Skip to content

Commit

Permalink
fix: single_distinct_aggretation_to_group_by fail (#7997)
Browse files Browse the repository at this point in the history
* fix: single_distinct_aggretation_to_group_by faile

* fix

* move test to groupby.slt
  • Loading branch information
haohuaijin authored Nov 2, 2023
1 parent 5634cce commit 7f3f465
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 1 deletion.
2 changes: 1 addition & 1 deletion datafusion/optimizer/src/single_distinct_to_groupby.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ fn is_single_distinct_agg(plan: &LogicalPlan) -> Result<bool> {
distinct_count += 1;
}
for e in args {
fields_set.insert(e.display_name()?);
fields_set.insert(e.canonical_name());
}
}
}
Expand Down
58 changes: 58 additions & 0 deletions datafusion/sqllogictest/test_files/groupby.slt
Original file line number Diff line number Diff line change
Expand Up @@ -3733,3 +3733,61 @@ AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 as b], aggr=[SUM(multip
--------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered
----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true

# Tests for single distinct to group by optimization rule
statement ok
CREATE TABLE t(x int) AS VALUES (1), (2), (1);

statement ok
create table t1(x bigint,y int) as values (9223372036854775807,2), (9223372036854775806,2);

query II
SELECT SUM(DISTINCT x), MAX(DISTINCT x) from t GROUP BY x ORDER BY x;
----
1 1
2 2

query II
SELECT MAX(DISTINCT x), SUM(DISTINCT x) from t GROUP BY x ORDER BY x;
----
1 1
2 2

query TT
EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT x) FROM t1 GROUP BY y;
----
logical_plan
Projection: SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)
--Aggregate: groupBy=[[t1.y]], aggr=[[SUM(DISTINCT CAST(t1.x AS Float64)), MAX(DISTINCT t1.x)]]
----TableScan: t1 projection=[x, y]
physical_plan
ProjectionExec: expr=[SUM(DISTINCT t1.x)@1 as SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)@2 as MAX(DISTINCT t1.x)]
--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)]
----CoalesceBatchesExec: target_batch_size=2
------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)]
------------MemoryExec: partitions=1, partition_sizes=[1]

query TT
EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) FROM t1 GROUP BY y;
----
logical_plan
Projection: SUM(alias1) AS SUM(DISTINCT t1.x), MAX(alias1) AS MAX(DISTINCT t1.x)
--Aggregate: groupBy=[[t1.y]], aggr=[[SUM(alias1), MAX(alias1)]]
----Aggregate: groupBy=[[t1.y, CAST(t1.x AS Float64)t1.x AS t1.x AS alias1]], aggr=[[]]
------Projection: CAST(t1.x AS Float64) AS CAST(t1.x AS Float64)t1.x, t1.y
--------TableScan: t1 projection=[x, y]
physical_plan
ProjectionExec: expr=[SUM(alias1)@1 as SUM(DISTINCT t1.x), MAX(alias1)@2 as MAX(DISTINCT t1.x)]
--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(alias1), MAX(alias1)]
----CoalesceBatchesExec: target_batch_size=2
------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[SUM(alias1), MAX(alias1)]
----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[]
------------CoalesceBatchesExec: target_batch_size=2
--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8
----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(t1.x AS Float64)t1.x@0 as alias1], aggr=[]
--------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as CAST(t1.x AS Float64)t1.x, y@1 as y]
----------------------MemoryExec: partitions=1, partition_sizes=[1]

0 comments on commit 7f3f465

Please # to comment.