@@ -218,6 +218,23 @@ impl PhysicalGroupBy {
218
218
pub fn is_single ( & self ) -> bool {
219
219
self . null_expr . is_empty ( )
220
220
}
221
+
222
+ /// Calculate GROUP BY expressions according to input schema.
223
+ pub fn input_exprs ( & self ) -> Vec < Arc < dyn PhysicalExpr > > {
224
+ self . expr
225
+ . iter ( )
226
+ . map ( |( expr, _alias) | expr. clone ( ) )
227
+ . collect ( )
228
+ }
229
+
230
+ /// Return grouping expressions as they occur in the output schema.
231
+ fn output_exprs ( & self ) -> Vec < Arc < dyn PhysicalExpr > > {
232
+ self . expr
233
+ . iter ( )
234
+ . enumerate ( )
235
+ . map ( |( index, ( _, name) ) | Arc :: new ( Column :: new ( name, index) ) as _ )
236
+ . collect ( )
237
+ }
221
238
}
222
239
223
240
impl PartialEq for PhysicalGroupBy {
@@ -319,11 +336,7 @@ fn get_working_mode(
319
336
// Since direction of the ordering is not important for GROUP BY columns,
320
337
// we convert PhysicalSortExpr to PhysicalExpr in the existing ordering.
321
338
let ordering_exprs = convert_to_expr ( output_ordering) ;
322
- let groupby_exprs = group_by
323
- . expr
324
- . iter ( )
325
- . map ( |( item, _) | item. clone ( ) )
326
- . collect :: < Vec < _ > > ( ) ;
339
+ let groupby_exprs = group_by. input_exprs ( ) ;
327
340
// Find where each expression of the GROUP BY clause occurs in the existing
328
341
// ordering (if it occurs):
329
342
let mut ordered_indices =
@@ -363,7 +376,7 @@ fn calc_aggregation_ordering(
363
376
) -> Option < AggregationOrdering > {
364
377
get_working_mode ( input, group_by) . map ( |( mode, order_indices) | {
365
378
let existing_ordering = input. output_ordering ( ) . unwrap_or ( & [ ] ) ;
366
- let out_group_expr = output_group_expr_helper ( group_by) ;
379
+ let out_group_expr = group_by. output_exprs ( ) ;
367
380
// Calculate output ordering information for the operator:
368
381
let out_ordering = order_indices
369
382
. iter ( )
@@ -381,18 +394,6 @@ fn calc_aggregation_ordering(
381
394
} )
382
395
}
383
396
384
- /// This function returns grouping expressions as they occur in the output schema.
385
- fn output_group_expr_helper ( group_by : & PhysicalGroupBy ) -> Vec < Arc < dyn PhysicalExpr > > {
386
- // Update column indices. Since the group by columns come first in the output schema, their
387
- // indices are simply 0..self.group_expr(len).
388
- group_by
389
- . expr ( )
390
- . iter ( )
391
- . enumerate ( )
392
- . map ( |( index, ( _, name) ) | Arc :: new ( Column :: new ( name, index) ) as _ )
393
- . collect ( )
394
- }
395
-
396
397
/// This function returns the ordering requirement of the first non-reversible
397
398
/// order-sensitive aggregate function such as ARRAY_AGG. This requirement serves
398
399
/// as the initial requirement while calculating the finest requirement among all
@@ -591,11 +592,7 @@ fn group_by_contains_all_requirements(
591
592
group_by : & PhysicalGroupBy ,
592
593
requirement : & LexOrdering ,
593
594
) -> bool {
594
- let physical_exprs = group_by
595
- . expr ( )
596
- . iter ( )
597
- . map ( |( expr, _alias) | expr. clone ( ) )
598
- . collect :: < Vec < _ > > ( ) ;
595
+ let physical_exprs = group_by. input_exprs ( ) ;
599
596
// When we have multiple groups (grouping set)
600
597
// since group by may be calculated on the subset of the group_by.expr()
601
598
// it is not guaranteed to have all of the requirements among group by expressions.
@@ -735,7 +732,7 @@ impl AggregateExec {
735
732
736
733
/// Grouping expressions as they occur in the output schema
737
734
pub fn output_group_expr ( & self ) -> Vec < Arc < dyn PhysicalExpr > > {
738
- output_group_expr_helper ( & self . group_by )
735
+ self . group_by . output_exprs ( )
739
736
}
740
737
741
738
/// Aggregate expressions
@@ -894,28 +891,24 @@ impl ExecutionPlan for AggregateExec {
894
891
895
892
/// Get the output partitioning of this plan
896
893
fn output_partitioning ( & self ) -> Partitioning {
897
- match & self . mode {
898
- AggregateMode :: Partial | AggregateMode :: Single => {
899
- // Partial and Single Aggregation will not change the output partitioning but need to respect the Alias
900
- let input_partition = self . input . output_partitioning ( ) ;
901
- match input_partition {
902
- Partitioning :: Hash ( exprs, part) => {
903
- let normalized_exprs = exprs
904
- . into_iter ( )
905
- . map ( |expr| {
906
- normalize_out_expr_with_columns_map (
907
- expr,
908
- & self . columns_map ,
909
- )
910
- } )
911
- . collect :: < Vec < _ > > ( ) ;
912
- Partitioning :: Hash ( normalized_exprs, part)
913
- }
914
- _ => input_partition,
915
- }
894
+ let input_partition = self . input . output_partitioning ( ) ;
895
+ if self . mode . is_first_stage ( ) {
896
+ // First stage Aggregation will not change the output partitioning but need to respect the Alias
897
+ let input_partition = self . input . output_partitioning ( ) ;
898
+ if let Partitioning :: Hash ( exprs, part) = input_partition {
899
+ let normalized_exprs = exprs
900
+ . into_iter ( )
901
+ . map ( |expr| {
902
+ normalize_out_expr_with_columns_map ( expr, & self . columns_map )
903
+ } )
904
+ . collect :: < Vec < _ > > ( ) ;
905
+ Partitioning :: Hash ( normalized_exprs, part)
906
+ } else {
907
+ input_partition
916
908
}
909
+ } else {
917
910
// Final Aggregation's output partitioning is the same as its real input
918
- _ => self . input . output_partitioning ( ) ,
911
+ input_partition
919
912
}
920
913
}
921
914
0 commit comments