Skip to content

feat: Run (logical) optimizers on subqueries #13066

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 1 commit into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use log::{debug, warn};
use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::ConfigOptions;
use datafusion_common::instant::Instant;
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter};
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result};
use datafusion_expr::logical_plan::LogicalPlan;

Expand Down Expand Up @@ -250,10 +250,6 @@ impl Optimizer {
Arc::new(DecorrelatePredicateSubquery::new()),
Arc::new(ScalarSubqueryToJoin::new()),
Arc::new(ExtractEquijoinPredicate::new()),
// simplify expressions does not simplify expressions in subqueries, so we
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

This may also make planning non trivially faster as SimplifyExpressions is quite expensive

// run it again after running the optimizations that potentially converted
// subqueries to joins
Arc::new(SimplifyExpressions::new()),
Arc::new(EliminateDuplicatedExpr::new()),
Arc::new(EliminateFilter::new()),
Arc::new(EliminateCrossJoin::new()),
Expand Down Expand Up @@ -384,11 +380,9 @@ impl Optimizer {

let result = match rule.apply_order() {
// optimizer handles recursion
Some(apply_order) => new_plan.rewrite(&mut Rewriter::new(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Such a simple change :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we owe a significant debt to @peter-toth for his work on the tree node API to sort out how to handle subqueries

apply_order,
rule.as_ref(),
config,
)),
Some(apply_order) => new_plan.rewrite_with_subqueries(
&mut Rewriter::new(apply_order, rule.as_ref(), config),
),
// rule handles recursion itself
None => optimize_plan_node(new_plan, rule.as_ref(), config),
}
Expand Down
2 changes: 0 additions & 2 deletions datafusion/sqllogictest/test_files/explain.slt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
Expand All @@ -214,7 +213,6 @@ logical_plan after eliminate_join SAME TEXT AS ABOVE
logical_plan after decorrelate_predicate_subquery SAME TEXT AS ABOVE
logical_plan after scalar_subquery_to_join SAME TEXT AS ABOVE
logical_plan after extract_equijoin_predicate SAME TEXT AS ABOVE
logical_plan after simplify_expressions SAME TEXT AS ABOVE
logical_plan after eliminate_duplicated_expr SAME TEXT AS ABOVE
logical_plan after eliminate_filter SAME TEXT AS ABOVE
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
Expand Down
28 changes: 14 additions & 14 deletions datafusion/sqllogictest/test_files/subquery.slt
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: t1.t1_int
04)------Filter: t1.t1_id > t1.t1_int
04)------Filter: t1.t1_int < t1.t1_id
05)--------TableScan: t1
06)--TableScan: t1 projection=[t1_id, t1_name, t1_int]

Expand Down Expand Up @@ -462,8 +462,8 @@ explain SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1.t1_int limit 1
logical_plan
01)Projection: t1.t1_id, (<subquery>) AS t2_int
02)--Subquery:
03)----Limit: skip=0, fetch=1
04)------Projection: t2.t2_int
03)----Projection: t2.t2_int
04)------Limit: skip=0, fetch=1
05)--------Filter: t2.t2_int = outer_ref(t1.t1_int)
06)----------TableScan: t2
07)--TableScan: t1 projection=[t1_id, t1_int]
Expand All @@ -475,8 +475,8 @@ logical_plan
01)Projection: t1.t1_id
02)--Filter: t1.t1_int = (<subquery>)
03)----Subquery:
04)------Limit: skip=0, fetch=1
05)--------Projection: t2.t2_int
04)------Projection: t2.t2_int
05)--------Limit: skip=0, fetch=1
06)----------Filter: t2.t2_int = outer_ref(t1.t1_int)
07)------------TableScan: t2
08)----TableScan: t1 projection=[t1_id, t1_int]
Expand Down Expand Up @@ -542,13 +542,13 @@ query TT
explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN t2 ON(t1.t1_id = t2.t2_id and t1.t1_name = t0.t0_name))
----
logical_plan
01)Filter: EXISTS (<subquery>)
02)--Subquery:
03)----Projection: Int64(1)
04)------Inner Join: Filter: t1.t1_id = t2.t2_id AND t1.t1_name = outer_ref(t0.t0_name)
05)--------TableScan: t1
06)--------TableScan: t2
07)--TableScan: t0 projection=[t0_id, t0_name]
01)LeftSemi Join: t0.t0_name = __correlated_sq_2.t1_name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉

02)--TableScan: t0 projection=[t0_id, t0_name]
03)--SubqueryAlias: __correlated_sq_2
04)----Projection: t1.t1_name
05)------Inner Join: t1.t1_id = t2.t2_id
06)--------TableScan: t1 projection=[t1_id, t1_name]
07)--------TableScan: t2 projection=[t2_id]

#subquery_contains_join_contains_correlated_columns
query TT
Expand Down Expand Up @@ -656,8 +656,8 @@ explain SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 where
logical_plan
01)Filter: t1.t1_id IN (<subquery>)
02)--Subquery:
03)----Limit: skip=0, fetch=10
04)------Projection: t2.t2_id
03)----Projection: t2.t2_id
04)------Limit: skip=0, fetch=10
05)--------Filter: outer_ref(t1.t1_name) = t2.t2_name
06)----------TableScan: t2
07)--TableScan: t1 projection=[t1_id, t1_name]
Expand Down