-
Notifications
You must be signed in to change notification settings - Fork 1.5k
feat: Run (logical) optimizers on subqueries #13066
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ use log::{debug, warn}; | |
use datafusion_common::alias::AliasGenerator; | ||
use datafusion_common::config::ConfigOptions; | ||
use datafusion_common::instant::Instant; | ||
use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; | ||
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter}; | ||
use datafusion_common::{internal_err, DFSchema, DataFusionError, Result}; | ||
use datafusion_expr::logical_plan::LogicalPlan; | ||
|
||
|
@@ -250,10 +250,6 @@ impl Optimizer { | |
Arc::new(DecorrelatePredicateSubquery::new()), | ||
Arc::new(ScalarSubqueryToJoin::new()), | ||
Arc::new(ExtractEquijoinPredicate::new()), | ||
// simplify expressions does not simplify expressions in subqueries, so we | ||
// run it again after running the optimizations that potentially converted | ||
// subqueries to joins | ||
Arc::new(SimplifyExpressions::new()), | ||
Arc::new(EliminateDuplicatedExpr::new()), | ||
Arc::new(EliminateFilter::new()), | ||
Arc::new(EliminateCrossJoin::new()), | ||
|
@@ -384,11 +380,9 @@ impl Optimizer { | |
|
||
let result = match rule.apply_order() { | ||
// optimizer handles recursion | ||
Some(apply_order) => new_plan.rewrite(&mut Rewriter::new( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Such a simple change :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we owe a significant debt to @peter-toth for his work on the tree node API to sort out how to handle subqueries |
||
apply_order, | ||
rule.as_ref(), | ||
config, | ||
)), | ||
Some(apply_order) => new_plan.rewrite_with_subqueries( | ||
&mut Rewriter::new(apply_order, rule.as_ref(), config), | ||
), | ||
// rule handles recursion itself | ||
None => optimize_plan_node(new_plan, rule.as_ref(), config), | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -391,7 +391,7 @@ logical_plan | |
01)Filter: EXISTS (<subquery>) | ||
02)--Subquery: | ||
03)----Projection: t1.t1_int | ||
04)------Filter: t1.t1_id > t1.t1_int | ||
04)------Filter: t1.t1_int < t1.t1_id | ||
05)--------TableScan: t1 | ||
06)--TableScan: t1 projection=[t1_id, t1_name, t1_int] | ||
|
||
|
@@ -462,8 +462,8 @@ explain SELECT t1_id, (SELECT t2_int FROM t2 WHERE t2.t2_int = t1.t1_int limit 1 | |
logical_plan | ||
01)Projection: t1.t1_id, (<subquery>) AS t2_int | ||
02)--Subquery: | ||
03)----Limit: skip=0, fetch=1 | ||
04)------Projection: t2.t2_int | ||
03)----Projection: t2.t2_int | ||
04)------Limit: skip=0, fetch=1 | ||
05)--------Filter: t2.t2_int = outer_ref(t1.t1_int) | ||
06)----------TableScan: t2 | ||
07)--TableScan: t1 projection=[t1_id, t1_int] | ||
|
@@ -475,8 +475,8 @@ logical_plan | |
01)Projection: t1.t1_id | ||
02)--Filter: t1.t1_int = (<subquery>) | ||
03)----Subquery: | ||
04)------Limit: skip=0, fetch=1 | ||
05)--------Projection: t2.t2_int | ||
04)------Projection: t2.t2_int | ||
05)--------Limit: skip=0, fetch=1 | ||
06)----------Filter: t2.t2_int = outer_ref(t1.t1_int) | ||
07)------------TableScan: t2 | ||
08)----TableScan: t1 projection=[t1_id, t1_int] | ||
|
@@ -542,13 +542,13 @@ query TT | |
explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN t2 ON(t1.t1_id = t2.t2_id and t1.t1_name = t0.t0_name)) | ||
---- | ||
logical_plan | ||
01)Filter: EXISTS (<subquery>) | ||
02)--Subquery: | ||
03)----Projection: Int64(1) | ||
04)------Inner Join: Filter: t1.t1_id = t2.t2_id AND t1.t1_name = outer_ref(t0.t0_name) | ||
05)--------TableScan: t1 | ||
06)--------TableScan: t2 | ||
07)--TableScan: t0 projection=[t0_id, t0_name] | ||
01)LeftSemi Join: t0.t0_name = __correlated_sq_2.t1_name | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 |
||
02)--TableScan: t0 projection=[t0_id, t0_name] | ||
03)--SubqueryAlias: __correlated_sq_2 | ||
04)----Projection: t1.t1_name | ||
05)------Inner Join: t1.t1_id = t2.t2_id | ||
06)--------TableScan: t1 projection=[t1_id, t1_name] | ||
07)--------TableScan: t2 projection=[t2_id] | ||
|
||
#subquery_contains_join_contains_correlated_columns | ||
query TT | ||
|
@@ -656,8 +656,8 @@ explain SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 where | |
logical_plan | ||
01)Filter: t1.t1_id IN (<subquery>) | ||
02)--Subquery: | ||
03)----Limit: skip=0, fetch=10 | ||
04)------Projection: t2.t2_id | ||
03)----Projection: t2.t2_id | ||
04)------Limit: skip=0, fetch=10 | ||
05)--------Filter: outer_ref(t1.t1_name) = t2.t2_name | ||
06)----------TableScan: t2 | ||
07)--TableScan: t1 projection=[t1_id, t1_name] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
❤️
This may also make planning non trivially faster as SimplifyExpressions is quite expensive