diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c index 3b59ce017fb..6f6b01de97b 100644 --- a/src/backend/optimizer/path/clausesel.c +++ b/src/backend/optimizer/path/clausesel.c @@ -407,14 +407,7 @@ clauselist_selectivity_ext(PlannerInfo *root, } pfree(rgsel); - /* - * For Anti Semi Join, selectivity is determined by the fraction of - * tuples that do no match - */ - if (JOIN_ANTI == jointype || JOIN_LASJ_NOTIN == jointype) - { - s1 = (1 - s1); - } + return s1; } diff --git a/src/test/regress/expected/join_gp.out b/src/test/regress/expected/join_gp.out index f7e2b50ecf9..713d02765e3 100644 --- a/src/test/regress/expected/join_gp.out +++ b/src/test/regress/expected/join_gp.out @@ -47,6 +47,34 @@ select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1 2 | 2 | 2 (5 rows) +-- +-- test anti_join/left_anti_semi_join selectivities +-- +create table aj_t1(a int, b int, c int) distributed by (a); +create table aj_t2(a int, b int, c int) distributed by (a); +insert into aj_t1 values(1,1,1); +insert into aj_t2 values(1,1,1),(2,2,2); +explain(costs off) select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Anti Join + Hash Cond: ((t1.b = t2.b) AND (t1.c = t2.c)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Hash Key: t1.b, t1.c + -> Seq Scan on aj_t1 t1 + -> Hash + -> Redistribute Motion 3:3 (slice3; segments: 3) + Hash Key: t2.b, t2.c + -> Seq Scan on aj_t2 t2 + Optimizer: Postgres query optimizer +(11 rows) + +select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + a +--- +(0 rows) + -- -- test hash join -- diff --git a/src/test/regress/expected/join_gp_optimizer.out b/src/test/regress/expected/join_gp_optimizer.out index 4221cc5fcd5..bd105318837 100644 --- a/src/test/regress/expected/join_gp_optimizer.out +++ b/src/test/regress/expected/join_gp_optimizer.out @@ -47,6 +47,31 @@ select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1 2 | 2 | 2 (5 rows) +-- +-- test anti_join/left_anti_semi_join selectivities +-- +create table aj_t1(a int, b int, c int) distributed by (a); +create table aj_t2(a int, b int, c int) distributed by (a); +insert into aj_t1 values(1,1,1); +insert into aj_t2 values(1,1,1),(2,2,2); +explain(costs off) select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Hash Anti Join + Hash Cond: ((aj_t1.b = aj_t2.b) AND (aj_t1.c = aj_t2.c)) + -> Seq Scan on aj_t1 + -> Hash + -> Broadcast Motion 3:3 (slice2; segments: 3) + -> Seq Scan on aj_t2 + Optimizer: GPORCA +(8 rows) + +select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + a +--- +(0 rows) + -- -- test hash join -- diff --git a/src/test/regress/expected/notin.out b/src/test/regress/expected/notin.out index 7baac477d8f..e8d69ab293d 100644 --- a/src/test/regress/expected/notin.out +++ b/src/test/regress/expected/notin.out @@ -317,19 +317,19 @@ select c1 from t1 where c1 > 6 and c1 not in -- explain select c1 from t1,t2 where c1 not in (select c3 from t3) and c1 = c2; - QUERY PLAN -------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=5.38..8.62 rows=4 width=4) - -> Hash Join (cost=5.38..8.62 rows=2 width=4) - Hash Cond: (t1.c1 = t2.c2) - -> Hash Left Anti Semi (Not-In) Join (cost=2.26..5.46 rows=2 width=4) - Hash Cond: (t1.c1 = t3.c3) - -> Seq Scan on t1 (cost=0.00..3.10 rows=4 width=4) - -> Hash (cost=2.15..2.15 rows=3 width=4) - -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..2.15 rows=3 width=4) - -> Seq Scan on t3 (cost=0.00..2.03 rows=1 width=4) - -> Hash (cost=3.05..3.05 rows=2 width=4) - -> Seq Scan on t2 (cost=0.00..3.05 rows=2 width=4) + QUERY PLAN +------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=2.12..3.25 rows=4 width=4) + -> Hash Left Anti Semi (Not-In) Join (cost=2.12..3.20 rows=1 width=4) + Hash Cond: (t1.c1 = t3.c3) + -> Hash Join (cost=1.04..2.10 rows=2 width=4) + Hash Cond: (t1.c1 = t2.c2) + -> Seq Scan on t1 (cost=0.00..1.03 rows=3 width=4) + -> Hash (cost=1.02..1.02 rows=2 width=4) + -> Seq Scan on t2 (cost=0.00..1.02 rows=2 width=4) + -> Hash (cost=1.05..1.05 rows=3 width=4) + -> Broadcast Motion 3:3 (slice2; segments: 3) (cost=0.00..1.05 rows=3 width=4) + -> Seq Scan on t3 (cost=0.00..1.01 rows=1 width=4) Optimizer: Postgres query optimizer (12 rows) diff --git a/src/test/regress/sql/join_gp.sql b/src/test/regress/sql/join_gp.sql index bf278e8f3f8..1498748c5b0 100644 --- a/src/test/regress/sql/join_gp.sql +++ b/src/test/regress/sql/join_gp.sql @@ -24,6 +24,18 @@ create temp table l(a int); insert into l values (1), (1), (2); select * from l l1 join l l2 on l1.a = l2.a left join l l3 on l1.a = l3.a and l1.a = 2 order by 1,2,3; +-- +-- test anti_join/left_anti_semi_join selectivities +-- +create table aj_t1(a int, b int, c int) distributed by (a); +create table aj_t2(a int, b int, c int) distributed by (a); +insert into aj_t1 values(1,1,1); +insert into aj_t2 values(1,1,1),(2,2,2); + +explain(costs off) select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + +select t1.a from aj_t1 t1 where not exists (select 1 from aj_t2 t2 where t1.b = t2.b and t1.c = t2.c); + -- -- test hash join --