apache · sarutak · Sep 8, 2020 · Sep 8, 2020 · Sep 8, 2020 · Sep 11, 2020
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -52,7 +52,10 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
       case (child, distribution) =>
         val numPartitions = distribution.requiredNumPartitions
           .getOrElse(conf.numShufflePartitions)
-        ShuffleExchangeExec(distribution.createPartitioning(numPartitions), child)
+        // Like optimizer.CollapseRepartition removes adjacent repartition operations,
+        // adjacent repartitions performed by shuffle can be also removed.
+        val newChild = if (child.isInstanceOf[ShuffleExchangeExec]) child.children.head else child
+        ShuffleExchangeExec(distribution.createPartitioning(numPartitions), newChild)
     }
 
     // Get the indexes of children which have specified distribution requirements and need to have

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -1001,6 +1001,38 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     val numPartitions = range.rdd.getNumPartitions
     assert(numPartitions == 0)
   }
+
+  test("SPARK-32820: Remove redundant shuffle exchange") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "0") {
+      withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "200") {
+        val ordered = spark.range(1, 100).repartitionByRange(10, $"id".desc).orderBy($"id")
+        val orderedPlan = ordered.queryExecution.executedPlan
+        val exchangesInOrdered =
+          orderedPlan.collect { case s: ShuffleExchangeExec => s }
+        assert(exchangesInOrdered.size == 1)
+
+        val partitioning = exchangesInOrdered.head.outputPartitioning
+        assert(partitioning.numPartitions == 200)
+        assert(partitioning.isInstanceOf[RangePartitioning])
+
+        val left = Seq(1, 2, 3).toDF.repartition(10, $"value")
+        val right = Seq(1, 2, 3).toDF
+        val joined = left.join(right, left("value") + 1 === right("value"))
+        val joinedPlan = joined.queryExecution.executedPlan
+        val exchangesInJoined =
+          joinedPlan.collect { case s: ShuffleExchangeExec => s }
+        assert(exchangesInJoined.size == 2)
+
+        val leftPartitioning = exchangesInJoined(0).outputPartitioning
+        assert(leftPartitioning.numPartitions == 200)
+        assert(leftPartitioning.isInstanceOf[HashPartitioning])
+
+        val rightPartitioning = exchangesInJoined(1).outputPartitioning
+        assert(rightPartitioning.numPartitions == 200)
+        assert(rightPartitioning.isInstanceOf[HashPartitioning])
+      }
+    }
+  }
 }
 
 // Used for unit-testing EnsureRequirements