From c5983c1691f20590abf80b17bdc029b584b89521 Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Thu, 7 Jul 2022 22:51:40 +0800 Subject: [PATCH] [SPARK-38018][SQL][3.2] Fix ColumnVectorUtils.populate to handle CalendarIntervalType correctly ### What changes were proposed in this pull request? This is a backport of https://github.com/apache/spark/pull/35314 to branch 3.2. See that original PR for context. ### Why are the changes needed? To fix potential correctness issue. ### Does this PR introduce _any_ user-facing change? No but fix the exiting correctness issue when reading partition column with CalendarInterval type. ### How was this patch tested? Added unit test in `ColumnVectorSuite.scala`. Closes #37114 from c21/branch-3.2. Authored-by: Cheng Su Signed-off-by: Wenchen Fan --- .../sql/execution/vectorized/ColumnVectorUtils.java | 3 ++- .../sql/execution/vectorized/ColumnVectorSuite.scala | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java index 2010d5cfa58a8..814902f72fa7f 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java @@ -91,7 +91,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field } else if (t instanceof CalendarIntervalType) { CalendarInterval c = (CalendarInterval)row.get(fieldIdx, t); col.getChild(0).putInts(0, capacity, c.months); - col.getChild(1).putLongs(0, capacity, c.microseconds); + col.getChild(1).putInts(0, capacity, c.days); + col.getChild(2).putLongs(0, capacity, c.microseconds); } else if (t instanceof DateType) { col.putInts(0, capacity, row.getInt(fieldIdx)); } else if (t instanceof TimestampType || t instanceof TimestampNTZType) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala index 43f48abb9734f..1c5b092f80512 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.columnar.ColumnAccessor import org.apache.spark.sql.execution.columnar.compression.ColumnBuilderHelper import org.apache.spark.sql.types._ import org.apache.spark.sql.vectorized.ColumnarArray -import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { private def withVector( @@ -536,5 +536,14 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } } } + + test("SPARK-38018: ColumnVectorUtils.populate to handle CalendarIntervalType correctly") { + val vector = new OnHeapColumnVector(5, CalendarIntervalType) + val row = new SpecificInternalRow(Array(CalendarIntervalType)) + val interval = new CalendarInterval(3, 5, 1000000) + row.setInterval(0, interval) + ColumnVectorUtils.populate(vector, row, 0) + assert(vector.getInterval(0) === interval) + } }