From 9a5eeb73dc69c0054823c81272235a842485957c Mon Sep 17 00:00:00 2001 From: lzlfred Date: Thu, 7 Sep 2023 16:56:40 -0700 Subject: [PATCH] Fix to Delta Uniform to support convert Delta null partition values to iceberg The existing Delta to iceberg conversion has a bug that it does not handle null partition values as it will write the string with content "null" in the partition path, and "null" cannot be converted to other numeric types. The fix uses a special marker from iceberg library so it recognizes the null value and converts correctly. GitOrigin-RevId: 667e795ead753803565340abcc23ae01d9738a2c --- .../sql/delta/icebergShaded/IcebergTransactionUtils.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala index bd850c0a0d7..bb6d1f6288c 100644 --- a/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala +++ b/iceberg/src/main/scala/org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.scala @@ -150,6 +150,7 @@ object IcebergTransactionUtils .withFormat(FileFormat.PARQUET) if (partitionSpec.isPartitioned) { + val ICEBERG_NULL_PARTITION_VALUE = "__HIVE_DEFAULT_PARTITION__" val partitionPath = partitionSpec .fields() .asScala @@ -158,7 +159,11 @@ object IcebergTransactionUtils // The Iceberg Schema and PartitionSpec all use the column logical names. // Delta FileAction::partitionValues, however, uses physical names. val physicalPartKey = logicalToPhysicalPartitionNames(logicalPartCol) - s"$logicalPartCol=${f.partitionValues(physicalPartKey)}" + + // ICEBERG_NULL_PARTITION_VALUE is referred in Iceberg lib to mark NULL partition value + val partValue = Option(f.partitionValues(physicalPartKey)) + .getOrElse(ICEBERG_NULL_PARTITION_VALUE) + s"$logicalPartCol=$partValue" } .mkString("/")