From ce6d83d5f0e98fbc2c64dfa8a5abd385c5085e33 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Thu, 26 Sep 2024 14:42:22 +0900 Subject: [PATCH 01/12] [SPARK-49773][SQL] uncaught Java exception from make_timestamp() with bad timezone --- .../utils/src/main/resources/error/error-conditions.json | 6 ++++++ .../spark/sql/catalyst/util/SparkDateTimeUtils.scala | 7 ++++++- .../org/apache/spark/sql/errors/ExecutionErrors.scala | 8 ++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index e83202d9e5ee3..e8bd377ee064f 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3267,6 +3267,12 @@ }, "sqlState" : "42000" }, + "INVALID_ZONE_OFFSET": { + "message": [ + ". ZoneOffset represents the time difference from UTC and must be in the range from -18:00 to +18:00." + ], + "sqlState" : "22009" + }, "JOIN_CONDITION_IS_NOT_BOOLEAN_TYPE" : { "message" : [ "The join condition has the invalid type , expected \"BOOLEAN\"." diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index 4e94bc6617357..a876139fd4cff 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -46,7 +46,12 @@ trait SparkDateTimeUtils { // To support the (+|-)hh:m format because it was supported before Spark 3.0. formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3") - ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS) + try { + ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS) + } catch { + case e: java.time.DateTimeException => + throw ExecutionErrors.zoneOffsetError(e.getMessage) + } } def getTimeZone(timeZoneId: String): TimeZone = TimeZone.getTimeZone(getZoneId(timeZoneId)) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala index 698a7b096e1a5..2ec6f5d43fc84 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala @@ -238,6 +238,14 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase { "encoderType" -> encoder.getClass.getName, "docroot" -> SparkBuildInfo.spark_doc_root)) } + + def zoneOffsetError(message: String): SparkDateTimeException = { + new SparkDateTimeException( + errorClass = "INVALID_ZONE_OFFSET", + messageParameters = Map("message" -> message), + context = Array.empty, + summary = "") + } } private[sql] object ExecutionErrors extends ExecutionErrors From 58ecafb42953ac19ea33345e13cc04bb1e290476 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 30 Sep 2024 10:55:50 +0900 Subject: [PATCH 02/12] Adjusted comments --- .../resources/error/error-conditions.json | 12 +++++----- .../org/apache/spark/SparkException.scala | 23 ++++++++++++++++--- .../catalyst/util/SparkDateTimeUtils.scala | 2 +- .../spark/sql/errors/ExecutionErrors.scala | 9 ++++---- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index e8bd377ee064f..0229b923c3c81 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3135,6 +3135,12 @@ }, "sqlState" : "42K0E" }, + "INVALID_TIMEZONE": { + "message": [ + "ZoneOffset represents the time difference from UTC and must be in the range from -18:00 to +18:00." + ], + "sqlState" : "22009" + }, "INVALID_TYPED_LITERAL" : { "message" : [ "The value of the typed literal is invalid: ." @@ -3267,12 +3273,6 @@ }, "sqlState" : "42000" }, - "INVALID_ZONE_OFFSET": { - "message": [ - ". ZoneOffset represents the time difference from UTC and must be in the range from -18:00 to +18:00." - ], - "sqlState" : "22009" - }, "JOIN_CONDITION_IS_NOT_BOOLEAN_TYPE" : { "message" : [ "The join condition has the invalid type , expected \"BOOLEAN\"." diff --git a/common/utils/src/main/scala/org/apache/spark/SparkException.scala b/common/utils/src/main/scala/org/apache/spark/SparkException.scala index 398cb1fad6726..974c5875de233 100644 --- a/common/utils/src/main/scala/org/apache/spark/SparkException.scala +++ b/common/utils/src/main/scala/org/apache/spark/SparkException.scala @@ -306,8 +306,9 @@ private[spark] class SparkDateTimeException private( message: String, errorClass: Option[String], messageParameters: Map[String, String], - context: Array[QueryContext]) - extends DateTimeException(message) with SparkThrowable { + context: Array[QueryContext], + cause: Option[Throwable]) + extends DateTimeException(message, cause.orNull) with SparkThrowable { def this( errorClass: String, @@ -318,7 +319,23 @@ private[spark] class SparkDateTimeException private( SparkThrowableHelper.getMessage(errorClass, messageParameters, summary), Option(errorClass), messageParameters, - context + context, + cause = null + ) + } + + def this( + errorClass: String, + messageParameters: Map[String, String], + context: Array[QueryContext], + summary: String, + cause: Option[Throwable]) = { + this( + SparkThrowableHelper.getMessage(errorClass, messageParameters, summary), + Option(errorClass), + messageParameters, + context, + cause ) } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index a876139fd4cff..db76c6aa8da69 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -50,7 +50,7 @@ trait SparkDateTimeUtils { ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS) } catch { case e: java.time.DateTimeException => - throw ExecutionErrors.zoneOffsetError(e.getMessage) + throw ExecutionErrors.zoneOffsetError(e) } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala index 2ec6f5d43fc84..4c62d119861fc 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala @@ -239,12 +239,13 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase { "docroot" -> SparkBuildInfo.spark_doc_root)) } - def zoneOffsetError(message: String): SparkDateTimeException = { + def zoneOffsetError(e: java.time.DateTimeException): SparkDateTimeException = { new SparkDateTimeException( - errorClass = "INVALID_ZONE_OFFSET", - messageParameters = Map("message" -> message), + errorClass = "INVALID_TIMEZONE", + messageParameters = Map.empty, context = Array.empty, - summary = "") + summary = "", + cause = Some(e)) } } From 10a2117244a6f1724341f70e0df41a50504a505b Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 30 Sep 2024 13:53:38 +0900 Subject: [PATCH 03/12] Update common/utils/src/main/resources/error/error-conditions.json Co-authored-by: Serge Rielau --- common/utils/src/main/resources/error/error-conditions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 0229b923c3c81..554f7b0671136 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3137,7 +3137,7 @@ }, "INVALID_TIMEZONE": { "message": [ - "ZoneOffset represents the time difference from UTC and must be in the range from -18:00 to +18:00." + "The timezone: is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form ‘area/city’, such as ‘America/Los_Angeles’. Zone offsets must be in the format ‘(+|-)HH’, ‘(+|-)HH:mm’ or ‘(+|-)HH:mm:ss’, e.g ‘-08’, ‘+01:00’ or ‘-13:33:33’., and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'." ], "sqlState" : "22009" }, From 9b79a1be7f5db86812bebf4c94b4273da0b908b1 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 30 Sep 2024 14:16:36 +0900 Subject: [PATCH 04/12] provide bad value for error message --- .../apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala | 2 +- .../scala/org/apache/spark/sql/errors/ExecutionErrors.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index db76c6aa8da69..492b5cc4fa909 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -50,7 +50,7 @@ trait SparkDateTimeUtils { ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS) } catch { case e: java.time.DateTimeException => - throw ExecutionErrors.zoneOffsetError(e) + throw ExecutionErrors.zoneOffsetError(timeZoneId, e) } } diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala index 4c62d119861fc..3cbb9ba3c3649 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala @@ -239,10 +239,10 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase { "docroot" -> SparkBuildInfo.spark_doc_root)) } - def zoneOffsetError(e: java.time.DateTimeException): SparkDateTimeException = { + def zoneOffsetError(timeZone: String, e: java.time.DateTimeException): SparkDateTimeException = { new SparkDateTimeException( errorClass = "INVALID_TIMEZONE", - messageParameters = Map.empty, + messageParameters = Map("timeZone" -> timeZone), context = Array.empty, summary = "", cause = Some(e)) From cf78542ca04d568a878b937c7731865707c466f2 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 30 Sep 2024 14:49:25 +0900 Subject: [PATCH 05/12] fix test --- .../src/main/scala/org/apache/spark/SparkException.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/SparkException.scala b/common/utils/src/main/scala/org/apache/spark/SparkException.scala index 974c5875de233..fcaee787fd8d3 100644 --- a/common/utils/src/main/scala/org/apache/spark/SparkException.scala +++ b/common/utils/src/main/scala/org/apache/spark/SparkException.scala @@ -320,7 +320,7 @@ private[spark] class SparkDateTimeException private( Option(errorClass), messageParameters, context, - cause = null + cause = None ) } @@ -335,7 +335,7 @@ private[spark] class SparkDateTimeException private( Option(errorClass), messageParameters, context, - cause + cause.orElse(None) ) } From 128eea7c91d198b8d4df3c9519801224853f0b87 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 30 Sep 2024 19:31:55 +0900 Subject: [PATCH 06/12] reformat --- .../scala/org/apache/spark/sql/errors/ExecutionErrors.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala index 3cbb9ba3c3649..3527a10496862 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala @@ -239,7 +239,9 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase { "docroot" -> SparkBuildInfo.spark_doc_root)) } - def zoneOffsetError(timeZone: String, e: java.time.DateTimeException): SparkDateTimeException = { + def zoneOffsetError( + timeZone: String, + e: java.time.DateTimeException): SparkDateTimeException = { new SparkDateTimeException( errorClass = "INVALID_TIMEZONE", messageParameters = Map("timeZone" -> timeZone), From d41df0669e4b85929a9eb1783901e66e91d423de Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Mon, 30 Sep 2024 19:36:06 +0900 Subject: [PATCH 07/12] reformat error class --- .../src/main/resources/error/error-conditions.json | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 554f7b0671136..4c466032320f4 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3101,6 +3101,12 @@ ], "sqlState" : "42K0F" }, + "INVALID_TIMEZONE" : { + "message" : [ + "The timezone: is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form ‘area/city’, such as ‘America/Los_Angeles’. Zone offsets must be in the format ‘(+|-)HH’, ‘(+|-)HH:mm’ or ‘(+|-)HH:mm:ss’, e.g ‘-08’, ‘+01:00’ or ‘-13:33:33’., and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'." + ], + "sqlState" : "22009" + }, "INVALID_TIME_TRAVEL_SPEC" : { "message" : [ "Cannot specify both version and timestamp when time travelling the table." @@ -3135,12 +3141,6 @@ }, "sqlState" : "42K0E" }, - "INVALID_TIMEZONE": { - "message": [ - "The timezone: is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form ‘area/city’, such as ‘America/Los_Angeles’. Zone offsets must be in the format ‘(+|-)HH’, ‘(+|-)HH:mm’ or ‘(+|-)HH:mm:ss’, e.g ‘-08’, ‘+01:00’ or ‘-13:33:33’., and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'." - ], - "sqlState" : "22009" - }, "INVALID_TYPED_LITERAL" : { "message" : [ "The value of the typed literal is invalid: ." From b84ad30caa57793a961ad90951e2f5dbfd36ba7a Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 2 Oct 2024 09:05:20 +0900 Subject: [PATCH 08/12] Update common/utils/src/main/resources/error/error-conditions.json Co-authored-by: Serge Rielau --- common/utils/src/main/resources/error/error-conditions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 4c466032320f4..a67edb2e9b06e 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3103,7 +3103,7 @@ }, "INVALID_TIMEZONE" : { "message" : [ - "The timezone: is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form ‘area/city’, such as ‘America/Los_Angeles’. Zone offsets must be in the format ‘(+|-)HH’, ‘(+|-)HH:mm’ or ‘(+|-)HH:mm:ss’, e.g ‘-08’, ‘+01:00’ or ‘-13:33:33’., and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'." + "The timezone: is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm’ or '(+|-)HH:mm:ss', e.g '-08' , '+01:00' or '-13:33:33', and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'." ], "sqlState" : "22009" }, From 888f0ac7fd22b12105c0625c422bb6842b178c39 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 2 Oct 2024 21:35:09 +0900 Subject: [PATCH 09/12] Retrigger From ded0e65ddf1ecd054a4281f63ee3f2798b0234f4 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Thu, 3 Oct 2024 22:35:07 +0900 Subject: [PATCH 10/12] adjusted comments --- .../spark/sql/catalyst/util/SparkDateTimeUtils.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index 492b5cc4fa909..29d0eb9ad312c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -41,16 +41,17 @@ trait SparkDateTimeUtils { final val singleMinuteTz = Pattern.compile("(\\+|\\-)(\\d\\d):(\\d)$") def getZoneId(timeZoneId: String): ZoneId = { - // To support the (+|-)h:mm format because it was supported before Spark 3.0. - var formattedZoneId = singleHourTz.matcher(timeZoneId).replaceFirst("$10$2:") - // To support the (+|-)hh:m format because it was supported before Spark 3.0. - formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3") - try { + // To support the (+|-)h:mm format because it was supported before Spark 3.0. + var formattedZoneId = singleHourTz.matcher(timeZoneId).replaceFirst("$10$2:") + // To support the (+|-)hh:m format because it was supported before Spark 3.0. + formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3") ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS) } catch { case e: java.time.DateTimeException => throw ExecutionErrors.zoneOffsetError(timeZoneId, e) + case e: java.time.zone.ZoneRulesException => + throw ExecutionErrors.zoneOffsetError(timeZoneId, e) } } From 8859ab788ea8203d185230c7354ed78d291d3a0c Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Thu, 3 Oct 2024 23:38:35 +0900 Subject: [PATCH 11/12] Catch DateTimeException alone --- .../org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala index 29d0eb9ad312c..4d05f9079548c 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala @@ -50,8 +50,6 @@ trait SparkDateTimeUtils { } catch { case e: java.time.DateTimeException => throw ExecutionErrors.zoneOffsetError(timeZoneId, e) - case e: java.time.zone.ZoneRulesException => - throw ExecutionErrors.zoneOffsetError(timeZoneId, e) } } From 05328c5be51fcd6f40b0fd2bf623d1d229ed00e1 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Sat, 5 Oct 2024 16:58:26 +0900 Subject: [PATCH 12/12] Add test --- .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala index ec92e0b700e31..2e0983fe0319c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala @@ -391,4 +391,14 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest } } } + + test("SPARK-49773: INVALID_TIMEZONE for bad timezone") { + checkError( + exception = intercept[SparkDateTimeException] { + sql("select make_timestamp(1, 2, 28, 23, 1, 1, -100)").collect() + }, + condition = "INVALID_TIMEZONE", + parameters = Map("timeZone" -> "-100") + ) + } }