diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 063417a254be..0073ed94b97d 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -263,10 +263,6 @@ impl TypeSignatureClass { self: &TypeSignatureClass, logical_type: &NativeType, ) -> bool { - if logical_type == &NativeType::Null { - return true; - } - match self { TypeSignatureClass::Native(t) if t.native() == logical_type => true, TypeSignatureClass::Timestamp if logical_type.is_timestamp() => true, diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index d60d1a6e4de0..7ffa684a83a4 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -43,7 +43,7 @@ use arrow::datatypes::DataType::{ }; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::exec_err; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature, TypeSignatureClass, Volatility, @@ -255,11 +255,19 @@ impl StringToArray { vec![ TypeSignature::Coercible(vec![ Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), TypeSignature::Coercible(vec![ Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), Coercion::new_exact(TypeSignatureClass::Native(logical_string())), ]), ], diff --git a/datafusion/functions/src/crypto/digest.rs b/datafusion/functions/src/crypto/digest.rs index 2840006169be..7ddc3ad3cd93 100644 --- a/datafusion/functions/src/crypto/digest.rs +++ b/datafusion/functions/src/crypto/digest.rs @@ -19,7 +19,7 @@ use super::basic::{digest, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::{ - types::{logical_binary, logical_string}, + types::{logical_binary, logical_null, logical_string, NativeType}, Result, }; use datafusion_expr::{ @@ -72,12 +72,28 @@ impl DigestFunc { signature: Signature::one_of( vec![ TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_binary())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_binary()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::Binary, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), ], Volatility::Immutable, diff --git a/datafusion/functions/src/crypto/md5.rs b/datafusion/functions/src/crypto/md5.rs index c1540450029c..3af0550cb8cd 100644 --- a/datafusion/functions/src/crypto/md5.rs +++ b/datafusion/functions/src/crypto/md5.rs @@ -20,7 +20,7 @@ use crate::crypto::basic::md5; use arrow::datatypes::DataType; use datafusion_common::{ plan_err, - types::{logical_binary, logical_string, NativeType}, + types::{logical_binary, logical_null, logical_string, NativeType}, Result, }; use datafusion_expr::{ @@ -62,12 +62,18 @@ impl Md5Func { vec![ TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], + vec![ + TypeSignatureClass::Native(logical_string()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::String, )]), TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], + vec![ + TypeSignatureClass::Native(logical_binary()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::Binary, )]), ], diff --git a/datafusion/functions/src/crypto/sha224.rs b/datafusion/functions/src/crypto/sha224.rs index a64a3ef80319..8b38a75e0af9 100644 --- a/datafusion/functions/src/crypto/sha224.rs +++ b/datafusion/functions/src/crypto/sha224.rs @@ -19,7 +19,7 @@ use super::basic::{sha224, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, + types::{logical_binary, logical_null, logical_string, NativeType}, Result, }; use datafusion_expr::{ @@ -62,12 +62,18 @@ impl SHA224Func { vec![ TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], + vec![ + TypeSignatureClass::Native(logical_string()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::String, )]), TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], + vec![ + TypeSignatureClass::Native(logical_binary()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::Binary, )]), ], diff --git a/datafusion/functions/src/crypto/sha256.rs b/datafusion/functions/src/crypto/sha256.rs index 94f3ea3b49fa..89d4b1071f27 100644 --- a/datafusion/functions/src/crypto/sha256.rs +++ b/datafusion/functions/src/crypto/sha256.rs @@ -19,7 +19,7 @@ use super::basic::{sha256, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, + types::{logical_binary, logical_null, logical_string, NativeType}, Result, }; use datafusion_expr::{ @@ -61,12 +61,18 @@ impl SHA256Func { vec![ TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], + vec![ + TypeSignatureClass::Native(logical_string()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::String, )]), TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], + vec![ + TypeSignatureClass::Native(logical_binary()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::Binary, )]), ], diff --git a/datafusion/functions/src/crypto/sha384.rs b/datafusion/functions/src/crypto/sha384.rs index 023730469c7b..5de5657227b5 100644 --- a/datafusion/functions/src/crypto/sha384.rs +++ b/datafusion/functions/src/crypto/sha384.rs @@ -19,7 +19,7 @@ use super::basic::{sha384, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, + types::{logical_binary, logical_null, logical_string, NativeType}, Result, }; use datafusion_expr::{ @@ -61,12 +61,18 @@ impl SHA384Func { vec![ TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], + vec![ + TypeSignatureClass::Native(logical_string()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::String, )]), TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], + vec![ + TypeSignatureClass::Native(logical_binary()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::Binary, )]), ], diff --git a/datafusion/functions/src/crypto/sha512.rs b/datafusion/functions/src/crypto/sha512.rs index f48737e5751f..f423dc88a9d7 100644 --- a/datafusion/functions/src/crypto/sha512.rs +++ b/datafusion/functions/src/crypto/sha512.rs @@ -19,7 +19,7 @@ use super::basic::{sha512, utf8_or_binary_to_binary_type}; use arrow::datatypes::DataType; use datafusion_common::{ - types::{logical_binary, logical_string, NativeType}, + types::{logical_binary, logical_null, logical_string, NativeType}, Result, }; use datafusion_expr::{ @@ -61,12 +61,18 @@ impl SHA512Func { vec![ TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_string())], + vec![ + TypeSignatureClass::Native(logical_string()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::String, )]), TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_binary()), - vec![TypeSignatureClass::Native(logical_binary())], + vec![ + TypeSignatureClass::Native(logical_binary()), + TypeSignatureClass::Native(logical_null()), + ], NativeType::Binary, )]), ], diff --git a/datafusion/functions/src/regex/regexplike.rs b/datafusion/functions/src/regex/regexplike.rs index 2080bb9fe818..f2a062c5a197 100644 --- a/datafusion/functions/src/regex/regexplike.rs +++ b/datafusion/functions/src/regex/regexplike.rs @@ -21,7 +21,7 @@ use arrow::array::{Array, ArrayRef, AsArray, GenericStringArray}; use arrow::compute::kernels::regexp; use arrow::datatypes::DataType; use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{ arrow_datafusion_err, exec_err, internal_err, plan_err, DataFusionError, Result, ScalarValue, @@ -84,12 +84,28 @@ impl RegexpLikeFunc { signature: Signature::one_of( vec![ TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), Coercion::new_exact(TypeSignatureClass::Native(logical_string())), ]), ], diff --git a/datafusion/functions/src/string/ascii.rs b/datafusion/functions/src/string/ascii.rs index 006492a0e07a..702148c71550 100644 --- a/datafusion/functions/src/string/ascii.rs +++ b/datafusion/functions/src/string/ascii.rs @@ -19,7 +19,7 @@ use crate::utils::make_scalar_function; use arrow::array::{ArrayAccessor, ArrayIter, ArrayRef, AsArray, Int32Array}; use arrow::datatypes::DataType; use arrow::error::ArrowError; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{internal_err, Result}; use datafusion_expr::{ColumnarValue, Documentation, TypeSignatureClass}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; @@ -64,9 +64,11 @@ impl AsciiFunc { pub fn new() -> Self { Self { signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Native( - logical_string(), - ))], + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/string/bit_length.rs b/datafusion/functions/src/string/bit_length.rs index f8740aa4178b..ad20f3338680 100644 --- a/datafusion/functions/src/string/bit_length.rs +++ b/datafusion/functions/src/string/bit_length.rs @@ -20,7 +20,7 @@ use arrow::datatypes::DataType; use std::any::Any; use crate::utils::utf8_to_int_type; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::utils::take_function_args; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ @@ -60,9 +60,11 @@ impl BitLengthFunc { pub fn new() -> Self { Self { signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Native( - logical_string(), - ))], + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/string/btrim.rs b/datafusion/functions/src/string/btrim.rs index 2f1711c9962a..8851b2f24df7 100644 --- a/datafusion/functions/src/string/btrim.rs +++ b/datafusion/functions/src/string/btrim.rs @@ -19,7 +19,7 @@ use crate::string::common::*; use crate::utils::{make_scalar_function, utf8_to_str_type}; use arrow::array::{ArrayRef, OffsetSizeTrait}; use arrow::datatypes::DataType; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, Result}; use datafusion_expr::function::Hint; use datafusion_expr::{ @@ -83,11 +83,21 @@ impl BTrimFunc { signature: Signature::one_of( vec![ TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), - TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, )]), ], Volatility::Immutable, diff --git a/datafusion/functions/src/string/contains.rs b/datafusion/functions/src/string/contains.rs index 05a3edf61c5a..5bf2ef7e33f1 100644 --- a/datafusion/functions/src/string/contains.rs +++ b/datafusion/functions/src/string/contains.rs @@ -20,7 +20,7 @@ use arrow::array::{Array, ArrayRef, AsArray}; use arrow::compute::contains as arrow_contains; use arrow::datatypes::DataType; use arrow::datatypes::DataType::{Boolean, LargeUtf8, Utf8, Utf8View}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_expr::binary::{binary_to_string_coercion, string_coercion}; use datafusion_expr::{ @@ -63,7 +63,11 @@ impl ContainsFunc { signature: Signature::coercible( vec![ Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ], Volatility::Immutable, ), diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs index eafc310236ee..5939bd46aaed 100644 --- a/datafusion/functions/src/string/ends_with.rs +++ b/datafusion/functions/src/string/ends_with.rs @@ -22,7 +22,7 @@ use arrow::array::ArrayRef; use arrow::datatypes::DataType; use crate::utils::make_scalar_function; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{internal_err, Result}; use datafusion_expr::binary::{binary_to_string_coercion, string_coercion}; use datafusion_expr::{ @@ -68,8 +68,16 @@ impl EndsWithFunc { Self { signature: Signature::coercible( vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ], Volatility::Immutable, ), diff --git a/datafusion/functions/src/string/levenshtein.rs b/datafusion/functions/src/string/levenshtein.rs index a1a486c7d3cf..211adc9665b2 100644 --- a/datafusion/functions/src/string/levenshtein.rs +++ b/datafusion/functions/src/string/levenshtein.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_int_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::utils::datafusion_strsim; use datafusion_common::utils::take_function_args; use datafusion_common::{exec_err, Result}; @@ -73,8 +73,16 @@ impl LevenshteinFunc { Self { signature: Signature::coercible( vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ], Volatility::Immutable, ), diff --git a/datafusion/functions/src/string/lower.rs b/datafusion/functions/src/string/lower.rs index 226275b13999..c7a427768bbd 100644 --- a/datafusion/functions/src/string/lower.rs +++ b/datafusion/functions/src/string/lower.rs @@ -20,7 +20,7 @@ use std::any::Any; use crate::string::common::to_lower; use crate::utils::utf8_to_str_type; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::Result; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -59,9 +59,11 @@ impl LowerFunc { pub fn new() -> Self { Self { signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Native( - logical_string(), - ))], + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/string/ltrim.rs b/datafusion/functions/src/string/ltrim.rs index 65849202efc6..30375a961964 100644 --- a/datafusion/functions/src/string/ltrim.rs +++ b/datafusion/functions/src/string/ltrim.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use crate::string::common::*; use crate::utils::{make_scalar_function, utf8_to_str_type}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, Result}; use datafusion_expr::function::Hint; use datafusion_expr::{ @@ -88,11 +88,21 @@ impl LtrimFunc { signature: Signature::one_of( vec![ TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), - TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, )]), ], Volatility::Immutable, diff --git a/datafusion/functions/src/string/octet_length.rs b/datafusion/functions/src/string/octet_length.rs index 17ea2726b071..71c124a823a0 100644 --- a/datafusion/functions/src/string/octet_length.rs +++ b/datafusion/functions/src/string/octet_length.rs @@ -20,7 +20,7 @@ use arrow::datatypes::DataType; use std::any::Any; use crate::utils::utf8_to_int_type; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::utils::take_function_args; use datafusion_common::{Result, ScalarValue}; use datafusion_expr::{ @@ -60,9 +60,11 @@ impl OctetLengthFunc { pub fn new() -> Self { Self { signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Native( - logical_string(), - ))], + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/string/repeat.rs b/datafusion/functions/src/string/repeat.rs index 2d36cb8356a0..6f53462b3067 100644 --- a/datafusion/functions/src/string/repeat.rs +++ b/datafusion/functions/src/string/repeat.rs @@ -26,7 +26,7 @@ use arrow::array::{ use arrow::datatypes::DataType; use arrow::datatypes::DataType::{LargeUtf8, Utf8, Utf8View}; use datafusion_common::cast::as_int64_array; -use datafusion_common::types::{logical_int64, logical_string, NativeType}; +use datafusion_common::types::{logical_int64, logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, DataFusionError, Result}; use datafusion_expr::{ColumnarValue, Documentation, Volatility}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature}; @@ -67,7 +67,11 @@ impl RepeatFunc { Self { signature: Signature::coercible( vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), // Accept all integer types but cast them to i64 Coercion::new_implicit( TypeSignatureClass::Native(logical_int64()), diff --git a/datafusion/functions/src/string/replace.rs b/datafusion/functions/src/string/replace.rs index de70215c49c7..222bf2c0f328 100644 --- a/datafusion/functions/src/string/replace.rs +++ b/datafusion/functions/src/string/replace.rs @@ -23,7 +23,7 @@ use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, Result}; use datafusion_expr::type_coercion::binary::{ binary_to_string_coercion, string_coercion, @@ -68,9 +68,21 @@ impl ReplaceFunc { Self { signature: Signature::coercible( vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ], Volatility::Immutable, ), diff --git a/datafusion/functions/src/string/rtrim.rs b/datafusion/functions/src/string/rtrim.rs index bb33274978da..9aa0be7db4c5 100644 --- a/datafusion/functions/src/string/rtrim.rs +++ b/datafusion/functions/src/string/rtrim.rs @@ -22,7 +22,7 @@ use std::sync::Arc; use crate::string::common::*; use crate::utils::{make_scalar_function, utf8_to_str_type}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, Result}; use datafusion_expr::function::Hint; use datafusion_expr::{ @@ -88,11 +88,21 @@ impl RtrimFunc { signature: Signature::one_of( vec![ TypeSignature::Coercible(vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ]), - TypeSignature::Coercible(vec![Coercion::new_exact( + TypeSignature::Coercible(vec![Coercion::new_implicit( TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, )]), ], Volatility::Immutable, diff --git a/datafusion/functions/src/string/starts_with.rs b/datafusion/functions/src/string/starts_with.rs index 71df83352f96..310155fb835d 100644 --- a/datafusion/functions/src/string/starts_with.rs +++ b/datafusion/functions/src/string/starts_with.rs @@ -26,7 +26,7 @@ use datafusion_expr::type_coercion::binary::{ }; use crate::utils::make_scalar_function; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::{ cast, Coercion, ColumnarValue, Documentation, Expr, Like, ScalarFunctionArgs, @@ -90,8 +90,16 @@ impl StartsWithFunc { Self { signature: Signature::coercible( vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ], Volatility::Immutable, ), diff --git a/datafusion/functions/src/string/upper.rs b/datafusion/functions/src/string/upper.rs index 2fec7305d183..f2d9b5aee502 100644 --- a/datafusion/functions/src/string/upper.rs +++ b/datafusion/functions/src/string/upper.rs @@ -18,7 +18,7 @@ use crate::string::common::to_upper; use crate::utils::utf8_to_str_type; use arrow::datatypes::DataType; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::Result; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature, @@ -58,9 +58,11 @@ impl UpperFunc { pub fn new() -> Self { Self { signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Native( - logical_string(), - ))], + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/unicode/initcap.rs b/datafusion/functions/src/unicode/initcap.rs index c9b0cb77b096..8b8f17846471 100644 --- a/datafusion/functions/src/unicode/initcap.rs +++ b/datafusion/functions/src/unicode/initcap.rs @@ -25,7 +25,7 @@ use arrow::datatypes::DataType; use crate::utils::{make_scalar_function, utf8_to_str_type}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, Result}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass, @@ -65,9 +65,11 @@ impl InitcapFunc { pub fn new() -> Self { Self { signature: Signature::coercible( - vec![Coercion::new_exact(TypeSignatureClass::Native( - logical_string(), - ))], + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], Volatility::Immutable, ), } diff --git a/datafusion/functions/src/unicode/strpos.rs b/datafusion/functions/src/unicode/strpos.rs index b3bc73a29585..7eaf35446492 100644 --- a/datafusion/functions/src/unicode/strpos.rs +++ b/datafusion/functions/src/unicode/strpos.rs @@ -23,7 +23,7 @@ use arrow::array::{ ArrayRef, ArrowPrimitiveType, AsArray, PrimitiveArray, StringArrayType, }; use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type}; -use datafusion_common::types::logical_string; +use datafusion_common::types::{logical_null, logical_string, NativeType}; use datafusion_common::{exec_err, internal_err, Result}; use datafusion_expr::{ Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignatureClass, @@ -64,8 +64,16 @@ impl StrposFunc { Self { signature: Signature::coercible( vec![ - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), - Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + ), ], Volatility::Immutable, ), diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 496f24abf6ed..70dd68590d9a 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -736,8 +736,11 @@ select specific_name, data_type, ordinal_position, parameter_mode, rid from info repeat Int64 2 IN 0 repeat Int64 2 IN 1 repeat Int64 2 IN 2 +repeat Int64 2 IN 3 repeat LargeUtf8 1 IN 1 repeat LargeUtf8 1 OUT 1 +repeat Null 1 IN 3 +repeat Null 1 OUT 3 repeat Utf8 1 IN 0 repeat Utf8 1 OUT 0 repeat Utf8 1 OUT 2 diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index 11fd49566522..4449a07faf3b 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -19,6 +19,90 @@ # Upgrade Guides +## DataFusion `47.0.0` + +### Specifying nullable arguments for coercible signatures in UDFs + +DataFusion 47 changes how UDFs handle nullable arguments in [`TypeSignatureClass`] (primarily used in coercible signatures). + +Previously, DataFusion treated [`NativeType::Null`] as a special case when matching native types to a signature class. +However, this led to inconsistent behavior and internal errors — when the signature class was not `TypeSignatureClass::Native`, passing `NULL` to a UDF would result in an error such as: + +```text +May miss the matching logic in `matches_native_type` +``` + +If your function uses a coercible signature ([`Signature::Coercible`]) and should accept `NULL` as a valid input, you need to convert it to use [`Coercion::Implicit`] (if it doesn't already) and explicitly include `TypeSignatureClass::Native(logical_null())` in the `allowed_source_types`. + +--- + +#### Example: Updating an exact coercible signature + +Before: + +```rust +# /* comment out so they don't run +pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Native( + logical_string(), + ))], + Volatility::Immutable, + ), + } +} +# */ +``` + +After: + +```rust +# /* comment out so they don't run +use datafusion_common::types::{logical_null, NativeType}; + +pub fn new() -> Self { + Self { + signature: Signature::coercible( + vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_string()), + vec![TypeSignatureClass::Native(logical_null())], + NativeType::String, + )], + Volatility::Immutable, + ), + } +} +# */ +``` + +--- + +#### Example: Updating an existing implicit coercion + +Even if you're already using `Coercion::Implicit`, you still need to explicitly allow `NULL` values. +Without this, passing `NULL` will fail to match the signature and result in an internal error. + +```diff +TypeSignature::Coercible(vec![Coercion::new_implicit( + TypeSignatureClass::Native(logical_binary()), +- vec![TypeSignatureClass::Native(logical_string())], ++ vec![ ++ TypeSignatureClass::Native(logical_string()), ++ TypeSignatureClass::Native(logical_null()), ++ ], + NativeType::String, +)]) +``` + +You can view [PR #15404] for more examples and implementation details. + +[`typesignatureclass`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/enum.TypeSignatureClass.html +[`nativetype::null`]: https://docs.rs/datafusion-common/latest/datafusion_common/types/enum.NativeType.html#variant.Null +[`signature::coercible`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/enum.TypeSignature.html#variant.Coercible +[`coercion::implicit`]: https://docs.rs/datafusion-expr/latest/datafusion_expr/enum.Coercion.html#variant.Implicit +[`pr #15404`]: https://github.com/apache/datafusion/pull/15404 + ## DataFusion `46.0.0` ### Use `invoke_with_args` instead of `invoke()` and `invoke_batch()`