From ced176a24c06eab50494c73478d5ce23cced7d1a Mon Sep 17 00:00:00 2001 From: veeupup Date: Wed, 29 Nov 2023 22:47:41 +0800 Subject: [PATCH 1/2] Implement Aliases for ScalarUDF Signed-off-by: veeupup --- datafusion/core/src/execution/context/mod.rs | 10 ++++- .../user_defined_scalar_functions.rs | 37 +++++++++++++++++++ datafusion/expr/src/udf.rs | 34 +++++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 46388f990a9a..8241f986afdc 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -811,8 +811,14 @@ impl SessionContext { /// - `SELECT MY_FUNC(x)...` will look for a function named `"my_func"` /// - `SELECT "my_FUNC"(x)` will look for a function named `"my_FUNC"` pub fn register_udf(&self, f: ScalarUDF) { - self.state - .write() + let mut state = self.state.write(); + let aliases = f.aliases(); + for alias in aliases { + state + .scalar_functions + .insert(alias.to_string(), Arc::new(f.clone())); + } + state .scalar_functions .insert(f.name().to_string(), Arc::new(f)); } diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 1c7e7137290f..985b0bd5bc76 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -341,6 +341,43 @@ async fn case_sensitive_identifiers_user_defined_functions() -> Result<()> { Ok(()) } +#[tokio::test] +async fn test_user_defined_functions_with_alias() -> Result<()> { + let ctx = SessionContext::new(); + let arr = Int32Array::from(vec![1]); + let batch = RecordBatch::try_from_iter(vec![("i", Arc::new(arr) as _)])?; + ctx.register_batch("t", batch).unwrap(); + + let myfunc = |args: &[ArrayRef]| Ok(Arc::clone(&args[0])); + let myfunc = make_scalar_function(myfunc); + + let udf = create_udf( + "dummy", + vec![DataType::Int32], + Arc::new(DataType::Int32), + Volatility::Immutable, + myfunc, + ) + .with_aliases(vec!["dummy_alias"]); + + ctx.register_udf(udf); + + let expected = [ + "+------------+", + "| dummy(t.i) |", + "+------------+", + "| 1 |", + "+------------+", + ]; + let result = plan_and_collect(&ctx, "SELECT dummy(i) FROM t").await?; + assert_batches_eq!(expected, &result); + + let alias_result = plan_and_collect(&ctx, "SELECT dummy_alias(i) FROM t").await?; + assert_batches_eq!(expected, &alias_result); + + Ok(()) +} + fn create_udf_context() -> SessionContext { let ctx = SessionContext::new(); // register a custom UDF diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index bc910b928a5d..3bbfd08637cb 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -49,6 +49,8 @@ pub struct ScalarUDF { /// the batch's row count (so that the generative zero-argument function can know /// the result array size). fun: ScalarFunctionImplementation, + /// Optional aliases for the function + aliases: Vec, } impl Debug for ScalarUDF { @@ -89,6 +91,34 @@ impl ScalarUDF { signature: signature.clone(), return_type: return_type.clone(), fun: fun.clone(), + aliases: vec![], + } + } + + /// Create a new ScalarUDF with aliases + pub fn new_with_aliases( + name: &str, + signature: &Signature, + return_type: &ReturnTypeFunction, + fun: &ScalarFunctionImplementation, + aliases: Vec, + ) -> Self { + Self { + name: name.to_owned(), + signature: signature.clone(), + return_type: return_type.clone(), + fun: fun.clone(), + aliases, + } + } + + pub fn with_aliases(self, aliases: Vec<&str>) -> Self { + Self { + name: self.name, + signature: self.signature, + return_type: self.return_type, + fun: self.fun, + aliases: aliases.iter().map(|s| s.to_string()).collect(), } } @@ -106,6 +136,10 @@ impl ScalarUDF { &self.name } + pub fn aliases(&self) -> &[String] { + &self.aliases + } + /// Returns this function's signature (what input types are accepted) pub fn signature(&self) -> &Signature { &self.signature From f495f7762a8b9ed0fe6c2ea1522306b154a23849 Mon Sep 17 00:00:00 2001 From: veeupup Date: Thu, 30 Nov 2023 09:03:20 +0800 Subject: [PATCH 2/2] fix comments Signed-off-by: veeupup --- datafusion/core/src/execution/context/mod.rs | 1 + datafusion/expr/src/udf.rs | 34 ++++++-------------- 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs index 8241f986afdc..dbebedce3c97 100644 --- a/datafusion/core/src/execution/context/mod.rs +++ b/datafusion/core/src/execution/context/mod.rs @@ -810,6 +810,7 @@ impl SessionContext { /// /// - `SELECT MY_FUNC(x)...` will look for a function named `"my_func"` /// - `SELECT "my_FUNC"(x)` will look for a function named `"my_FUNC"` + /// Any functions registered with the udf name or its aliases will be overwritten with this new function pub fn register_udf(&self, f: ScalarUDF) { let mut state = self.state.write(); let aliases = f.aliases(); diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 3bbfd08637cb..3a18ca2d25e8 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -49,7 +49,7 @@ pub struct ScalarUDF { /// the batch's row count (so that the generative zero-argument function can know /// the result array size). fun: ScalarFunctionImplementation, - /// Optional aliases for the function + /// Optional aliases for the function. This list should NOT include the value of `name` as well aliases: Vec, } @@ -95,31 +95,14 @@ impl ScalarUDF { } } - /// Create a new ScalarUDF with aliases - pub fn new_with_aliases( - name: &str, - signature: &Signature, - return_type: &ReturnTypeFunction, - fun: &ScalarFunctionImplementation, - aliases: Vec, + /// Adds additional names that can be used to invoke this function, in addition to `name` + pub fn with_aliases( + mut self, + aliases: impl IntoIterator, ) -> Self { - Self { - name: name.to_owned(), - signature: signature.clone(), - return_type: return_type.clone(), - fun: fun.clone(), - aliases, - } - } - - pub fn with_aliases(self, aliases: Vec<&str>) -> Self { - Self { - name: self.name, - signature: self.signature, - return_type: self.return_type, - fun: self.fun, - aliases: aliases.iter().map(|s| s.to_string()).collect(), - } + self.aliases + .extend(aliases.into_iter().map(|s| s.to_string())); + self } /// creates a logical expression with a call of the UDF @@ -136,6 +119,7 @@ impl ScalarUDF { &self.name } + /// Returns the aliases for this function. See [`ScalarUDF::with_aliases`] for more details pub fn aliases(&self) -> &[String] { &self.aliases }