From b52714cb32d32bd9fe3b455d8973ca35c86b8a63 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Fri, 15 Nov 2024 00:26:34 +0800 Subject: [PATCH 1/5] unparse construct and access --- datafusion/functions-nested/src/planner.rs | 4 +- datafusion/sql/src/unparser/expr.rs | 55 +++++++++++++++++++++- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs index 9ae2fa781d87..82787a65d506 100644 --- a/datafusion/functions-nested/src/planner.rs +++ b/datafusion/functions-nested/src/planner.rs @@ -34,7 +34,7 @@ use crate::{ make_array::make_array, }; -#[derive(Debug)] +#[derive(Default, Debug)] pub struct NestedFunctionPlanner; impl ExprPlanner for NestedFunctionPlanner { @@ -131,7 +131,7 @@ impl ExprPlanner for NestedFunctionPlanner { } } -#[derive(Debug)] +#[derive(Default, Debug)] pub struct FieldAccessPlanner; impl ExprPlanner for FieldAccessPlanner { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 8f6ffa51f76a..573134519594 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -18,8 +18,8 @@ use datafusion_expr::expr::Unnest; use sqlparser::ast::Value::SingleQuotedString; use sqlparser::ast::{ - self, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, ObjectName, - TimezoneInfo, UnaryOperator, + self, Array, BinaryOperator, Expr as AstExpr, Function, Ident, Interval, ObjectName, + Subscript, TimezoneInfo, UnaryOperator, }; use std::sync::Arc; use std::vec; @@ -476,6 +476,19 @@ impl Unparser<'_> { &self, func_name: &str, args: &[Expr], + ) -> Result { + match func_name { + "make_array" => self.make_array_to_sql(args), + "array_element" => self.array_element_to_sql(args), + // TODO: support for the construct and access functions of the `map` and `struct` types + _ => self.scalar_function_to_sql_internal(func_name, args), + } + } + + fn scalar_function_to_sql_internal( + &self, + func_name: &str, + args: &[Expr], ) -> Result { let args = self.function_args_to_sql(args)?; Ok(ast::Expr::Function(Function { @@ -496,6 +509,29 @@ impl Unparser<'_> { })) } + fn make_array_to_sql(&self, args: &[Expr]) -> Result { + let args = args + .iter() + .map(|e| self.expr_to_sql(e)) + .collect::>>()?; + Ok(ast::Expr::Array(Array { + elem: args, + named: false, + })) + } + + fn array_element_to_sql(&self, args: &[Expr]) -> Result { + if args.len() != 2 { + return internal_err!("array_element must have exactly 2 arguments"); + } + let array = self.expr_to_sql(&args[0])?; + let index = self.expr_to_sql(&args[1])?; + Ok(ast::Expr::Subscript { + expr: Box::new(array), + subscript: Box::new(Subscript::Index { index }), + }) + } + pub fn sort_to_sql(&self, sort: &Sort) -> Result { let Sort { expr, @@ -1483,8 +1519,10 @@ mod tests { Signature, Volatility, WindowFrame, WindowFunctionDefinition, }; use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt}; + use datafusion_functions::core::named_struct; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::sum; + use datafusion_functions_nested::expr_fn::{array_element, make_array}; use datafusion_functions_window::row_number::row_number_udwf; use crate::unparser::dialect::{ @@ -1889,6 +1927,19 @@ mod tests { }), r#"UNNEST("table".array_col)"#, ), + (make_array(vec![lit(1), lit(2), lit(3)]), "[1, 2, 3]"), + (array_element(col("array_col"), lit(1)), "array_col[1]"), + ( + array_element(make_array(vec![lit(1), lit(2), lit(3)]), lit(1)), + "[1, 2, 3][1]", + ), + ( + Expr::ScalarFunction(ScalarFunction { + func: named_struct(), + args: vec![lit("c0"), lit(1), lit("c1"), lit(2)], + }), + "{c0: 1, c1: 2}", + ), ]; for (expr, expected) in tests { From ac2e8b4f37ac89833d303b2c4d7a4dea19b31bb9 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Fri, 15 Nov 2024 00:26:55 +0800 Subject: [PATCH 2/5] add sql e2e roundtrip --- datafusion/sql/tests/cases/plan_to_sql.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 94e420066d8b..00473430c6ad 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -40,6 +40,8 @@ use datafusion_expr::builder::{ table_scan_with_filter_and_fetch, table_scan_with_filters, }; use datafusion_functions::core::planner::CoreFunctionPlanner; +use datafusion_functions_nested::extract::array_element_udf; +use datafusion_functions_nested::planner::{FieldAccessPlanner, NestedFunctionPlanner}; use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect}; use sqlparser::parser::Parser; @@ -182,6 +184,11 @@ fn roundtrip_statement() -> Result<()> { SUM(id) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total FROM person GROUP BY GROUPING SETS ((id, first_name, last_name), (first_name, last_name), (last_name))"#, + "SELECT ARRAY[1, 2, 3]", + "SELECT ARRAY[1, 2, 3][1]", + "SELECT [1, 2, 3]", + "SELECT [1, 2, 3][1]", + "SELECT left[1] FROM array" ]; // For each test sql string, we transform as follows: @@ -195,10 +202,14 @@ fn roundtrip_statement() -> Result<()> { .try_with_sql(query)? .parse_statement()?; let state = MockSessionState::default() + .with_scalar_function(make_array_udf()) + .with_scalar_function(array_element_udf()) .with_aggregate_function(sum_udaf()) .with_aggregate_function(count_udaf()) .with_aggregate_function(max_udaf()) - .with_expr_planner(Arc::new(CoreFunctionPlanner::default())); + .with_expr_planner(Arc::new(CoreFunctionPlanner::default())) + .with_expr_planner(Arc::new(NestedFunctionPlanner::default())) + .with_expr_planner(Arc::new(FieldAccessPlanner::default())); let context = MockContextProvider { state }; let sql_to_rel = SqlToRel::new(&context); let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); From 16ff14bbea18f93b73c8b986be51e727f40e3e7c Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Fri, 15 Nov 2024 00:37:25 +0800 Subject: [PATCH 3/5] remove unused tests --- datafusion/sql/src/unparser/expr.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index 573134519594..bdefef02cb75 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1933,13 +1933,6 @@ mod tests { array_element(make_array(vec![lit(1), lit(2), lit(3)]), lit(1)), "[1, 2, 3][1]", ), - ( - Expr::ScalarFunction(ScalarFunction { - func: named_struct(), - args: vec![lit("c0"), lit(1), lit("c1"), lit(2)], - }), - "{c0: 1, c1: 2}", - ), ]; for (expr, expected) in tests { From e7ec97353a2153617efb8f2c7d8dfa5161f6cea1 Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Fri, 15 Nov 2024 01:17:25 +0800 Subject: [PATCH 4/5] fix test and clippy --- datafusion/sql/src/unparser/expr.rs | 1 - datafusion/sql/tests/cases/plan_to_sql.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index bdefef02cb75..8664abd6543e 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -1519,7 +1519,6 @@ mod tests { Signature, Volatility, WindowFrame, WindowFunctionDefinition, }; use datafusion_expr::{interval_month_day_nano_lit, ExprFunctionExt}; - use datafusion_functions::core::named_struct; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::sum; use datafusion_functions_nested::expr_fn::{array_element, make_array}; diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 00473430c6ad..4b84395f197e 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -1250,6 +1250,6 @@ fn test_unnest_to_sql() { sql_round_trip( GenericDialect {}, r#"SELECT unnest(make_array(1, 2, 2, 5, NULL)) as u1"#, - r#"SELECT UNNEST(make_array(1, 2, 2, 5, NULL)) AS u1"#, + r#"SELECT UNNEST([1, 2, 2, 5, NULL]) AS u1"#, ); } From 8630128c62a3c7b5e37f74b95e53ef7c557b783f Mon Sep 17 00:00:00 2001 From: Jia-Xuan Liu Date: Fri, 15 Nov 2024 09:30:57 +0800 Subject: [PATCH 5/5] fix clippy --- datafusion/functions-nested/src/planner.rs | 5 ++--- datafusion/sql/tests/cases/plan_to_sql.rs | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs index 82787a65d506..1929b8222a1b 100644 --- a/datafusion/functions-nested/src/planner.rs +++ b/datafusion/functions-nested/src/planner.rs @@ -34,7 +34,7 @@ use crate::{ make_array::make_array, }; -#[derive(Default, Debug)] +#[derive(Debug)] pub struct NestedFunctionPlanner; impl ExprPlanner for NestedFunctionPlanner { @@ -131,9 +131,8 @@ impl ExprPlanner for NestedFunctionPlanner { } } -#[derive(Default, Debug)] +#[derive(Debug)] pub struct FieldAccessPlanner; - impl ExprPlanner for FieldAccessPlanner { fn plan_field_access( &self, diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 4b84395f197e..4f43d7333dd1 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -208,8 +208,8 @@ fn roundtrip_statement() -> Result<()> { .with_aggregate_function(count_udaf()) .with_aggregate_function(max_udaf()) .with_expr_planner(Arc::new(CoreFunctionPlanner::default())) - .with_expr_planner(Arc::new(NestedFunctionPlanner::default())) - .with_expr_planner(Arc::new(FieldAccessPlanner::default())); + .with_expr_planner(Arc::new(NestedFunctionPlanner)) + .with_expr_planner(Arc::new(FieldAccessPlanner)); let context = MockContextProvider { state }; let sql_to_rel = SqlToRel::new(&context); let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap();