From 556b4824754e5ee38f96680f7968d1f2557908e0 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Tue, 2 Jul 2024 21:12:16 +0800 Subject: [PATCH 1/7] struct literal Signed-off-by: jayzhan211 --- .../core/src/execution/session_state.rs | 5 +++ datafusion/expr/src/planner.rs | 15 +++++++ datafusion/functions/src/core/mod.rs | 1 + datafusion/functions/src/core/planner.rs | 40 +++++++++++++++++++ datafusion/sql/src/expr/identifier.rs | 1 + datafusion/sql/src/expr/mod.rs | 37 +++++++++++++++++ datafusion/sqllogictest/test_files/struct.slt | 19 +++++++++ 7 files changed, 118 insertions(+) create mode 100644 datafusion/functions/src/core/planner.rs diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index aa81d77cf682..32a524fc2d39 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -958,6 +958,11 @@ impl SessionState { query = query.with_user_defined_planner(planner.clone()); } + let core_function_planner = + Arc::new(functions::core::planner::CoreFunctionPlanner::default()) as _; + let query = query + .with_user_defined_planner(core_function_planner); + // register crate of array expressions (if enabled) #[cfg(feature = "array_expressions")] { diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index c928ab39194d..442f5e8aead6 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -110,6 +110,15 @@ pub trait UserDefinedSQLPlanner: Send + Sync { ) -> Result>> { Ok(PlannerResult::Original(exprs)) } + + // Plan the dictionaray literal { key: value, ...} + fn plan_dictionary_literal( + &self, + expr: RawDictionaryExpr, + _schema: &DFSchema, + ) -> Result> { + Ok(PlannerResult::Original(expr)) + } } /// An operator with two arguments to plan @@ -136,6 +145,12 @@ pub struct RawFieldAccessExpr { pub expr: Expr, } +/// A dictionary expression { key: value, ...} +pub struct RawDictionaryExpr { + pub keys: Vec, + pub values: Vec, +} + /// Result of planning a raw expr with [`UserDefinedSQLPlanner`] #[derive(Debug, Clone)] pub enum PlannerResult { diff --git a/datafusion/functions/src/core/mod.rs b/datafusion/functions/src/core/mod.rs index a2742220f3e9..5761a4bd39cc 100644 --- a/datafusion/functions/src/core/mod.rs +++ b/datafusion/functions/src/core/mod.rs @@ -29,6 +29,7 @@ pub mod named_struct; pub mod nullif; pub mod nvl; pub mod nvl2; +pub mod planner; pub mod r#struct; // create UDFs diff --git a/datafusion/functions/src/core/planner.rs b/datafusion/functions/src/core/planner.rs new file mode 100644 index 000000000000..71f6c7f8ac72 --- /dev/null +++ b/datafusion/functions/src/core/planner.rs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::DFSchema; +use datafusion_common::Result; +use datafusion_expr::planner::{PlannerResult, RawDictionaryExpr, UserDefinedSQLPlanner}; + +use super::named_struct; + +#[derive(Default)] +pub struct CoreFunctionPlanner {} + +impl UserDefinedSQLPlanner for CoreFunctionPlanner { + fn plan_dictionary_literal( + &self, + expr: RawDictionaryExpr, + _schema: &DFSchema, + ) -> Result> { + let mut args = vec![]; + for (k, v) in expr.keys.into_iter().zip(expr.values.into_iter()) { + args.push(k); + args.push(v); + } + Ok(PlannerResult::Planned(named_struct().call(args))) + } +} diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index d297b2e4df5b..12fb5457130a 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -135,6 +135,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let nested_name = nested_names[0].to_string(); let col = Expr::Column(Column::from((qualifier, field))); + println!("col: {:?} nested_name: {:?}", col, nested_name); if let Some(udf) = self.context_provider.get_function_meta("get_field") { diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 786ea288fa0e..5cd2e5b7c77d 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -18,6 +18,7 @@ use arrow_schema::DataType; use arrow_schema::TimeUnit; use datafusion_expr::planner::PlannerResult; +use datafusion_expr::planner::RawDictionaryExpr; use datafusion_expr::planner::RawFieldAccessExpr; use sqlparser::ast::{CastKind, Expr as SQLExpr, Subscript, TrimWhereField, Value}; @@ -619,10 +620,46 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { } }, ))), + SQLExpr::Dictionary(fields) => { + let mut keys = vec![]; + let mut values = vec![]; + for field in fields { + // convert ident to literal + let key = lit(field.key.value); + let value = self.sql_expr_to_logical_expr( + *field.value, + schema, + planner_context, + )?; + keys.push(key); + values.push(value); + } + + let expr = RawDictionaryExpr { keys, values }; + self.try_plan_dictionary_literal(expr, schema) + } _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } } + fn try_plan_dictionary_literal( + &self, + expr: RawDictionaryExpr, + schema: &DFSchema, + ) -> Result { + let mut raw_expr = expr; + for planner in self.planners.iter() { + match planner.plan_dictionary_literal(raw_expr, schema)? { + PlannerResult::Planned(expr) => { + return Ok(expr); + } + PlannerResult::Original(expr) => raw_expr = expr, + } + } + + internal_err!("Expected a simplified result, but none was found") + } + /// Parses a struct(..) expression fn parse_struct( &self, diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index 749daa7e20e7..af64973378a0 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -162,6 +162,13 @@ select named_struct('scalar', 27, 'array', values.a, 'null', NULL) from values; {scalar: 27, array: 2, null: } {scalar: 27, array: 3, null: } +query ? +select {'scalar': 27, 'array': values.a, 'null': NULL} from values; +---- +{scalar: 27, array: 1, null: } +{scalar: 27, array: 2, null: } +{scalar: 27, array: 3, null: } + # named_struct with mixed scalar and array values #2 query ? select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; @@ -170,6 +177,13 @@ select named_struct('array', values.a, 'scalar', 27, 'null', NULL) from values; {array: 2, scalar: 27, null: } {array: 3, scalar: 27, null: } +query ? +select {'array': values.a, 'scalar': 27, 'null': NULL} from values; +---- +{array: 1, scalar: 27, null: } +{array: 2, scalar: 27, null: } +{array: 3, scalar: 27, null: } + # named_struct with mixed scalar and array values #3 query ? select named_struct('null', NULL, 'array', values.a, 'scalar', 27) from values; @@ -207,3 +221,8 @@ query T select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3)); ---- Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) + +query T +select arrow_typeof({'first': 1, 'second': 2, 'third': 3}); +---- +Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) From 9498da082ab35347b81a88d766c26026373773ec Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Tue, 2 Jul 2024 21:18:46 +0800 Subject: [PATCH 2/7] add nested Signed-off-by: jayzhan211 --- datafusion/sql/src/expr/identifier.rs | 1 - datafusion/sqllogictest/test_files/struct.slt | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/expr/identifier.rs b/datafusion/sql/src/expr/identifier.rs index 12fb5457130a..d297b2e4df5b 100644 --- a/datafusion/sql/src/expr/identifier.rs +++ b/datafusion/sql/src/expr/identifier.rs @@ -135,7 +135,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let nested_name = nested_names[0].to_string(); let col = Expr::Column(Column::from((qualifier, field))); - println!("col: {:?} nested_name: {:?}", col, nested_name); if let Some(udf) = self.context_provider.get_function_meta("get_field") { diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt index af64973378a0..fd6e25ea749d 100644 --- a/datafusion/sqllogictest/test_files/struct.slt +++ b/datafusion/sqllogictest/test_files/struct.slt @@ -226,3 +226,9 @@ query T select arrow_typeof({'first': 1, 'second': 2, 'third': 3}); ---- Struct([Field { name: "first", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "second", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "third", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]) + +# test nested struct literal +query ? +select {'animal': {'cat': 1, 'dog': 2, 'bird': {'parrot': 3, 'canary': 1}}, 'genre': {'fiction': ['mystery', 'sci-fi', 'fantasy'], 'non-fiction': {'biography': 5, 'history': 7, 'science': {'physics': 2, 'biology': 3}}}, 'vehicle': {'car': {'sedan': 4, 'suv': 2}, 'bicycle': 3, 'boat': ['sailboat', 'motorboat']}, 'weather': {'sunny': True, 'temperature': 25.5, 'wind': {'speed': 10, 'direction': 'NW'}}}; +---- +{animal: {cat: 1, dog: 2, bird: {parrot: 3, canary: 1}}, genre: {fiction: [mystery, sci-fi, fantasy], non-fiction: {biography: 5, history: 7, science: {physics: 2, biology: 3}}}, vehicle: {car: {sedan: 4, suv: 2}, bicycle: 3, boat: [sailboat, motorboat]}, weather: {sunny: true, temperature: 25.5, wind: {speed: 10, direction: NW}}} From 5a046da58556d6653afe8316e309a28793d9c8e2 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Tue, 2 Jul 2024 22:00:25 +0800 Subject: [PATCH 3/7] fmt Signed-off-by: jayzhan211 --- datafusion/core/src/execution/session_state.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 32a524fc2d39..26278ca43ee3 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -960,8 +960,7 @@ impl SessionState { let core_function_planner = Arc::new(functions::core::planner::CoreFunctionPlanner::default()) as _; - let query = query - .with_user_defined_planner(core_function_planner); + let query = query.with_user_defined_planner(core_function_planner); // register crate of array expressions (if enabled) #[cfg(feature = "array_expressions")] From 904575a078036d337dd3e468c76adcaab632f6e0 Mon Sep 17 00:00:00 2001 From: jayzhan211 Date: Tue, 2 Jul 2024 22:01:39 +0800 Subject: [PATCH 4/7] rm useless comment Signed-off-by: jayzhan211 --- datafusion/sql/src/expr/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 5cd2e5b7c77d..0151518fa17b 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -624,7 +624,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let mut keys = vec![]; let mut values = vec![]; for field in fields { - // convert ident to literal let key = lit(field.key.value); let value = self.sql_expr_to_logical_expr( *field.value, From 8c3fdc66c470dfb860dc11414e85ddc73c1aae3b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 3 Jul 2024 16:36:37 -0400 Subject: [PATCH 5/7] switch to NYI error, derive debug/clone --- datafusion/expr/src/planner.rs | 1 + datafusion/sql/src/expr/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index 736a8a0f1ec5..3281df00c035 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -152,6 +152,7 @@ pub struct RawFieldAccessExpr { } /// A dictionary expression { key: value, ...} +#[derive(Debug, Clone)] pub struct RawDictionaryExpr { pub keys: Vec, pub values: Vec, diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index ec06a86eda3a..d9623b57cad1 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -655,8 +655,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { PlannerResult::Original(expr) => raw_expr = expr, } } - - internal_err!("Expected a simplified result, but none was found") + not_impl_err!("Unsupported dictionary literal: {raw_expr:?}") } /// Parses a struct(..) expression From 3da39fda9c822744885c10cb83c894c99f59c4fa Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 3 Jul 2024 16:39:27 -0400 Subject: [PATCH 6/7] improve documentation strings --- datafusion/expr/src/planner.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs index 3281df00c035..bba0228ae0aa 100644 --- a/datafusion/expr/src/planner.rs +++ b/datafusion/expr/src/planner.rs @@ -84,7 +84,8 @@ pub trait ContextProvider { /// This trait allows users to customize the behavior of the SQL planner pub trait UserDefinedSQLPlanner: Send + Sync { - /// Plan the binary operation between two expressions, returns OriginalBinaryExpr if not possible + /// Plan the binary operation between two expressions, returns original + /// BinaryExpr if not possible fn plan_binary_op( &self, expr: RawBinaryExpr, @@ -93,7 +94,9 @@ pub trait UserDefinedSQLPlanner: Send + Sync { Ok(PlannerResult::Original(expr)) } - /// Plan the field access expression, returns OriginalFieldAccessExpr if not possible + /// Plan the field access expression + /// + /// returns original FieldAccessExpr if not possible fn plan_field_access( &self, expr: RawFieldAccessExpr, @@ -102,7 +105,9 @@ pub trait UserDefinedSQLPlanner: Send + Sync { Ok(PlannerResult::Original(expr)) } - // Plan the array literal, returns OriginalArray if not possible + /// Plan the array literal, returns OriginalArray if not possible + /// + /// Returns origin expression arguments if not possible fn plan_array_literal( &self, exprs: Vec, @@ -111,7 +116,9 @@ pub trait UserDefinedSQLPlanner: Send + Sync { Ok(PlannerResult::Original(exprs)) } - // Plan the dictionaray literal { key: value, ...} + /// Plan the dictionary literal `{ key: value, ...}` + /// + /// Returns origin expression arguments if not possible fn plan_dictionary_literal( &self, expr: RawDictionaryExpr, @@ -120,8 +127,9 @@ pub trait UserDefinedSQLPlanner: Send + Sync { Ok(PlannerResult::Original(expr)) } - // Plan the Extract expression, e.g., EXTRACT(month FROM foo) - // returns origin expression arguments if not possible + /// Plan an extract expression, e.g., `EXTRACT(month FROM foo)` + /// + /// Returns origin expression arguments if not possible fn plan_extract(&self, args: Vec) -> Result>> { Ok(PlannerResult::Original(args)) } @@ -151,7 +159,10 @@ pub struct RawFieldAccessExpr { pub expr: Expr, } -/// A dictionary expression { key: value, ...} +/// A Dictionary literal expression `{ key: value, ...}` +/// +/// This structure is used by [`UserDefinedSQLPlanner`] to plan operators with +/// custom expressions. #[derive(Debug, Clone)] pub struct RawDictionaryExpr { pub keys: Vec, From 1f91f0ff5f15f6471ede0cf945144ac04b1e6ed2 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 3 Jul 2024 17:46:21 -0400 Subject: [PATCH 7/7] Avoid stack overflow by putting code in a new function --- datafusion/sql/src/expr/mod.rs | 36 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index d9623b57cad1..2ddd2d22c022 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -20,7 +20,9 @@ use arrow_schema::TimeUnit; use datafusion_expr::planner::PlannerResult; use datafusion_expr::planner::RawDictionaryExpr; use datafusion_expr::planner::RawFieldAccessExpr; -use sqlparser::ast::{CastKind, Expr as SQLExpr, Subscript, TrimWhereField, Value}; +use sqlparser::ast::{ + CastKind, DictionaryField, Expr as SQLExpr, Subscript, TrimWhereField, Value, +}; use datafusion_common::{ internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result, @@ -621,21 +623,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { }, ))), SQLExpr::Dictionary(fields) => { - let mut keys = vec![]; - let mut values = vec![]; - for field in fields { - let key = lit(field.key.value); - let value = self.sql_expr_to_logical_expr( - *field.value, - schema, - planner_context, - )?; - keys.push(key); - values.push(value); - } - - let expr = RawDictionaryExpr { keys, values }; - self.try_plan_dictionary_literal(expr, schema) + self.try_plan_dictionary_literal(fields, schema, planner_context) } _ => not_impl_err!("Unsupported ast node in sqltorel: {sql:?}"), } @@ -643,10 +631,22 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { fn try_plan_dictionary_literal( &self, - expr: RawDictionaryExpr, + fields: Vec, schema: &DFSchema, + planner_context: &mut PlannerContext, ) -> Result { - let mut raw_expr = expr; + let mut keys = vec![]; + let mut values = vec![]; + for field in fields { + let key = lit(field.key.value); + let value = + self.sql_expr_to_logical_expr(*field.value, schema, planner_context)?; + keys.push(key); + values.push(value); + } + + let mut raw_expr = RawDictionaryExpr { keys, values }; + for planner in self.planners.iter() { match planner.plan_dictionary_literal(raw_expr, schema)? { PlannerResult::Planned(expr) => {