From c2e4ea5c96fb3b82cc8ec86bd077246e9dcfcf5a Mon Sep 17 00:00:00 2001 From: jackwener Date: Tue, 13 Jun 2023 14:54:33 +0800 Subject: [PATCH] fix: from_plan shouldn't use original schema --- datafusion/common/src/dfschema.rs | 8 +- datafusion/core/tests/sql/expr.rs | 15 ++- .../tests/sqllogictests/test_files/array.slt | 115 +++++++++--------- datafusion/expr/src/utils.rs | 11 +- 4 files changed, 79 insertions(+), 70 deletions(-) diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs index 0416086d81f3..292c19886b72 100644 --- a/datafusion/common/src/dfschema.rs +++ b/datafusion/common/src/dfschema.rs @@ -384,8 +384,12 @@ impl DFSchema { let self_fields = self.fields().iter(); let other_fields = other.fields().iter(); self_fields.zip(other_fields).all(|(f1, f2)| { - f1.qualifier() == f2.qualifier() - && f1.name() == f2.name() + // TODO: resolve field when exist alias + // f1.qualifier() == f2.qualifier() + // && f1.name() == f2.name() + // column(t1.a) field is "t1"."a" + // column(x) as t1.a field is ""."t1.a" + f1.qualified_name() == f2.qualified_name() && Self::datatype_is_semantically_equal(f1.data_type(), f2.data_type()) }) } diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index 6783670545c3..53552b16959f 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -512,15 +512,22 @@ async fn test_regex_expressions() -> Result<()> { #[tokio::test] async fn test_cast_expressions() -> Result<()> { + test_expression!("CAST('0' AS INT)", "0"); + test_expression!("CAST(NULL AS INT)", "NULL"); + test_expression!("TRY_CAST('0' AS INT)", "0"); + test_expression!("TRY_CAST('x' AS INT)", "NULL"); + Ok(()) +} + +#[tokio::test] +#[ignore] +// issue: https://github.com/apache/arrow-datafusion/issues/6596 +async fn test_array_cast_expressions() -> Result<()> { test_expression!("CAST([1,2,3,4] AS INT[])", "[1, 2, 3, 4]"); test_expression!( "CAST([1,2,3,4] AS NUMERIC(10,4)[])", "[1.0000, 2.0000, 3.0000, 4.0000]" ); - test_expression!("CAST('0' AS INT)", "0"); - test_expression!("CAST(NULL AS INT)", "NULL"); - test_expression!("TRY_CAST('0' AS INT)", "0"); - test_expression!("TRY_CAST('x' AS INT)", "NULL"); Ok(()) } diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index 183522138044..45c9a452fbd4 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -61,17 +61,18 @@ select make_array(make_array()), make_array(make_array(make_array())) ---- [[]] [[[]]] +# TODO issue: https://github.com/apache/arrow-datafusion/issues/6596 # array_append scalar function #1 -query ? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_append(make_array(), 4); ----- -[4] # array_append scalar function #2 -query ?? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_append(make_array(), make_array()), array_append(make_array(), make_array(4)); ----- -[[]] [[4]] # array_append scalar function #3 query ??? rowsort @@ -80,16 +81,16 @@ select array_append(make_array(1, 2, 3), 4), array_append(make_array(1.0, 2.0, 3 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] # array_prepend scalar function #1 -query ? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_prepend(4, make_array()); ----- -[4] # array_prepend scalar function #2 -query ?? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_prepend(make_array(), make_array()), array_prepend(make_array(4), make_array()); ----- -[[]] [[4]] # array_prepend scalar function #3 query ??? rowsort @@ -98,22 +99,22 @@ select array_prepend(1, make_array(2, 3, 4)), array_prepend(1.0, make_array(2.0, [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] # array_fill scalar function #1 -query ??? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), array_fill(2, make_array(2)); ----- -[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2] # array_fill scalar function #2 -query ?? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2, 2)); ----- -[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]] # array_fill scalar function #3 -query ? +query error DataFusion error: SQL error: TokenizerError\("Unterminated string literal at Line: 2, Column 856"\) +caused by +Internal error: Optimizer rule 'simplify_expressions' failed, due to generate a different schema, original schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker select array_fill(1, make_array()) ----- -[] # array_concat scalar function #1 query ?? rowsort @@ -146,10 +147,10 @@ select array_concat(make_array(2, 3), make_array()); [2, 3] # array_concat scalar function #6 -query ? rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_concat(make_array(), make_array(2, 3)); ----- -[2, 3] # array_position scalar function #1 query III @@ -164,10 +165,10 @@ select array_position(['h', 'e', 'l', 'l', 'o'], 'l', 4), array_position([1, 2, 4 5 2 # array_positions scalar function -query III +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8 select array_positions(['h', 'e', 'l', 'l', 'o'], 'l'), array_positions([1, 2, 3, 4, 5], 5), array_positions([1, 1, 1], 1); ----- -[3, 4] [5] [1, 2, 3] # array_replace scalar function query ??? @@ -176,22 +177,22 @@ select array_replace(make_array(1, 2, 3, 4), 2, 3), array_replace(make_array(1, [1, 3, 3, 4] [1, 0, 0, 5, 0, 6, 7] [1, 2, 3] # array_to_string scalar function -query ??? +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Arrow error: Cast error: Cannot cast string '1\-2\-3\-4\-5' to value of Int64 type select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3, 4, 5], '-'), array_to_string([1.0, 2.0, 3.0], '|'); ----- -h,e,l,l,o 1-2-3-4-5 1|2|3 # array_to_string scalar function #2 -query ??? +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Arrow error: Cast error: Cannot cast string '1\+2\+3\+4\+5\+6' to value of Int64 type select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\'); ----- -11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 # array_to_string scalar function #3 -query ? +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_to_string(make_array(), ',') ----- -(empty) # cardinality scalar function query III @@ -200,10 +201,10 @@ select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinali 5 3 5 # cardinality scalar function #2 -query II +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3, array[3, 2, 3])); ----- -6 18 # cardinality scalar function #3 query II @@ -218,10 +219,10 @@ select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', [1, 2, 3] [h, e] [1.0] # trim_array scalar function #2 -query ?? +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4, 2]), 2); ----- -[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]] # trim_array scalar function #3 query ? @@ -254,10 +255,10 @@ select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, NULL NULL 2 # array_length scalar function #4 -query IIII rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4); ----- -3 2 5 NULL # array_length scalar function #5 query III rowsort @@ -266,22 +267,22 @@ select array_length(make_array()), array_length(make_array(), 1), array_length(m 0 0 NULL # array_dims scalar function -query III rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8 select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), array_dims(make_array([[[[1], [2]]]])); ----- -[3] [2, 2] [1, 1, 1, 2, 1] # array_dims scalar function #2 -query II rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4])); ----- -[1, 2, 3] [2, 5, 4] # array_dims scalar function #3 -query II rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: UInt8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to UInt8 select array_dims(make_array()), array_dims(make_array(make_array())) ----- -[0] [1, 0] # array_ndims scalar function query III rowsort @@ -290,10 +291,10 @@ select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])) 1 2 5 # array_ndims scalar function #2 -query II rowsort +query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) +caused by +Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); ----- -3 21 # array_ndims scalar function #3 query II rowsort diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index c2eabea85727..05ebab3d9998 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -730,13 +730,10 @@ pub fn from_plan( inputs: &[LogicalPlan], ) -> Result { match plan { - LogicalPlan::Projection(Projection { schema, .. }) => { - Ok(LogicalPlan::Projection(Projection::try_new_with_schema( - expr.to_vec(), - Arc::new(inputs[0].clone()), - schema.clone(), - )?)) - } + LogicalPlan::Projection(_) => Ok(LogicalPlan::Projection(Projection::try_new( + expr.to_vec(), + Arc::new(inputs[0].clone()), + )?)), LogicalPlan::Dml(DmlStatement { table_name, table_schema,