diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt b/datafusion/core/tests/sqllogictests/test_files/array.slt index cf20c14cac76..e2c83a14cbfb 100644 --- a/datafusion/core/tests/sqllogictests/test_files/array.slt +++ b/datafusion/core/tests/sqllogictests/test_files/array.slt @@ -344,22 +344,22 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; ## array_fill # array_fill scalar function #1 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query ??? select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), array_fill(2, make_array(2)); +---- +[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2] # array_fill scalar function #2 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query ?? select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2, 2)); +---- +[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]] # array_fill scalar function #3 -query error DataFusion error: SQL error: TokenizerError\("Unterminated string literal at Line: 2, Column 856"\) -caused by -Internal error: Optimizer rule 'simplify_expressions' failed, due to generate a different schema, original schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker +query ? select array_fill(1, make_array()) +---- +[] ## array_concat @@ -498,16 +498,16 @@ select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3 h,e,l,l,o 1-2-3-4-5 1|2|3 # array_to_string scalar function #2 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Arrow error: Cast error: Cannot cast string '1\+2\+3\+4\+5\+6' to value of Int64 type +query TTT select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\'); +---- +11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 # array_to_string scalar function #3 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query T select array_to_string(make_array(), ',') +---- +(empty) # array_to_string scalar function with nulls #1 query TTT @@ -521,6 +521,56 @@ select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_ ---- h,-,-,-,o nil-2-nil-4-5 1|0|3 +# array_to_string with columns #1 + +# For reference +# select column1, column4 from arrays_values; +# ---- +# [, 2, 3, 4, 5, 6, 7, 8, 9, 10] , +# [11, 12, 13, 14, 15, 16, 17, 18, , 20] . +# [21, 22, 23, , 25, 26, 27, 28, 29, 30] - +# [31, 32, 33, 34, 35, , 37, 38, 39, 40] ok +# NULL @ +# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ +# [51, 52, , 54, 55, 56, 57, 58, 59, 60] ^ +# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL + +query T +select array_to_string(column1, column4) from arrays_values; +---- +2,3,4,5,6,7,8,9,10 +11.12.13.14.15.16.17.18.20 +21-22-23-25-26-27-28-29-30 +31ok32ok33ok34ok35ok37ok38ok39ok40 +NULL +41$42$43$44$45$46$47$48$49$50 +51^52^54^55^56^57^58^59^60 +NULL + +query TT +select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values; +---- +2_3_4_5_6_7_8_9_10 1/2/3 +11_12_13_14_15_16_17_18_20 1/2/3 +21_22_23_25_26_27_28_29_30 1/2/3 +31_32_33_34_35_37_38_39_40 1/2/3 +NULL 1/2/3 +41_42_43_44_45_46_47_48_49_50 1/2/3 +51_52_54_55_56_57_58_59_60 1/2/3 +61_62_63_64_65_66_67_68_69_70 1/2/3 + +query TT +select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values; +---- +*_2_3_4_5_6_7_8_9_10 1.2.3 +11_12_13_14_15_16_17_18_*_20 1.2.3 +21_22_23_*_25_26_27_28_29_30 1.2.3 +31_32_33_34_35_*_37_38_39_40 1.2.3 +NULL 1.2.3 +41_42_43_44_45_46_47_48_49_50 1.2.3 +51_52_*_54_55_56_57_58_59_60 1.2.3 +61_62_63_64_65_66_67_68_69_70 1.2.3 + ## cardinality # cardinality scalar function @@ -530,10 +580,10 @@ select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinali 5 3 5 # cardinality scalar function #2 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query II select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3, array[3, 2, 3])); +---- +6 18 # cardinality scalar function #3 query II @@ -562,10 +612,10 @@ select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', [1, 2, 3] [h, e] [1.0] # trim_array scalar function #2 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query ?? select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4, 2]), 2); +---- +[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]] # trim_array scalar function #3 query ? @@ -600,10 +650,10 @@ select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, NULL NULL 2 # array_length scalar function #4 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query IIII select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4); +---- +3 2 5 NULL # array_length scalar function #5 query III @@ -646,10 +696,10 @@ select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), [3] [2, 2] [1, 1, 1, 2, 1] # array_dims scalar function #2 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query ?? select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4])); +---- +[1, 2, 3] [2, 5, 4] # array_dims scalar function #3 query ?? @@ -678,10 +728,10 @@ select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])) 1 2 5 # array_ndims scalar function #2 -query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) -caused by -Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) +query II select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); +---- +3 21 # array_ndims scalar function #3 query II @@ -763,10 +813,8 @@ select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_a ---- [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] - ### Array casting tests - ## make_array # make_array scalar function #1 diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index a0fed8f90889..cbfebb74249a 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -1040,7 +1040,6 @@ macro_rules! to_string { } } } - Ok($ARG) }}; } @@ -1048,9 +1047,10 @@ macro_rules! to_string { /// Array_to_string SQL function pub fn array_to_string(args: &[ArrayRef]) -> Result { let arr = &args[0]; - let delimeter = as_generic_string_array::(&args[1])? - .value(0) - .to_string(); + + let delimeters = as_generic_string_array::(&args[1])?; + let delimeters: Vec> = delimeters.iter().collect(); + let mut null_string = String::from(""); let mut with_null_string = false; if args.len() == 3 { @@ -1195,21 +1195,56 @@ pub fn array_to_string(args: &[ArrayRef]) -> Result { } let mut arg = String::from(""); - let mut res = compute_array_to_string( - &mut arg, - arr.clone(), - delimeter.clone(), - null_string, - with_null_string, - )? - .clone(); - match res.as_str() { - "" => Ok(Arc::new(StringArray::from(vec![Some(res)]))), + let mut res: Vec> = Vec::new(); + + match arr.data_type() { + DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => { + let list_array = arr.as_list::(); + for (arr, &delimeter) in list_array.iter().zip(delimeters.iter()) { + if let (Some(arr), Some(delimeter)) = (arr, delimeter) { + arg = String::from(""); + let s = compute_array_to_string( + &mut arg, + arr, + delimeter.to_string(), + null_string.clone(), + with_null_string, + )? + .clone(); + + if let Some(s) = s.strip_suffix(delimeter) { + res.push(Some(s.to_string())); + } else { + res.push(Some(s)); + } + } else { + res.push(None); + } + } + } _ => { - res.truncate(res.len() - delimeter.len()); - Ok(Arc::new(StringArray::from(vec![Some(res)]))) + // delimeter length is 1 + assert_eq!(delimeters.len(), 1); + let delimeter = delimeters[0].unwrap(); + let s = compute_array_to_string( + &mut arg, + arr.clone(), + delimeter.to_string(), + null_string, + with_null_string, + )? + .clone(); + + if !s.is_empty() { + let s = s.strip_suffix(delimeter).unwrap().to_string(); + res.push(Some(s)); + } else { + res.push(Some(s)); + } } } + + Ok(Arc::new(StringArray::from(res))) } /// Trim_array SQL function