-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Column support for array_to_string #6940
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -344,22 +344,22 @@ select array_prepend(100.1, column2), array_prepend('.', column3) from arrays; | |
## array_fill | ||
|
||
# array_fill scalar function #1 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query ??? | ||
select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), array_fill(2, make_array(2)); | ||
---- | ||
[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2] | ||
|
||
# array_fill scalar function #2 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query ?? | ||
select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 2, 2)); | ||
---- | ||
[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], [2, 2]]]]] | ||
|
||
# array_fill scalar function #3 | ||
query error DataFusion error: SQL error: TokenizerError\("Unterminated string literal at Line: 2, Column 856"\) | ||
caused by | ||
Internal error: Optimizer rule 'simplify_expressions' failed, due to generate a different schema, original schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: \[DFField \{ qualifier: None, field: Field \{ name: "array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: \{\} \} \}\], metadata: \{\} \}\. This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker | ||
query ? | ||
select array_fill(1, make_array()) | ||
---- | ||
[] | ||
|
||
## array_concat | ||
|
||
|
@@ -498,16 +498,16 @@ select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), array_to_string([1, 2, 3 | |
h,e,l,l,o 1-2-3-4-5 1|2|3 | ||
|
||
# array_to_string scalar function #2 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Arrow error: Cast error: Cannot cast string '1\+2\+3\+4\+5\+6' to value of Int64 type | ||
query TTT | ||
select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\'); | ||
---- | ||
11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3 | ||
|
||
# array_to_string scalar function #3 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert Utf8 to List\(Field \{ name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query T | ||
select array_to_string(make_array(), ',') | ||
---- | ||
(empty) | ||
|
||
# array_to_string scalar function with nulls #1 | ||
query TTT | ||
|
@@ -521,6 +521,56 @@ select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), array_ | |
---- | ||
h,-,-,-,o nil-2-nil-4-5 1|0|3 | ||
|
||
# array_to_string with columns #1 | ||
|
||
# For reference | ||
# select column1, column4 from arrays_values; | ||
# ---- | ||
# [, 2, 3, 4, 5, 6, 7, 8, 9, 10] , | ||
# [11, 12, 13, 14, 15, 16, 17, 18, , 20] . | ||
# [21, 22, 23, , 25, 26, 27, 28, 29, 30] - | ||
# [31, 32, 33, 34, 35, , 37, 38, 39, 40] ok | ||
# NULL @ | ||
# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $ | ||
# [51, 52, , 54, 55, 56, 57, 58, 59, 60] ^ | ||
# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL | ||
|
||
query T | ||
select array_to_string(column1, column4) from arrays_values; | ||
---- | ||
2,3,4,5,6,7,8,9,10 | ||
11.12.13.14.15.16.17.18.20 | ||
21-22-23-25-26-27-28-29-30 | ||
31ok32ok33ok34ok35ok37ok38ok39ok40 | ||
NULL | ||
41$42$43$44$45$46$47$48$49$50 | ||
51^52^54^55^56^57^58^59^60 | ||
NULL | ||
|
||
query TT | ||
select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') from arrays_values; | ||
---- | ||
2_3_4_5_6_7_8_9_10 1/2/3 | ||
11_12_13_14_15_16_17_18_20 1/2/3 | ||
21_22_23_25_26_27_28_29_30 1/2/3 | ||
31_32_33_34_35_37_38_39_40 1/2/3 | ||
NULL 1/2/3 | ||
41_42_43_44_45_46_47_48_49_50 1/2/3 | ||
51_52_54_55_56_57_58_59_60 1/2/3 | ||
61_62_63_64_65_66_67_68_69_70 1/2/3 | ||
|
||
query TT | ||
select array_to_string(column1, '_', '*'), array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values; | ||
---- | ||
*_2_3_4_5_6_7_8_9_10 1.2.3 | ||
11_12_13_14_15_16_17_18_*_20 1.2.3 | ||
21_22_23_*_25_26_27_28_29_30 1.2.3 | ||
31_32_33_34_35_*_37_38_39_40 1.2.3 | ||
NULL 1.2.3 | ||
41_42_43_44_45_46_47_48_49_50 1.2.3 | ||
51_52_*_54_55_56_57_58_59_60 1.2.3 | ||
61_62_63_64_65_66_67_68_69_70 1.2.3 | ||
|
||
## cardinality | ||
|
||
# cardinality scalar function | ||
|
@@ -530,10 +580,10 @@ select cardinality(make_array(1, 2, 3, 4, 5)), cardinality([1, 3, 5]), cardinali | |
5 3 5 | ||
|
||
# cardinality scalar function #2 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query II | ||
select cardinality(make_array([1, 2], [3, 4], [5, 6])), cardinality(array_fill(3, array[3, 2, 3])); | ||
---- | ||
6 18 | ||
|
||
# cardinality scalar function #3 | ||
query II | ||
|
@@ -562,10 +612,10 @@ select trim_array(make_array(1, 2, 3, 4, 5), 2), trim_array(['h', 'e', 'l', 'l', | |
[1, 2, 3] [h, e] [1.0] | ||
|
||
# trim_array scalar function #2 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query ?? | ||
select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 4, 2]), 2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
---- | ||
[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]] | ||
|
||
# trim_array scalar function #3 | ||
query ? | ||
|
@@ -600,10 +650,10 @@ select array_length(make_array(1, 2, 3, 4, 5), 2), array_length(make_array(1, 2, | |
NULL NULL 2 | ||
|
||
# array_length scalar function #4 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query IIII | ||
select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, [3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), array_length(array_fill(3, [3, 2, 5]), 4); | ||
---- | ||
3 2 5 NULL | ||
|
||
# array_length scalar function #5 | ||
query III | ||
|
@@ -646,10 +696,10 @@ select array_dims(make_array(1, 2, 3)), array_dims(make_array([1, 2], [3, 4])), | |
[3] [2, 2] [1, 1, 1, 2, 1] | ||
|
||
# array_dims scalar function #2 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query ?? | ||
select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 4])); | ||
---- | ||
[1, 2, 3] [2, 5, 4] | ||
|
||
# array_dims scalar function #3 | ||
query ?? | ||
|
@@ -678,10 +728,10 @@ select array_ndims(make_array(1, 2, 3)), array_ndims(make_array([1, 2], [3, 4])) | |
1 2 5 | ||
|
||
# array_ndims scalar function #2 | ||
query error DataFusion error: SQL error: ParserError\("Expected an SQL statement, found: caused"\) | ||
caused by | ||
Error during planning: Cannot automatically convert List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\) | ||
query II | ||
select array_ndims(array_fill(1, [1, 2, 3])), array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]); | ||
---- | ||
3 21 | ||
|
||
# array_ndims scalar function #3 | ||
query II | ||
|
@@ -763,10 +813,8 @@ select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 3.0, 4.0), 'h' || make_a | |
---- | ||
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o] | ||
|
||
|
||
### Array casting tests | ||
|
||
|
||
## make_array | ||
|
||
# make_array scalar function #1 | ||
|
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1040,17 +1040,17 @@ macro_rules! to_string { | |||||||||
} | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
Ok($ARG) | ||||||||||
}}; | ||||||||||
} | ||||||||||
|
||||||||||
/// Array_to_string SQL function | ||||||||||
pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> { | ||||||||||
let arr = &args[0]; | ||||||||||
let delimeter = as_generic_string_array::<i32>(&args[1])? | ||||||||||
.value(0) | ||||||||||
.to_string(); | ||||||||||
|
||||||||||
let delimeters = as_generic_string_array::<i32>(&args[1])?; | ||||||||||
let delimeters: Vec<Option<&str>> = delimeters.iter().collect(); | ||||||||||
|
||||||||||
let mut null_string = String::from(""); | ||||||||||
let mut with_null_string = false; | ||||||||||
if args.len() == 3 { | ||||||||||
|
@@ -1195,21 +1195,56 @@ pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> { | |||||||||
} | ||||||||||
|
||||||||||
let mut arg = String::from(""); | ||||||||||
let mut res = compute_array_to_string( | ||||||||||
&mut arg, | ||||||||||
arr.clone(), | ||||||||||
delimeter.clone(), | ||||||||||
null_string, | ||||||||||
with_null_string, | ||||||||||
)? | ||||||||||
.clone(); | ||||||||||
match res.as_str() { | ||||||||||
"" => Ok(Arc::new(StringArray::from(vec![Some(res)]))), | ||||||||||
let mut res: Vec<Option<String>> = Vec::new(); | ||||||||||
|
||||||||||
match arr.data_type() { | ||||||||||
DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => { | ||||||||||
let list_array = arr.as_list::<i32>(); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is going to work (will However, for now I think it looks reasonable and a step forward to me |
||||||||||
for (arr, &delimeter) in list_array.iter().zip(delimeters.iter()) { | ||||||||||
if let (Some(arr), Some(delimeter)) = (arr, delimeter) { | ||||||||||
arg = String::from(""); | ||||||||||
let s = compute_array_to_string( | ||||||||||
&mut arg, | ||||||||||
arr, | ||||||||||
delimeter.to_string(), | ||||||||||
null_string.clone(), | ||||||||||
with_null_string, | ||||||||||
)? | ||||||||||
.clone(); | ||||||||||
|
||||||||||
if let Some(s) = s.strip_suffix(delimeter) { | ||||||||||
res.push(Some(s.to_string())); | ||||||||||
} else { | ||||||||||
res.push(Some(s)); | ||||||||||
} | ||||||||||
} else { | ||||||||||
res.push(None); | ||||||||||
} | ||||||||||
} | ||||||||||
} | ||||||||||
_ => { | ||||||||||
res.truncate(res.len() - delimeter.len()); | ||||||||||
Ok(Arc::new(StringArray::from(vec![Some(res)]))) | ||||||||||
// delimeter length is 1 | ||||||||||
assert_eq!(delimeters.len(), 1); | ||||||||||
let delimeter = delimeters[0].unwrap(); | ||||||||||
let s = compute_array_to_string( | ||||||||||
&mut arg, | ||||||||||
arr.clone(), | ||||||||||
delimeter.to_string(), | ||||||||||
null_string, | ||||||||||
with_null_string, | ||||||||||
)? | ||||||||||
.clone(); | ||||||||||
|
||||||||||
if !s.is_empty() { | ||||||||||
let s = s.strip_suffix(delimeter).unwrap().to_string(); | ||||||||||
res.push(Some(s)); | ||||||||||
Comment on lines
+1239
to
+1240
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not quite sure if it is sure that Perhaps it could be something like the following to aavoid the unwrap?
Suggested change
|
||||||||||
} else { | ||||||||||
res.push(Some(s)); | ||||||||||
} | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
Ok(Arc::new(StringArray::from(res))) | ||||||||||
} | ||||||||||
|
||||||||||
/// Trim_array SQL function | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
array_fill
will not be used in future versions (due to the inability to properly set the data type)It will be replaced with
array_repeat
.