Skip to content

Commit 574dfeb

Browse files
authored
Fix: generate_series function support string type (#12002)
* fix: sqllogictest * Revert "fix: sqllogictest" This reverts commit 4957a1d. * fix: sqllogictest * remove any type signature * coerce type from null to date32 * fmt * slt * Revert "coerce type from null to date32" This reverts commit bccdc2e. * replace type coerce by `coerce_types` method * fmt * fix underscored param
1 parent a91be04 commit 574dfeb

File tree

2 files changed

+67
-35
lines changed

2 files changed

+67
-35
lines changed

datafusion/functions-nested/src/range.rs

+62-31
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,12 @@ use arrow::datatypes::{DataType, Field};
2323
use arrow_array::types::{Date32Type, IntervalMonthDayNanoType};
2424
use arrow_array::NullArray;
2525
use arrow_buffer::{BooleanBufferBuilder, NullBuffer, OffsetBuffer};
26-
use arrow_schema::DataType::{Date32, Int64, Interval, List};
26+
use arrow_schema::DataType::*;
2727
use arrow_schema::IntervalUnit::MonthDayNano;
2828
use datafusion_common::cast::{as_date32_array, as_int64_array, as_interval_mdn_array};
2929
use datafusion_common::{exec_err, not_impl_datafusion_err, Result};
30-
use datafusion_expr::{
31-
ColumnarValue, ScalarUDFImpl, Signature, TypeSignature, Volatility,
32-
};
30+
use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
31+
use itertools::Itertools;
3332
use std::any::Any;
3433
use std::iter::from_fn;
3534
use std::sync::Arc;
@@ -49,16 +48,7 @@ pub(super) struct Range {
4948
impl Range {
5049
pub fn new() -> Self {
5150
Self {
52-
signature: Signature::one_of(
53-
vec![
54-
TypeSignature::Exact(vec![Int64]),
55-
TypeSignature::Exact(vec![Int64, Int64]),
56-
TypeSignature::Exact(vec![Int64, Int64, Int64]),
57-
TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
58-
TypeSignature::Any(3),
59-
],
60-
Volatility::Immutable,
61-
),
51+
signature: Signature::user_defined(Volatility::Immutable),
6252
aliases: vec![],
6353
}
6454
}
@@ -75,9 +65,34 @@ impl ScalarUDFImpl for Range {
7565
&self.signature
7666
}
7767

68+
fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
69+
arg_types
70+
.iter()
71+
.map(|arg_type| match arg_type {
72+
Null => Ok(Null),
73+
Int8 => Ok(Int64),
74+
Int16 => Ok(Int64),
75+
Int32 => Ok(Int64),
76+
Int64 => Ok(Int64),
77+
UInt8 => Ok(Int64),
78+
UInt16 => Ok(Int64),
79+
UInt32 => Ok(Int64),
80+
UInt64 => Ok(Int64),
81+
Timestamp(_, _) => Ok(Date32),
82+
Date32 => Ok(Date32),
83+
Date64 => Ok(Date32),
84+
Utf8 => Ok(Date32),
85+
LargeUtf8 => Ok(Date32),
86+
Utf8View => Ok(Date32),
87+
Interval(_) => Ok(Interval(MonthDayNano)),
88+
_ => exec_err!("Unsupported DataType"),
89+
})
90+
.try_collect()
91+
}
92+
7893
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
79-
if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
80-
Ok(DataType::Null)
94+
if arg_types.iter().any(|t| t.is_null()) {
95+
Ok(Null)
8196
} else {
8297
Ok(List(Arc::new(Field::new(
8398
"item",
@@ -88,7 +103,7 @@ impl ScalarUDFImpl for Range {
88103
}
89104

90105
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
91-
if args.iter().any(|arg| arg.data_type() == DataType::Null) {
106+
if args.iter().any(|arg| arg.data_type().is_null()) {
92107
return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
93108
}
94109
match args[0].data_type() {
@@ -120,16 +135,7 @@ pub(super) struct GenSeries {
120135
impl GenSeries {
121136
pub fn new() -> Self {
122137
Self {
123-
signature: Signature::one_of(
124-
vec![
125-
TypeSignature::Exact(vec![Int64]),
126-
TypeSignature::Exact(vec![Int64, Int64]),
127-
TypeSignature::Exact(vec![Int64, Int64, Int64]),
128-
TypeSignature::Exact(vec![Date32, Date32, Interval(MonthDayNano)]),
129-
TypeSignature::Any(3),
130-
],
131-
Volatility::Immutable,
132-
),
138+
signature: Signature::user_defined(Volatility::Immutable),
133139
aliases: vec![],
134140
}
135141
}
@@ -146,9 +152,34 @@ impl ScalarUDFImpl for GenSeries {
146152
&self.signature
147153
}
148154

155+
fn coerce_types(&self, _arg_types: &[DataType]) -> Result<Vec<DataType>> {
156+
_arg_types
157+
.iter()
158+
.map(|arg_type| match arg_type {
159+
Null => Ok(Null),
160+
Int8 => Ok(Int64),
161+
Int16 => Ok(Int64),
162+
Int32 => Ok(Int64),
163+
Int64 => Ok(Int64),
164+
UInt8 => Ok(Int64),
165+
UInt16 => Ok(Int64),
166+
UInt32 => Ok(Int64),
167+
UInt64 => Ok(Int64),
168+
Timestamp(_, _) => Ok(Date32),
169+
Date32 => Ok(Date32),
170+
Date64 => Ok(Date32),
171+
Utf8 => Ok(Date32),
172+
LargeUtf8 => Ok(Date32),
173+
Utf8View => Ok(Date32),
174+
Interval(_) => Ok(Interval(MonthDayNano)),
175+
_ => exec_err!("Unsupported DataType"),
176+
})
177+
.try_collect()
178+
}
179+
149180
fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
150-
if arg_types.iter().any(|t| t.eq(&DataType::Null)) {
151-
Ok(DataType::Null)
181+
if arg_types.iter().any(|t| t.is_null()) {
182+
Ok(Null)
152183
} else {
153184
Ok(List(Arc::new(Field::new(
154185
"item",
@@ -159,15 +190,15 @@ impl ScalarUDFImpl for GenSeries {
159190
}
160191

161192
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
162-
if args.iter().any(|arg| arg.data_type() == DataType::Null) {
193+
if args.iter().any(|arg| arg.data_type().is_null()) {
163194
return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
164195
}
165196
match args[0].data_type() {
166197
Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args),
167198
Date32 => make_scalar_function(|args| gen_range_date(args, true))(args),
168199
dt => {
169200
exec_err!(
170-
"unsupported type for range. Expected Int64 or Date32, got: {}",
201+
"unsupported type for gen_series. Expected Int64 or Date32, got: {}",
171202
dt
172203
)
173204
}

datafusion/sqllogictest/test_files/array.slt

+5-4
Original file line numberDiff line numberDiff line change
@@ -5804,7 +5804,7 @@ select generate_series(5),
58045804
----
58055805
[0, 1, 2, 3, 4, 5] [2, 3, 4, 5] [2, 5, 8] [1, 2, 3, 4, 5] [5, 4, 3, 2, 1] [10, 7, 4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01, 1993-03-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02, 1993-01-01] [1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
58065806

5807-
query error DataFusion error: Execution error: unsupported type for range. Expected Int64 or Date32, got: Timestamp\(Nanosecond, None\)
5807+
query error DataFusion error: Execution error: Cannot generate date range less than 1 day\.
58085808
select generate_series('2021-01-01'::timestamp, '2021-01-02'::timestamp, INTERVAL '1' HOUR);
58095809

58105810
## should return NULL
@@ -5936,11 +5936,12 @@ select generate_series(start, '1993-03-01'::date, INTERVAL '1 year') from date_t
59365936

59375937

59385938
# https://github.com/apache/datafusion/issues/11922
5939-
query error
5939+
query ?
59405940
select generate_series(start, '1993-03-01', INTERVAL '1 year') from date_table;
59415941
----
5942-
DataFusion error: Internal error: could not cast value to arrow_array::array::primitive_array::PrimitiveArray<arrow_array::types::Date32Type>.
5943-
This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker
5942+
[1992-01-01, 1993-01-01]
5943+
[1993-02-01]
5944+
[1989-04-01, 1990-04-01, 1991-04-01, 1992-04-01]
59445945

59455946

59465947
## array_except

0 commit comments

Comments
 (0)