Skip to content

Commit cf0c55a

Browse files
committed
Convert approx_distinct to UDAF
1 parent e8fdc09 commit cf0c55a

File tree

15 files changed

+578
-262
lines changed

15 files changed

+578
-262
lines changed

datafusion/expr/src/aggregate_function.rs

+4-10
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ pub enum AggregateFunction {
4141
Max,
4242
/// Average
4343
Avg,
44-
/// Approximate distinct function
45-
ApproxDistinct,
4644
/// Aggregation into an array
4745
ArrayAgg,
4846
/// N'th value in a group according to some ordering
@@ -97,7 +95,6 @@ impl AggregateFunction {
9795
Min => "MIN",
9896
Max => "MAX",
9997
Avg => "AVG",
100-
ApproxDistinct => "APPROX_DISTINCT",
10198
ArrayAgg => "ARRAY_AGG",
10299
NthValue => "NTH_VALUE",
103100
Correlation => "CORR",
@@ -160,7 +157,6 @@ impl FromStr for AggregateFunction {
160157
"regr_syy" => AggregateFunction::RegrSYY,
161158
"regr_sxy" => AggregateFunction::RegrSXY,
162159
// approximate
163-
"approx_distinct" => AggregateFunction::ApproxDistinct,
164160
"approx_median" => AggregateFunction::ApproxMedian,
165161
"approx_percentile_cont" => AggregateFunction::ApproxPercentileCont,
166162
"approx_percentile_cont_with_weight" => {
@@ -198,9 +194,7 @@ impl AggregateFunction {
198194
})?;
199195

200196
match self {
201-
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
202-
Ok(DataType::Int64)
203-
}
197+
AggregateFunction::Count => Ok(DataType::Int64),
204198
AggregateFunction::Max | AggregateFunction::Min => {
205199
// For min and max agg function, the returned type is same as input type.
206200
// The coerced_data_types is same with input_types.
@@ -261,9 +255,9 @@ impl AggregateFunction {
261255
// note: the physical expression must accept the type returned by this function or the execution panics.
262256
match self {
263257
AggregateFunction::Count => Signature::variadic_any(Volatility::Immutable),
264-
AggregateFunction::ApproxDistinct
265-
| AggregateFunction::Grouping
266-
| AggregateFunction::ArrayAgg => Signature::any(1, Volatility::Immutable),
258+
AggregateFunction::Grouping | AggregateFunction::ArrayAgg => {
259+
Signature::any(1, Volatility::Immutable)
260+
}
267261
AggregateFunction::Min | AggregateFunction::Max => {
268262
let valid = STRINGS
269263
.iter()

datafusion/expr/src/expr_fn.rs

-18
Original file line numberDiff line numberDiff line change
@@ -266,24 +266,6 @@ pub fn in_list(expr: Expr, list: Vec<Expr>, negated: bool) -> Expr {
266266
Expr::InList(InList::new(Box::new(expr), list, negated))
267267
}
268268

269-
/// Returns the approximate number of distinct input values.
270-
/// This function provides an approximation of count(DISTINCT x).
271-
/// Zero is returned if all input values are null.
272-
/// This function should produce a standard error of 0.81%,
273-
/// which is the standard deviation of the (approximately normal)
274-
/// error distribution over all possible sets.
275-
/// It does not guarantee an upper bound on the error for any specific input set.
276-
pub fn approx_distinct(expr: Expr) -> Expr {
277-
Expr::AggregateFunction(AggregateFunction::new(
278-
aggregate_function::AggregateFunction::ApproxDistinct,
279-
vec![expr],
280-
false,
281-
None,
282-
None,
283-
None,
284-
))
285-
}
286-
287269
/// Calculate an approximation of the median for `expr`.
288270
pub fn approx_median(expr: Expr) -> Expr {
289271
Expr::AggregateFunction(AggregateFunction::new(

datafusion/expr/src/type_coercion/aggregates.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,7 @@ pub fn coerce_types(
9696
check_arg_count(agg_fun.name(), input_types, &signature.type_signature)?;
9797

9898
match agg_fun {
99-
AggregateFunction::Count | AggregateFunction::ApproxDistinct => {
100-
Ok(input_types.to_vec())
101-
}
99+
AggregateFunction::Count => Ok(input_types.to_vec()),
102100
AggregateFunction::ArrayAgg => Ok(input_types.to_vec()),
103101
AggregateFunction::Min | AggregateFunction::Max => {
104102
// min and max support the dictionary data type
@@ -539,7 +537,6 @@ mod tests {
539537
let funs = vec![
540538
AggregateFunction::Count,
541539
AggregateFunction::ArrayAgg,
542-
AggregateFunction::ApproxDistinct,
543540
AggregateFunction::Min,
544541
AggregateFunction::Max,
545542
];

0 commit comments

Comments
 (0)