From 9d09fe562c65d4f52cdccb253690b5533f6cc23f Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Wed, 14 Jun 2023 15:14:26 +0100 Subject: [PATCH] Faster unpacking of Int32Type dictionary (#4406) --- arrow-cast/src/cast.rs | 43 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs index 32f422768dc3..dea3f2acfaf8 100644 --- a/arrow-cast/src/cast.rs +++ b/arrow-cast/src/cast.rs @@ -49,7 +49,7 @@ use crate::parse::{ use arrow_array::{ builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *, }; -use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer}; +use arrow_buffer::{i256, ArrowNativeType, Buffer, MutableBuffer, ScalarBuffer}; use arrow_data::ArrayData; use arrow_schema::*; use arrow_select::take::take; @@ -3466,34 +3466,21 @@ fn unpack_dictionary( where K: ArrowDictionaryKeyType, { - let dict_array = array - .as_any() - .downcast_ref::>() - .ok_or_else(|| { - ArrowError::ComputeError( - "Internal Error: Cannot cast dictionary to DictionaryArray of expected type".to_string(), - ) - })?; - - // attempt to cast the dict values to the target type - // use the take kernel to expand out the dictionary + let dict_array = array.as_dictionary::(); let cast_dict_values = cast_with_options(dict_array.values(), to_type, cast_options)?; - - // Note take requires first casting the indices to u32 - let keys_array: ArrayRef = - Arc::new(PrimitiveArray::::from(dict_array.keys().to_data())); - let indices = cast_with_options(&keys_array, &DataType::UInt32, cast_options)?; - let u32_indices = - indices - .as_any() - .downcast_ref::() - .ok_or_else(|| { - ArrowError::ComputeError( - "Internal Error: Cannot cast dict indices to UInt32".to_string(), - ) - })?; - - take(cast_dict_values.as_ref(), u32_indices, None) + let keys = dict_array.keys(); + match K::DATA_TYPE { + DataType::Int32 => { + // Dictionary guarantees all non-null keys >= 0 + let buffer = ScalarBuffer::new(keys.values().inner().clone(), 0, keys.len()); + let indices = PrimitiveArray::new(buffer, keys.nulls().cloned()); + take::(cast_dict_values.as_ref(), &indices, None) + } + _ => { + let indices = cast_with_options(keys, &DataType::UInt32, cast_options)?; + take::(cast_dict_values.as_ref(), indices.as_primitive(), None) + } + } } /// Attempts to encode an array into an `ArrayDictionary` with index