Skip to content

Commit

Permalink
refactor: remove remaining uses of arrow_array and use reexport in …
Browse files Browse the repository at this point in the history
…`arrow` instead (#14528)

* refactor: remove remaining uses of arrow_array and use reexport in arrow instead

* fix: remove blank

* fix: run carg fmt

* fix: import record_batch

* fix: update cargo.lock

* fix: add chrono-tz feature in arrow

* fix: remove remaining arrow-array & update cargo.lock

---------

Co-authored-by: Cheng-Yuan-Lai <a186235@g,ail.com>
Co-authored-by: Ian Lai <Ian.Lai@senao.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
4 people authored Feb 8, 2025
1 parent 91c0975 commit faa8c1b
Show file tree
Hide file tree
Showing 132 changed files with 297 additions and 354 deletions.
2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,6 @@ ahash = { version = "0.8", default-features = false, features = [
] }
arrow = { version = "54.1.0", features = [
"prettyprint",
] }
arrow-array = { version = "54.1.0", default-features = false, features = [
"chrono-tz",
] }
arrow-buffer = { version = "54.1.0", default-features = false }
Expand Down
21 changes: 8 additions & 13 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ publish = false
[dependencies]
abi_stable = "0.11.3"
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-schema = { workspace = true }
datafusion = { workspace = true }
datafusion-ffi = { workspace = true }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,9 @@
use std::sync::Arc;

use abi_stable::{export_root_module, prefix_type::PrefixTypeTrait};
use arrow_array::RecordBatch;
use datafusion::{
arrow::datatypes::{DataType, Field, Schema},
common::record_batch,
datasource::MemTable,
};
use arrow::array::RecordBatch;
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::{common::record_batch, datasource::MemTable};
use datafusion_ffi::table_provider::FFI_TableProvider;
use ffi_module_interface::{TableProviderModule, TableProviderModuleRef};

Expand Down
1 change: 0 additions & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ apache-avro = { version = "0.17", default-features = false, features = [
"zstandard",
], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
base64 = "0.22.1"
Expand Down
8 changes: 4 additions & 4 deletions datafusion/common/src/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
//! kernels in arrow-rs such as `as_boolean_array` do.
use crate::{downcast_value, DataFusionError, Result};
use arrow::array::{
BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray,
LargeStringArray, StringViewArray, UInt16Array,
};
use arrow::{
array::{
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
Expand All @@ -36,10 +40,6 @@ use arrow::{
},
datatypes::{ArrowDictionaryKeyType, ArrowPrimitiveType},
};
use arrow_array::{
BinaryViewArray, Float16Array, Int16Array, Int8Array, LargeBinaryArray,
LargeStringArray, StringViewArray, UInt16Array,
};

// Downcast ArrayRef to Date32Array
pub fn as_date32_array(array: &dyn Array) -> Result<&Date32Array> {
Expand Down
3 changes: 1 addition & 2 deletions datafusion/common/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@

//! Conversions between PyArrow and DataFusion types
use arrow::array::ArrayData;
use arrow::array::{Array, ArrayData};
use arrow::pyarrow::{FromPyArrow, ToPyArrow};
use arrow_array::Array;
use pyo3::exceptions::PyException;
use pyo3::prelude::PyErr;
use pyo3::types::{PyAnyMethods, PyList};
Expand Down
34 changes: 18 additions & 16 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,24 @@ use crate::cast::{
use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err};
use crate::hash_utils::create_hashes;
use crate::utils::SingleRowListArrayBuilder;
use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
use arrow::array::{
types::{IntervalDayTime, IntervalMonthDayNano},
*,
};
use arrow::buffer::ScalarBuffer;
use arrow::compute::kernels::numeric::*;
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
use arrow::{
array::*,
compute::kernels::cast::{cast_with_options, CastOptions},
datatypes::{
i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
IntervalYearMonthType, TimeUnit, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION,
},
use arrow::compute::kernels::{
cast::{cast_with_options, CastOptions},
numeric::*,
};
use arrow::datatypes::{
i256, ArrowDictionaryKeyType, ArrowNativeType, ArrowTimestampType, DataType,
Date32Type, Date64Type, Field, Float32Type, Int16Type, Int32Type, Int64Type,
Int8Type, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit,
IntervalYearMonthType, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
UInt8Type, DECIMAL128_MAX_PRECISION,
};
use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions};
use arrow_schema::{UnionFields, UnionMode};

use crate::format::DEFAULT_CAST_OPTIONS;
Expand Down Expand Up @@ -165,7 +167,7 @@ pub use struct_builder::ScalarStructBuilder;
/// ```
/// # use std::sync::Arc;
/// # use arrow::datatypes::{DataType, Field, Fields};
/// # use arrow_array::{ArrayRef, Int32Array, StructArray, StringArray};
/// # use arrow::array::{ArrayRef, Int32Array, StructArray, StringArray};
/// # use datafusion_common::ScalarValue;
/// // Build a struct like: {a: 1, b: "foo"}
/// // Field description
Expand Down Expand Up @@ -1674,7 +1676,7 @@ impl ScalarValue {
///
/// assert_eq!(&result, &expected);
/// ```
/// [`Datum`]: arrow_array::Datum
/// [`Datum`]: arrow::array::Datum
pub fn to_scalar(&self) -> Result<Scalar<ArrayRef>> {
Ok(Scalar::new(self.to_array_of_size(1)?))
}
Expand Down
6 changes: 3 additions & 3 deletions datafusion/common/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use std::{error::Error, path::PathBuf};
/// ```
/// # use std::sync::Arc;
/// # use arrow::record_batch::RecordBatch;
/// # use arrow_array::{ArrayRef, Int32Array};
/// # use arrow::array::{ArrayRef, Int32Array};
/// # use datafusion_common::assert_batches_eq;
/// let col: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
/// let batch = RecordBatch::try_from_iter([("column", col)]).unwrap();
Expand Down Expand Up @@ -344,7 +344,7 @@ macro_rules! record_batch {
)*
]));

let batch = arrow_array::RecordBatch::try_new(
let batch = arrow::array::RecordBatch::try_new(
schema,
vec![$(
$crate::create_array!($type, $values),
Expand Down Expand Up @@ -416,7 +416,7 @@ mod tests {

#[test]
fn test_create_record_batch() -> Result<()> {
use arrow_array::Array;
use arrow::array::Array;

let batch = record_batch!(
("a", Int32, vec![1, 2, 3, 4]),
Expand Down
13 changes: 6 additions & 7 deletions datafusion/common/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ pub mod string_utils;

use crate::error::{_internal_datafusion_err, _internal_err};
use crate::{DataFusionError, Result, ScalarValue};
use arrow::array::ArrayRef;
use arrow::array::{
cast::AsArray, Array, ArrayRef, FixedSizeListArray, LargeListArray, ListArray,
OffsetSizeTrait,
};
use arrow::buffer::OffsetBuffer;
use arrow::compute::{partition, SortColumn, SortOptions};
use arrow::datatypes::{Field, SchemaRef};
use arrow_array::cast::AsArray;
use arrow_array::{
Array, FixedSizeListArray, LargeListArray, ListArray, OffsetSizeTrait,
};
use arrow_schema::DataType;
use sqlparser::ast::Ident;
use sqlparser::dialect::GenericDialect;
Expand Down Expand Up @@ -329,8 +328,8 @@ pub fn longest_consecutive_prefix<T: Borrow<usize>>(
/// # Example
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{Array, ListArray};
/// # use arrow_array::types::Int64Type;
/// # use arrow::array::{Array, ListArray};
/// # use arrow::array::types::Int64Type;
/// # use datafusion_common::utils::SingleRowListArrayBuilder;
/// // Array is [1, 2, 3]
/// let arr = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/utils/string_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

//! Utilities for working with strings
use arrow::{array::AsArray, datatypes::DataType};
use arrow_array::Array;
use arrow::array::{Array, AsArray};
use arrow::datatypes::DataType;

/// Convenient function to convert an Arrow string array to a vector of strings
pub fn string_array_to_vec(array: &dyn Array) -> Vec<Option<&str>> {
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ extended_tests = []
[dependencies]
apache-avro = { version = "0.17", optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-ipc = { workspace = true }
arrow-schema = { workspace = true }
async-compression = { version = "0.4.0", features = [
Expand Down
12 changes: 4 additions & 8 deletions datafusion/core/benches/data_utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,11 @@

//! This module provides the in-memory table for more realistic benchmarking.
use arrow::{
array::Float32Array,
array::Float64Array,
array::StringArray,
array::UInt64Array,
datatypes::{DataType, Field, Schema, SchemaRef},
record_batch::RecordBatch,
use arrow::array::{
builder::{Int64Builder, StringBuilder},
Float32Array, Float64Array, RecordBatch, StringArray, UInt64Array,
};
use arrow_array::builder::{Int64Builder, StringBuilder};
use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion::datasource::MemTable;
use datafusion::error::Result;
use datafusion_common::DataFusionError;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/benches/map_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

use std::sync::Arc;

use arrow_array::{ArrayRef, Int32Array, RecordBatch};
use arrow::array::{ArrayRef, Int32Array, RecordBatch};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use parking_lot::Mutex;
use rand::prelude::ThreadRng;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/benches/sql_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ extern crate datafusion;
mod data_utils;

use crate::criterion::Criterion;
use arrow::array::{ArrayRef, RecordBatch};
use arrow::datatypes::{DataType, Field, Fields, Schema};
use arrow_array::{ArrayRef, RecordBatch};
use criterion::Bencher;
use datafusion::datasource::MemTable;
use datafusion::execution::context::SessionContext;
Expand Down
3 changes: 1 addition & 2 deletions datafusion/core/benches/sql_query_with_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

use std::{fmt::Write, sync::Arc, time::Duration};

use arrow::array::{Int64Builder, UInt64Builder};
use arrow_array::RecordBatch;
use arrow::array::{Int64Builder, RecordBatch, UInt64Builder};
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use bytes::Bytes;
use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -760,10 +760,10 @@ mod tests {
use crate::prelude::{CsvReadOptions, SessionConfig, SessionContext};
use crate::test_util::arrow_test_data;

use arrow::array::{BooleanArray, Float64Array, Int32Array, StringArray};
use arrow::compute::concat_batches;
use arrow::csv::ReaderBuilder;
use arrow::util::pretty::pretty_format_batches;
use arrow_array::{BooleanArray, Float64Array, Int32Array, StringArray};
use datafusion_common::cast::as_string_array;
use datafusion_common::internal_err;
use datafusion_common::stats::Precision;
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ use crate::physical_plan::{
DisplayAs, DisplayFormatType, SendableRecordBatchStream, Statistics,
};

use arrow::array::RecordBatch;
use arrow::datatypes::Schema;
use arrow::datatypes::SchemaRef;
use arrow::json;
use arrow::json::reader::{infer_json_schema_from_iterator, ValueIter};
use arrow_array::RecordBatch;
use arrow_schema::ArrowError;
use datafusion_catalog::Session;
use datafusion_common::config::{ConfigField, ConfigFileType, JsonOptions};
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ use std::fmt::{self, Debug, Display};
use std::sync::Arc;
use std::task::Poll;

use crate::arrow::array::RecordBatch;
use crate::arrow::datatypes::SchemaRef;
use crate::datasource::physical_plan::{FileScanConfig, FileSinkConfig};
use crate::error::Result;
use crate::physical_plan::{ExecutionPlan, Statistics};

use arrow_array::RecordBatch;
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema};
use datafusion_catalog::Session;
use datafusion_common::file_options::file_type::FileType;
Expand Down
7 changes: 4 additions & 3 deletions datafusion/core/src/datasource/file_format/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1309,9 +1309,10 @@ mod tests {
use crate::datasource::file_format::parquet::test_util::store_parquet;
use crate::physical_plan::metrics::MetricValue;
use crate::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
use arrow::array::{Array, ArrayRef, StringArray};
use arrow_array::types::Int32Type;
use arrow_array::{DictionaryArray, Int32Array, Int64Array};
use arrow::array::{
types::Int32Type, Array, ArrayRef, DictionaryArray, Int32Array, Int64Array,
StringArray,
};
use arrow_schema::{DataType, Field};
use async_trait::async_trait;
use datafusion_common::cast::{
Expand Down
Loading

0 comments on commit faa8c1b

Please # to comment.