Skip to content

Commit 9b78efa

Browse files
tushushualamb
andauthored
Add serde support for Arrow FileTypeWriterOptions (#8850)
* refactor * generated files * feat * feat * feat * feat * tests * clippy --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent d14f766 commit 9b78efa

File tree

7 files changed

+170
-1
lines changed

7 files changed

+170
-1
lines changed

datafusion/common/src/file_options/arrow_writer.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,18 @@ use super::StatementOptions;
2727
#[derive(Clone, Debug)]
2828
pub struct ArrowWriterOptions {}
2929

30+
impl ArrowWriterOptions {
31+
pub fn new() -> Self {
32+
Self {}
33+
}
34+
}
35+
36+
impl Default for ArrowWriterOptions {
37+
fn default() -> Self {
38+
Self::new()
39+
}
40+
}
41+
3042
impl TryFrom<(&ConfigOptions, &StatementOptions)> for ArrowWriterOptions {
3143
type Error = DataFusionError;
3244

datafusion/proto/proto/datafusion.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,7 @@ message FileTypeWriterOptions {
12131213
JsonWriterOptions json_options = 1;
12141214
ParquetWriterOptions parquet_options = 2;
12151215
CsvWriterOptions csv_options = 3;
1216+
ArrowWriterOptions arrow_options = 4;
12161217
}
12171218
}
12181219

@@ -1243,6 +1244,8 @@ message CsvWriterOptions {
12431244
string null_value = 8;
12441245
}
12451246

1247+
message ArrowWriterOptions {}
1248+
12461249
message WriterProperties {
12471250
uint64 data_page_size_limit = 1;
12481251
uint64 dictionary_page_size_limit = 2;

datafusion/proto/src/generated/pbjson.rs

Lines changed: 85 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/generated/prost.rs

Lines changed: 6 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion/proto/src/logical_plan/mod.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
use arrow::csv::WriterBuilder;
19+
use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
1920
use std::collections::HashMap;
2021
use std::fmt::Debug;
2122
use std::str::FromStr;
@@ -858,6 +859,13 @@ impl AsLogicalPlan for LogicalPlanNode {
858859
Some(copy_to_node::CopyOptions::WriterOptions(opt)) => {
859860
match &opt.file_type {
860861
Some(ft) => match ft {
862+
file_type_writer_options::FileType::ArrowOptions(_) => {
863+
CopyOptions::WriterOptions(Box::new(
864+
FileTypeWriterOptions::Arrow(
865+
ArrowWriterOptions::new(),
866+
),
867+
))
868+
}
861869
file_type_writer_options::FileType::CsvOptions(
862870
writer_options,
863871
) => {
@@ -1659,6 +1667,17 @@ impl AsLogicalPlan for LogicalPlanNode {
16591667
}
16601668
CopyOptions::WriterOptions(opt) => {
16611669
match opt.as_ref() {
1670+
FileTypeWriterOptions::Arrow(_) => {
1671+
let arrow_writer_options =
1672+
file_type_writer_options::FileType::ArrowOptions(
1673+
protobuf::ArrowWriterOptions {},
1674+
);
1675+
Some(copy_to_node::CopyOptions::WriterOptions(
1676+
protobuf::FileTypeWriterOptions {
1677+
file_type: Some(arrow_writer_options),
1678+
},
1679+
))
1680+
}
16621681
FileTypeWriterOptions::CSV(csv_opts) => {
16631682
let csv_options = &csv_opts.writer_options;
16641683
let csv_writer_options = csv_writer_options_to_proto(

datafusion/proto/src/physical_plan/from_proto.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ use datafusion::physical_plan::windows::create_window_expr;
4242
use datafusion::physical_plan::{
4343
functions, ColumnStatistics, Partitioning, PhysicalExpr, Statistics, WindowExpr,
4444
};
45+
use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
4546
use datafusion_common::file_options::csv_writer::CsvWriterOptions;
4647
use datafusion_common::file_options::json_writer::JsonWriterOptions;
4748
use datafusion_common::file_options::parquet_writer::ParquetWriterOptions;
@@ -834,6 +835,10 @@ impl TryFrom<&protobuf::FileTypeWriterOptions> for FileTypeWriterOptions {
834835
.ok_or_else(|| proto_error("Missing required file_type field in protobuf"))?;
835836

836837
match file_type {
838+
protobuf::file_type_writer_options::FileType::ArrowOptions(_) => {
839+
Ok(Self::Arrow(ArrowWriterOptions::new()))
840+
}
841+
837842
protobuf::file_type_writer_options::FileType::JsonOptions(opts) => {
838843
let compression: CompressionTypeVariant = opts.compression().into();
839844
Ok(Self::JSON(JsonWriterOptions::new(compression)))

datafusion/proto/tests/cases/roundtrip_logical_plan.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use arrow::datatypes::{
2727
IntervalUnit, Schema, SchemaRef, TimeUnit, UnionFields, UnionMode,
2828
};
2929

30+
use datafusion_common::file_options::arrow_writer::ArrowWriterOptions;
3031
use prost::Message;
3132

3233
use datafusion::datasource::provider::TableProviderFactory;
@@ -394,6 +395,45 @@ async fn roundtrip_logical_plan_copy_to_writer_options() -> Result<()> {
394395
Ok(())
395396
}
396397

398+
#[tokio::test]
399+
async fn roundtrip_logical_plan_copy_to_arrow() -> Result<()> {
400+
let ctx = SessionContext::new();
401+
402+
let input = create_csv_scan(&ctx).await?;
403+
404+
let plan = LogicalPlan::Copy(CopyTo {
405+
input: Arc::new(input),
406+
output_url: "test.arrow".to_string(),
407+
file_format: FileType::ARROW,
408+
single_file_output: true,
409+
copy_options: CopyOptions::WriterOptions(Box::new(FileTypeWriterOptions::Arrow(
410+
ArrowWriterOptions::new(),
411+
))),
412+
});
413+
414+
let bytes = logical_plan_to_bytes(&plan)?;
415+
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx)?;
416+
assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
417+
418+
match logical_round_trip {
419+
LogicalPlan::Copy(copy_to) => {
420+
assert_eq!("test.arrow", copy_to.output_url);
421+
assert_eq!(FileType::ARROW, copy_to.file_format);
422+
assert!(copy_to.single_file_output);
423+
match &copy_to.copy_options {
424+
CopyOptions::WriterOptions(y) => match y.as_ref() {
425+
FileTypeWriterOptions::Arrow(_) => {}
426+
_ => panic!(),
427+
},
428+
_ => panic!(),
429+
}
430+
}
431+
_ => panic!(),
432+
}
433+
434+
Ok(())
435+
}
436+
397437
#[tokio::test]
398438
async fn roundtrip_logical_plan_copy_to_csv() -> Result<()> {
399439
let ctx = SessionContext::new();

0 commit comments

Comments
 (0)