From 8a8850cc8846c4295667fc6e96ae037b27f5b11d Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 25 Mar 2025 18:26:08 -0400 Subject: [PATCH 1/8] Change default explain plan to `tree` --- datafusion-cli/src/main.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index e21006312d85..3a0e7bd319d7 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -155,6 +155,8 @@ async fn main_inner() -> Result<()> { if let Some(batch_size) = args.batch_size { session_config = session_config.with_batch_size(batch_size); }; + // use easier to understand "tree" mode by default + session_config.options_mut().explain.format = String::from("tree"); let mut rt_builder = RuntimeEnvBuilder::new(); // set memory pool size @@ -437,4 +439,5 @@ mod tests { Ok(()) } + } From 4b9eeb280c637946b4349651285c3c5826f47bd4 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 25 Mar 2025 18:27:38 -0400 Subject: [PATCH 2/8] Add tests --- datafusion-cli/tests/cli_integration.rs | 28 ++++++++++++++++- .../snapshots/cli_quick_test@backslash.snap | 1 + .../snapshots/cli_quick_test@batch_size.snap | 3 +- .../tests/snapshots/cli_quick_test@files.snap | 3 +- .../snapshots/cli_quick_test@statements.snap | 3 +- .../cli_quick_test_explain@batch_size-2.snap | 31 +++++++++++++++++++ .../cli_quick_test_explain@batch_size.snap | 27 ++++++++++++++++ 7 files changed, 92 insertions(+), 4 deletions(-) create mode 100644 datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size-2.snap create mode 100644 datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size.snap diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index a54a920e97bb..55ad388862c1 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -19,7 +19,7 @@ use std::process::Command; use rstest::rstest; -use insta::{glob, Settings}; +use insta::{assert_snapshot, glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; use std::{env, fs}; @@ -59,6 +59,7 @@ fn init() { "batch_size", ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], )] + #[test] fn cli_quick_test<'a>( #[case] snapshot_name: &'a str, @@ -74,6 +75,31 @@ fn cli_quick_test<'a>( assert_cmd_snapshot!(cmd); } +#[rstest] +#[case::default_explain_plan( + "batch_size", + // default explain format should be tree + ["--command", "EXPLAIN SELECT 123"], +)] +#[case::can_see_indent_format( + "batch_size", + // can choose the old explain format too + ["--command", "EXPLAIN FORMAT indent SELECT 123"], +)] +#[test] +fn cli_quick_test_explain<'a>( + #[case] snapshot_name: &'a str, + #[case] args: impl IntoIterator, +) { + let mut settings = make_settings(); + settings.set_snapshot_suffix(snapshot_name); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); + cmd.args(args); + + assert_cmd_snapshot!(cmd); +} #[rstest] #[case("csv")] #[case("tsv")] diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap b/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap index c01699146aa8..9915320a7a5e 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap @@ -8,6 +8,7 @@ info: - "--format" - json - "-q" +snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap index c27d527df0b6..40af7acefd00 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap @@ -1,5 +1,5 @@ --- -source: tests/cli_integration.rs +source: datafusion-cli/tests/cli_integration.rs info: program: datafusion-cli args: @@ -8,6 +8,7 @@ info: - "-q" - "-b" - "1" +snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap index 7c44e41729a1..e29494242143 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap @@ -1,11 +1,12 @@ --- -source: tests/cli_integration.rs +source: datafusion-cli/tests/cli_integration.rs info: program: datafusion-cli args: - "--file" - tests/sql/select.sql - "-q" +snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap index 3b975bb6a927..d2949f66921b 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap @@ -1,11 +1,12 @@ --- -source: tests/cli_integration.rs +source: datafusion-cli/tests/cli_integration.rs info: program: datafusion-cli args: - "--command" - select 1; select 2; - "-q" +snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size-2.snap b/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size-2.snap new file mode 100644 index 000000000000..46ee6be64f62 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size-2.snap @@ -0,0 +1,31 @@ +--- +source: datafusion-cli/tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - EXPLAIN SELECT 123 +snapshot_kind: text +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] ++---------------+-------------------------------+ +| plan_type | plan | ++---------------+-------------------------------+ +| physical_plan | ┌───────────────────────────┐ | +| | │ ProjectionExec │ | +| | │ -------------------- │ | +| | │ Int64(123): 123 │ | +| | └─────────────┬─────────────┘ | +| | ┌─────────────┴─────────────┐ | +| | │ PlaceholderRowExec │ | +| | └───────────────────────────┘ | +| | | ++---------------+-------------------------------+ +1 row(s) fetched. +[ELAPSED] + + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size.snap new file mode 100644 index 000000000000..b2fb64709974 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size.snap @@ -0,0 +1,27 @@ +--- +source: datafusion-cli/tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - EXPLAIN FORMAT indent SELECT 123 +snapshot_kind: text +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] ++---------------+------------------------------------------+ +| plan_type | plan | ++---------------+------------------------------------------+ +| logical_plan | Projection: Int64(123) | +| | EmptyRelation | +| physical_plan | ProjectionExec: expr=[123 as Int64(123)] | +| | PlaceholderRowExec | +| | | ++---------------+------------------------------------------+ +2 row(s) fetched. +[ELAPSED] + + +----- stderr ----- From cfa9171dedf12285f2b4c3e9be59adcbe7146018 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 25 Mar 2025 18:40:35 -0400 Subject: [PATCH 3/8] remove unecessary changes --- datafusion-cli/src/main.rs | 1 - datafusion-cli/tests/cli_integration.rs | 3 +-- datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap | 1 - datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap | 1 - datafusion-cli/tests/snapshots/cli_quick_test@files.snap | 1 - datafusion-cli/tests/snapshots/cli_quick_test@statements.snap | 1 - 6 files changed, 1 insertion(+), 7 deletions(-) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 3a0e7bd319d7..136da590635c 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -439,5 +439,4 @@ mod tests { Ok(()) } - } diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 55ad388862c1..b0b09ff8132a 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -19,7 +19,7 @@ use std::process::Command; use rstest::rstest; -use insta::{assert_snapshot, glob, Settings}; +use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; use std::{env, fs}; @@ -59,7 +59,6 @@ fn init() { "batch_size", ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], )] - #[test] fn cli_quick_test<'a>( #[case] snapshot_name: &'a str, diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap b/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap index 9915320a7a5e..c01699146aa8 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@backslash.snap @@ -8,7 +8,6 @@ info: - "--format" - json - "-q" -snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap index 40af7acefd00..9fd07fa6f4e1 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap @@ -8,7 +8,6 @@ info: - "-q" - "-b" - "1" -snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap index e29494242143..df3a10b6bb54 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap @@ -6,7 +6,6 @@ info: - "--file" - tests/sql/select.sql - "-q" -snapshot_kind: text --- success: true exit_code: 0 diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap index d2949f66921b..a394458768d1 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap @@ -6,7 +6,6 @@ info: - "--command" - select 1; select 2; - "-q" -snapshot_kind: text --- success: true exit_code: 0 From 489fc4da9078a8b3b7f0c1b7dde702b949b0c51e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Tue, 25 Mar 2025 18:41:49 -0400 Subject: [PATCH 4/8] more unecessary changes --- datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap | 2 +- datafusion-cli/tests/snapshots/cli_quick_test@files.snap | 2 +- datafusion-cli/tests/snapshots/cli_quick_test@statements.snap | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap index 9fd07fa6f4e1..c27d527df0b6 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap @@ -1,5 +1,5 @@ --- -source: datafusion-cli/tests/cli_integration.rs +source: tests/cli_integration.rs info: program: datafusion-cli args: diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap index df3a10b6bb54..7c44e41729a1 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap @@ -1,5 +1,5 @@ --- -source: datafusion-cli/tests/cli_integration.rs +source: tests/cli_integration.rs info: program: datafusion-cli args: diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap index a394458768d1..3b975bb6a927 100644 --- a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap +++ b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap @@ -1,5 +1,5 @@ --- -source: datafusion-cli/tests/cli_integration.rs +source: tests/cli_integration.rs info: program: datafusion-cli args: From dd7dfdfd1b9b90141fa0b0a0a443a19a3842facb Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 26 Mar 2025 18:44:10 +0000 Subject: [PATCH 5/8] Move test cases --- datafusion-cli/tests/cli_integration.rs | 23 ++++--------------- ...cli_quick_test@can_see_indent_format.snap} | 0 ... cli_quick_test@default_explain_plan.snap} | 0 3 files changed, 4 insertions(+), 19 deletions(-) rename datafusion-cli/tests/snapshots/{cli_quick_test_explain@batch_size.snap => cli_quick_test@can_see_indent_format.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_quick_test_explain@batch_size-2.snap => cli_quick_test@default_explain_plan.snap} (100%) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index b0b09ff8132a..eca51d851060 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -59,34 +59,18 @@ fn init() { "batch_size", ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], )] -#[test] -fn cli_quick_test<'a>( - #[case] snapshot_name: &'a str, - #[case] args: impl IntoIterator, -) { - let mut settings = make_settings(); - settings.set_snapshot_suffix(snapshot_name); - let _bound = settings.bind_to_scope(); - - let mut cmd = cli(); - cmd.args(args); - - assert_cmd_snapshot!(cmd); -} - -#[rstest] #[case::default_explain_plan( - "batch_size", + "default_explain_plan", // default explain format should be tree ["--command", "EXPLAIN SELECT 123"], )] #[case::can_see_indent_format( - "batch_size", + "can_see_indent_format", // can choose the old explain format too ["--command", "EXPLAIN FORMAT indent SELECT 123"], )] #[test] -fn cli_quick_test_explain<'a>( +fn cli_quick_test<'a>( #[case] snapshot_name: &'a str, #[case] args: impl IntoIterator, ) { @@ -99,6 +83,7 @@ fn cli_quick_test_explain<'a>( assert_cmd_snapshot!(cmd); } + #[rstest] #[case("csv")] #[case("tsv")] diff --git a/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@can_see_indent_format.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size.snap rename to datafusion-cli/tests/snapshots/cli_quick_test@can_see_indent_format.snap diff --git a/datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size-2.snap b/datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_quick_test_explain@batch_size-2.snap rename to datafusion-cli/tests/snapshots/cli_quick_test@default_explain_plan.snap From f570f0ffb210933537b3e805b4d5793d26735bf1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 30 Mar 2025 07:59:36 -0400 Subject: [PATCH 6/8] Respect environment variable for explain plan --- datafusion-cli/src/main.rs | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 136da590635c..0b7a98f65201 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -37,6 +37,8 @@ use datafusion_cli::{ }; use clap::Parser; +use datafusion::common::config_err; +use datafusion::config::ConfigOptions; use mimalloc::MiMalloc; #[global_allocator] @@ -150,13 +152,7 @@ async fn main_inner() -> Result<()> { env::set_current_dir(p).unwrap(); }; - let mut session_config = SessionConfig::from_env()?.with_information_schema(true); - - if let Some(batch_size) = args.batch_size { - session_config = session_config.with_batch_size(batch_size); - }; - // use easier to understand "tree" mode by default - session_config.options_mut().explain.format = String::from("tree"); + let session_config = get_session_config(&args)?; let mut rt_builder = RuntimeEnvBuilder::new(); // set memory pool size @@ -228,6 +224,30 @@ async fn main_inner() -> Result<()> { Ok(()) } +/// Get the session configuration based on the provided arguments +/// and environment settings. +fn get_session_config(args: &Args) -> Result { + // Read options from environment variables and merge with command line options + let mut config_options = ConfigOptions::from_env()?; + + if let Some(batch_size) = args.batch_size { + if batch_size == 0 { + return config_err!("batch_size must be greater than 0"); + } + config_options.execution.batch_size = batch_size; + }; + + // use easier to understand "tree" mode by default + // if the user hasn't specified an explain format in the environment + if env::var_os("DATAFUSION_EXPLAIN_FORMAT").is_none() { + config_options.explain.format = String::from("tree"); + } + + let session_config = + SessionConfig::from(config_options).with_information_schema(true); + Ok(session_config) +} + fn parse_valid_file(dir: &str) -> Result { if Path::new(dir).is_file() { Ok(dir.to_string()) From 8eecfb1778c914dda735f44fcc14b8834a96828b Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 30 Mar 2025 08:06:01 -0400 Subject: [PATCH 7/8] Fix format --- datafusion-cli/tests/cli_integration.rs | 17 +++++++ ...es@explain_plan_environment_overrides.snap | 44 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index eca51d851060..58f7d611eb65 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -84,6 +84,23 @@ fn cli_quick_test<'a>( assert_cmd_snapshot!(cmd); } +#[test] +fn cli_explain_environment_overrides() { + let mut settings = make_settings(); + settings.set_snapshot_suffix("explain_plan_environment_overrides"); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); + + // should use the environment variable to override the default explain plan + cmd + .env("DATAFUSION_EXPLAIN_FORMAT", "pgjson") + .args(["--command", "EXPLAIN SELECT 123"]); + + assert_cmd_snapshot!(cmd); +} + + #[rstest] #[case("csv")] #[case("tsv")] diff --git a/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap new file mode 100644 index 000000000000..6b3a247dd7b8 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_explain_environment_overrides@explain_plan_environment_overrides.snap @@ -0,0 +1,44 @@ +--- +source: datafusion-cli/tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - EXPLAIN SELECT 123 + env: + DATAFUSION_EXPLAIN_FORMAT: pgjson +snapshot_kind: text +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] ++--------------+-----------------------------------------+ +| plan_type | plan | ++--------------+-----------------------------------------+ +| logical_plan | [ | +| | { | +| | "Plan": { | +| | "Expressions": [ | +| | "Int64(123)" | +| | ], | +| | "Node Type": "Projection", | +| | "Output": [ | +| | "Int64(123)" | +| | ], | +| | "Plans": [ | +| | { | +| | "Node Type": "EmptyRelation", | +| | "Output": [], | +| | "Plans": [] | +| | } | +| | ] | +| | } | +| | } | +| | ] | ++--------------+-----------------------------------------+ +1 row(s) fetched. +[ELAPSED] + + +----- stderr ----- From cc5236853afaf77fba89b140566bce72bc5a2eb1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Sun, 30 Mar 2025 08:06:08 -0400 Subject: [PATCH 8/8] fmt --- datafusion-cli/tests/cli_integration.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 58f7d611eb65..9ac09955512b 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -93,14 +93,12 @@ fn cli_explain_environment_overrides() { let mut cmd = cli(); // should use the environment variable to override the default explain plan - cmd - .env("DATAFUSION_EXPLAIN_FORMAT", "pgjson") + cmd.env("DATAFUSION_EXPLAIN_FORMAT", "pgjson") .args(["--command", "EXPLAIN SELECT 123"]); assert_cmd_snapshot!(cmd); } - #[rstest] #[case("csv")] #[case("tsv")]