|
| 1 | +<!--- |
| 2 | + Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + or more contributor license agreements. See the NOTICE file |
| 4 | + distributed with this work for additional information |
| 5 | + regarding copyright ownership. The ASF licenses this file |
| 6 | + to you under the Apache License, Version 2.0 (the |
| 7 | + "License"); you may not use this file except in compliance |
| 8 | + with the License. You may obtain a copy of the License at |
| 9 | +
|
| 10 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +
|
| 12 | + Unless required by applicable law or agreed to in writing, |
| 13 | + software distributed under the License is distributed on an |
| 14 | + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + KIND, either express or implied. See the License for the |
| 16 | + specific language governing permissions and limitations |
| 17 | + under the License. |
| 18 | +--> |
| 19 | + |
| 20 | +# Crate Configuration |
| 21 | + |
| 22 | +This section contains information on how to configure DataFusion in your Rust |
| 23 | +project. See the [Configuration Settings] section for a list of options that |
| 24 | +control DataFusion's behavior. |
| 25 | + |
| 26 | +[configuration settings]: configs.md |
| 27 | + |
| 28 | +## Add latest non published DataFusion dependency |
| 29 | + |
| 30 | +DataFusion changes are published to `crates.io` according to the [release schedule](https://github.com/apache/datafusion/blob/main/dev/release/README.md#release-process) |
| 31 | + |
| 32 | +If you would like to test out DataFusion changes which are merged but not yet |
| 33 | +published, Cargo supports adding dependency directly to GitHub branch: |
| 34 | + |
| 35 | +```toml |
| 36 | +datafusion = { git = "https://github.com/apache/datafusion", branch = "main"} |
| 37 | +``` |
| 38 | + |
| 39 | +Also it works on the package level |
| 40 | + |
| 41 | +```toml |
| 42 | +datafusion-common = { git = "https://github.com/apache/datafusion", branch = "main", package = "datafusion-common"} |
| 43 | +``` |
| 44 | + |
| 45 | +And with features |
| 46 | + |
| 47 | +```toml |
| 48 | +datafusion = { git = "https://github.com/apache/datafusion", branch = "main", default-features = false, features = ["unicode_expressions"] } |
| 49 | +``` |
| 50 | + |
| 51 | +More on [Cargo dependencies](https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html#specifying-dependencies) |
| 52 | + |
| 53 | +## Optimized Configuration |
| 54 | + |
| 55 | +For an optimized build several steps are required. First, use the below in your `Cargo.toml`. It is |
| 56 | +worth noting that using the settings in the `[profile.release]` section will significantly increase the build time. |
| 57 | + |
| 58 | +```toml |
| 59 | +[dependencies] |
| 60 | +datafusion = { version = "22.0" } |
| 61 | +tokio = { version = "^1.0", features = ["rt-multi-thread"] } |
| 62 | +snmalloc-rs = "0.3" |
| 63 | + |
| 64 | +[profile.release] |
| 65 | +lto = true |
| 66 | +codegen-units = 1 |
| 67 | +``` |
| 68 | + |
| 69 | +Then, in `main.rs.` update the memory allocator with the below after your imports: |
| 70 | + |
| 71 | +```rust ,ignore |
| 72 | +use datafusion::prelude::*; |
| 73 | + |
| 74 | +#[global_allocator] |
| 75 | +static ALLOC: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; |
| 76 | + |
| 77 | +#[tokio::main] |
| 78 | +async fn main() -> datafusion::error::Result<()> { |
| 79 | + Ok(()) |
| 80 | +} |
| 81 | +``` |
| 82 | + |
| 83 | +Based on the instruction set architecture you are building on you will want to configure the `target-cpu` as well, ideally |
| 84 | +with `native` or at least `avx2`. |
| 85 | + |
| 86 | +```shell |
| 87 | +RUSTFLAGS='-C target-cpu=native' cargo run --release |
| 88 | +``` |
| 89 | + |
| 90 | +## Enable backtraces |
| 91 | + |
| 92 | +By default Datafusion returns errors as a plain message. There is option to enable more verbose details about the error, |
| 93 | +like error backtrace. To enable a backtrace you need to add Datafusion `backtrace` feature to your `Cargo.toml` file: |
| 94 | + |
| 95 | +```toml |
| 96 | +datafusion = { version = "31.0.0", features = ["backtrace"]} |
| 97 | +``` |
| 98 | + |
| 99 | +Set environment [variables](https://doc.rust-lang.org/std/backtrace/index.html#environment-variables) |
| 100 | + |
| 101 | +```bash |
| 102 | +RUST_BACKTRACE=1 ./target/debug/datafusion-cli |
| 103 | +DataFusion CLI v31.0.0 |
| 104 | +> select row_numer() over (partition by a order by a) from (select 1 a); |
| 105 | +Error during planning: Invalid function 'row_numer'. |
| 106 | +Did you mean 'ROW_NUMBER'? |
| 107 | + |
| 108 | +backtrace: 0: std::backtrace_rs::backtrace::libunwind::trace |
| 109 | + at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/../../backtrace/src/backtrace/libunwind.rs:93:5 |
| 110 | + 1: std::backtrace_rs::backtrace::trace_unsynchronized |
| 111 | + at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/../../backtrace/src/backtrace/mod.rs:66:5 |
| 112 | + 2: std::backtrace::Backtrace::create |
| 113 | + at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/backtrace.rs:332:13 |
| 114 | + 3: std::backtrace::Backtrace::capture |
| 115 | + at /rustc/5680fa18feaa87f3ff04063800aec256c3d4b4be/library/std/src/backtrace.rs:298:9 |
| 116 | + 4: datafusion_common::error::DataFusionError::get_back_trace |
| 117 | + at /datafusion/datafusion/common/src/error.rs:436:30 |
| 118 | + 5: datafusion_sql::expr::function::<impl datafusion_sql::planner::SqlToRel<S>>::sql_function_to_expr |
| 119 | + ............ |
| 120 | +``` |
| 121 | +
|
| 122 | +The backtraces are useful when debugging code. If there is a test in `datafusion/core/src/physical_planner.rs` |
| 123 | +
|
| 124 | +``` |
| 125 | +#[tokio::test] |
| 126 | +async fn test_get_backtrace_for_failed_code() -> Result<()> { |
| 127 | + let ctx = SessionContext::new(); |
| 128 | + |
| 129 | + let sql = " |
| 130 | + select row_numer() over (partition by a order by a) from (select 1 a); |
| 131 | + "; |
| 132 | + |
| 133 | + let _ = ctx.sql(sql).await?.collect().await?; |
| 134 | + |
| 135 | + Ok(()) |
| 136 | +} |
| 137 | +``` |
| 138 | +
|
| 139 | +To obtain a backtrace: |
| 140 | +
|
| 141 | +```bash |
| 142 | +cargo build --features=backtrace |
| 143 | +RUST_BACKTRACE=1 cargo test --features=backtrace --package datafusion --lib -- physical_planner::tests::test_get_backtrace_for_failed_code --exact --nocapture |
| 144 | +``` |
| 145 | +
|
| 146 | +Note: The backtrace wrapped into systems calls, so some steps on top of the backtrace can be ignored |
0 commit comments