Skip to content

Commit be71464

Browse files
authored
Merge pull request #251 from pmcgleenon/datafusion-43
updated for datafusion release 43.0.0
2 parents e452fdf + 5abcf2a commit be71464

File tree

7 files changed

+97
-95
lines changed

7 files changed

+97
-95
lines changed

datafusion/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ The benchmark should be completed in under an hour. On-demand # is $0.6 pe
2020
1. `cd ClickBench/datafusion`
2121
1. `vi benchmark.sh` and modify following line to target Datafusion version
2222
```
23-
git checkout 40.0.0
23+
git checkout 43.0.0
2424
```
2525
1. `bash benchmark.sh`
2626

datafusion/benchmark.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ sudo yum install gcc -y
1414
# Install DataFusion main branch
1515
git clone https://github.com/apache/arrow-datafusion.git
1616
cd arrow-datafusion/datafusion-cli
17-
git checkout 40.0.0
17+
git checkout 43.0.0
1818
CARGO_PROFILE_RELEASE_LTO=true RUSTFLAGS="-C codegen-units=1" cargo build --release
1919
export PATH="`pwd`/target/release:$PATH"
2020
cd ../..

datafusion/create_partitioned.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
CREATE EXTERNAL TABLE hits
22
STORED AS PARQUET
3-
LOCATION 'partitioned';
3+
LOCATION 'partitioned'
4+
OPTIONS ('binary_as_string' 'true');

datafusion/create_single.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
CREATE EXTERNAL TABLE hits
22
STORED AS PARQUET
3-
LOCATION 'hits.parquet';
3+
LOCATION 'hits.parquet'
4+
OPTIONS ('binary_as_string' 'true');

datafusion/results/partitioned.json

+45-45
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,58 @@
11
{
22
"system": "DataFusion (Parquet, partitioned)",
3-
"date": "2024-07-27",
3+
"date": "2024-11-15",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
6-
"comment": "v40.0.0 (4cae813)",
6+
"comment": "v43.0.0 (88f58bf)",
77

88
"tags": ["Rust", "column-oriented", "embedded", "stateless"],
99

1010
"load_time": 0,
1111
"data_size": 14779976446,
1212

1313
"result": [
14-
[0.043, 0.018, 0.016],
15-
[0.087, 0.031, 0.028],
16-
[0.173, 0.072, 0.073],
17-
[0.356, 0.075, 0.081],
18-
[1.201, 0.784, 0.796],
19-
[0.960, 0.831, 0.837],
20-
[0.057, 0.026, 0.026],
21-
[0.062, 0.029, 0.031],
22-
[1.408, 1.314, 1.315],
23-
[1.302, 1.025, 1.038],
24-
[0.483, 0.280, 0.269],
25-
[0.705, 0.306, 0.296],
26-
[1.137, 0.931, 0.939],
27-
[3.183, 2.245, 2.252],
28-
[1.499, 1.415, 1.429],
29-
[1.011, 0.901, 0.897],
30-
[3.230, 2.670, 2.655],
31-
[3.136, 2.560, 2.539],
32-
[6.849, 5.608, 5.827],
33-
[0.299, 0.075, 0.068],
34-
[10.086, 1.544, 1.617],
35-
[11.238, 1.821, 1.835],
36-
[21.957, 4.104, 4.132],
37-
[55.510, 10.615, 10.548],
38-
[2.678, 0.503, 0.500],
39-
[0.765, 0.412, 0.413],
40-
[2.649, 0.574, 0.559],
41-
[9.652, 2.177, 2.203],
42-
[8.528, 5.051, 5.019],
43-
[0.499, 0.421, 0.439],
44-
[2.389, 1.018, 1.028],
45-
[6.060, 1.520, 1.513],
46-
[8.820, 8.081, 7.826],
47-
[10.604, 4.851, 5.088],
48-
[10.567, 4.971, 4.880],
49-
[1.737, 1.659, 1.649],
50-
[0.363, 0.247, 0.231],
51-
[0.156, 0.093, 0.092],
52-
[0.198, 0.125, 0.124],
53-
[0.902, 0.701, 0.683],
54-
[0.144, 0.042, 0.041],
55-
[0.130, 0.037, 0.040],
56-
[0.131, 0.055, 0.050]
14+
[0.051, 0.019, 0.019],
15+
[0.091, 0.035, 0.035],
16+
[0.189, 0.085, 0.088],
17+
[0.383, 0.081, 0.077],
18+
[1.071, 0.811, 0.803],
19+
[0.944, 0.801, 0.805],
20+
[0.078, 0.030, 0.030],
21+
[0.103, 0.037, 0.037],
22+
[1.313, 1.205, 1.201],
23+
[1.357, 1.034, 1.025],
24+
[0.511, 0.255, 0.253],
25+
[0.634, 0.295, 0.301],
26+
[1.016, 0.856, 0.879],
27+
[2.615, 1.421, 1.374],
28+
[1.131, 0.931, 0.918],
29+
[1.051, 0.952, 0.958],
30+
[2.672, 2.031, 2.066],
31+
[2.592, 1.879, 1.887],
32+
[5.549, 4.226, 4.335],
33+
[0.254, 0.078, 0.075],
34+
[9.967, 1.098, 1.092],
35+
[11.248, 1.329, 1.327],
36+
[21.868, 2.820, 2.818],
37+
[55.458, 10.286, 10.609],
38+
[2.678, 0.488, 0.486],
39+
[0.802, 0.352, 0.354],
40+
[2.672, 0.507, 0.498],
41+
[9.614, 1.513, 1.507],
42+
[8.368, 3.394, 3.521],
43+
[0.535, 0.418, 0.439],
44+
[2.362, 0.854, 0.861],
45+
[5.957, 0.910, 0.914],
46+
[4.780, 3.806, 3.871],
47+
[10.168, 3.654, 3.586],
48+
[10.090, 3.645, 3.546],
49+
[1.775, 1.644, 1.660],
50+
[0.364, 0.199, 0.183],
51+
[0.183, 0.078, 0.075],
52+
[0.290, 0.128, 0.123],
53+
[0.619, 0.376, 0.376],
54+
[0.148, 0.053, 0.044],
55+
[0.142, 0.042, 0.042],
56+
[0.155, 0.065, 0.053]
5757
]
5858
}

datafusion/results/single.json

+45-45
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,58 @@
11
{
22
"system": "DataFusion (Parquet, single)",
3-
"date": "2024-07-27",
3+
"date": "2024-11-15",
44
"machine": "c6a.4xlarge, 500gb gp2",
55
"cluster_size": 1,
6-
"comment": "v40.0.0 (4cae813)",
6+
"comment": "v43.0.0 (88f58bf)",
77

88
"tags": ["Rust", "column-oriented", "embedded", "stateless"],
99

1010
"load_time": 0,
1111
"data_size": 14779976446,
1212

1313
"result": [
14-
[0.076, 0.051, 0.055],
15-
[0.113, 0.066, 0.066],
16-
[0.196, 0.115, 0.105],
17-
[0.340, 0.114, 0.115],
18-
[1.074, 0.862, 0.858],
19-
[0.995, 0.874, 0.909],
20-
[0.088, 0.076, 0.065],
21-
[0.102, 0.078, 0.068],
22-
[1.442, 1.349, 1.368],
23-
[1.260, 1.083, 1.064],
24-
[0.451, 0.306, 0.304],
25-
[0.597, 0.337, 0.335],
26-
[1.088, 0.986, 0.974],
27-
[3.085, 2.261, 2.268],
28-
[1.522, 1.428, 1.429],
29-
[1.068, 0.957, 0.960],
30-
[3.217, 2.702, 2.754],
31-
[3.149, 2.621, 2.564],
32-
[6.978, 5.679, 5.865],
33-
[0.338, 0.107, 0.113],
34-
[9.885, 1.466, 1.474],
35-
[11.225, 1.794, 1.791],
36-
[22.035, 3.906, 3.912],
37-
[55.923, 10.899, 10.975],
38-
[2.560, 0.579, 0.575],
39-
[0.754, 0.509, 0.506],
40-
[2.517, 0.674, 0.651],
41-
[9.574, 2.220, 2.216],
42-
[9.070, 4.926, 4.940],
43-
[0.536, 0.473, 0.481],
44-
[2.288, 1.090, 1.101],
45-
[5.823, 1.543, 1.528],
46-
[8.637, 8.328, 7.848],
47-
[10.477, 4.972, 5.022],
48-
[10.435, 4.910, 5.020],
49-
[1.827, 1.685, 1.724],
50-
[0.389, 0.275, 0.270],
51-
[0.201, 0.175, 0.160],
52-
[0.230, 0.173, 0.172],
53-
[0.887, 0.749, 0.755],
54-
[0.172, 0.085, 0.076],
55-
[0.165, 0.075, 0.073],
56-
[0.160, 0.090, 0.100]
14+
[0.093, 0.055, 0.056],
15+
[0.138, 0.070, 0.070],
16+
[0.206, 0.120, 0.117],
17+
[0.346, 0.118, 0.114],
18+
[0.979, 0.867, 0.871],
19+
[1.030, 0.902, 0.904],
20+
[0.125, 0.064, 0.077],
21+
[0.143, 0.083, 0.078],
22+
[1.304, 1.169, 1.240],
23+
[1.533, 1.104, 1.100],
24+
[0.475, 0.272, 0.278],
25+
[0.562, 0.309, 0.315],
26+
[1.165, 0.931, 0.965],
27+
[2.643, 1.402, 1.490],
28+
[1.143, 0.997, 0.983],
29+
[1.106, 0.991, 0.993],
30+
[2.727, 2.161, 2.098],
31+
[2.578, 1.954, 1.947],
32+
[5.530, 4.311, 4.253],
33+
[0.319, 0.105, 0.107],
34+
[9.732, 1.155, 1.149],
35+
[11.337, 1.468, 1.407],
36+
[22.055, 3.678, 3.663],
37+
[55.942, 10.017, 10.014],
38+
[2.561, 0.557, 0.577],
39+
[0.809, 0.510, 0.519],
40+
[2.579, 0.634, 0.620],
41+
[9.630, 1.618, 1.655],
42+
[8.645, 3.565, 3.699],
43+
[0.584, 0.493, 0.485],
44+
[2.285, 0.978, 0.991],
45+
[5.690, 1.046, 1.006],
46+
[4.468, 3.833, 3.885],
47+
[10.123, 3.663, 3.654],
48+
[10.114, 3.672, 3.685],
49+
[1.743, 1.597, 1.659],
50+
[0.389, 0.242, 0.230],
51+
[0.266, 0.155, 0.170],
52+
[0.369, 0.161, 0.180],
53+
[0.659, 0.446, 0.416],
54+
[0.190, 0.084, 0.085],
55+
[0.177, 0.078, 0.079],
56+
[0.164, 0.103, 0.088]
5757
]
5858
}

datafusion/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ cat queries.sql | while read query; do
3131
# 2. each query contains a "Query took xxx seconds", we just grep these 2 lines
3232
# 3. use sed to take the second line
3333
# 4. use awk to take the number we want
34-
RES=`datafusion-cli -f $CREATE_SQL_FILE /tmp/query.sql 2>&1 | grep "Elapsed" |sed -n 2p | awk '{ print $2 }'
34+
RES=`datafusion-cli -f $CREATE_SQL_FILE /tmp/query.sql 2>&1 | grep "Elapsed" |sed -n 2p | awk '{ print $2 }'`
3535
[[ $RES != "" ]] && \
3636
echo -n "$RES" || \
3737
echo -n "null"

0 commit comments

Comments
 (0)