Skip to content

Support all the codecs supported by Avro #7718

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 2 commits into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
apache-avro = { version = "0.16", default-features = false, features = ["snappy"], optional = true }
apache-avro = { version = "0.16", default-features = false, features = ["bzip", "snappy", "xz", "zstandard"], optional = true }
arrow = { workspace = true }
arrow-array = { workspace = true }
arrow-buffer = { workspace = true }
Expand Down
124 changes: 124 additions & 0 deletions datafusion/sqllogictest/test_files/avro.slt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,78 @@ STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_snappy (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.snappy.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_bzip2 (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.bzip2.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_xz (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.xz.avro'

statement ok
CREATE EXTERNAL TABLE alltypes_plain_zstandard (
id INT NOT NULL,
bool_col BOOLEAN NOT NULL,
tinyint_col TINYINT NOT NULL,
smallint_col SMALLINT NOT NULL,
int_col INT NOT NULL,
bigint_col BIGINT NOT NULL,
float_col FLOAT NOT NULL,
double_col DOUBLE NOT NULL,
date_string_col BYTEA NOT NULL,
string_col VARCHAR NOT NULL,
timestamp_col TIMESTAMP NOT NULL,
)
STORED AS AVRO
WITH HEADER ROW
LOCATION '../../testing/data/avro/alltypes_plain.zstandard.avro'

statement ok
CREATE EXTERNAL TABLE single_nan (
mycol FLOAT
Expand Down Expand Up @@ -73,6 +145,58 @@ SELECT id, CAST(string_col AS varchar) FROM alltypes_plain
0 0
1 1

# test avro query with snappy
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_snappy
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro query with bzip2
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_bzip2
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro query with xz
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_xz
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro query with zstandard
query IT
SELECT id, CAST(string_col AS varchar) FROM alltypes_plain_zstandard
----
4 0
5 1
6 0
7 1
2 0
3 1
0 0
1 1

# test avro single nan schema
query R
SELECT mycol FROM single_nan
Expand Down