From 2309dae3ba041aa9eaf7b6011a62836a39446ea9 Mon Sep 17 00:00:00 2001 From: "Damon P. Cortesi" Date: Thu, 11 Jul 2024 10:48:38 -0700 Subject: [PATCH] Finalize Glue catalog support for iceberg --- README.md | 3 +- faker_cli/cli.py | 5 +- faker_cli/writer.py | 4 +- faker_cli/writers/iceberg.py | 67 ++++++++++++----------- faker_cli/writers/parquet.py | 2 +- poetry.lock | 100 +++++++++++++++++++---------------- pyproject.toml | 3 ++ 7 files changed, 98 insertions(+), 86 deletions(-) diff --git a/README.md b/README.md index ab6f98a..0080d6c 100644 --- a/README.md +++ b/README.md @@ -149,9 +149,8 @@ And, of course, Iceberg tables! Currently supported are writing to a Glue or generic SQL catalog. - ```bash -fake -n 10 pyint,user_name,date_this_year -f deltalake -o glue://default.iceberg_sample +fake -n 10 pyint,user_name,date_this_year -f iceberg -C glue://default.iceberg_sample -o s3://YOUR_BUCKET/iceberg-data/ ``` ## Templates diff --git a/faker_cli/cli.py b/faker_cli/cli.py index d7d39b7..2bf5801 100644 --- a/faker_cli/cli.py +++ b/faker_cli/cli.py @@ -64,7 +64,7 @@ def main(num_rows, format, output, columns, template, catalog, column_types, pro # Parquet output requires a filename if format in ["parquet", "deltalake", "iceberg"] and output is None: - raise click.BadArgumentUsage("parquet | deltalake formats requires --output/-o filename parameter.") + raise click.BadArgumentUsage(f"{format} format requires --output/-o filename parameter.") if output is not None and format not in ["parquet", "deltalake", "iceberg"]: raise click.BadArgumentUsage("output files not supported for csv/json yet.") if catalog and format not in ['iceberg']: @@ -122,7 +122,8 @@ def main(num_rows, format, output, columns, template, catalog, column_types, pro format_klas = KLAS_MAPPER.get(format) if format_klas is None: raise click.ClickException(f"Format {format} not supported.") - writer = format_klas(sys.stdout, headers, output) + # Fix in a better way - maybe passing **kwargs? + writer = format_klas(sys.stdout, headers, output, catalog) for i in range(num_rows): writer.write(fake.generate_row(col_types)) writer.close() diff --git a/faker_cli/writer.py b/faker_cli/writer.py index 3407e59..a286780 100644 --- a/faker_cli/writer.py +++ b/faker_cli/writer.py @@ -17,7 +17,7 @@ def close(self): class CSVWriter(Writer): - def __init__(self, output, headers, filename): + def __init__(self, output, headers, filename, catalog_uri): super().__init__(output, headers) self.writer = csv.writer(self.output) self.write(headers) @@ -27,7 +27,7 @@ def write(self, row): class JSONWriter(Writer): - def __init__(self, output, headers, filename): + def __init__(self, output, headers, filename, catalog_uri): super().__init__(output, headers) self.writer = self.output diff --git a/faker_cli/writers/iceberg.py b/faker_cli/writers/iceberg.py index 05d4843..60ac088 100644 --- a/faker_cli/writers/iceberg.py +++ b/faker_cli/writers/iceberg.py @@ -5,19 +5,18 @@ import pyarrow as pa from pyiceberg.catalog import Catalog from pyiceberg.catalog.sql import SqlCatalog -from pyiceberg.exceptions import NoSuchNamespaceError +from pyiceberg.exceptions import NoSuchNamespaceError, NoSuchTableError -from faker_cli.writer import Writer from faker_cli.writers.parquet import ParquetWriter class CatalogManager: def __init__(self, uri: str, location: str) -> None: + [self.database, self.table] = urlparse(uri).netloc.split(".") self.catalog = self._from_uri(uri, location) - def _from_uri(self, uri: str, location:str) -> Catalog: + def _from_uri(self, uri: str, location: str) -> Catalog: u = urlparse(uri) - namespace = u.netloc.split(".")[0] if u.scheme == "glue": try: from pyiceberg.catalog.glue import GlueCatalog @@ -26,54 +25,58 @@ def _from_uri(self, uri: str, location:str) -> Catalog: "Using Iceberg writer with Glue catalog, but the 'boto3' package is not installed. " "Make sure to install faker-cli using `pip install faker-cli[iceberg,glue]`." ) - glue = GlueCatalog(namespace) + glue = GlueCatalog(self.database) try: - glue.load_namespace_properties(namespace) + glue.load_namespace_properties(self.database) + glue.load_table(u.netloc) + raise Exception("Table already exists, please delete or choose another name.") except NoSuchNamespaceError: - glue.create_namespace(namespace) + glue.create_namespace(self.database) + except NoSuchTableError: + pass + return glue elif u.scheme == "sqlite": self.temp_path = TemporaryDirectory() sql = SqlCatalog( - namespace, uri=f"sqlite:////{self.temp_path.name}/pyiceberg_catalog.db", warehouse=location + self.database, uri=f"sqlite:////{self.temp_path.name}/pyiceberg_catalog.db", warehouse=location ) - sql.create_namespace(namespace) + sql.create_namespace(self.database) return sql else: - raise Exception("Unknown catalog type") + raise Exception("Unsupported catalog type, only glue or sqllite are supported.") + + def create_table(self, schema, warehouse_path) -> pa.Table: + if self.catalog is SqlCatalog: + table = self.catalog.create_table( + f"{self.database}.{self.table}", + schema=schema, + ) + else: + # location required for GlueCatalog + table = self.catalog.create_table( + f"{self.database}.{self.table}", + schema=schema, + location=warehouse_path.rstrip("/"), + ) + return table class IcebergWriter(ParquetWriter): def __init__(self, output, headers, filename, catalog_uri): - super().__init__(output, headers, filename) + super().__init__(output, headers, filename, catalog_uri) self.warehouse_path = filename self.temp_path = TemporaryDirectory() self.table: pa.Table = None self.catalog: CatalogManager = CatalogManager(catalog_uri, filename) - # def write(self, row): - # df = pa.Table.from_pylist([dict(zip(self.headers, row))]) - # if self.table is None: - # self.table = self._init_table("test", df.schema) - - # # Kind of works, but writes a file per row - # self.table.append(df) - - def _init_table(self, name, schema): - # self.catalog.create_namespace("default") - - table = self.catalog.catalog.create_table( - f"default.{name}", - schema=schema, - ) - - return table - def close(self): - iceberg_table = self._init_table("test", self.table.schema) + iceberg_table = self.catalog.create_table(self.table.schema, self.warehouse_path) iceberg_table.overwrite(self.table) - pa.fs.copy_files(f"{self.temp_path.name}/pyiceberg_catalog.db", f"{self.warehouse_path}pyiceberg_catalog.db") - return super().close() + if self.catalog is SqlCatalog: + pa.fs.copy_files( + f"{self.temp_path.name}/pyiceberg_catalog.db", f"{self.warehouse_path}pyiceberg_catalog.db" + ) return super().close() diff --git a/faker_cli/writers/parquet.py b/faker_cli/writers/parquet.py index bbead2c..e93a76e 100644 --- a/faker_cli/writers/parquet.py +++ b/faker_cli/writers/parquet.py @@ -5,7 +5,7 @@ from faker_cli.writer import Writer class ParquetWriter(Writer): - def __init__(self, output, headers, filename): + def __init__(self, output, headers, filename, catalog_uri): super().__init__(output, headers) self.filename = filename self.table: pa.Table = None diff --git a/poetry.lock b/poetry.lock index 0fc1dab..2cd09af 100644 --- a/poetry.lock +++ b/poetry.lock @@ -484,56 +484,47 @@ typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} [[package]] name = "numpy" -version = "2.0.0" +version = "1.26.4" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.9" files = [ - {file = "numpy-2.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:04494f6ec467ccb5369d1808570ae55f6ed9b5809d7f035059000a37b8d7e86f"}, - {file = "numpy-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2635dbd200c2d6faf2ef9a0d04f0ecc6b13b3cad54f7c67c61155138835515d2"}, - {file = "numpy-2.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:0a43f0974d501842866cc83471bdb0116ba0dffdbaac33ec05e6afed5b615238"}, - {file = "numpy-2.0.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:8d83bb187fb647643bd56e1ae43f273c7f4dbcdf94550d7938cfc32566756514"}, - {file = "numpy-2.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79e843d186c8fb1b102bef3e2bc35ef81160ffef3194646a7fdd6a73c6b97196"}, - {file = "numpy-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d7696c615765091cc5093f76fd1fa069870304beaccfd58b5dcc69e55ef49c1"}, - {file = "numpy-2.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b4c76e3d4c56f145d41b7b6751255feefae92edbc9a61e1758a98204200f30fc"}, - {file = "numpy-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:acd3a644e4807e73b4e1867b769fbf1ce8c5d80e7caaef0d90dcdc640dfc9787"}, - {file = "numpy-2.0.0-cp310-cp310-win32.whl", hash = "sha256:cee6cc0584f71adefe2c908856ccc98702baf95ff80092e4ca46061538a2ba98"}, - {file = "numpy-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:ed08d2703b5972ec736451b818c2eb9da80d66c3e84aed1deeb0c345fefe461b"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad0c86f3455fbd0de6c31a3056eb822fc939f81b1618f10ff3406971893b62a5"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e7f387600d424f91576af20518334df3d97bc76a300a755f9a8d6e4f5cadd289"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:34f003cb88b1ba38cb9a9a4a3161c1604973d7f9d5552c38bc2f04f829536609"}, - {file = "numpy-2.0.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:b6f6a8f45d0313db07d6d1d37bd0b112f887e1369758a5419c0370ba915b3871"}, - {file = "numpy-2.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f64641b42b2429f56ee08b4f427a4d2daf916ec59686061de751a55aafa22e4"}, - {file = "numpy-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7039a136017eaa92c1848152827e1424701532ca8e8967fe480fe1569dae581"}, - {file = "numpy-2.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:46e161722e0f619749d1cd892167039015b2c2817296104487cd03ed4a955995"}, - {file = "numpy-2.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0e50842b2295ba8414c8c1d9d957083d5dfe9e16828b37de883f51fc53c4016f"}, - {file = "numpy-2.0.0-cp311-cp311-win32.whl", hash = "sha256:2ce46fd0b8a0c947ae047d222f7136fc4d55538741373107574271bc00e20e8f"}, - {file = "numpy-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd6acc766814ea6443628f4e6751d0da6593dae29c08c0b2606164db026970c"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:354f373279768fa5a584bac997de6a6c9bc535c482592d7a813bb0c09be6c76f"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d2f62e55a4cd9c58c1d9a1c9edaedcd857a73cb6fda875bf79093f9d9086f85"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1e72728e7501a450288fc8e1f9ebc73d90cfd4671ebbd631f3e7857c39bd16f2"}, - {file = "numpy-2.0.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:84554fc53daa8f6abf8e8a66e076aff6ece62de68523d9f665f32d2fc50fd66e"}, - {file = "numpy-2.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c73aafd1afca80afecb22718f8700b40ac7cab927b8abab3c3e337d70e10e5a2"}, - {file = "numpy-2.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49d9f7d256fbc804391a7f72d4a617302b1afac1112fac19b6c6cec63fe7fe8a"}, - {file = "numpy-2.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:0ec84b9ba0654f3b962802edc91424331f423dcf5d5f926676e0150789cb3d95"}, - {file = "numpy-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:feff59f27338135776f6d4e2ec7aeeac5d5f7a08a83e80869121ef8164b74af9"}, - {file = "numpy-2.0.0-cp312-cp312-win32.whl", hash = "sha256:c5a59996dc61835133b56a32ebe4ef3740ea5bc19b3983ac60cc32be5a665d54"}, - {file = "numpy-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:a356364941fb0593bb899a1076b92dfa2029f6f5b8ba88a14fd0984aaf76d0df"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e61155fae27570692ad1d327e81c6cf27d535a5d7ef97648a17d922224b216de"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4554eb96f0fd263041baf16cf0881b3f5dafae7a59b1049acb9540c4d57bc8cb"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:903703372d46bce88b6920a0cd86c3ad82dae2dbef157b5fc01b70ea1cfc430f"}, - {file = "numpy-2.0.0-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:3e8e01233d57639b2e30966c63d36fcea099d17c53bf424d77f088b0f4babd86"}, - {file = "numpy-2.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cde1753efe513705a0c6d28f5884e22bdc30438bf0085c5c486cdaff40cd67a"}, - {file = "numpy-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821eedb7165ead9eebdb569986968b541f9908979c2da8a4967ecac4439bae3d"}, - {file = "numpy-2.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9a1712c015831da583b21c5bfe15e8684137097969c6d22e8316ba66b5baabe4"}, - {file = "numpy-2.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9c27f0946a3536403efb0e1c28def1ae6730a72cd0d5878db38824855e3afc44"}, - {file = "numpy-2.0.0-cp39-cp39-win32.whl", hash = "sha256:63b92c512d9dbcc37f9d81b123dec99fdb318ba38c8059afc78086fe73820275"}, - {file = "numpy-2.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:3f6bed7f840d44c08ebdb73b1825282b801799e325bcbdfa6bc5c370e5aecc65"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9416a5c2e92ace094e9f0082c5fd473502c91651fb896bc17690d6fc475128d6"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-macosx_14_0_x86_64.whl", hash = "sha256:17067d097ed036636fa79f6a869ac26df7db1ba22039d962422506640314933a"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38ecb5b0582cd125f67a629072fed6f83562d9dd04d7e03256c9829bdec027ad"}, - {file = "numpy-2.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cef04d068f5fb0518a77857953193b6bb94809a806bd0a14983a8f12ada060c9"}, - {file = "numpy-2.0.0.tar.gz", hash = "sha256:cf5d1c9e6837f8af9f92b6bd3e86d513cdc11f60fd62185cc49ec7d1aba34864"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, + {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"}, + {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"}, + {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"}, + {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"}, + {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"}, + {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"}, + {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"}, + {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"}, + {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"}, + {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"}, + {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"}, + {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"}, + {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"}, + {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"}, + {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"}, + {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"}, + {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"}, + {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"}, + {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"}, + {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"}, + {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"}, + {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] [[package]] @@ -941,6 +932,21 @@ botocore = ">=1.33.2,<2.0a.0" [package.extras] crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] +[[package]] +name = "setuptools" +version = "70.3.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-70.3.0-py3-none-any.whl", hash = "sha256:fe384da74336c398e0d956d1cae0669bc02eed936cdb1d49b57de1990dc11ffc"}, + {file = "setuptools-70.3.0.tar.gz", hash = "sha256:f171bab1dfbc86b132997f26a119f6056a57950d058587841a0082e8830f9dc5"}, +] + +[package.extras] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "mypy (==1.10.0)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.3.2)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] + [[package]] name = "six" version = "1.16.0" @@ -1110,4 +1116,4 @@ parquet = ["pyarrow"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4a2a1cf1e028e9f6219957faae3b941b8ab7f9283ec37c687899155301f99ba4" +content-hash = "68981fb7715eb9245d1d473fe60d13535df977c28eff025fe386a8a625123f78" diff --git a/pyproject.toml b/pyproject.toml index 82430b5..6f0ff4b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,11 +12,14 @@ faker = "^18.9.0" click = "^8.1.3" # urllib3 <2 required for glue/boto3 of iceberg urllib3 = "<2" +# numpy <2 required for compat issues +numpy = "<2" pyarrow = { version = "~14.0.2", optional = true } deltalake = { version = "^0.9.0", optional = true } pyiceberg = {extras = ["pyarrow", "sql-sqlite", "glue"], version = "^0.6.0"} mimesis = "^17.0.0" +setuptools = "^70.3.0" [tool.poetry.extras] delta = ["deltalake", "pyarrow"] parquet = ["pyarrow"]