diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..c3d5100
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1 @@
+* @pall-j
diff --git a/.github/workflows/dynamic_workflow.yaml b/.github/workflows/dynamic_workflow.yaml
index 6337ba2..c2df01d 100644
--- a/.github/workflows/dynamic_workflow.yaml
+++ b/.github/workflows/dynamic_workflow.yaml
@@ -3,17 +3,20 @@ name: Dynamic Pipeline
on: [push]
env:
- PYTHON_VERSION: 3.10.12
CHANGELOG_FILE: CHANGELOG.md
FULL_CHANGELOG_FILE: FULL_CHANGELOG.md
- CONVCO_VERSION: v0.5.0
+ CONVCO_VERSION: v0.6.1
POETRY_VERSION: 1.8.2
- PACKAGE_NAME: "dtml-dbx-pyspark-testing"
+ PACKAGE_NAME: "pysparkdt"
+ PACKAGE_PATH: "pysparkdt"
jobs:
tests:
- runs-on: ubuntu-20.04
+ runs-on: ubuntu-22.04
+ strategy:
+ matrix:
+ python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
with:
@@ -22,7 +25,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v5
with:
- python-version: ${{ env.PYTHON_VERSION }}
+ python-version: ${{ matrix.python-version }}
- name: Bootstrap poetry
run: |
@@ -39,12 +42,12 @@ jobs:
id: venv-cache
with:
path: .venv/
- key: poetry-${{ hashFiles('poetry.lock') }}
+ key: poetry-${{ matrix.python-version }}-${{ hashFiles('poetry.lock') }}
- name: Install dependencies
if: steps.venv-cache.outputs.cache-hit != 'true'
run: |
- poetry env use ${{ env.PYTHON_VERSION }}
+ poetry env use ${{ matrix.python-version }}
poetry install
- name: Run checks
@@ -54,13 +57,8 @@ jobs:
poetry run pytest .
publish:
- runs-on: ubuntu-22.04 # convco needs GLIBC_2.32 which is not in 20.04
- # Do not use core-tools-python-runtime because:
- # - it doesn't have preinstalled GH CLI and GH CLI installation takes
- # longer than installation of poetry
- # - there are issues with using convco in python-runtime:
- # GitHub writes: Error: Could not open the git repository.
- # - image pull of python-runtime is slower than installation of poetry
+ runs-on: ubuntu-22.04
+ needs: tests
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/checkout@v4
@@ -78,7 +76,7 @@ jobs:
convco
version
--prefix=${{ env.PACKAGE_NAME }}/
- --paths=dtml
+ --paths=${{ env.PACKAGE_PATH }}
--paths=pyproject.toml
--paths=poetry.lock
"
@@ -89,15 +87,12 @@ jobs:
new_version=$($base_version_command --bump)
fi
new_tag=${{ env.PACKAGE_NAME }}/"$new_version"
- package_path=.
echo "OLD_VERSION=$old_version" >> $GITHUB_ENV
echo "NEW_VERSION=$new_version" >> $GITHUB_ENV
echo "NEW_TAG=$new_tag" >> $GITHUB_ENV
- echo "PACKAGE_PATH=$package_path" >> $GITHUB_ENV
echo "old version: $old_version"
echo "new version: $new_version"
echo "new tag: $new_tag"
- echo "packages path: $package_path"
- name: Release and publish
if: ${{ env.OLD_VERSION != env.NEW_VERSION }}
env:
@@ -126,7 +121,6 @@ jobs:
########################################
# Publish to JFrog
########################################
- cd ${{ env.PACKAGE_PATH }}
poetry version ${{ env.NEW_VERSION }}
poetry config repositories.dtml "https://datamole.jfrog.io/artifactory/api/pypi/pypi-general-local"
poetry config http-basic.dtml "" ${{ secrets.JFROG_SERVICE_JWT }}
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 160db42..12d9332 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -6,7 +6,27 @@ on:
types: [opened, edited, reopened, synchronize]
jobs:
- pr-title-check:
- uses: datamole-ai/core-mle-git-workflow/.github/workflows/convco-pr-title-check.yaml@main
+ pr-convco-title-check:
+ runs-on: ubuntu-22.04
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install convco
+ run: |
+ curl -sSfL "https://github.com/convco/convco/releases/download/${{ env.CONVCO_VERSION }}/convco-ubuntu.zip" | zcat > /usr/local/bin/convco
+ chmod +x /usr/local/bin/convco
+ - name: Check PR Title is Conventional
+ run: echo "${{ github.event.pull_request.title }}" | convco check --from-stdin
dismiss-stale-pr-reviews:
- uses: datamole-ai/core-mle-git-workflow/.github/workflows/dismiss-stale-pr-reviews.yaml@main
+ runs-on: ubuntu-22.04
+ if: github.event.action == 'edited'
+ steps:
+ - name: Auth. Github-CLI
+ run: echo ${{ secrets.GITHUB_TOKEN }} | gh auth login --with-token
+ shell: bash
+ - name: Dismiss approvals
+ run: |
+ gh api "repos/${{ github.repository }}/pulls/${{ github.event.number }}/reviews" \
+ --jq '.[] | select(.state == "APPROVED") | .id' \
+ | xargs -I '{}' gh api --method=PUT -f message="Dismissed due to PR edit." \
+ "repos/${{ github.repository }}/pulls/${{ github.event.number }}/reviews/{}/dismissals"
+ shell: bash
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..355c72f
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,68 @@
+# Contributing to pysparkdt
+
+## Suggesting changes
+1. Create an [issue](https://github.com/datamole-ai/pysparkdt/issues) describing the change you want to make.
+
+## General workflow
+
+### Environment setup
+pysparkdt uses [Poetry](https://python-poetry.org/) for managing dependencies.
+Follow the instructions on the Poetry website to install it.
+We recommend [pyenv](https://github.com/pyenv/pyenv)
+([installer](https://github.com/pyenv/pyenv-installer)) for managing Python versions.
+```bash
+# Install Python 3.12
+pyenv install 3.12
+
+# Use pyenv's Python 3.12 for the current folder
+pyenv local 3.12
+
+# Create virtual environment (install all optional dependencies)
+poetry install --extras all
+
+# Activate Poetry virtual environment in the current shell
+poetry shell
+```
+
+You can also use `poetry run` to run commands in the virtual environment without activating it in the current shell (via `poetry shell`).
+
+
+### Test the newly implemented changes
+Create unit tests by creating a Python script in the folder `tests` prefixed with `test_`.
+The script should contain functions also prefixed with `test_` that make assertions.
+See the `tests` folder for reference.
+
+## Pull Requests & Git
+
+* Split your work into separate and atomic pull requests. Put any
+ non-obvious reasoning behind any change to the pull request description.
+ Separate “preparatory” changes and modifications from new features &
+ improvements.
+* The pull requests are squashed when merged. The PR title is used as the commit title.
+ The PR description is used as the commit description.
+* Use conventional commit messages in the PR title and description.
+ See [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/).
+ Usage of conventional commit PR titles and descriptions is enforced by the CI pipeline.
+* Prefer adding new commits over amending existing ones during the review process.
+ The latter makes it harder to review changes and track down modifications.
+
+
+## Code style
+
+* The line length is limited to 79 characters in Python code,
+except if it would make the code less readable.
+* `ruff` is used for formatting and linting Python code.
+The following commands can be used to properly format the code and check
+for linting errors with automatic fixing:
+```bash
+poetry run ruff format .
+poetry run ruff check . --fix
+```
+The following commands can be used to check if the code is properly
+formatted and check for linting errors:
+```bash
+poetry run ruff format --check .
+poetry run ruff check .
+```
+
+All of the above code style requirements are enforced by the CI pipeline.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..01f2235
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,7 @@
+Copyright 2024 Datamole, s.r.o.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
index 7a059a6..254b1ac 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,440 @@
-# core-dbx-pyspark-testing
+# pysparkdt (pyspark-delta-testing)
-Package for simplifying local testing of pyspark Databricks pipelines.
+**An open-source Python library for simplifying local testing of Databricks
+workflows using PySpark and Delta tables.**
-Refer to the testing guidelines located in Databricks Competence Team
-confluence space for information on how to utilize this package.
+This library enables seamless testing of PySpark processing logic outside
+Databricks by **emulating Unity Catalog** behavior. It dynamically generates a
+local metastore to mimic Unity Catalog and supports simplified handling of
+Delta tables for both batch and streaming workloads.
+
+# Guideline
+
+## Table of Contents
+
+- [Overview](#overview)
+ - [Scope](#scope)
+ - [Prerequisites](#prerequisites)
+- [Setup](#setup)
+ 1. [Installation](#1-installation)
+ 2. [Testable Code](#2-testable-code)
+ 3. [File Structure](#3-file-structure)
+ 4. [Tests](#4-tests)
+- [Advanced](#advanced)
+ - [Testing Stream Processing](#testing-stream-processing)
+ - [Mocking Inside RDD and UDF Operations](#mocking-inside-rdd-and-udf-operations)
+- [Limitations](#limitations)
+ - [Map Key Type Must Be String](#map-key-type-must-be-string)
+
+## Overview
+
+### Scope
+This guideline helps you test Databricks Python pipelines with a
+focus on PySpark code. While basic unit testing knowledge with pytest is
+helpful, it's not the central focus.
+
+### Key Points
+- **Standalone Testing:** The setup allows you to test code without Databricks
+access, enabling easy CI integration.
+
+- **Local Metastore:** Mimic the Databricks Unity Catalog using a dynamically
+generated local metastore with local Delta tables.
+
+- **Code Testability:** Move core processing logic from notebooks to Python
+modules. Notebooks then serve as entrypoints.
+
+## Setup
+In the following section we will assume that you are creating tests for a
+job which has one delta table on input and produces one delta table on output.
+It utilizes PySpark for its processing.
+
+### 1. Installation
+**Install pysparkdt**
+- Get this package from the pypi. It's only needed in your test environment.
+
+```bash
+pip install pysparkdt
+```
+
+### 2. Testable code
+- **Modularization:** Move processing logic from notebooks to modules.
+
+- **Notebook Role:** Notebooks primarily handle initialization and triggering
+processing. They should contain all the code specific to Databricks
+(e.g. `dbutils` usage)
+
+
+entrypoint.py (Databricks Notebook)
+
+
+```python
+# Databricks notebook source
+import sys
+from pathlib import Path
+
+MODULE_DIR = Path.cwd().parent
+sys.path.append(MODULE_DIR.as_posix())
+
+# COMMAND ----------
+
+import os
+from myjobpackage.processing import process_data
+
+# COMMAND ----------
+
+input_table = dbutils.widgets.get('input_table')
+output_table = dbutils.widgets.get('output_table')
+
+# COMMAND ----------
+
+process_data(
+ session=spark,
+ input_table=input_table,
+ output_table=output_table,
+)
+```
+**myjobpackage.processing**
+- Contains the core logic to test
+- Our test focuses on the core function `myjobpackage.processing.process_data`
+
+### 3. File structure
+
+```
+myjobpackage
+├── __init__.py
+├── entrypoint.py # Databricks Notebook
+├── processing.py
+└── tests
+ ├── __init__.py
+ ├── test_processing.py
+ └── data
+ ├── tables
+ ├── example_input.ndjson
+ └── expected_output.ndjson
+ └── schema
+ ├── example_input.json
+ └── expected_output.json
+```
+
+**Data Format**
+
+- **Test Data:** Newline-delimited JSON (`.ndjson`)
+- **Optional Schema:** JSON
+ - If present, full schema must be provided (all columns included).
+ - The format of the schema file is defined by PySpark StructType JSON
+ representation.
+
+
+example_input.ndjson
+
+
+```json lines
+{"id": 0, "time_utc": "2024-01-08T11:00:00", "name": "Jorge", "feature": 0.5876}
+{"id": 1, "time_utc": "2024-01-11T14:28:00", "name": "Ricardo", "feature": 0.42}
+```
+
+
+example_input.json
+
+
+```json
+{
+ "type": "struct",
+ "fields":
+ [
+ {
+ "name": "id",
+ "type": "long",
+ "nullable": false,
+ "metadata": {}
+ },
+ {
+ "name": "time_utc",
+ "type": "timestamp",
+ "nullable": false,
+ "metadata": {}
+ },
+ {
+ "name": "name",
+ "type": "string",
+ "nullable": true,
+ "metadata": {}
+ },
+ {
+ "name": "feature",
+ "type": "double",
+ "nullable": true,
+ "metadata": {}
+ }
+ ]
+}
+```
+
+**Tip:** A schema file for a loaded PySpark DataFrame df can be created using:
+
+```python
+with(open('example_input.json', 'w')) as file:
+ file.write(json.dumps(df.schema.jsonValue(), indent=4))
+```
+
+Thus, you can first load a table without a schema, then create schema file
+from it and modify the types to the desired one.
+
+### 4. Tests
+
+**Constants:** Define paths for test data and the temporary metastore.
+
+```python
+DATA_DIR = f'{os.path.dirname(__file__)}/data'
+JSON_TABLES_DIR = f'{DATA_DIR}/tables'
+TMP_DIR = f'{DATA_DIR}/tmp'
+METASTORE_DIR = f'{TMP_DIR}/metastore'
+```
+
+**Spark Fixture:** Define fixture for the local spark session using
+`spark_base` function from the testing package. Specify the temporal metastore
+location.
+
+```python
+from pytest import fixture
+from dtml.dbx.pysparktesting import spark_base
+
+@fixture(scope='module')
+def spark():
+ yield from spark_base(METASTORE_DIR)
+```
+
+**Metastore Initialization:** Use `reinit_local_metastore`
+
+At the beginning of your test method call `reinit_local_metastore` function
+from the testing package to initialize the metastore with the tables from
+your json folder (`JSON_TABLES_DIR`). If the method is called while the
+metastore already exists, it will delete all the existing tables before
+initializing the new ones.
+
+*Alternatively, you can call this method only once per testing module,
+but then individual testing methods might affect each other by modifying
+metastore tables.*
+
+```python
+from myjobpackage.processing import process_data
+from dtml.dbx.pysparktesting import reinit_local_metastore
+from pyspark.testing import assertDataFrameEqual
+
+def test_process_data(
+ spark: SparkSession,
+):
+ reinit_local_metastore(spark, JSON_TABLES_DIR)
+
+ process_data(
+ session=spark,
+ input_table='example_input',
+ output_table='output',
+ )
+
+ output = spark.read.format('delta').table('output')
+ expected = spark.read.format('delta').table('expected_output')
+
+ assertDataFrameEqual(
+ actual=output.select(sorted(output.columns)),
+ expected=expected.select(sorted(expected.columns)),
+ )
+```
+
+ In the example above, we use `assertDataFrameEqual` to compare PySpark
+ DataFrames. We ensure the columns are ordered so that the order of result
+ columns does not matter. By default, the order of rows does not matter in
+ `assertDataFrameEqual` (this can be adjusted using the `checkRowOrder`
+ parameter).
+
+**⚠️ Note on running tests in parallel**
+
+With the setup above, the metastore is shared on the module scope.
+Therefore, if tests defined in the same module are run in parallel,
+race conditions can occur if multiple test functions use the same tables.
+
+To mitigate this, make sure each test in the module uses its own set of tables.
+
+## Advanced
+
+### Testing Stream Processing
+
+Let's now focus on a case where a job is reading input delta table using
+PySpark streaming, performing some computation on the data and saving it to
+the output delta table.
+
+In order to be able to test the processing we need to explicitly wait for
+its completion. The best way to do it is to **await the streaming function
+performing the processing**.
+
+To be able to await the streaming function, the **test function needs to have
+access to it**. Thus, we need to make sure the streaming function (query in
+Databricks terms) is accessible - for example by returning it by
+the processing function.
+
+
+myjobpackage/processing.py
+
+
+```python
+def process_data(
+ input_table: str,
+ output_table: str,
+ checkpoint_location: str,
+) -> StreamingQuery
+ load_query = session.readStream.format('delta').table(input_table)
+
+ def process_batch(df: pyspark.sql.DataFrame, _) -> None:
+ ... process df ...
+ df.write.mode('append').format('delta').saveAsTable(output_table)
+
+ return (
+ load_query.writeStream.format('delta')
+ .foreachBatch(process_batch)
+ .trigger(availableNow=True)
+ .option('checkpointLocation', checkpoint_location)
+ .start()
+ )
+```
+
+
+myjobpackage/tests/test_processing.py
+
+
+```python
+def test_process_data(spark: SparkSession):
+ ...
+ spark_processing = process_data(
+ session=spark,
+ input_table_name='example_input',
+ output_table='output',
+ checkpoint_location=f'{TMP_DIR}/_checkpoint/output',
+ )
+ spark_processing.awaitTermination(60)
+
+ output = spark.read.format('delta').table('output')
+ expected = spark.read.format('delta').table('expected_output')
+ ...
+```
+
+### Mocking Inside RDD and UDF Operations
+
+If we are testing whole job’s processing code and inside it we have functions
+executed through `rdd.mapPartitions`, `rdd.map`, or UDFs, we need to add
+special handling for mocking as regular patching does not propagate to worker
+nodes.
+
+
+myjobpackage/processing.py
+
+
+```python
+myjobpackage/processing.py
+
+def call_api(
+ data_df: pyspark.sql.DataFrame,
+) -> pyspark.sql.DataFrame:
+ # Call API in parallel (session per partition)
+ result = data_df.rdd.mapPartitions(_partition_run).toDF()
+ return result
+
+def _partition_run(
+ iterable: Iterable[Row],
+) -> Iterable[dict[str, Any]]:
+ with ApiSessionClient() as client:
+ for row in iterable:
+ ...
+ output = client.post(prepared_data)
+ ...
+ yield output
+
+def process_data(
+ data_df: pyspark.sql.DataFrame,
+) -> pyspark.sql.DataFrame:
+ ...
+ ... = call_api(...)
+ ...
+```
+
+ In this example we have a code that calls external API in `_partition_run`,
+ we do not want to call the actual API in our test, thus we want to mock
+ `ApiSessionClient`.
+
+```python
+from pytest import fixture
+
+def _mocked_session_post(json_data: dict):
+ ...
+ return output
+
+
+@fixture
+def api_session_client(mocker):
+ api_session_client_mock = mocker.patch.object(
+ myjobpackage.processing,
+ 'ApiSessionClient',
+ )
+ api_session_client_mock.return_value = session_client = mocker.Mock()
+ session_client.__enter__ = mocker.Mock()
+ session_client.__enter__.return_value = session_client_ctx = mocker.Mock()
+ session_client.__exit__ = mocker.Mock()
+ session_client_ctx.post = mocker.Mock(side_effect=_mocked_session_post)
+ return session_client
+```
+
+As `ApiSessionClient` is created inside `rdd.mapPartitions` we need to also
+mock `call_api`.
+
+```python
+def _mocked_call_api(
+ data_df: pyspark.sql.DataFrame,
+) -> pyspark.sql.DataFrame:
+ results = list(_partition_run(data_df.collect()))
+ spark = SparkSession.builder.getOrCreate()
+ pandas_df = pd.DataFrame(results)
+ return spark.createDataFrame(pandas_df)
+
+
+@fixture
+def call_api_mock(mocker, api_session_client):
+ mocker.patch.object(
+ myjobpackage.processing, 'call_api', _mocked_call_api
+ )
+```
+
+Then we can run the test with the mocked API.
+
+```python
+def test_process_data(
+ spark: SparkSession,
+ call_api_mock,
+):
+ ...
+```
+
+## Limitations
+
+### Map Key Type Must Be String
+
+Although Spark supports non-string key types in map fields, the JSON format
+itself does not support non-string keys. In JSON, all keys are inherently
+interpreted as strings, regardless of their declared type in the schema.
+This discrepancy becomes problematic when testing with `.ndjson` files.
+
+Specifically, if the schema defines a map key type as anything other than
+`string` (such as `long` or `integer`), the reinitialization of the metastore
+will result in `None` values for all fields in the Delta table when the data
+is loaded. This happens because the keys in the JSON data are read as strings,
+but the schema expects another type, leading to a silent failure where no
+exception or warning is raised. This makes the issue difficult to detect
+and debug.
+
+## License
+
+pysparkdt is licensed under the [MIT
+license](https://opensource.org/license/mit/). See the
+[LICENSE file](https://github.com/datamole-ai/pysparkdt/blob/main/LICENSE) for more details.
+
+## How to Contribute
+
+See [CONTRIBUTING.md](https://github.com/datamole-ai/pysparkdt/blob/main/CONTRIBUTING.md).
diff --git a/dtml/dbx/pysparktesting/__init__.py b/dtml/dbx/pysparktesting/__init__.py
deleted file mode 100644
index 2895249..0000000
--- a/dtml/dbx/pysparktesting/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from dtml.dbx.pysparktesting.metastore import reinit_local_metastore
-from dtml.dbx.pysparktesting.spark_base import spark_base
diff --git a/poetry.lock b/poetry.lock
index 325cf56..fff7032 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -13,18 +13,18 @@ files = [
[[package]]
name = "delta-spark"
-version = "3.2.0"
+version = "3.2.1"
description = "Python APIs for using Delta Lake with Apache Spark"
optional = false
python-versions = ">=3.6"
files = [
- {file = "delta-spark-3.2.0.tar.gz", hash = "sha256:641967828e47c64805f8c746513da80bea24b5f19b069cdcf64561cd3692e11d"},
- {file = "delta_spark-3.2.0-py3-none-any.whl", hash = "sha256:c4ff3fa7218e58a702cb71eb64384b0005c4d6f0bbdd0fe0b38a53564d946e09"},
+ {file = "delta_spark-3.2.1-py3-none-any.whl", hash = "sha256:662ff591acbe190d0d0a07e65cde77f9b81f58da940ae8ca85f620b562165fc3"},
+ {file = "delta_spark-3.2.1.tar.gz", hash = "sha256:05384ebfeee8e779435302a3e0f1e565636270a2404bedc3a2ee1fea7c980626"},
]
[package.dependencies]
importlib-metadata = ">=1.0.0"
-pyspark = ">=3.5.0,<3.6.0"
+pyspark = ">=3.5.3,<3.6.0"
[[package]]
name = "exceptiongroup"
@@ -42,22 +42,26 @@ test = ["pytest (>=6)"]
[[package]]
name = "importlib-metadata"
-version = "8.2.0"
+version = "8.5.0"
description = "Read metadata from Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "importlib_metadata-8.2.0-py3-none-any.whl", hash = "sha256:11901fa0c2f97919b288679932bb64febaeacf289d18ac84dd68cb2e74213369"},
- {file = "importlib_metadata-8.2.0.tar.gz", hash = "sha256:72e8d4399996132204f9a16dcc751af254a48f8d1b20b9ff0f98d4a8f901e73d"},
+ {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"},
+ {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"},
]
[package.dependencies]
-zipp = ">=0.5"
+zipp = ">=3.20"
[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+cover = ["pytest-cov"]
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
perf = ["ipython"]
-test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
+test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
+type = ["pytest-mypy"]
[[package]]
name = "iniconfig"
@@ -70,16 +74,165 @@ files = [
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
]
+[[package]]
+name = "numpy"
+version = "1.24.4"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"},
+ {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"},
+ {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"},
+ {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"},
+ {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"},
+ {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"},
+ {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"},
+ {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"},
+ {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"},
+ {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"},
+ {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"},
+ {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"},
+ {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"},
+ {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"},
+ {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"},
+ {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"},
+ {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"},
+ {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"},
+ {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"},
+ {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"},
+ {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"},
+ {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"},
+ {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"},
+ {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"},
+ {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"},
+ {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"},
+ {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"},
+ {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"},
+]
+
+[[package]]
+name = "numpy"
+version = "1.26.4"
+description = "Fundamental package for array computing in Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
+ {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
+ {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"},
+ {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"},
+ {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"},
+ {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"},
+ {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"},
+ {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"},
+ {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
+ {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
+ {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"},
+ {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"},
+ {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"},
+ {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"},
+ {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"},
+ {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"},
+ {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"},
+ {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"},
+ {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"},
+ {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"},
+ {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"},
+ {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"},
+ {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"},
+ {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"},
+ {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"},
+ {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"},
+ {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"},
+ {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"},
+ {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"},
+ {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"},
+ {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"},
+ {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"},
+ {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"},
+ {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"},
+ {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
+ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
+]
+
[[package]]
name = "packaging"
-version = "24.1"
+version = "24.2"
description = "Core utilities for Python packages"
optional = false
python-versions = ">=3.8"
files = [
- {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
- {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+ {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
+ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
+]
+
+[[package]]
+name = "pandas"
+version = "2.0.3"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"},
+ {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"},
+ {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"},
+ {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"},
+ {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"},
+ {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"},
+ {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"},
+ {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"},
+ {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"},
+ {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"},
+ {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"},
+ {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"},
+ {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"},
+ {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"},
+ {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"},
+ {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"},
+ {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"},
+ {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"},
+ {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"},
+ {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"},
+ {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"},
+ {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"},
+ {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"},
+ {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"},
+ {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"},
+]
+
+[package.dependencies]
+numpy = [
+ {version = ">=1.20.3", markers = "python_version < \"3.10\""},
+ {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
+ {version = ">=1.23.2", markers = "python_version >= \"3.11\""},
]
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.1"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"]
+aws = ["s3fs (>=2021.08.0)"]
+clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"]
+compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"]
+computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"]
+feather = ["pyarrow (>=7.0.0)"]
+fss = ["fsspec (>=2021.07.0)"]
+gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"]
+hdf5 = ["tables (>=3.6.1)"]
+html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"]
+mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"]
+parquet = ["pyarrow (>=7.0.0)"]
+performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"]
+plot = ["matplotlib (>=3.6.1)"]
+postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"]
+spss = ["pyreadstat (>=1.1.2)"]
+sql-other = ["SQLAlchemy (>=1.4.16)"]
+test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.6.3)"]
[[package]]
name = "pluggy"
@@ -107,35 +260,89 @@ files = [
{file = "py4j-0.10.9.7.tar.gz", hash = "sha256:0b6e5315bb3ada5cf62ac651d107bb2ebc02def3dee9d9548e3baac644ea8dbb"},
]
+[[package]]
+name = "pyarrow"
+version = "17.0.0"
+description = "Python library for Apache Arrow"
+optional = false
+python-versions = ">=3.8"
+files = [
+ {file = "pyarrow-17.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:a5c8b238d47e48812ee577ee20c9a2779e6a5904f1708ae240f53ecbee7c9f07"},
+ {file = "pyarrow-17.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db023dc4c6cae1015de9e198d41250688383c3f9af8f565370ab2b4cb5f62655"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da1e060b3876faa11cee287839f9cc7cdc00649f475714b8680a05fd9071d545"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75c06d4624c0ad6674364bb46ef38c3132768139ddec1c56582dbac54f2663e2"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:fa3c246cc58cb5a4a5cb407a18f193354ea47dd0648194e6265bd24177982fe8"},
+ {file = "pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:f7ae2de664e0b158d1607699a16a488de3d008ba99b3a7aa5de1cbc13574d047"},
+ {file = "pyarrow-17.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:5984f416552eea15fd9cee03da53542bf4cddaef5afecefb9aa8d1010c335087"},
+ {file = "pyarrow-17.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:1c8856e2ef09eb87ecf937104aacfa0708f22dfeb039c363ec99735190ffb977"},
+ {file = "pyarrow-17.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e19f569567efcbbd42084e87f948778eb371d308e137a0f97afe19bb860ccb3"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b244dc8e08a23b3e352899a006a26ae7b4d0da7bb636872fa8f5884e70acf15"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b72e87fe3e1db343995562f7fff8aee354b55ee83d13afba65400c178ab2597"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dc5c31c37409dfbc5d014047817cb4ccd8c1ea25d19576acf1a001fe07f5b420"},
+ {file = "pyarrow-17.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e3343cb1e88bc2ea605986d4b94948716edc7a8d14afd4e2c097232f729758b4"},
+ {file = "pyarrow-17.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:a27532c38f3de9eb3e90ecab63dfda948a8ca859a66e3a47f5f42d1e403c4d03"},
+ {file = "pyarrow-17.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:9b8a823cea605221e61f34859dcc03207e52e409ccf6354634143e23af7c8d22"},
+ {file = "pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f1e70de6cb5790a50b01d2b686d54aaf73da01266850b05e3af2a1bc89e16053"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0071ce35788c6f9077ff9ecba4858108eebe2ea5a3f7cf2cf55ebc1dbc6ee24a"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:757074882f844411fcca735e39aae74248a1531367a7c80799b4266390ae51cc"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:9ba11c4f16976e89146781a83833df7f82077cdab7dc6232c897789343f7891a"},
+ {file = "pyarrow-17.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b0c6ac301093b42d34410b187bba560b17c0330f64907bfa4f7f7f2444b0cf9b"},
+ {file = "pyarrow-17.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:392bc9feabc647338e6c89267635e111d71edad5fcffba204425a7c8d13610d7"},
+ {file = "pyarrow-17.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:af5ff82a04b2171415f1410cff7ebb79861afc5dae50be73ce06d6e870615204"},
+ {file = "pyarrow-17.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:edca18eaca89cd6382dfbcff3dd2d87633433043650c07375d095cd3517561d8"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7c7916bff914ac5d4a8fe25b7a25e432ff921e72f6f2b7547d1e325c1ad9d155"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f553ca691b9e94b202ff741bdd40f6ccb70cdd5fbf65c187af132f1317de6145"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0cdb0e627c86c373205a2f94a510ac4376fdc523f8bb36beab2e7f204416163c"},
+ {file = "pyarrow-17.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d7d192305d9d8bc9082d10f361fc70a73590a4c65cf31c3e6926cd72b76bc35c"},
+ {file = "pyarrow-17.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:02dae06ce212d8b3244dd3e7d12d9c4d3046945a5933d28026598e9dbbda1fca"},
+ {file = "pyarrow-17.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:13d7a460b412f31e4c0efa1148e1d29bdf18ad1411eb6757d38f8fbdcc8645fb"},
+ {file = "pyarrow-17.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9b564a51fbccfab5a04a80453e5ac6c9954a9c5ef2890d1bcf63741909c3f8df"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32503827abbc5aadedfa235f5ece8c4f8f8b0a3cf01066bc8d29de7539532687"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a155acc7f154b9ffcc85497509bcd0d43efb80d6f733b0dc3bb14e281f131c8b"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:dec8d129254d0188a49f8a1fc99e0560dc1b85f60af729f47de4046015f9b0a5"},
+ {file = "pyarrow-17.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a48ddf5c3c6a6c505904545c25a4ae13646ae1f8ba703c4df4a1bfe4f4006bda"},
+ {file = "pyarrow-17.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:42bf93249a083aca230ba7e2786c5f673507fa97bbd9725a1e2754715151a204"},
+ {file = "pyarrow-17.0.0.tar.gz", hash = "sha256:4beca9521ed2c0921c1023e68d097d0299b62c362639ea315572a58f3f50fd28"},
+]
+
+[package.dependencies]
+numpy = ">=1.16.6"
+
+[package.extras]
+test = ["cffi", "hypothesis", "pandas", "pytest", "pytz"]
+
[[package]]
name = "pyspark"
-version = "3.5.1"
+version = "3.5.3"
description = "Apache Spark Python API"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pyspark-3.5.1.tar.gz", hash = "sha256:dd6569e547365eadc4f887bf57f153e4d582a68c4b490de475d55b9981664910"},
+ {file = "pyspark-3.5.3.tar.gz", hash = "sha256:68b7cc0c0c570a7d8644f49f40d2da8709b01d30c9126cc8cf93b4f84f3d9747"},
]
[package.dependencies]
+numpy = {version = ">=1.15,<2", optional = true, markers = "extra == \"sql\""}
+pandas = {version = ">=1.0.5", optional = true, markers = "extra == \"sql\""}
py4j = "0.10.9.7"
+pyarrow = {version = ">=4.0.0", optional = true, markers = "extra == \"sql\""}
[package.extras]
-connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
-ml = ["numpy (>=1.15)"]
-mllib = ["numpy (>=1.15)"]
-pandas-on-spark = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
-sql = ["numpy (>=1.15)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+connect = ["googleapis-common-protos (>=1.56.4)", "grpcio (>=1.56.0)", "grpcio-status (>=1.56.0)", "numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+ml = ["numpy (>=1.15,<2)"]
+mllib = ["numpy (>=1.15,<2)"]
+pandas-on-spark = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
+sql = ["numpy (>=1.15,<2)", "pandas (>=1.0.5)", "pyarrow (>=4.0.0)"]
[[package]]
name = "pytest"
-version = "8.3.2"
+version = "8.3.4"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pytest-8.3.2-py3-none-any.whl", hash = "sha256:4ba08f9ae7dcf84ded419494d229b48d0903ea6407b030eaec46df5e6a73bba5"},
- {file = "pytest-8.3.2.tar.gz", hash = "sha256:c132345d12ce551242c87269de812483f5bcc87cdbb4722e48487ba194f9fdce"},
+ {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"},
+ {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"},
]
[package.dependencies]
@@ -166,59 +373,161 @@ pytest = ">=6.2.5"
[package.extras]
dev = ["pre-commit", "pytest-asyncio", "tox"]
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+description = "Extensions to the standard Python datetime module"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+files = [
+ {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+ {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
+]
+
+[package.dependencies]
+six = ">=1.5"
+
+[[package]]
+name = "pytz"
+version = "2024.2"
+description = "World timezone definitions, modern and historical"
+optional = false
+python-versions = "*"
+files = [
+ {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
+ {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
+]
+
[[package]]
name = "ruff"
-version = "0.3.7"
+version = "0.8.3"
description = "An extremely fast Python linter and code formatter, written in Rust."
optional = false
python-versions = ">=3.7"
files = [
- {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0e8377cccb2f07abd25e84fc5b2cbe48eeb0fea9f1719cad7caedb061d70e5ce"},
- {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:15a4d1cc1e64e556fa0d67bfd388fed416b7f3b26d5d1c3e7d192c897e39ba4b"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d28bdf3d7dc71dd46929fafeec98ba89b7c3550c3f0978e36389b5631b793663"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:379b67d4f49774ba679593b232dcd90d9e10f04d96e3c8ce4a28037ae473f7bb"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c060aea8ad5ef21cdfbbe05475ab5104ce7827b639a78dd55383a6e9895b7c51"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ebf8f615dde968272d70502c083ebf963b6781aacd3079081e03b32adfe4d58a"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d48098bd8f5c38897b03604f5428901b65e3c97d40b3952e38637b5404b739a2"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8a4fda219bf9024692b1bc68c9cff4b80507879ada8769dc7e985755d662ea"},
- {file = "ruff-0.3.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c44e0149f1d8b48c4d5c33d88c677a4aa22fd09b1683d6a7ff55b816b5d074f"},
- {file = "ruff-0.3.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3050ec0af72b709a62ecc2aca941b9cd479a7bf2b36cc4562f0033d688e44fa1"},
- {file = "ruff-0.3.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a29cc38e4c1ab00da18a3f6777f8b50099d73326981bb7d182e54a9a21bb4ff7"},
- {file = "ruff-0.3.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b15cc59c19edca917f51b1956637db47e200b0fc5e6e1878233d3a938384b0b"},
- {file = "ruff-0.3.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e491045781b1e38b72c91247cf4634f040f8d0cb3e6d3d64d38dcf43616650b4"},
- {file = "ruff-0.3.7-py3-none-win32.whl", hash = "sha256:bc931de87593d64fad3a22e201e55ad76271f1d5bfc44e1a1887edd0903c7d9f"},
- {file = "ruff-0.3.7-py3-none-win_amd64.whl", hash = "sha256:5ef0e501e1e39f35e03c2acb1d1238c595b8bb36cf7a170e7c1df1b73da00e74"},
- {file = "ruff-0.3.7-py3-none-win_arm64.whl", hash = "sha256:789e144f6dc7019d1f92a812891c645274ed08af6037d11fc65fcbc183b7d59f"},
- {file = "ruff-0.3.7.tar.gz", hash = "sha256:d5c1aebee5162c2226784800ae031f660c350e7a3402c4d1f8ea4e97e232e3ba"},
+ {file = "ruff-0.8.3-py3-none-linux_armv6l.whl", hash = "sha256:8d5d273ffffff0acd3db5bf626d4b131aa5a5ada1276126231c4174543ce20d6"},
+ {file = "ruff-0.8.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e4d66a21de39f15c9757d00c50c8cdd20ac84f55684ca56def7891a025d7e939"},
+ {file = "ruff-0.8.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:c356e770811858bd20832af696ff6c7e884701115094f427b64b25093d6d932d"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c0a60a825e3e177116c84009d5ebaa90cf40dfab56e1358d1df4e29a9a14b13"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:75fb782f4db39501210ac093c79c3de581d306624575eddd7e4e13747e61ba18"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f26bc76a133ecb09a38b7868737eded6941b70a6d34ef53a4027e83913b6502"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:01b14b2f72a37390c1b13477c1c02d53184f728be2f3ffc3ace5b44e9e87b90d"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53babd6e63e31f4e96ec95ea0d962298f9f0d9cc5990a1bbb023a6baf2503a82"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ae441ce4cf925b7f363d33cd6570c51435972d697e3e58928973994e56e1452"},
+ {file = "ruff-0.8.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7c65bc0cadce32255e93c57d57ecc2cca23149edd52714c0c5d6fa11ec328cd"},
+ {file = "ruff-0.8.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:5be450bb18f23f0edc5a4e5585c17a56ba88920d598f04a06bd9fd76d324cb20"},
+ {file = "ruff-0.8.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8faeae3827eaa77f5721f09b9472a18c749139c891dbc17f45e72d8f2ca1f8fc"},
+ {file = "ruff-0.8.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:db503486e1cf074b9808403991663e4277f5c664d3fe237ee0d994d1305bb060"},
+ {file = "ruff-0.8.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:6567be9fb62fbd7a099209257fef4ad2c3153b60579818b31a23c886ed4147ea"},
+ {file = "ruff-0.8.3-py3-none-win32.whl", hash = "sha256:19048f2f878f3ee4583fc6cb23fb636e48c2635e30fb2022b3a1cd293402f964"},
+ {file = "ruff-0.8.3-py3-none-win_amd64.whl", hash = "sha256:f7df94f57d7418fa7c3ffb650757e0c2b96cf2501a0b192c18e4fb5571dfada9"},
+ {file = "ruff-0.8.3-py3-none-win_arm64.whl", hash = "sha256:fe2756edf68ea79707c8d68b78ca9a58ed9af22e430430491ee03e718b5e4936"},
+ {file = "ruff-0.8.3.tar.gz", hash = "sha256:5e7558304353b84279042fc584a4f4cb8a07ae79b2bf3da1a7551d960b5626d3"},
+]
+
+[[package]]
+name = "setuptools"
+version = "75.6.0"
+description = "Easily download, build, install, upgrade, and uninstall Python packages"
+optional = false
+python-versions = ">=3.9"
+files = [
+ {file = "setuptools-75.6.0-py3-none-any.whl", hash = "sha256:ce74b49e8f7110f9bf04883b730f4765b774ef3ef28f722cce7c273d253aaf7d"},
+ {file = "setuptools-75.6.0.tar.gz", hash = "sha256:8199222558df7c86216af4f84c30e9b34a61d8ba19366cc914424cdbd28252f6"},
+]
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.7.0)"]
+core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+description = "Python 2 and 3 compatibility utilities"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+files = [
+ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
+ {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
]
[[package]]
name = "tomli"
-version = "2.0.1"
+version = "2.2.1"
description = "A lil' TOML parser"
optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
+files = [
+ {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
+ {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
+ {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"},
+ {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"},
+ {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"},
+ {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"},
+ {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"},
+ {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"},
+ {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"},
+ {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"},
+ {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"},
+ {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"},
+ {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"},
+ {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"},
+ {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"},
+ {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"},
+ {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"},
+ {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"},
+ {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"},
+ {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"},
+ {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"},
+ {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"},
+ {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"},
+ {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"},
+ {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"},
+ {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"},
+ {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"},
+ {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"},
+ {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"},
+ {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"},
+ {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
+ {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
+]
+
+[[package]]
+name = "tzdata"
+version = "2024.2"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
files = [
- {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
- {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+ {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"},
+ {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
]
[[package]]
name = "zipp"
-version = "3.19.2"
+version = "3.20.2"
description = "Backport of pathlib-compatible object wrapper for zip files"
optional = false
python-versions = ">=3.8"
files = [
- {file = "zipp-3.19.2-py3-none-any.whl", hash = "sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c"},
- {file = "zipp-3.19.2.tar.gz", hash = "sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19"},
+ {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
+ {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
]
[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+cover = ["pytest-cov"]
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
+type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
-python-versions = "^3.10.12"
-content-hash = "59b58025966f843daa3fef0479e41f43891d6a881917635e5797f9b9468d5435"
+python-versions = ">=3.8, <3.13"
+content-hash = "90e4a5aa92a2db1ea17227f409ddb3f80b9bc379eed973abb675fc7b8056176a"
diff --git a/pyproject.toml b/pyproject.toml
index 3142af7..ba26333 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,42 +1,47 @@
[tool.poetry]
-name = "dtml-dbx-pyspark-testing"
+name = "pysparkdt"
version = "0" # DO NOT CHANGE - managed by Git tags and CI
-description = "Package for simplifying local testing of pyspark DBX pipelines."
-authors = ["MLE "]
-packages = [
- { include = "dtml" },
-]
+license = "MIT"
+repository = "https://github.com/datamole-ai/pysparkdt"
+description = "An open-source Python library for simplifying local testing of Databricks workflows that use PySpark and Delta tables."
+authors = ["Juraj Pall "]
+exclude = ["tests"]
+readme = "README.md"
[[tool.poetry.source]]
name = "PyPI"
priority = "primary"
[tool.poetry.dependencies]
-python = "^3.10.12"
-pyspark = "^3.5.0"
+python = ">=3.8, <3.13"
+pyspark = { version = "^3.5.0", extras = ["sql"] }
delta-spark = "^3.0.0"
+numpy = [
+ { version = ">=1.26.0", python = ">=3.9" },
+ { version = "<1.26.0", python = ">=3.8, <3.9" }
+]
+setuptools = { version = "^75.6.0", python = ">=3.12,<3.13" } # Necessary for Python 3.12 as distutils is missing without it
[tool.poetry.group.dev.dependencies]
-ruff = "^0.3.0"
-pytest = "^8.0.2"
-pytest-mock = "^3.12.0"
+ruff = "^0.8.3"
+pytest = "^8.3.4"
+pytest-mock = "^3.14.0"
[tool.ruff]
line-length = 79
-target-version = "py310"
-select = [
+target-version = "py38"
+lint.select = [
"E",
"F",
"N",
"W",
"I001",
]
-src = ["task"]
[tool.ruff.format]
quote-style = "single"
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402", "F401"]
"*/**" = ["E402"]
diff --git a/pysparkdt/__init__.py b/pysparkdt/__init__.py
new file mode 100644
index 0000000..669a1c7
--- /dev/null
+++ b/pysparkdt/__init__.py
@@ -0,0 +1,2 @@
+from pysparkdt.metastore import reinit_local_metastore
+from pysparkdt.spark_base import spark_base
diff --git a/dtml/dbx/pysparktesting/metastore.py b/pysparkdt/metastore.py
similarity index 100%
rename from dtml/dbx/pysparktesting/metastore.py
rename to pysparkdt/metastore.py
diff --git a/dtml/dbx/pysparktesting/spark_base.py b/pysparkdt/spark_base.py
similarity index 100%
rename from dtml/dbx/pysparktesting/spark_base.py
rename to pysparkdt/spark_base.py
diff --git a/tests/data/tables/example_input.ndjson b/tests/data/tables/example_input.ndjson
new file mode 100644
index 0000000..cb274f4
--- /dev/null
+++ b/tests/data/tables/example_input.ndjson
@@ -0,0 +1,2 @@
+{"id": 0, "time_utc": "2024-01-08T11:00:00", "name": "Jorge", "feature": 0.5876}
+{"id": 1, "time_utc": "2024-01-11T14:28:00", "name": "Ricardo", "feature": 0.42}
diff --git a/tests/data/tables/expected_output.ndjson b/tests/data/tables/expected_output.ndjson
new file mode 100644
index 0000000..8970947
--- /dev/null
+++ b/tests/data/tables/expected_output.ndjson
@@ -0,0 +1,2 @@
+{"id": 0, "time_utc": "2024-01-08T11:00:00", "name": "Jorge", "result": 58.76}
+{"id": 1, "time_utc": "2024-01-11T14:28:00", "name": "Ricardo", "result": 42}
diff --git a/tests/data/tables/schema/example_input.json b/tests/data/tables/schema/example_input.json
new file mode 100644
index 0000000..c65c393
--- /dev/null
+++ b/tests/data/tables/schema/example_input.json
@@ -0,0 +1,30 @@
+{
+ "type": "struct",
+ "fields":
+ [
+ {
+ "name": "id",
+ "type": "long",
+ "nullable": false,
+ "metadata": {}
+ },
+ {
+ "name": "time_utc",
+ "type": "timestamp",
+ "nullable": false,
+ "metadata": {}
+ },
+ {
+ "name": "name",
+ "type": "string",
+ "nullable": true,
+ "metadata": {}
+ },
+ {
+ "name": "feature",
+ "type": "double",
+ "nullable": true,
+ "metadata": {}
+ }
+ ]
+}
diff --git a/tests/data/tables/schema/expected_output.json b/tests/data/tables/schema/expected_output.json
new file mode 100644
index 0000000..66ae323
--- /dev/null
+++ b/tests/data/tables/schema/expected_output.json
@@ -0,0 +1,30 @@
+{
+ "type": "struct",
+ "fields":
+ [
+ {
+ "name": "id",
+ "type": "long",
+ "nullable": false,
+ "metadata": {}
+ },
+ {
+ "name": "time_utc",
+ "type": "timestamp",
+ "nullable": false,
+ "metadata": {}
+ },
+ {
+ "name": "name",
+ "type": "string",
+ "nullable": true,
+ "metadata": {}
+ },
+ {
+ "name": "result",
+ "type": "double",
+ "nullable": true,
+ "metadata": {}
+ }
+ ]
+}
diff --git a/tests/processing.py b/tests/processing.py
new file mode 100644
index 0000000..d0848be
--- /dev/null
+++ b/tests/processing.py
@@ -0,0 +1,11 @@
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import col
+
+
+def process_data(
+ input_table: str, output_table: str, session: SparkSession
+) -> None:
+ df = session.read.table(input_table)
+ df = df.withColumn('result', col('feature') * 100)
+ df = df.drop('feature')
+ df.write.format('delta').mode('overwrite').saveAsTable(output_table)
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
deleted file mode 100644
index 4f5dab1..0000000
--- a/tests/test_dummy.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def test_dummy():
- import dtml.dbx.pysparktesting # noqa: F401
diff --git a/tests/test_processing.py b/tests/test_processing.py
new file mode 100644
index 0000000..ad6a0c7
--- /dev/null
+++ b/tests/test_processing.py
@@ -0,0 +1,36 @@
+import os
+
+from pyspark.sql import SparkSession
+from pyspark.testing import assertDataFrameEqual
+from pytest import fixture
+
+from pysparkdt import reinit_local_metastore, spark_base
+
+from .processing import process_data
+
+DATA_DIR = f'{os.path.dirname(__file__)}/data'
+JSON_TABLES_DIR = f'{DATA_DIR}/tables'
+TMP_DIR = f'{DATA_DIR}/tmp'
+METASTORE_DIR = f'{TMP_DIR}/metastore'
+
+
+@fixture(scope='module')
+def spark():
+ yield from spark_base(METASTORE_DIR)
+
+
+def test_process_data(
+ spark: SparkSession,
+):
+ reinit_local_metastore(spark, JSON_TABLES_DIR)
+ process_data(
+ session=spark,
+ input_table='example_input',
+ output_table='output',
+ )
+ output = spark.read.format('delta').table('output')
+ expected = spark.read.format('delta').table('expected_output')
+ assertDataFrameEqual(
+ actual=output.select(sorted(output.columns)),
+ expected=expected.select(sorted(expected.columns)),
+ )