Skip to content

Commit

Permalink
Merge pull request #2 from amazon-science/adivekar
Browse files Browse the repository at this point in the history
Updated dependencies: python>=3.11.11, pandas==2.*, dask==2024.10.0, ray==2.41.0, torch>=2.5.1, and made corresponding fixes.
  • Loading branch information
adivekar-utexas authored Jan 23, 2025
2 parents c771283 + 4c49163 commit bae23c6
Show file tree
Hide file tree
Showing 12 changed files with 30 additions and 36 deletions.
6 changes: 0 additions & 6 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ jobs:
- name: Build Package
run: hatch build

- name: Publish to Test PyPI
env:
TWINE_USERNAME: "__token__"
TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
run: twine upload --repository testpypi dist/*

- name: Publish to PyPI
env:
TWINE_USERNAME: "__token__"
Expand Down
13 changes: 6 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
description = ""
readme = "README.md"
requires-python = ">=3.11"
requires-python = ">=3.11.11"
classifiers = [
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
Expand All @@ -22,8 +22,8 @@ dependencies = [
"requests",
"pyyaml",
"urllib3",
"pandas==1.*",
"numpy==1.*",
"pandas==2.*",
"numpy",
"pydantic==1.10.15",
"xlrd",
"XlsxWriter",
Expand All @@ -38,13 +38,12 @@ dependencies = [

[project.optional-dependencies]
all = [
"pytest",
"orjson",
"ray==2.9.2",
"ray==2.41.0",
"ray[default]",
"dask==2024.10.0",
"dask[complete]",
"dask",
"torch==2.3.0",
"torch>=2.5.1",
"imageio",
]

Expand Down
4 changes: 2 additions & 2 deletions src/bears/core/frame/DaskScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@

if _IS_DASK_INSTALLED:
import dask.dataframe as dd
from dask.dataframe.core import DataFrame as DaskDataFrame
from dask.dataframe import DataFrame as DaskDataFrame
from dask.dataframe import Series as DaskSeries
from dask.dataframe.core import Scalar as DaskScalar
from dask.dataframe.core import Series as DaskSeries

class DaskScalableDataFrame(ScalableDataFrame):
layout = DataLayout.DASK
Expand Down
4 changes: 2 additions & 2 deletions src/bears/core/frame/DaskScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

if _IS_DASK_INSTALLED:
import dask.array as da
from dask.dataframe.core import DataFrame as DaskDataFrame
from dask.dataframe import DataFrame as DaskDataFrame
from dask.dataframe import Series as DaskSeries
from dask.dataframe.core import Scalar as DaskScalar
from dask.dataframe.core import Series as DaskSeries

class DaskScalableSeries(ScalableSeries):
layout = DataLayout.DASK
Expand Down
2 changes: 2 additions & 0 deletions src/bears/core/frame/ScalableDataFrame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1960,6 +1960,8 @@ def to_dask(self, **kwargs) -> pd.DataFrame:

def as_dask(self, **kwargs) -> DaskDataFrame:
_check_is_dask_installed()
import dask.dataframe as dd

if "npartitions" not in kwargs and "chunksize" not in kwargs:
kwargs["npartitions"] = 1 ## Create a dask dataframe with a single partition.
return dd.from_pandas(self.pandas(), **kwargs)
Expand Down
2 changes: 2 additions & 0 deletions src/bears/core/frame/ScalableSeries.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,8 @@ def to_dask(self, **kwargs) -> DaskSeries:

def as_dask(self, **kwargs) -> DaskSeries:
_check_is_dask_installed()
import dask.dataframe as dd

if "npartitions" not in kwargs and "chunksize" not in kwargs:
kwargs["npartitions"] = 1 ## Create a dask series with a single partition.
return dd.from_pandas(self.pandas(), **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion src/bears/util/language/_structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def not_impl(
## ======================== List utils ======================== ##
def is_list_like(l: Any) -> bool:
with optional_dependency("dask"):
from dask.dataframe.core import Series as DaskSeries
from dask.dataframe import Series as DaskSeries

if isinstance(l, (list, tuple, ValuesView, ItemsView, pd.Series, DaskSeries)):
return True
Expand Down
8 changes: 4 additions & 4 deletions src/bears/writer/dataframe/CsvWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
from typing import *

import pandas as pd
from pydantic import constr, validator

from bears.constants import DataLayout, FileFormat, Storage
from bears.core.frame.DaskScalableDataFrame import DaskScalableDataFrame
from bears.core.frame.ScalableDataFrame import ScalableDataFrame
from bears.writer.dataframe.DataFrameWriter import DataFrameWriter
from bears.util import String
from pydantic import constr, validator
from bears.writer.dataframe.DataFrameWriter import DataFrameWriter


class CsvWriter(DataFrameWriter):
Expand Down Expand Up @@ -44,7 +44,7 @@ def _write_sdf(
def _write_dask_sdf(
self,
destination: Union[io.IOBase, str],
sdf: DaskScalableDataFrame,
sdf: ScalableDataFrame,
storage: Storage,
is_dir: bool,
name_function: Optional[Callable[[int], str]] = None,
Expand Down
13 changes: 6 additions & 7 deletions src/bears/writer/dataframe/DataFrameWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from pydantic import conint, constr, root_validator

from bears.constants import DataLayout, FileContents, MLTypeSchema, Parallelize, Storage
from bears.core.frame.DaskScalableDataFrame import DaskScalableDataFrame
from bears.core.frame.ScalableDataFrame import ScalableDataFrame, ScalableDataFrameRawType
from bears.writer.Writer import Writer
from bears.util import (
FileSystemUtil,
Log,
Expand All @@ -26,6 +24,7 @@
)
from bears.util.aws import S3Util
from bears.util.language._import import _check_is_dask_installed
from bears.writer.Writer import Writer


class DataFrameWriter(Writer, ABC):
Expand Down Expand Up @@ -267,7 +266,7 @@ def _write_s3(
def _write_sdf_single(
self,
destination: Union[io.IOBase, str],
sdf: Union[ScalableDataFrame, DaskScalableDataFrame],
sdf: Union[ScalableDataFrame],
storage: Storage,
**kwargs,
) -> Optional[str]:
Expand All @@ -291,7 +290,7 @@ def _write_sdf_single(
def _write_sdf_multi(
self,
destination: str, ## Do not allow writing multiple files to stream.
sdf: Union[ScalableDataFrame, DaskScalableDataFrame],
sdf: ScalableDataFrame,
storage: Storage,
file_name: Optional[constr(min_length=1)] = None,
**kwargs,
Expand All @@ -309,7 +308,7 @@ def _write_sdf_multi(
def _write_sdf_multi_dask(
self,
destination_dir: str, ## Local/remote folder path. Do not allow writing multiple files to stream.
sdf: DaskScalableDataFrame,
sdf: ScalableDataFrame,
storage: Storage,
file_name: constr(min_length=1),
**kwargs,
Expand Down Expand Up @@ -470,7 +469,7 @@ def _write_sdf(
def _write_dask_sdf(
self,
destination: Union[io.IOBase, str],
sdf: DaskScalableDataFrame,
sdf: ScalableDataFrame,
storage: Storage,
is_dir: bool,
name_function: Optional[Callable[[int], str]] = None,
Expand All @@ -479,7 +478,7 @@ def _write_dask_sdf(
"""
Writes to a stream/file/folder using Dask-specific implementations of to_csv, to_parquet, etc.
:param destination: stream/file/folder.
:param sdf: DaskScalableDataFrame to write.
:param sdf: ScalableDataFrame to write.
:param storage: the storage medium.
:param is_dir: whether the destination is a directory or file. When is_dir=False, this method should write a
single file.
Expand Down
6 changes: 3 additions & 3 deletions src/bears/writer/dataframe/JsonLinesWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from typing import *

import pandas as pd
from pydantic import constr

from bears.constants import DataLayout, FileFormat, Storage
from bears.core.frame.DaskScalableDataFrame import DaskScalableDataFrame
from bears.core.frame.ScalableDataFrame import ScalableDataFrame
from bears.writer.dataframe.DataFrameWriter import DataFrameWriter
from pydantic import constr


class JsonLinesWriter(DataFrameWriter):
Expand Down Expand Up @@ -34,7 +34,7 @@ def _write_sdf(
def _write_dask_sdf(
self,
destination: Union[io.IOBase, str],
sdf: DaskScalableDataFrame,
sdf: ScalableDataFrame,
storage: Storage,
is_dir: bool,
name_function: Optional[Callable[[int], str]] = None,
Expand Down
3 changes: 1 addition & 2 deletions src/bears/writer/dataframe/NpzWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pydantic import Field

from bears.constants import DataLayout, FileFormat, Storage
from bears.core.frame.DaskScalableDataFrame import DaskScalableDataFrame
from bears.core.frame.ScalableDataFrame import DaskDataFrame, ScalableDataFrame
from bears.writer.dataframe.DataFrameWriter import DataFrameWriter

Expand Down Expand Up @@ -37,7 +36,7 @@ def _write_sdf(
def _write_dask_sdf(
self,
destination: Union[io.IOBase, str],
sdf: DaskScalableDataFrame,
sdf: ScalableDataFrame,
storage: Storage,
is_dir: bool,
name_function: Optional[Callable[[int], str]] = None,
Expand Down
3 changes: 1 addition & 2 deletions src/bears/writer/dataframe/ParquetWriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pydantic import Field

from bears.constants import DataLayout, FileFormat, Storage
from bears.core.frame.DaskScalableDataFrame import DaskScalableDataFrame
from bears.core.frame.ScalableDataFrame import ScalableDataFrame
from bears.writer.dataframe.DataFrameWriter import DataFrameWriter

Expand Down Expand Up @@ -34,7 +33,7 @@ def _write_sdf(
def _write_dask_sdf(
self,
destination: Union[io.IOBase, str],
sdf: DaskScalableDataFrame,
sdf: ScalableDataFrame,
storage: Storage,
is_dir: bool,
name_function: Optional[Callable[[int], str]] = None,
Expand Down

0 comments on commit bae23c6

Please # to comment.