diff --git a/meerkat/columns/deferred/file.py b/meerkat/columns/deferred/file.py index f4062978d..e2862ce48 100644 --- a/meerkat/columns/deferred/file.py +++ b/meerkat/columns/deferred/file.py @@ -526,7 +526,7 @@ def load_audio(path: str) -> Audio: "loader": load_audio, "formatters": DeferredAudioFormatterGroup, "exts": [".wav", ".mp3"], - "defer": False + "defer": False, }, } diff --git a/meerkat/dataframe.py b/meerkat/dataframe.py index f399ece08..517e74353 100644 --- a/meerkat/dataframe.py +++ b/meerkat/dataframe.py @@ -676,6 +676,7 @@ def from_huggingface(cls, *args, **kwargs): >>> dict_of_dataframes = DataFrame.from_huggingface('boolq') """ import datasets + datasets.logging.set_verbosity_error() import pyarrow.compute as pc @@ -700,6 +701,7 @@ def _convert_columns(dataset: datasets.Dataset): ) elif (~path.isnull()).all(): from meerkat.columns.deferred.file import FileColumn + df[name] = FileColumn(path, type="audio") else: raise ValueError( @@ -711,10 +713,12 @@ def _convert_columns(dataset: datasets.Dataset): bytes = ArrowScalarColumn(pc.struct_field(column._data, "bytes")) path = ArrowScalarColumn(pc.struct_field(column._data, "path")) if (~ArrowScalarColumn(bytes).isnull()).all(): - from meerkat.interactive.formatter import ImageFormatterGroup import io + from PIL import Image + from meerkat.interactive.formatter import ImageFormatterGroup + df[name] = bytes.defer( lambda x: Image.open(io.BytesIO(x)) ).format(ImageFormatterGroup().defer()) @@ -741,7 +745,6 @@ def _convert_columns(dataset: datasets.Dataset): ) ) else: - df = cls.from_arrow(dataset._data) return _convert_columns(dataset) @classmethod diff --git a/meerkat/interactive/formatter/__init__.py b/meerkat/interactive/formatter/__init__.py index 1705b07ff..988a5fe49 100644 --- a/meerkat/interactive/formatter/__init__.py +++ b/meerkat/interactive/formatter/__init__.py @@ -2,7 +2,12 @@ from .base import Formatter, deferred_formatter_group from .boolean import BooleanFormatter, BooleanFormatterGroup from .code import CodeFormatter, CodeFormatterGroup -from .image import ImageFormatter, ImageFormatterGroup, DeferredImageFormatter, DeferredImageFormatterGroup +from .image import ( + DeferredImageFormatter, + DeferredImageFormatterGroup, + ImageFormatter, + ImageFormatterGroup, +) from .number import NumberFormatter, NumberFormatterGroup from .pdf import PDFFormatter, PDFFormatterGroup from .raw_html import HTMLFormatter, HTMLFormatterGroup