Skip to content

Commit

Permalink
Merge pull request #2133 from takacsd/fix/parquet_reading_from_zip_or_s3
Browse files Browse the repository at this point in the history
Fix parquet reading from zip or s3
  • Loading branch information
anjakefala authored Nov 23, 2023
2 parents 81af233 + a48c9d2 commit 3f9b77a
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
3 changes: 2 additions & 1 deletion visidata/loaders/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def iterload(self):
pq = vd.importExternal("pyarrow.parquet", "pyarrow")
from visidata.loaders.arrow import arrow_to_vdtype

self.tbl = pq.read_table(str(self.source))
with self.source.open('rb') as f:
self.tbl = pq.read_table(f)
self.columns = []
for colname, col in zip(self.tbl.column_names, self.tbl.columns):
c = ParquetColumn(colname, type=arrow_to_vdtype(col.type), source=col)
Expand Down
13 changes: 5 additions & 8 deletions visidata/loaders/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,10 @@ def fs(self):
def fs(self, val):
self._fs = val

def open(self, *args, **kwargs):
def open(self, mode='r', **kwargs):
"""Open the current S3 path, decompressing along the way if needed."""

# Default to text mode unless we have a compressed file
mode = "rb" if self.compression else "r"

fp = self.fs.open(self.given, mode=mode, version_id=self.version_id)
fp = self.fs.open(self.given, mode="rb" if self.compression else mode, version_id=self.version_id)

# Workaround for https://github.com/ajkerrigan/visidata-plugins/issues/12
if hasattr(fp, "cache") and fp.cache.size != fp.size:
Expand All @@ -79,17 +76,17 @@ def open(self, *args, **kwargs):
if self.compression == "gz":
import gzip

return gzip.open(fp, *args, **kwargs)
return gzip.open(fp, mode, **kwargs)

if self.compression == "bz2":
import bz2

return bz2.open(fp, *args, **kwargs)
return bz2.open(fp, mode, **kwargs)

if self.compression == "xz":
import lzma

return lzma.open(fp, *args, **kwargs)
return lzma.open(fp, mode, **kwargs)

return fp

Expand Down
7 changes: 4 additions & 3 deletions visidata/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,9 +238,10 @@ def open(self, mode='rt', encoding=None, encoding_errors=None, newline=None):
return self.rfile.reopen()

if self.fp:
self.fptext = codecs.iterdecode(self.fp,
encoding=encoding or vd.options.encoding,
errors=encoding_errors or vd.options.encoding_errors)
if 'b' not in mode:
self.fptext = codecs.iterdecode(self.fp,
encoding=encoding or vd.options.encoding,
errors=encoding_errors or vd.options.encoding_errors)

if self.fptext:
self.rfile = RepeatFile(self.fptext)
Expand Down

0 comments on commit 3f9b77a

Please # to comment.