diff --git a/visidata/loaders/parquet.py b/visidata/loaders/parquet.py index 9ee624c90..b9fcbbfc5 100644 --- a/visidata/loaders/parquet.py +++ b/visidata/loaders/parquet.py @@ -18,7 +18,8 @@ def iterload(self): pq = vd.importExternal("pyarrow.parquet", "pyarrow") from visidata.loaders.arrow import arrow_to_vdtype - self.tbl = pq.read_table(str(self.source)) + with self.source.open('rb') as f: + self.tbl = pq.read_table(f) self.columns = [] for colname, col in zip(self.tbl.column_names, self.tbl.columns): c = ParquetColumn(colname, type=arrow_to_vdtype(col.type), source=col) diff --git a/visidata/loaders/s3.py b/visidata/loaders/s3.py index 7f181cc40..3507e6ec0 100644 --- a/visidata/loaders/s3.py +++ b/visidata/loaders/s3.py @@ -61,13 +61,10 @@ def fs(self): def fs(self, val): self._fs = val - def open(self, *args, **kwargs): + def open(self, mode='r', **kwargs): """Open the current S3 path, decompressing along the way if needed.""" - # Default to text mode unless we have a compressed file - mode = "rb" if self.compression else "r" - - fp = self.fs.open(self.given, mode=mode, version_id=self.version_id) + fp = self.fs.open(self.given, mode="rb" if self.compression else mode, version_id=self.version_id) # Workaround for https://github.com/ajkerrigan/visidata-plugins/issues/12 if hasattr(fp, "cache") and fp.cache.size != fp.size: @@ -79,17 +76,17 @@ def open(self, *args, **kwargs): if self.compression == "gz": import gzip - return gzip.open(fp, *args, **kwargs) + return gzip.open(fp, mode, **kwargs) if self.compression == "bz2": import bz2 - return bz2.open(fp, *args, **kwargs) + return bz2.open(fp, mode, **kwargs) if self.compression == "xz": import lzma - return lzma.open(fp, *args, **kwargs) + return lzma.open(fp, mode, **kwargs) return fp diff --git a/visidata/path.py b/visidata/path.py index 74b18bb1c..66703d3bb 100644 --- a/visidata/path.py +++ b/visidata/path.py @@ -238,9 +238,10 @@ def open(self, mode='rt', encoding=None, encoding_errors=None, newline=None): return self.rfile.reopen() if self.fp: - self.fptext = codecs.iterdecode(self.fp, - encoding=encoding or vd.options.encoding, - errors=encoding_errors or vd.options.encoding_errors) + if 'b' not in mode: + self.fptext = codecs.iterdecode(self.fp, + encoding=encoding or vd.options.encoding, + errors=encoding_errors or vd.options.encoding_errors) if self.fptext: self.rfile = RepeatFile(self.fptext)