Skip to content

Commit

Permalink
DEPR: Remove bytes input for read_excel (pandas-dev#53830)
Browse files Browse the repository at this point in the history
* Adding deprecation logic and updating documentation.

* Adding PR number to new unit test

* Updating read_excel() documentation per reviewer recommendations.

* Updating read_excel() documentation per reviewer recommendations.

* Updating read_excel() documentation per reviewer recommendations.

* Updating implementation per reviewer recommendations.
  • Loading branch information
rmhowe425 authored and im-vinicius committed Jul 8, 2023
1 parent 0bf1e55 commit c642d9b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ Deprecations
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
- Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`)
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
Expand All @@ -305,6 +306,7 @@ Deprecations
- Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
- Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_210.performance:
Expand Down
13 changes: 13 additions & 0 deletions pandas/io/excel/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
cast,
overload,
)
import warnings
import zipfile

from pandas._config import config
Expand All @@ -36,6 +37,7 @@
Appender,
doc,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -97,6 +99,10 @@
By file-like object, we refer to objects with a ``read()`` method,
such as a file handle (e.g. via builtin ``open`` function)
or ``StringIO``.
.. deprecated:: 2.1.0
Passing byte strings is deprecated. To read from a
byte string, wrap it in a ``BytesIO`` object.
sheet_name : str, int, list, or None, default 0
Strings are used for sheet names. Integers are used in zero-indexed
sheet positions (chart sheets do not count as a sheet position).
Expand Down Expand Up @@ -1504,6 +1510,13 @@ def __init__(
# First argument can also be bytes, so create a buffer
if isinstance(path_or_buffer, bytes):
path_or_buffer = BytesIO(path_or_buffer)
warnings.warn(
"Passing bytes to 'read_excel' is deprecated and "
"will be removed in a future version. To read from a "
"byte string, wrap it in a `BytesIO` object.",
FutureWarning,
stacklevel=find_stack_level(),
)

# Could be a str, ExcelFile, Book, etc.
self.io = path_or_buffer
Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/io/excel/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
time,
)
from functools import partial
from io import BytesIO
import os
from pathlib import Path
import platform
Expand Down Expand Up @@ -873,7 +874,7 @@ def test_corrupt_bytes_raises(self, engine):
error = BadZipFile
msg = "File is not a zip file"
with pytest.raises(error, match=msg):
pd.read_excel(bad_stream)
pd.read_excel(BytesIO(bad_stream))

@pytest.mark.network
@pytest.mark.single_cpu
Expand Down Expand Up @@ -1438,6 +1439,18 @@ def test_euro_decimal_format(self, read_ext):


class TestExcelFileRead:
def test_deprecate_bytes_input(self, engine, read_ext):
# GH 53830
msg = (
"Passing bytes to 'read_excel' is deprecated and "
"will be removed in a future version. To read from a "
"byte string, wrap it in a `BytesIO` object."
)

with tm.assert_produces_warning(FutureWarning, match=msg):
with open("test1" + read_ext, "rb") as f:
pd.read_excel(f.read(), engine=engine)

@pytest.fixture(autouse=True)
def cd_and_set_engine(self, engine, datapath, monkeypatch):
"""
Expand Down Expand Up @@ -1621,7 +1634,7 @@ def test_excel_read_binary(self, engine, read_ext):
with open("test1" + read_ext, "rb") as f:
data = f.read()

actual = pd.read_excel(data, engine=engine)
actual = pd.read_excel(BytesIO(data), engine=engine)
tm.assert_frame_equal(expected, actual)

def test_excel_read_binary_via_read_excel(self, read_ext, engine):
Expand Down

0 comments on commit c642d9b

Please # to comment.