Skip to content

Commit

Permalink
DEPR: Default of observed=False in DataFrame.pivot_table (#56237)
Browse files Browse the repository at this point in the history
* DEPR: Default of observed=False in DataFrame.pivot_table

* Finish up

* fixup

* Convert to code-block

* Kickoff builds
  • Loading branch information
rhshadrach authored Dec 4, 2023
1 parent 7b528c9 commit 4fd5a15
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 22 deletions.
2 changes: 1 addition & 1 deletion doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ Pivot tables:
raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
pd.pivot_table(df, values="values", index=["A", "B"])
pd.pivot_table(df, values="values", index=["A", "B"], observed=False)
Data munging
------------
Expand Down
31 changes: 26 additions & 5 deletions doc/source/whatsnew/v0.23.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,12 +286,33 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
df
.. ipython:: python
pd.pivot_table(df, values='values', index=['A', 'B'],
dropna=True)
pd.pivot_table(df, values='values', index=['A', 'B'],
dropna=False)
.. code-block:: ipython
In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True)
Out[1]:
values
A B
a c 1.0
d 2.0
b c 3.0
d 4.0
In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False)
Out[2]:
values
A B
a c 1.0
d 2.0
y NaN
b c 3.0
d 4.0
y NaN
z c NaN
d NaN
y NaN
.. _whatsnew_0230.enhancements.window_raw:
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ Other Deprecations
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9296,6 +9296,11 @@ def pivot(
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.
.. deprecated:: 2.2.0
The default value of ``False`` is deprecated and will change to
``True`` in a future version of pandas.
sort : bool, default True
Specifies if the result should be sorted.
Expand Down Expand Up @@ -9406,7 +9411,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Level = "All",
observed: bool = False,
observed: bool | lib.NoDefault = lib.no_default,
sort: bool = True,
) -> DataFrame:
from pandas.core.reshape.pivot import pivot_table
Expand Down
20 changes: 17 additions & 3 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Literal,
cast,
)
import warnings

import numpy as np

Expand All @@ -18,6 +19,7 @@
Appender,
Substitution,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -68,7 +70,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Hashable = "All",
observed: bool = False,
observed: bool | lib.NoDefault = lib.no_default,
sort: bool = True,
) -> DataFrame:
index = _convert_by(index)
Expand Down Expand Up @@ -123,7 +125,7 @@ def __internal_pivot_table(
margins: bool,
dropna: bool,
margins_name: Hashable,
observed: bool,
observed: bool | lib.NoDefault,
sort: bool,
) -> DataFrame:
"""
Expand Down Expand Up @@ -166,7 +168,18 @@ def __internal_pivot_table(
pass
values = list(values)

grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
observed_bool = False if observed is lib.no_default else observed
grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
if observed is lib.no_default and any(
ping._passed_categorical for ping in grouped.grouper.groupings
):
warnings.warn(
"The default value of observed=False is deprecated and will change "
"to observed=True in a future version of pandas. Specify "
"observed=False to silence this warning and retain the current behavior",
category=FutureWarning,
stacklevel=find_stack_level(),
)
agged = grouped.agg(aggfunc)

if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
Expand Down Expand Up @@ -719,6 +732,7 @@ def crosstab(
margins=margins,
margins_name=margins_name,
dropna=dropna,
observed=False,
**kwargs, # type: ignore[arg-type]
)

Expand Down
41 changes: 29 additions & 12 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,9 @@ def test_pivot_table_categorical(self):
["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
)
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)

exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
Expand All @@ -220,7 +222,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
)

df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
expected_columns = Series(["a", "b", "c"], name="A")
expected_columns = expected_columns.astype(
CategoricalDtype(categories, ordered=False)
Expand Down Expand Up @@ -250,7 +254,9 @@ def test_pivot_with_non_observable_dropna(self, dropna):
}
)

result = df.pivot_table(index="A", values="B", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
if dropna:
values = [2.0, 3.0]
codes = [0, 1]
Expand Down Expand Up @@ -283,7 +289,9 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
}
)

result = df.pivot_table(index="A", values="B", dropna=dropna)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": [2.0, 3.0, 0.0]},
index=Index(
Expand All @@ -301,7 +309,10 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
def test_pivot_with_interval_index(self, interval_values, dropna):
# GH 25814
df = DataFrame({"A": interval_values, "B": 1})
result = df.pivot_table(index="A", values="B", dropna=dropna)

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
expected = DataFrame(
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
)
Expand All @@ -322,9 +333,11 @@ def test_pivot_with_interval_index_margins(self):
}
)

pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)

result = pivot_tab["All"]
expected = Series(
Expand Down Expand Up @@ -1827,7 +1840,9 @@ def test_categorical_margins_category(self, observed):

df.y = df.y.astype("category")
df.z = df.z.astype("category")
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
tm.assert_frame_equal(table, expected)

def test_margins_casted_to_float(self):
Expand Down Expand Up @@ -1889,9 +1904,11 @@ def test_categorical_aggfunc(self, observed):
{"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
)
df["C1"] = df["C1"].astype("category")
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)

expected_index = pd.CategoricalIndex(
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
Expand Down

0 comments on commit 4fd5a15

Please # to comment.