From 4fd5a1562f122a0afd4cdd12339881831dd53972 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
Date: Mon, 4 Dec 2023 13:27:35 -0500
Subject: [PATCH] DEPR: Default of observed=False in DataFrame.pivot_table
 (#56237)

* DEPR: Default of observed=False in DataFrame.pivot_table

* Finish up

* fixup

* Convert to code-block

* Kickoff builds
---
 doc/source/user_guide/categorical.rst |  2 +-
 doc/source/whatsnew/v0.23.0.rst       | 31 ++++++++++++++++----
 doc/source/whatsnew/v2.2.0.rst        |  1 +
 pandas/core/frame.py                  |  7 ++++-
 pandas/core/reshape/pivot.py          | 20 +++++++++++--
 pandas/tests/reshape/test_pivot.py    | 41 +++++++++++++++++++--------
 6 files changed, 80 insertions(+), 22 deletions(-)

diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 34d04745ccdb5..8fb991dca02db 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -647,7 +647,7 @@ Pivot tables:
 
     raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
-    pd.pivot_table(df, values="values", index=["A", "B"])
+    pd.pivot_table(df, values="values", index=["A", "B"], observed=False)
 
 Data munging
 ------------
diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst
index cdffc6968a170..808741ccf4475 100644
--- a/doc/source/whatsnew/v0.23.0.rst
+++ b/doc/source/whatsnew/v0.23.0.rst
@@ -286,12 +286,33 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
    df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
    df
 
-.. ipython:: python
 
-   pd.pivot_table(df, values='values', index=['A', 'B'],
-                  dropna=True)
-   pd.pivot_table(df, values='values', index=['A', 'B'],
-                  dropna=False)
+.. code-block:: ipython
+
+    In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True)
+
+    Out[1]:
+         values
+    A B
+    a c     1.0
+      d     2.0
+    b c     3.0
+      d     4.0
+
+    In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False)
+
+    Out[2]:
+         values
+    A B
+    a c     1.0
+      d     2.0
+      y     NaN
+    b c     3.0
+      d     4.0
+      y     NaN
+    z c     NaN
+      d     NaN
+      y     NaN
 
 
 .. _whatsnew_0230.enhancements.window_raw:
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
index 8bd9ac1aa366c..5ee2bb1778cb1 100644
--- a/doc/source/whatsnew/v2.2.0.rst
+++ b/doc/source/whatsnew/v2.2.0.rst
@@ -435,6 +435,7 @@ Other Deprecations
 - Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
 - Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
 - Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
+- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
 - Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
 - Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index c9da273b99ce9..3edfea4480e47 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -9296,6 +9296,11 @@ def pivot(
             If True: only show observed values for categorical groupers.
             If False: show all values for categorical groupers.
 
+            .. deprecated:: 2.2.0
+
+                The default value of ``False`` is deprecated and will change to
+                ``True`` in a future version of pandas.
+
         sort : bool, default True
             Specifies if the result should be sorted.
 
@@ -9406,7 +9411,7 @@ def pivot_table(
         margins: bool = False,
         dropna: bool = True,
         margins_name: Level = "All",
-        observed: bool = False,
+        observed: bool | lib.NoDefault = lib.no_default,
         sort: bool = True,
     ) -> DataFrame:
         from pandas.core.reshape.pivot import pivot_table
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index eba4f72b5eb8f..82718d4c43a65 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -10,6 +10,7 @@
     Literal,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -18,6 +19,7 @@
     Appender,
     Substitution,
 )
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
@@ -68,7 +70,7 @@ def pivot_table(
     margins: bool = False,
     dropna: bool = True,
     margins_name: Hashable = "All",
-    observed: bool = False,
+    observed: bool | lib.NoDefault = lib.no_default,
     sort: bool = True,
 ) -> DataFrame:
     index = _convert_by(index)
@@ -123,7 +125,7 @@ def __internal_pivot_table(
     margins: bool,
     dropna: bool,
     margins_name: Hashable,
-    observed: bool,
+    observed: bool | lib.NoDefault,
     sort: bool,
 ) -> DataFrame:
     """
@@ -166,7 +168,18 @@ def __internal_pivot_table(
                 pass
         values = list(values)
 
-    grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
+    observed_bool = False if observed is lib.no_default else observed
+    grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
+    if observed is lib.no_default and any(
+        ping._passed_categorical for ping in grouped.grouper.groupings
+    ):
+        warnings.warn(
+            "The default value of observed=False is deprecated and will change "
+            "to observed=True in a future version of pandas. Specify "
+            "observed=False to silence this warning and retain the current behavior",
+            category=FutureWarning,
+            stacklevel=find_stack_level(),
+        )
     agged = grouped.agg(aggfunc)
 
     if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
@@ -719,6 +732,7 @@ def crosstab(
         margins=margins,
         margins_name=margins_name,
         dropna=dropna,
+        observed=False,
         **kwargs,  # type: ignore[arg-type]
     )
 
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 4a852daaadf98..dab2b034d3fd4 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -201,7 +201,9 @@ def test_pivot_table_categorical(self):
             ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
         )
         df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-        result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
 
         exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
         expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
@@ -220,7 +222,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
         )
 
         df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
-        result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
         expected_columns = Series(["a", "b", "c"], name="A")
         expected_columns = expected_columns.astype(
             CategoricalDtype(categories, ordered=False)
@@ -250,7 +254,9 @@ def test_pivot_with_non_observable_dropna(self, dropna):
             }
         )
 
-        result = df.pivot_table(index="A", values="B", dropna=dropna)
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.pivot_table(index="A", values="B", dropna=dropna)
         if dropna:
             values = [2.0, 3.0]
             codes = [0, 1]
@@ -283,7 +289,9 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
             }
         )
 
-        result = df.pivot_table(index="A", values="B", dropna=dropna)
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.pivot_table(index="A", values="B", dropna=dropna)
         expected = DataFrame(
             {"B": [2.0, 3.0, 0.0]},
             index=Index(
@@ -301,7 +309,10 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
     def test_pivot_with_interval_index(self, interval_values, dropna):
         # GH 25814
         df = DataFrame({"A": interval_values, "B": 1})
-        result = df.pivot_table(index="A", values="B", dropna=dropna)
+
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.pivot_table(index="A", values="B", dropna=dropna)
         expected = DataFrame(
             {"B": 1.0}, index=Index(interval_values.unique(), name="A")
         )
@@ -322,9 +333,11 @@ def test_pivot_with_interval_index_margins(self):
             }
         )
 
-        pivot_tab = pivot_table(
-            df, index="C", columns="B", values="A", aggfunc="sum", margins=True
-        )
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            pivot_tab = pivot_table(
+                df, index="C", columns="B", values="A", aggfunc="sum", margins=True
+            )
 
         result = pivot_tab["All"]
         expected = Series(
@@ -1827,7 +1840,9 @@ def test_categorical_margins_category(self, observed):
 
         df.y = df.y.astype("category")
         df.z = df.z.astype("category")
-        table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
         tm.assert_frame_equal(table, expected)
 
     def test_margins_casted_to_float(self):
@@ -1889,9 +1904,11 @@ def test_categorical_aggfunc(self, observed):
             {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
         )
         df["C1"] = df["C1"].astype("category")
-        result = df.pivot_table(
-            "V", index="C1", columns="C2", dropna=observed, aggfunc="count"
-        )
+        msg = "The default value of observed=False is deprecated"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.pivot_table(
+                "V", index="C1", columns="C2", dropna=observed, aggfunc="count"
+            )
 
         expected_index = pd.CategoricalIndex(
             ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"