diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst index a1d4f9d14a905..07139ebad8737 100644 --- a/doc/source/whatsnew/v0.15.1.rst +++ b/doc/source/whatsnew/v0.15.1.rst @@ -70,9 +70,14 @@ API changes current behavior: - .. ipython:: python + .. code-block:: ipython - df.groupby(ts, as_index=False).max() + In [4]: df.groupby(ts, as_index=False).max() + Out[4]: + jim joe + 0 72 83 + 1 77 84 + 2 96 65 - ``groupby`` will not erroneously exclude columns if the column name conflicts with the grouper name (:issue:`8112`): diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e860d59f2e5bd..89973e81bde1b 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -220,6 +220,7 @@ Deprecations - Deprecated the default of ``observed=False`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby`; this will default to ``True`` in a future version (:issue:`43999`) - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) +- Deprecated :class:`.DataFrameGroupBy` with ``as_index=False`` not including groupings in the result when they are not columns of the DataFrame (:issue:`49519`) - Deprecated :func:`is_categorical_dtype`, use ``isinstance(obj.dtype, pd.CategoricalDtype)`` instead (:issue:`52527`) - Deprecated :func:`is_datetime64tz_dtype`, check ``isinstance(dtype, pd.DatetimeTZDtype)`` instead (:issue:`52607`) - Deprecated :func:`is_int64_dtype`, check ``dtype == np.dtype(np.int64)`` instead (:issue:`52564`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a12d312d148b9..8f25eef4d3855 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1244,8 +1244,21 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame: ): # GH #28549 # When using .apply(-), name will be in columns already - if in_axis and name not in columns: - result.insert(0, name, lev) + if name not in columns: + if in_axis: + result.insert(0, name, lev) + else: + msg = ( + "A grouping was used that is not in the columns of the " + "DataFrame and so was excluded from the result. This grouping " + "will be included in a future version of pandas. Add the " + "grouping as a column of the DataFrame to silence this warning." + ) + warnings.warn( + message=msg, + category=FutureWarning, + stacklevel=find_stack_level(), + ) return result diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index fa8873c22b6b0..abce7dcfef444 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -771,7 +771,9 @@ def test_as_index(): # function grouper f = lambda r: df.loc[r, "A"] - result = df.groupby(["cat", f], as_index=False, observed=True).sum() + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(["cat", f], as_index=False, observed=True).sum() expected = DataFrame( { "cat": Categorical([1, 2], categories=df.cat.cat.categories), @@ -784,7 +786,9 @@ def test_as_index(): # another not in-axis grouper (conflicting names in index) s = Series(["a", "b", "b"], name="cat") - result = df.groupby(["cat", s], as_index=False, observed=True).sum() + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby(["cat", s], as_index=False, observed=True).sum() tm.assert_frame_equal(result, expected) # is original index dropped? diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index af5e35c67ffe3..b5b13d6b10511 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -244,18 +244,26 @@ def f(x, q=None, axis=0): # DataFrame for as_index in [True, False]: df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index) - agg_result = df_grouped.agg(np.percentile, 80, axis=0) - apply_result = df_grouped.apply(DataFrame.quantile, 0.8) - expected = df_grouped.quantile(0.8) + warn = None if as_index else FutureWarning + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(warn, match=msg): + agg_result = df_grouped.agg(np.percentile, 80, axis=0) + with tm.assert_produces_warning(warn, match=msg): + apply_result = df_grouped.apply(DataFrame.quantile, 0.8) + with tm.assert_produces_warning(warn, match=msg): + expected = df_grouped.quantile(0.8) tm.assert_frame_equal(apply_result, expected, check_names=False) tm.assert_frame_equal(agg_result, expected) apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8]) - expected_seq = df_grouped.quantile([0.4, 0.8]) + with tm.assert_produces_warning(warn, match=msg): + expected_seq = df_grouped.quantile([0.4, 0.8]) tm.assert_frame_equal(apply_result, expected_seq, check_names=False) - agg_result = df_grouped.agg(f, q=80) - apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) + with tm.assert_produces_warning(warn, match=msg): + agg_result = df_grouped.agg(f, q=80) + with tm.assert_produces_warning(warn, match=msg): + apply_result = df_grouped.apply(DataFrame.quantile, q=0.8) tm.assert_frame_equal(agg_result, expected) tm.assert_frame_equal(apply_result, expected, check_names=False) @@ -266,7 +274,10 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): tsframe.columns = ["A", "B", "A", "C"] gb = tsframe.groupby(lambda x: x.month, as_index=as_index) - res = gb.agg(np.percentile, 80, axis=0) + warn = None if as_index else FutureWarning + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(warn, match=msg): + res = gb.agg(np.percentile, 80, axis=0) ex_data = { 1: tsframe[tsframe.index.month == 1].quantile(0.8), diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index b132494dafd91..48cc1518937b3 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -576,7 +576,13 @@ def test_categorical_reducers( gb_keepna = df.groupby( keys, dropna=False, observed=observed, sort=sort, as_index=as_index ) - result = getattr(gb_keepna, reduction_func)(*args) + if as_index or index_kind == "range" or reduction_func == "size": + warn = None + else: + warn = FutureWarning + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(warn, match=msg): + result = getattr(gb_keepna, reduction_func)(*args) # size will return a Series, others are DataFrame tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 84df18d46aa92..3d70a03d4b917 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -1085,7 +1085,9 @@ def test_grouping_by_key_is_in_axis(): # Currently only in-axis groupings are including in the result when as_index=False; # This is likely to change in the future. - result = gb.sum() + msg = "A grouping .* was excluded from the result" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = gb.sum() expected = DataFrame({"b": [1, 2], "c": [7, 5]}) tm.assert_frame_equal(result, expected)