diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 68159cd211a5e..40dfe0b33d66c 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -93,6 +93,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) +- Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 60c43b6cf0ecd..dca2d5676f71d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -23,6 +23,7 @@ Union, cast, ) +import warnings import numpy as np @@ -49,6 +50,7 @@ Substitution, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int64, @@ -270,6 +272,16 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # pinned in _python_agg_general, only in _aggregate_named result = self._aggregate_named(func, *args, **kwargs) + warnings.warn( + "Pinning the groupby key to each group in " + f"{type(self).__name__}.agg is deprecated, and cases that " + "relied on it will raise in a future version. " + "If your operation requires utilizing the groupby keys, " + "iterate over the groupby object instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + # result is a dict whose keys are the elements of result_index result = Series(result, index=self.grouper.result_index) result = self._wrap_aggregated_output(result) @@ -407,6 +419,7 @@ def _aggregate_named(self, func, *args, **kwargs): for name, group in self.grouper.get_iterator( self._selected_obj, axis=self.axis ): + # needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations object.__setattr__(group, "name", name) output = func(group, *args, **kwargs) @@ -1537,6 +1550,7 @@ def _transform_general(self, func, *args, **kwargs): except StopIteration: pass else: + # 2023-02-27 No tests broken by disabling this pinning object.__setattr__(group, "name", name) try: path, res = self._choose_path(fast_path, slow_path, group) @@ -1552,6 +1566,7 @@ def _transform_general(self, func, *args, **kwargs): for name, group in gen: if group.size == 0: continue + # 2023-02-27 No tests broken by disabling this pinning object.__setattr__(group, "name", name) res = path(group) @@ -1721,6 +1736,8 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): gen = self.grouper.get_iterator(obj, axis=self.axis) for name, group in gen: + # 2023-02-27 no tests are broken this pinning, but it is documented in the + # docstring above. object.__setattr__(group, "name", name) res = func(group, *args, **kwargs) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 726d75d705344..9a06a3da28e15 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -760,6 +760,11 @@ def apply_groupwise( zipped = zip(group_keys, splitter) for key, group in zipped: + # Pinning name is needed for + # test_group_apply_once_per_group, + # test_inconsistent_return_type, test_set_group_name, + # test_group_name_available_in_inference_pass, + # test_groupby_multi_timezone object.__setattr__(group, "name", key) # group might be modified diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e225ff5a0fa43..97e88a8545aa5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -65,7 +65,7 @@ def test_groupby_std_datetimelike(): @pytest.mark.parametrize("dtype", ["int64", "int32", "float64", "float32"]) -def test_basic(dtype): +def test_basic_aggregations(dtype): data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype) index = np.arange(9) @@ -102,7 +102,13 @@ def test_basic(dtype): grouped.aggregate({"one": np.mean, "two": np.std}) group_constants = {0: 10, 1: 20, 2: 30} - agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) + msg = ( + "Pinning the groupby key to each group in SeriesGroupBy.agg is deprecated, " + "and cases that relied on it will raise in a future version" + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#41090 + agged = grouped.agg(lambda x: group_constants[x.name] + x.mean()) assert agged[1] == 21 # corner cases