Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

DEPR: groupby with as_index=False not including out-of-axis groupings #52333

Merged
merged 7 commits into from
Apr 17, 2023
Merged
9 changes: 7 additions & 2 deletions doc/source/whatsnew/v0.15.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,14 @@ API changes

current behavior:

.. ipython:: python
.. code-block:: ipython

df.groupby(ts, as_index=False).max()
In [4]: df.groupby(ts, as_index=False).max()
Out[4]:
jim joe
0 72 83
1 77 84
2 96 65

- ``groupby`` will not erroneously exclude columns if the column name conflicts
with the grouper name (:issue:`8112`):
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ Deprecations
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
- Deprecated the "fastpath" keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`)
- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :meth:`Series.dt` properties (:issue:`20306`)
- Deprecated :class:`.DataFrameGroupBy` with ``as_index=False`` not including groupings in the result when they are not columns of the DataFrame (:issue:`49519`)
- Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`)
- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)
- Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`)
Expand Down
17 changes: 15 additions & 2 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1244,8 +1244,21 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame:
):
# GH #28549
# When using .apply(-), name will be in columns already
if in_axis and name not in columns:
result.insert(0, name, lev)
if name not in columns:
if in_axis:
result.insert(0, name, lev)
else:
msg = (
"A grouping was used that is not in the columns of the "
"DataFrame and so was excluded from the result. This grouping "
"will be included in a future version of pandas. Add the "
"grouping as a column of the DataFrame to silence this warning."
)
warnings.warn(
message=msg,
category=FutureWarning,
stacklevel=find_stack_level(),
)

return result

Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,9 @@ def test_as_index():

# function grouper
f = lambda r: df.loc[r, "A"]
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby(["cat", f], as_index=False, observed=True).sum()
expected = DataFrame(
{
"cat": Categorical([1, 2], categories=df.cat.cat.categories),
Expand All @@ -784,7 +786,9 @@ def test_as_index():

# another not in-axis grouper (conflicting names in index)
s = Series(["a", "b", "b"], name="cat")
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.groupby(["cat", s], as_index=False, observed=True).sum()
tm.assert_frame_equal(result, expected)

# is original index dropped?
Expand Down
25 changes: 18 additions & 7 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,26 @@ def f(x, q=None, axis=0):
# DataFrame
for as_index in [True, False]:
df_grouped = tsframe.groupby(lambda x: x.month, as_index=as_index)
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
expected = df_grouped.quantile(0.8)
warn = None if as_index else FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
agg_result = df_grouped.agg(np.percentile, 80, axis=0)
with tm.assert_produces_warning(warn, match=msg):
apply_result = df_grouped.apply(DataFrame.quantile, 0.8)
with tm.assert_produces_warning(warn, match=msg):
expected = df_grouped.quantile(0.8)
tm.assert_frame_equal(apply_result, expected, check_names=False)
tm.assert_frame_equal(agg_result, expected)

apply_result = df_grouped.apply(DataFrame.quantile, [0.4, 0.8])
expected_seq = df_grouped.quantile([0.4, 0.8])
with tm.assert_produces_warning(warn, match=msg):
expected_seq = df_grouped.quantile([0.4, 0.8])
tm.assert_frame_equal(apply_result, expected_seq, check_names=False)

agg_result = df_grouped.agg(f, q=80)
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
with tm.assert_produces_warning(warn, match=msg):
agg_result = df_grouped.agg(f, q=80)
with tm.assert_produces_warning(warn, match=msg):
apply_result = df_grouped.apply(DataFrame.quantile, q=0.8)
tm.assert_frame_equal(agg_result, expected)
tm.assert_frame_equal(apply_result, expected, check_names=False)

Expand All @@ -266,7 +274,10 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
tsframe.columns = ["A", "B", "A", "C"]
gb = tsframe.groupby(lambda x: x.month, as_index=as_index)

res = gb.agg(np.percentile, 80, axis=0)
warn = None if as_index else FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
res = gb.agg(np.percentile, 80, axis=0)

ex_data = {
1: tsframe[tsframe.index.month == 1].quantile(0.8),
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,13 @@ def test_categorical_reducers(
gb_keepna = df.groupby(
keys, dropna=False, observed=observed, sort=sort, as_index=as_index
)
result = getattr(gb_keepna, reduction_func)(*args)
if as_index or index_kind == "range" or reduction_func == "size":
warn = None
else:
warn = FutureWarning
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(gb_keepna, reduction_func)(*args)

# size will return a Series, others are DataFrame
tm.assert_equal(result, expected)
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,9 @@ def test_grouping_by_key_is_in_axis():

# Currently only in-axis groupings are including in the result when as_index=False;
# This is likely to change in the future.
result = gb.sum()
msg = "A grouping .* was excluded from the result"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = gb.sum()
expected = DataFrame({"b": [1, 2], "c": [7, 5]})
tm.assert_frame_equal(result, expected)

Expand Down