From 0023e5ddc807bb7d12651f9403ba76939900b9dc Mon Sep 17 00:00:00 2001 From: Joel Jaeschke Date: Sat, 27 Jul 2024 00:43:36 +0200 Subject: [PATCH] Change .groupby fastpath to work for monotonic increasing and decreasing (#7427) * Fix GH6220 This fixes GH6220 which makes it possible to use the fast path for .groupby for monotonically increasing and decreasing values. * Implemented groupby tests as described by feedback * Implemented groupby tests as described by feedback * Minor test. * Test resampling error with monotonic decreasing data. * Fix test. * Added feature to whats-new.rst * Update whats-new.rst * main * flox test * Update xarray/tests/test_groupby.py Co-authored-by: Michael Niklas --------- Co-authored-by: dcherian Co-authored-by: Deepak Cherian Co-authored-by: Michael Niklas --- doc/whats-new.rst | 2 ++ xarray/core/groupers.py | 6 ++++-- xarray/tests/test_groupby.py | 23 ++++++++++++++++++++++- 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe678e7b7ee..b47af12738b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,6 +22,8 @@ v2024.06.1 (unreleased) New Features ~~~~~~~~~~~~ +- Use fastpath when grouping both montonically increasing and decreasing variable + in :py:class:`GroupBy` (:issue:`6220`, :pull:`7427`). By `Joel Jaeschke `_. - Introduce new :py:class:`groupers.UniqueGrouper`, :py:class:`groupers.BinGrouper`, and :py:class:`groupers.TimeResampler` objects as a step towards supporting grouping by multiple variables. See the `docs ` and the diff --git a/xarray/core/groupers.py b/xarray/core/groupers.py index 4ee500cf960..1b96988b245 100644 --- a/xarray/core/groupers.py +++ b/xarray/core/groupers.py @@ -123,7 +123,9 @@ def is_unique_and_monotonic(self) -> bool: if isinstance(self.group, _DummyGroup): return True index = self.group_as_index - return index.is_unique and index.is_monotonic_increasing + return index.is_unique and ( + index.is_monotonic_increasing or index.is_monotonic_decreasing + ) @property def group_as_index(self) -> pd.Index: @@ -326,7 +328,7 @@ def _init_properties(self, group: T_Group) -> None: if not group_as_index.is_monotonic_increasing: # TODO: sort instead of raising an error - raise ValueError("index must be monotonic for resampling") + raise ValueError("Index must be monotonic for resampling") if isinstance(group_as_index, CFTimeIndex): from xarray.core.resample_cftime import CFTimeGrouper diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 2f169079ca0..9519a5559ae 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1792,6 +1792,23 @@ def test_groupby_fillna(self) -> None: actual = a.groupby("b").fillna(DataArray([0, 2], dims="b")) assert_identical(expected, actual) + @pytest.mark.parametrize("use_flox", [True, False]) + def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None: + # Fixes https://github.com/pydata/xarray/issues/6220 + # Fixes https://github.com/pydata/xarray/issues/9279 + index = [1, 2, 3, 4, 7, 9, 10] + array = DataArray(np.arange(len(index)), [("idx", index)]) + array_rev = array.copy().assign_coords({"idx": index[::-1]}) + fwd = array.groupby("idx", squeeze=False) + rev = array_rev.groupby("idx", squeeze=False) + + for gb in [fwd, rev]: + assert all([isinstance(elem, slice) for elem in gb._group_indices]) + + with xr.set_options(use_flox=use_flox): + assert_identical(fwd.sum(), array) + assert_identical(rev.sum(), array_rev) + class TestDataArrayResample: @pytest.mark.parametrize("use_cftime", [True, False]) @@ -1828,9 +1845,13 @@ def resample_as_pandas(array, *args, **kwargs): expected = resample_as_pandas(array, "24h", closed="right") assert_identical(expected, actual) - with pytest.raises(ValueError, match=r"index must be monotonic"): + with pytest.raises(ValueError, match=r"Index must be monotonic"): array[[2, 0, 1]].resample(time="1D") + reverse = array.isel(time=slice(-1, None, -1)) + with pytest.raises(ValueError): + reverse.resample(time="1D").mean() + @pytest.mark.parametrize("use_cftime", [True, False]) def test_resample_doctest(self, use_cftime: bool) -> None: # run the doctest example here so we are not surprised