From 4e9d557d47bf6792937792426559747204fce5ed Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 7 May 2024 11:59:21 -0400 Subject: [PATCH] Add a benchmark to monitor performance for large dataset indexing (#9012) --- asv_bench/benchmarks/indexing.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 169c8af06e9..892a6cb3758 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -12,6 +12,7 @@ nt = 500 basic_indexes = { + "1scalar": {"x": 0}, "1slice": {"x": slice(0, 3)}, "1slice-1scalar": {"x": 0, "y": slice(None, None, 3)}, "2slicess-1scalar": {"x": slice(3, -3, 3), "y": 1, "t": slice(None, -3, 3)}, @@ -74,6 +75,10 @@ def setup(self, key): "x_coords": ("x", np.linspace(1.1, 2.1, nx)), }, ) + # Benchmark how indexing is slowed down by adding many scalar variable + # to the dataset + # https://github.com/pydata/xarray/pull/9003 + self.ds_large = self.ds.merge({f"extra_var{i}": i for i in range(400)}) class Indexing(Base): @@ -89,6 +94,11 @@ def time_indexing_outer(self, key): def time_indexing_vectorized(self, key): self.ds.isel(**vectorized_indexes[key]).load() + @parameterized(["key"], [list(basic_indexes.keys())]) + def time_indexing_basic_ds_large(self, key): + # https://github.com/pydata/xarray/pull/9003 + self.ds_large.isel(**basic_indexes[key]).load() + class Assignment(Base): @parameterized(["key"], [list(basic_indexes.keys())])