diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 66c62653139..bbf6cb4040d 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -664,6 +664,12 @@ def __setitem__(self, key, value): self._ensure_copied() self.array[key] = value + def __deepcopy__(self, memo): + # CopyOnWriteArray is used to wrap backend array objects, which might + # point to files on disk, so we can't rely on the default deepcopy + # implementation. + return type(self)(self.array) + class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): __slots__ = ("array",) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fe93f5a9777..33ac26cfd39 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1441,7 +1441,10 @@ def test_autoclose_future_warning(self): with self.open(tmp_file, autoclose=True) as actual: assert_identical(data, actual) - def test_already_open_dataset(self): + +@requires_netCDF4 +class TestNetCDF4AlreadyOpen: + def test_base_case(self): with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, mode="w") as nc: v = nc.createVariable("x", "int") @@ -1453,7 +1456,7 @@ def test_already_open_dataset(self): expected = Dataset({"x": ((), 42)}) assert_identical(expected, ds) - def test_already_open_dataset_group(self): + def test_group(self): with create_tmp_file() as tmp_file: with nc4.Dataset(tmp_file, mode="w") as nc: group = nc.createGroup("g") @@ -1476,6 +1479,21 @@ def test_already_open_dataset_group(self): with pytest.raises(ValueError, match="must supply a root"): backends.NetCDF4DataStore(nc.groups["g"], group="g") + def test_deepcopy(self): + # regression test for https://github.com/pydata/xarray/issues/4425 + with create_tmp_file() as tmp_file: + with nc4.Dataset(tmp_file, mode="w") as nc: + nc.createDimension("x", 10) + v = nc.createVariable("y", np.int32, ("x",)) + v[:] = np.arange(10) + + h5 = nc4.Dataset(tmp_file, mode="r") + store = backends.NetCDF4DataStore(h5) + with open_dataset(store) as ds: + copied = ds.copy(deep=True) + expected = Dataset({"y": ("x", np.arange(10))}) + assert_identical(expected, copied) + @requires_netCDF4 @requires_dask @@ -2422,7 +2440,10 @@ def test_dump_encodings_h5py(self): assert actual.x.encoding["compression"] == "lzf" assert actual.x.encoding["compression_opts"] is None - def test_already_open_dataset_group(self): + +@requires_h5netcdf +class TestH5NetCDFAlreadyOpen: + def test_open_dataset_group(self): import h5netcdf with create_tmp_file() as tmp_file: @@ -2443,6 +2464,22 @@ def test_already_open_dataset_group(self): expected = Dataset({"x": ((), 42)}) assert_identical(expected, ds) + def test_deepcopy(self): + import h5netcdf + + with create_tmp_file() as tmp_file: + with nc4.Dataset(tmp_file, mode="w") as nc: + nc.createDimension("x", 10) + v = nc.createVariable("y", np.int32, ("x",)) + v[:] = np.arange(10) + + h5 = h5netcdf.File(tmp_file, mode="r") + store = backends.H5NetCDFStore(h5) + with open_dataset(store) as ds: + copied = ds.copy(deep=True) + expected = Dataset({"y": ("x", np.arange(10))}) + assert_identical(expected, copied) + @requires_h5netcdf class TestH5NetCDFFileObject(TestH5NetCDFData):