Skip to content

Fix for h5py deepcopy issues #4426

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 4 commits into from
Sep 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions xarray/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,12 @@ def __setitem__(self, key, value):
self._ensure_copied()
self.array[key] = value

def __deepcopy__(self, memo):
# CopyOnWriteArray is used to wrap backend array objects, which might
# point to files on disk, so we can't rely on the default deepcopy
# implementation.
return type(self)(self.array)


class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin):
__slots__ = ("array",)
Expand Down
43 changes: 40 additions & 3 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,7 +1441,10 @@ def test_autoclose_future_warning(self):
with self.open(tmp_file, autoclose=True) as actual:
assert_identical(data, actual)

def test_already_open_dataset(self):

@requires_netCDF4
class TestNetCDF4AlreadyOpen:
def test_base_case(self):
with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, mode="w") as nc:
v = nc.createVariable("x", "int")
Expand All @@ -1453,7 +1456,7 @@ def test_already_open_dataset(self):
expected = Dataset({"x": ((), 42)})
assert_identical(expected, ds)

def test_already_open_dataset_group(self):
def test_group(self):
with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, mode="w") as nc:
group = nc.createGroup("g")
Expand All @@ -1476,6 +1479,21 @@ def test_already_open_dataset_group(self):
with pytest.raises(ValueError, match="must supply a root"):
backends.NetCDF4DataStore(nc.groups["g"], group="g")

def test_deepcopy(self):
# regression test for https://github.com/pydata/xarray/issues/4425
with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, mode="w") as nc:
nc.createDimension("x", 10)
v = nc.createVariable("y", np.int32, ("x",))
v[:] = np.arange(10)

h5 = nc4.Dataset(tmp_file, mode="r")
store = backends.NetCDF4DataStore(h5)
with open_dataset(store) as ds:
copied = ds.copy(deep=True)
expected = Dataset({"y": ("x", np.arange(10))})
assert_identical(expected, copied)


@requires_netCDF4
@requires_dask
Expand Down Expand Up @@ -2422,7 +2440,10 @@ def test_dump_encodings_h5py(self):
assert actual.x.encoding["compression"] == "lzf"
assert actual.x.encoding["compression_opts"] is None

def test_already_open_dataset_group(self):

@requires_h5netcdf
class TestH5NetCDFAlreadyOpen:
def test_open_dataset_group(self):
import h5netcdf

with create_tmp_file() as tmp_file:
Expand All @@ -2443,6 +2464,22 @@ def test_already_open_dataset_group(self):
expected = Dataset({"x": ((), 42)})
assert_identical(expected, ds)

def test_deepcopy(self):
import h5netcdf

with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, mode="w") as nc:
nc.createDimension("x", 10)
v = nc.createVariable("y", np.int32, ("x",))
v[:] = np.arange(10)

h5 = h5netcdf.File(tmp_file, mode="r")
store = backends.H5NetCDFStore(h5)
with open_dataset(store) as ds:
copied = ds.copy(deep=True)
expected = Dataset({"y": ("x", np.arange(10))})
assert_identical(expected, copied)


@requires_h5netcdf
class TestH5NetCDFFileObject(TestH5NetCDFData):
Expand Down