Skip to content

Commit 520bc1f

Browse files
d-v-bTomAugspurger
andauthored
fix/unbreak chunks initialized (#2862)
* unbreak chunks initialized * Update src/zarr/storage/_utils.py Co-authored-by: Tom Augspurger <tom.augspurger88@gmail.com> * update docstring * make relativize_paths kw-only, and add tests --------- Co-authored-by: Tom Augspurger <tom.augspurger88@gmail.com>
1 parent 5ff3fbe commit 520bc1f

File tree

6 files changed

+95
-6
lines changed

6 files changed

+95
-6
lines changed

changes/2862.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug that prevented the number of initialized chunks being counted properly.

docs/user-guide/groups.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ property. E.g.::
140140
No. bytes : 8000000 (7.6M)
141141
No. bytes stored : 1614
142142
Storage ratio : 4956.6
143-
Chunks Initialized : 0
143+
Chunks Initialized : 10
144144
>>> baz.info
145145
Type : Array
146146
Zarr format : 3

src/zarr/core/array.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@
117117
get_pipeline_class,
118118
)
119119
from zarr.storage._common import StorePath, ensure_no_existing_node, make_store_path
120+
from zarr.storage._utils import _relativize_path
120121

121122
if TYPE_CHECKING:
122123
from collections.abc import Iterator, Sequence
@@ -3737,7 +3738,12 @@ async def chunks_initialized(
37373738
store_contents = [
37383739
x async for x in array.store_path.store.list_prefix(prefix=array.store_path.path)
37393740
]
3740-
return tuple(chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents)
3741+
store_contents_relative = [
3742+
_relativize_path(path=key, prefix=array.store_path.path) for key in store_contents
3743+
]
3744+
return tuple(
3745+
chunk_key for chunk_key in array._iter_chunk_keys() if chunk_key in store_contents_relative
3746+
)
37413747

37423748

37433749
def _build_parents(

src/zarr/storage/_utils.py

+52-1
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,62 @@ def _join_paths(paths: Iterable[str]) -> str:
7474
"""
7575
Filter out instances of '' and join the remaining strings with '/'.
7676
77-
Because the root node of a zarr hierarchy is represented by an empty string,
77+
Parameters
78+
----------
79+
paths : Iterable[str]
80+
81+
Returns
82+
-------
83+
str
84+
85+
Examples
86+
--------
87+
>>> _join_paths(["", "a", "b"])
88+
'a/b'
89+
>>> _join_paths(["a", "b", "c"])
90+
'a/b/c'
7891
"""
7992
return "/".join(filter(lambda v: v != "", paths))
8093

8194

95+
def _relativize_path(*, path: str, prefix: str) -> str:
96+
"""
97+
Make a "/"-delimited path relative to some prefix. If the prefix is '', then the path is
98+
returned as-is. Otherwise, the prefix is removed from the path as well as the separator
99+
string "/".
100+
101+
If ``prefix`` is not the empty string and ``path`` does not start with ``prefix``
102+
followed by a "/" character, then an error is raised.
103+
104+
This function assumes that the prefix does not end with "/".
105+
106+
Parameters
107+
----------
108+
path : str
109+
The path to make relative to the prefix.
110+
prefix : str
111+
The prefix to make the path relative to.
112+
113+
Returns
114+
-------
115+
str
116+
117+
Examples
118+
--------
119+
>>> _relativize_path(path="", prefix="a/b")
120+
'a/b'
121+
>>> _relativize_path(path="a/b", prefix="a/b/c")
122+
'c'
123+
"""
124+
if prefix == "":
125+
return path
126+
else:
127+
_prefix = prefix + "/"
128+
if not path.startswith(_prefix):
129+
raise ValueError(f"The first component of {path} does not start with {prefix}.")
130+
return path.removeprefix(f"{prefix}/")
131+
132+
82133
def _normalize_paths(paths: Iterable[str]) -> tuple[str, ...]:
83134
"""
84135
Normalize the input paths according to the normalization scheme used for zarr node paths.

tests/test_array.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -387,12 +387,13 @@ async def test_nchunks_initialized(test_cls: type[Array] | type[AsyncArray[Any]]
387387
assert observed == expected
388388

389389

390-
async def test_chunks_initialized() -> None:
390+
@pytest.mark.parametrize("path", ["", "foo"])
391+
async def test_chunks_initialized(path: str) -> None:
391392
"""
392393
Test that chunks_initialized accurately returns the keys of stored chunks.
393394
"""
394395
store = MemoryStore()
395-
arr = zarr.create_array(store, shape=(100,), chunks=(10,), dtype="i4")
396+
arr = zarr.create_array(store, name=path, shape=(100,), chunks=(10,), dtype="i4")
396397

397398
chunks_accumulated = tuple(
398399
accumulate(tuple(tuple(v.split(" ")) for v in arr._iter_chunk_keys()))

tests/test_store/test_core.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@
88
from zarr.core.common import AccessModeLiteral, ZarrFormat
99
from zarr.storage import FsspecStore, LocalStore, MemoryStore, StoreLike, StorePath
1010
from zarr.storage._common import contains_array, contains_group, make_store_path
11-
from zarr.storage._utils import _join_paths, _normalize_path_keys, _normalize_paths, normalize_path
11+
from zarr.storage._utils import (
12+
_join_paths,
13+
_normalize_path_keys,
14+
_normalize_paths,
15+
_relativize_path,
16+
normalize_path,
17+
)
1218

1319

1420
@pytest.mark.parametrize("path", ["foo", "foo/bar"])
@@ -221,3 +227,27 @@ def test_normalize_path_keys():
221227
"""
222228
data = {"a": 10, "//b": 10}
223229
assert _normalize_path_keys(data) == {normalize_path(k): v for k, v in data.items()}
230+
231+
232+
@pytest.mark.parametrize(
233+
("path", "prefix", "expected"),
234+
[
235+
("a", "", "a"),
236+
("a/b/c", "a/b", "c"),
237+
("a/b/c", "a", "b/c"),
238+
],
239+
)
240+
def test_relativize_path_valid(path: str, prefix: str, expected: str) -> None:
241+
"""
242+
Test the normal behavior of the _relativize_path function. Prefixes should be removed from the
243+
path argument.
244+
"""
245+
assert _relativize_path(path=path, prefix=prefix) == expected
246+
247+
248+
def test_relativize_path_invalid() -> None:
249+
path = "a/b/c"
250+
prefix = "b"
251+
msg = f"The first component of {path} does not start with {prefix}."
252+
with pytest.raises(ValueError, match=msg):
253+
_relativize_path(path="a/b/c", prefix="b")

0 commit comments

Comments
 (0)