From 87ca1508f8c362a91e29ad7aa0759ab26047c792 Mon Sep 17 00:00:00 2001 From: Hannes Spitz <44113112+brokkoli71@users.noreply.github.com> Date: Thu, 24 Oct 2024 22:44:30 +0200 Subject: [PATCH] support zero-sized chunks (#2434) * support zero-sized chunks * fix imports * add min_side=0 to testing strategies * fix property tests --------- Co-authored-by: Deepak Cherian <deepak@cherian.net> --- src/zarr/core/indexing.py | 4 +++- src/zarr/testing/strategies.py | 15 +++++++++++---- tests/test_indexing.py | 10 +++++++++- tests/test_properties.py | 6 ++++-- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index f1d5fd16d1..1873d5c83c 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -94,6 +94,8 @@ def __iter__(self) -> Iterator[ChunkProjection]: ... def ceildiv(a: float, b: float) -> int: + if a == 0: + return 0 return math.ceil(a / b) @@ -374,7 +376,7 @@ def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None: def __iter__(self) -> Iterator[ChunkDimProjection]: # figure out the range of chunks we need to visit - dim_chunk_ix_from = self.start // self.dim_chunk_len + dim_chunk_ix_from = 0 if self.start == 0 else self.start // self.dim_chunk_len dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) # iterate over chunks in range diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py index 2c17fbf79d..c82e168cf1 100644 --- a/src/zarr/testing/strategies.py +++ b/src/zarr/testing/strategies.py @@ -65,7 +65,7 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]: stores = st.builds(MemoryStore, st.just({}), mode=st.just("w")) compressors = st.sampled_from([None, "default"]) zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3]) -array_shapes = npst.array_shapes(max_dims=4) +array_shapes = npst.array_shapes(max_dims=4, min_side=0) @st.composite # type: ignore[misc] @@ -85,7 +85,7 @@ def numpy_arrays( @st.composite # type: ignore[misc] def np_array_and_chunks( draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays -) -> tuple[np.ndarray, tuple[int]]: # type: ignore[type-arg] +) -> tuple[np.ndarray, tuple[int, ...]]: # type: ignore[type-arg] """A hypothesis strategy to generate small sized random arrays. Returns: a tuple of the array and a suitable random chunking for it. @@ -93,9 +93,16 @@ def np_array_and_chunks( array = draw(arrays) # We want this strategy to shrink towards arrays with smaller number of chunks # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks - numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape])) + numchunks = draw( + st.tuples( + *[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in array.shape] + ) + ) # 2. and now generate the chunks tuple - chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True)) + chunks = tuple( + size // nchunks if nchunks > 0 else 0 + for size, nchunks in zip(array.shape, numchunks, strict=True) + ) return (array, chunks) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index b3a1990686..2c51f3da3a 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -11,6 +11,7 @@ from numpy.testing import assert_array_equal import zarr +from zarr import Array from zarr.core.buffer import BufferPrototype, default_buffer_prototype from zarr.core.indexing import ( BasicSelection, @@ -31,7 +32,6 @@ if TYPE_CHECKING: from collections.abc import AsyncGenerator - from zarr.core.array import Array from zarr.core.buffer.core import Buffer from zarr.core.common import ChunkCoords @@ -1927,3 +1927,11 @@ def test_indexing_with_zarr_array(store: StorePath) -> None: assert_array_equal(a[ii], za[zii]) assert_array_equal(a[ii], za.oindex[zii]) + + +@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"]) +@pytest.mark.parametrize("shape", [(0, 2, 3), (0), (3, 0)]) +def test_zero_sized_chunks(store: StorePath, shape: list[int]) -> None: + z = Array.create(store=store, shape=shape, chunk_shape=shape, zarr_format=3, dtype="f8") + z[...] = 42 + assert_array_equal(z[...], np.zeros(shape, dtype="f8")) diff --git a/tests/test_properties.py b/tests/test_properties.py index 380a4d851e..f70753ceb5 100644 --- a/tests/test_properties.py +++ b/tests/test_properties.py @@ -6,7 +6,7 @@ import hypothesis.extra.numpy as npst # noqa: E402 import hypothesis.strategies as st # noqa: E402 -from hypothesis import given # noqa: E402 +from hypothesis import assume, given # noqa: E402 from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats # noqa: E402 @@ -35,11 +35,13 @@ def test_basic_indexing(data: st.DataObject) -> None: @given(data=st.data()) def test_vindex(data: st.DataObject) -> None: zarray = data.draw(arrays()) + # integer_array_indices can't handle 0-size dimensions. + assume(all(s > 0 for s in zarray.shape)) nparray = zarray[:] indexer = data.draw( npst.integer_array_indices( - shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None) + shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None) ) ) actual = zarray.vindex[indexer]