From 6ce05265472771e922e69012105d2210e3405aa9 Mon Sep 17 00:00:00 2001
From: Joe Hamman <joe@earthmover.io>
Date: Wed, 23 Oct 2024 08:37:55 -0700
Subject: [PATCH 1/5] [v3] Array.append (#2413)

* feature(array): implement Array.append

changes the Array.resize to be an inplace operation

* better error message

* no more warn

* style: pre-commit fixes

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 src/zarr/core/array.py           | 130 ++++++++++++++++++---
 tests/test_array.py              | 188 +++++++++++++++++++++++++++++++
 tests/test_codecs/test_codecs.py |   3 +-
 3 files changed, 302 insertions(+), 19 deletions(-)

diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index bdafa33f67..8c4d797e9a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -2,7 +2,7 @@
 
 import json
 from asyncio import gather
-from dataclasses import dataclass, field, replace
+from dataclasses import dataclass, field
 from itertools import starmap
 from logging import getLogger
 from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload
@@ -1104,15 +1104,15 @@ async def setitem(
         )
         return await self._set_selection(indexer, value, prototype=prototype)
 
-    async def resize(self, new_shape: ChunkCoords, delete_outside_chunks: bool = True) -> Self:
+    async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None:
+        new_shape = parse_shapelike(new_shape)
         assert len(new_shape) == len(self.metadata.shape)
         new_metadata = self.metadata.update_shape(new_shape)
 
-        # Remove all chunks outside of the new shape
-        old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
-        new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
-
         if delete_outside_chunks:
+            # Remove all chunks outside of the new shape
+            old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape))
+            new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape))
 
             async def _delete_key(key: str) -> None:
                 await (self.store_path / key).delete()
@@ -1128,7 +1128,63 @@ async def _delete_key(key: str) -> None:
 
         # Write new metadata
         await self._save_metadata(new_metadata)
-        return replace(self, metadata=new_metadata)
+
+        # Update metadata (in place)
+        object.__setattr__(self, "metadata", new_metadata)
+
+    async def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
+        """Append `data` to `axis`.
+
+        Parameters
+        ----------
+        data : array-like
+            Data to be appended.
+        axis : int
+            Axis along which to append.
+
+        Returns
+        -------
+        new_shape : tuple
+
+        Notes
+        -----
+        The size of all dimensions other than `axis` must match between this
+        array and `data`.
+        """
+        # ensure data is array-like
+        if not hasattr(data, "shape"):
+            data = np.asanyarray(data)
+
+        self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis)
+        data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis)
+        if self_shape_preserved != data_shape_preserved:
+            raise ValueError(
+                f"shape of data to append is not compatible with the array. "
+                f"The shape of the data is ({data_shape_preserved})"
+                f"and the shape of the array is ({self_shape_preserved})."
+                "All dimensions must match except for the dimension being "
+                "appended."
+            )
+        # remember old shape
+        old_shape = self.shape
+
+        # determine new shape
+        new_shape = tuple(
+            self.shape[i] if i != axis else self.shape[i] + data.shape[i]
+            for i in range(len(self.shape))
+        )
+
+        # resize
+        await self.resize(new_shape)
+
+        # store data
+        append_selection = tuple(
+            slice(None) if i != axis else slice(old_shape[i], new_shape[i])
+            for i in range(len(self.shape))
+        )
+        await self.setitem(append_selection, data)
+
+        return new_shape
 
     async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self:
         # metadata.attributes is "frozen" so we simply clear and update the dict
@@ -1147,7 +1203,8 @@ async def info(self) -> None:
         raise NotImplementedError
 
 
-@dataclass(frozen=True)
+# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed
+@dataclass(frozen=False)
 class Array:
     """Instantiate an array from an initialized store."""
 
@@ -1297,6 +1354,11 @@ def shape(self) -> ChunkCoords:
         """
         return self._async_array.shape
 
+    @shape.setter
+    def shape(self, value: ChunkCoords) -> None:
+        """Sets the shape of the array by calling resize."""
+        self.resize(value)
+
     @property
     def chunks(self) -> ChunkCoords:
         """Returns a tuple of integers describing the length of each dimension of a chunk of the array.
@@ -2754,18 +2816,18 @@ def blocks(self) -> BlockIndex:
         :func:`set_block_selection` for documentation and examples."""
         return BlockIndex(self)
 
-    def resize(self, new_shape: ChunkCoords) -> Array:
+    def resize(self, new_shape: ShapeLike) -> None:
         """
         Change the shape of the array by growing or shrinking one or more
         dimensions.
 
-        This method does not modify the original Array object. Instead, it returns a new Array
-        with the specified shape.
+        Parameters
+        ----------
+        new_shape : tuple
+            New shape of the array.
 
         Notes
         -----
-        When resizing an array, the data are not rearranged in any way.
-
         If one or more dimensions are shrunk, any chunks falling outside the
         new array shape will be deleted from the underlying store.
         However, it is noteworthy that the chunks partially falling inside the new array
@@ -2778,7 +2840,6 @@ def resize(self, new_shape: ChunkCoords) -> Array:
         >>> import zarr
         >>> z = zarr.zeros(shape=(10000, 10000),
         >>>                chunk_shape=(1000, 1000),
-        >>>                store=StorePath(MemoryStore(mode="w")),
         >>>                dtype="i4",)
         >>> z.shape
         (10000, 10000)
@@ -2791,10 +2852,43 @@ def resize(self, new_shape: ChunkCoords) -> Array:
         >>> z2.shape
         (50, 50)
         """
-        resized = sync(self._async_array.resize(new_shape))
-        # TODO: remove this cast when type inference improves
-        _resized = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], resized)
-        return type(self)(_resized)
+        sync(self._async_array.resize(new_shape))
+
+    def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords:
+        """Append `data` to `axis`.
+
+        Parameters
+        ----------
+        data : array-like
+            Data to be appended.
+        axis : int
+            Axis along which to append.
+
+        Returns
+        -------
+        new_shape : tuple
+
+        Notes
+        -----
+        The size of all dimensions other than `axis` must match between this
+        array and `data`.
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> import zarr
+        >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000)
+        >>> z = zarr.array(a, chunks=(1000, 100))
+        >>> z.shape
+        (10000, 1000)
+        >>> z.append(a)
+        (20000, 1000)
+        >>> z.append(np.vstack([a, a]), axis=1)
+        (20000, 2000)
+        >>> z.shape
+        (20000, 2000)
+        """
+        return sync(self._async_array.append(data, axis=axis))
 
     def update_attributes(self, new_attributes: dict[str, JSON]) -> Array:
         # TODO: remove this cast when type inference improves
diff --git a/tests/test_array.py b/tests/test_array.py
index f182cb1a14..ae8e7f99c2 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -419,6 +419,194 @@ def test_update_attrs(zarr_format: int) -> None:
     assert arr2.attrs["foo"] == "bar"
 
 
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_resize_1d(store: MemoryStore, zarr_format: int) -> None:
+    z = zarr.create(
+        shape=105, chunks=10, dtype="i4", fill_value=0, store=store, zarr_format=zarr_format
+    )
+    a = np.arange(105, dtype="i4")
+    z[:] = a
+    assert (105,) == z.shape
+    assert (105,) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10,) == z.chunks
+    np.testing.assert_array_equal(a, z[:])
+
+    z.resize(205)
+    assert (205,) == z.shape
+    assert (205,) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10,) == z.chunks
+    np.testing.assert_array_equal(a, z[:105])
+    np.testing.assert_array_equal(np.zeros(100, dtype="i4"), z[105:])
+
+    z.resize(55)
+    assert (55,) == z.shape
+    assert (55,) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10,) == z.chunks
+    np.testing.assert_array_equal(a[:55], z[:])
+
+    # via shape setter
+    new_shape = (105,)
+    z.shape = new_shape
+    assert new_shape == z.shape
+    assert new_shape == z[:].shape
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_resize_2d(store: MemoryStore, zarr_format: int) -> None:
+    z = zarr.create(
+        shape=(105, 105),
+        chunks=(10, 10),
+        dtype="i4",
+        fill_value=0,
+        store=store,
+        zarr_format=zarr_format,
+    )
+    a = np.arange(105 * 105, dtype="i4").reshape((105, 105))
+    z[:] = a
+    assert (105, 105) == z.shape
+    assert (105, 105) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(a, z[:])
+
+    z.resize((205, 205))
+    assert (205, 205) == z.shape
+    assert (205, 205) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(a, z[:105, :105])
+    np.testing.assert_array_equal(np.zeros((100, 205), dtype="i4"), z[105:, :])
+    np.testing.assert_array_equal(np.zeros((205, 100), dtype="i4"), z[:, 105:])
+
+    z.resize((55, 55))
+    assert (55, 55) == z.shape
+    assert (55, 55) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(a[:55, :55], z[:])
+
+    z.resize((55, 1))
+    assert (55, 1) == z.shape
+    assert (55, 1) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(a[:55, :1], z[:])
+
+    z.resize((1, 55))
+    assert (1, 55) == z.shape
+    assert (1, 55) == z[:].shape
+    assert np.dtype("i4") == z.dtype
+    assert np.dtype("i4") == z[:].dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(a[:1, :10], z[:, :10])
+    np.testing.assert_array_equal(np.zeros((1, 55 - 10), dtype="i4"), z[:, 10:55])
+
+    # via shape setter
+    new_shape = (105, 105)
+    z.shape = new_shape
+    assert new_shape == z.shape
+    assert new_shape == z[:].shape
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_append_1d(store: MemoryStore, zarr_format: int) -> None:
+    a = np.arange(105)
+    z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format)
+    z[:] = a
+    assert a.shape == z.shape
+    assert a.dtype == z.dtype
+    assert (10,) == z.chunks
+    np.testing.assert_array_equal(a, z[:])
+
+    b = np.arange(105, 205)
+    e = np.append(a, b)
+    assert z.shape == (105,)
+    z.append(b)
+    assert e.shape == z.shape
+    assert e.dtype == z.dtype
+    assert (10,) == z.chunks
+    np.testing.assert_array_equal(e, z[:])
+
+    # check append handles array-like
+    c = [1, 2, 3]
+    f = np.append(e, c)
+    z.append(c)
+    assert f.shape == z.shape
+    assert f.dtype == z.dtype
+    assert (10,) == z.chunks
+    np.testing.assert_array_equal(f, z[:])
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_append_2d(store: MemoryStore, zarr_format: int) -> None:
+    a = np.arange(105 * 105, dtype="i4").reshape((105, 105))
+    z = zarr.create(
+        shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format
+    )
+    z[:] = a
+    assert a.shape == z.shape
+    assert a.dtype == z.dtype
+    assert (10, 10) == z.chunks
+    actual = z[:]
+    np.testing.assert_array_equal(a, actual)
+
+    b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105))
+    e = np.append(a, b, axis=0)
+    z.append(b)
+    assert e.shape == z.shape
+    assert e.dtype == z.dtype
+    assert (10, 10) == z.chunks
+    actual = z[:]
+    np.testing.assert_array_equal(e, actual)
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None:
+    a = np.arange(105 * 105, dtype="i4").reshape((105, 105))
+    z = zarr.create(
+        shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format
+    )
+    z[:] = a
+    assert a.shape == z.shape
+    assert a.dtype == z.dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(a, z[:])
+
+    b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105))
+    e = np.append(a, b, axis=1)
+    z.append(b, axis=1)
+    assert e.shape == z.shape
+    assert e.dtype == z.dtype
+    assert (10, 10) == z.chunks
+    np.testing.assert_array_equal(e, z[:])
+
+
+@pytest.mark.parametrize("store", ["memory"], indirect=True)
+@pytest.mark.parametrize("zarr_format", [2, 3])
+def test_append_bad_shape(store: MemoryStore, zarr_format: int) -> None:
+    a = np.arange(100)
+    z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format)
+    z[:] = a
+    b = a.reshape(10, 10)
+    with pytest.raises(ValueError):
+        z.append(b)
+
+
 @pytest.mark.parametrize("order", ["C", "F", None])
 @pytest.mark.parametrize("zarr_format", [2, 3])
 @pytest.mark.parametrize("store", ["memory"], indirect=True)
diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py
index 7a5fb979a1..0f2f892915 100644
--- a/tests/test_codecs/test_codecs.py
+++ b/tests/test_codecs/test_codecs.py
@@ -371,8 +371,9 @@ async def test_resize(store: Store) -> None:
     assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None
     assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None
 
-    a = await a.resize((10, 12))
+    await a.resize((10, 12))
     assert a.metadata.shape == (10, 12)
+    assert a.shape == (10, 12)
     assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None
     assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None
     assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None

From bc588a760a804f783c4242d4435863a43a5f3f9f Mon Sep 17 00:00:00 2001
From: Deepak Cherian <dcherian@users.noreply.github.com>
Date: Wed, 23 Oct 2024 13:30:49 -0600
Subject: [PATCH 2/5] Fix JSON encoding of complex fill values (#2432)

* Fix JSON encoding of complex fill values

We were not replacing NaNs and Infs with the string versions.

* Fix decoding of complex fill values

* try excluding `math.inf`

* Check complex numbers explicitly

* Update src/zarr/core/metadata/v3.py
---
 src/zarr/core/metadata/v3.py | 26 ++++++++++++++++++++++----
 tests/test_array.py          | 23 +++++++++++++++++++++++
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py
index 6b6f28dd96..7a38e9fd70 100644
--- a/src/zarr/core/metadata/v3.py
+++ b/src/zarr/core/metadata/v3.py
@@ -43,6 +43,13 @@
 
 DEFAULT_DTYPE = "float64"
 
+# Keep in sync with _replace_special_floats
+SPECIAL_FLOATS_ENCODED = {
+    "Infinity": np.inf,
+    "-Infinity": -np.inf,
+    "NaN": np.nan,
+}
+
 
 def parse_zarr_format(data: object) -> Literal[3]:
     if data == 3:
@@ -149,7 +156,7 @@ def default(self, o: object) -> Any:
                 if isinstance(out, complex):
                     # python complex types are not JSON serializable, so we use the
                     # serialization defined in the zarr v3 spec
-                    return [out.real, out.imag]
+                    return _replace_special_floats([out.real, out.imag])
                 elif np.isnan(out):
                     return "NaN"
                 elif np.isinf(out):
@@ -447,8 +454,11 @@ def parse_fill_value(
     if isinstance(fill_value, Sequence) and not isinstance(fill_value, str):
         if data_type in (DataType.complex64, DataType.complex128):
             if len(fill_value) == 2:
+                decoded_fill_value = tuple(
+                    SPECIAL_FLOATS_ENCODED.get(value, value) for value in fill_value
+                )
                 # complex datatypes serialize to JSON arrays with two elements
-                return np_dtype.type(complex(*fill_value))
+                return np_dtype.type(complex(*decoded_fill_value))
             else:
                 msg = (
                     f"Got an invalid fill value for complex data type {data_type.value}."
@@ -475,12 +485,20 @@ def parse_fill_value(
         pass
     elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value):
         pass
-    elif np_dtype.kind in "cf":
+    elif np_dtype.kind == "f":
         # float comparison is not exact, especially when dtype <float64
-        # so we us np.isclose for this comparison.
+        # so we use np.isclose for this comparison.
         # this also allows us to compare nan fill_values
         if not np.isclose(fill_value, casted_value, equal_nan=True):
             raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
+    elif np_dtype.kind == "c":
+        # confusingly np.isclose(np.inf, np.inf + 0j) is False on numpy<2, so compare real and imag parts
+        # explicitly.
+        if not (
+            np.isclose(np.real(fill_value), np.real(casted_value), equal_nan=True)
+            and np.isclose(np.imag(fill_value), np.imag(casted_value), equal_nan=True)
+        ):
+            raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
     else:
         if fill_value != casted_value:
             raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")
diff --git a/tests/test_array.py b/tests/test_array.py
index ae8e7f99c2..6451c7fe5e 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1,3 +1,5 @@
+import json
+import math
 import pickle
 from itertools import accumulate
 from typing import Any, Literal
@@ -9,6 +11,7 @@
 from zarr import Array, AsyncArray, Group
 from zarr.codecs import BytesCodec, VLenBytesCodec
 from zarr.core.array import chunks_initialized
+from zarr.core.buffer import default_buffer_prototype
 from zarr.core.buffer.cpu import NDBuffer
 from zarr.core.common import JSON, MemoryOrder, ZarrFormat
 from zarr.core.group import AsyncGroup
@@ -624,3 +627,23 @@ def test_array_create_order(
         assert vals.flags.f_contiguous
     else:
         raise AssertionError
+
+
+@pytest.mark.parametrize(
+    ("fill_value", "expected"),
+    [
+        (np.nan * 1j, ["NaN", "NaN"]),
+        (np.nan, ["NaN", 0.0]),
+        (np.inf, ["Infinity", 0.0]),
+        (np.inf * 1j, ["NaN", "Infinity"]),
+        (-np.inf, ["-Infinity", 0.0]),
+        (math.inf, ["Infinity", 0.0]),
+    ],
+)
+async def test_special_complex_fill_values_roundtrip(fill_value: Any, expected: list[Any]) -> None:
+    store = MemoryStore({}, mode="w")
+    Array.create(store=store, shape=(1,), dtype=np.complex64, fill_value=fill_value)
+    content = await store.get("zarr.json", prototype=default_buffer_prototype())
+    assert content is not None
+    actual = json.loads(content.to_bytes())
+    assert actual["fill_value"] == expected

From 87ca1508f8c362a91e29ad7aa0759ab26047c792 Mon Sep 17 00:00:00 2001
From: Hannes Spitz <44113112+brokkoli71@users.noreply.github.com>
Date: Thu, 24 Oct 2024 22:44:30 +0200
Subject: [PATCH 3/5] support zero-sized chunks (#2434)

* support zero-sized chunks

* fix imports

* add min_side=0 to testing strategies

* fix property tests

---------

Co-authored-by: Deepak Cherian <deepak@cherian.net>
---
 src/zarr/core/indexing.py      |  4 +++-
 src/zarr/testing/strategies.py | 15 +++++++++++----
 tests/test_indexing.py         | 10 +++++++++-
 tests/test_properties.py       |  6 ++++--
 4 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py
index f1d5fd16d1..1873d5c83c 100644
--- a/src/zarr/core/indexing.py
+++ b/src/zarr/core/indexing.py
@@ -94,6 +94,8 @@ def __iter__(self) -> Iterator[ChunkProjection]: ...
 
 
 def ceildiv(a: float, b: float) -> int:
+    if a == 0:
+        return 0
     return math.ceil(a / b)
 
 
@@ -374,7 +376,7 @@ def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int) -> None:
 
     def __iter__(self) -> Iterator[ChunkDimProjection]:
         # figure out the range of chunks we need to visit
-        dim_chunk_ix_from = self.start // self.dim_chunk_len
+        dim_chunk_ix_from = 0 if self.start == 0 else self.start // self.dim_chunk_len
         dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len)
 
         # iterate over chunks in range
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 2c17fbf79d..c82e168cf1 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -65,7 +65,7 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]:
 stores = st.builds(MemoryStore, st.just({}), mode=st.just("w"))
 compressors = st.sampled_from([None, "default"])
 zarr_formats: st.SearchStrategy[Literal[2, 3]] = st.sampled_from([2, 3])
-array_shapes = npst.array_shapes(max_dims=4)
+array_shapes = npst.array_shapes(max_dims=4, min_side=0)
 
 
 @st.composite  # type: ignore[misc]
@@ -85,7 +85,7 @@ def numpy_arrays(
 @st.composite  # type: ignore[misc]
 def np_array_and_chunks(
     draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays
-) -> tuple[np.ndarray, tuple[int]]:  # type: ignore[type-arg]
+) -> tuple[np.ndarray, tuple[int, ...]]:  # type: ignore[type-arg]
     """A hypothesis strategy to generate small sized random arrays.
 
     Returns: a tuple of the array and a suitable random chunking for it.
@@ -93,9 +93,16 @@ def np_array_and_chunks(
     array = draw(arrays)
     # We want this strategy to shrink towards arrays with smaller number of chunks
     # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
-    numchunks = draw(st.tuples(*[st.integers(min_value=1, max_value=size) for size in array.shape]))
+    numchunks = draw(
+        st.tuples(
+            *[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in array.shape]
+        )
+    )
     # 2. and now generate the chunks tuple
-    chunks = tuple(size // nchunks for size, nchunks in zip(array.shape, numchunks, strict=True))
+    chunks = tuple(
+        size // nchunks if nchunks > 0 else 0
+        for size, nchunks in zip(array.shape, numchunks, strict=True)
+    )
     return (array, chunks)
 
 
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index b3a1990686..2c51f3da3a 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -11,6 +11,7 @@
 from numpy.testing import assert_array_equal
 
 import zarr
+from zarr import Array
 from zarr.core.buffer import BufferPrototype, default_buffer_prototype
 from zarr.core.indexing import (
     BasicSelection,
@@ -31,7 +32,6 @@
 if TYPE_CHECKING:
     from collections.abc import AsyncGenerator
 
-    from zarr.core.array import Array
     from zarr.core.buffer.core import Buffer
     from zarr.core.common import ChunkCoords
 
@@ -1927,3 +1927,11 @@ def test_indexing_with_zarr_array(store: StorePath) -> None:
 
     assert_array_equal(a[ii], za[zii])
     assert_array_equal(a[ii], za.oindex[zii])
+
+
+@pytest.mark.parametrize("store", ["local", "memory"], indirect=["store"])
+@pytest.mark.parametrize("shape", [(0, 2, 3), (0), (3, 0)])
+def test_zero_sized_chunks(store: StorePath, shape: list[int]) -> None:
+    z = Array.create(store=store, shape=shape, chunk_shape=shape, zarr_format=3, dtype="f8")
+    z[...] = 42
+    assert_array_equal(z[...], np.zeros(shape, dtype="f8"))
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 380a4d851e..f70753ceb5 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -6,7 +6,7 @@
 
 import hypothesis.extra.numpy as npst  # noqa: E402
 import hypothesis.strategies as st  # noqa: E402
-from hypothesis import given  # noqa: E402
+from hypothesis import assume, given  # noqa: E402
 
 from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats  # noqa: E402
 
@@ -35,11 +35,13 @@ def test_basic_indexing(data: st.DataObject) -> None:
 @given(data=st.data())
 def test_vindex(data: st.DataObject) -> None:
     zarray = data.draw(arrays())
+    # integer_array_indices can't handle 0-size dimensions.
+    assume(all(s > 0 for s in zarray.shape))
     nparray = zarray[:]
 
     indexer = data.draw(
         npst.integer_array_indices(
-            shape=nparray.shape, result_shape=npst.array_shapes(max_dims=None)
+            shape=nparray.shape, result_shape=npst.array_shapes(min_side=1, max_dims=None)
         )
     )
     actual = zarray.vindex[indexer]

From 109f71f6ddec95e2eba034646c582885ad2bb44b Mon Sep 17 00:00:00 2001
From: Norman Rzepka <code@normanrz.com>
Date: Fri, 25 Oct 2024 00:47:57 +0200
Subject: [PATCH 4/5] Refactors v2 codec handling (#2425)

* refactors codec pipeline for v2

* async

* rm ensure_bytes
---
 src/zarr/codecs/_v2.py | 115 +++++++++++++++++------------------------
 src/zarr/core/array.py |   7 ++-
 tests/test_v2.py       |   9 ++++
 3 files changed, 60 insertions(+), 71 deletions(-)

diff --git a/src/zarr/codecs/_v2.py b/src/zarr/codecs/_v2.py
index 0f50264be8..30504ad204 100644
--- a/src/zarr/codecs/_v2.py
+++ b/src/zarr/codecs/_v2.py
@@ -5,20 +5,21 @@
 from typing import TYPE_CHECKING
 
 import numcodecs
-from numcodecs.compat import ensure_bytes, ensure_ndarray
+from numcodecs.compat import ensure_ndarray_like
 
-from zarr.abc.codec import ArrayArrayCodec, ArrayBytesCodec
-from zarr.core.buffer import Buffer, NDBuffer, default_buffer_prototype
+from zarr.abc.codec import ArrayBytesCodec
 from zarr.registry import get_ndbuffer_class
 
 if TYPE_CHECKING:
     import numcodecs.abc
 
     from zarr.core.array_spec import ArraySpec
+    from zarr.core.buffer import Buffer, NDBuffer
 
 
 @dataclass(frozen=True)
-class V2Compressor(ArrayBytesCodec):
+class V2Codec(ArrayBytesCodec):
+    filters: tuple[numcodecs.abc.Codec, ...] | None
     compressor: numcodecs.abc.Codec | None
 
     is_fixed_size = False
@@ -28,81 +29,61 @@ async def _decode_single(
         chunk_bytes: Buffer,
         chunk_spec: ArraySpec,
     ) -> NDBuffer:
-        if self.compressor is not None:
-            chunk_numpy_array = ensure_ndarray(
-                await asyncio.to_thread(self.compressor.decode, chunk_bytes.as_array_like())
-            )
+        cdata = chunk_bytes.as_array_like()
+        # decompress
+        if self.compressor:
+            chunk = await asyncio.to_thread(self.compressor.decode, cdata)
         else:
-            chunk_numpy_array = ensure_ndarray(chunk_bytes.as_array_like())
+            chunk = cdata
+
+        # apply filters
+        if self.filters:
+            for f in reversed(self.filters):
+                chunk = await asyncio.to_thread(f.decode, chunk)
+
+        # view as numpy array with correct dtype
+        chunk = ensure_ndarray_like(chunk)
+        # special case object dtype, because incorrect handling can lead to
+        # segfaults and other bad things happening
+        if chunk_spec.dtype != object:
+            chunk = chunk.view(chunk_spec.dtype)
+        elif chunk.dtype != object:
+            # If we end up here, someone must have hacked around with the filters.
+            # We cannot deal with object arrays unless there is an object
+            # codec in the filter chain, i.e., a filter that converts from object
+            # array to something else during encoding, and converts back to object
+            # array during decoding.
+            raise RuntimeError("cannot read object array without object codec")
 
-        # ensure correct dtype
-        if str(chunk_numpy_array.dtype) != chunk_spec.dtype and not chunk_spec.dtype.hasobject:
-            chunk_numpy_array = chunk_numpy_array.view(chunk_spec.dtype)
+        # ensure correct chunk shape
+        chunk = chunk.reshape(-1, order="A")
+        chunk = chunk.reshape(chunk_spec.shape, order=chunk_spec.order)
 
-        return get_ndbuffer_class().from_numpy_array(chunk_numpy_array)
+        return get_ndbuffer_class().from_ndarray_like(chunk)
 
     async def _encode_single(
-        self,
-        chunk_array: NDBuffer,
-        _chunk_spec: ArraySpec,
-    ) -> Buffer | None:
-        chunk_numpy_array = chunk_array.as_numpy_array()
-        if self.compressor is not None:
-            if (
-                not chunk_numpy_array.flags.c_contiguous
-                and not chunk_numpy_array.flags.f_contiguous
-            ):
-                chunk_numpy_array = chunk_numpy_array.copy(order="A")
-            encoded_chunk_bytes = ensure_bytes(
-                await asyncio.to_thread(self.compressor.encode, chunk_numpy_array)
-            )
-        else:
-            encoded_chunk_bytes = ensure_bytes(chunk_numpy_array)
-
-        return default_buffer_prototype().buffer.from_bytes(encoded_chunk_bytes)
-
-    def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
-        raise NotImplementedError
-
-
-@dataclass(frozen=True)
-class V2Filters(ArrayArrayCodec):
-    filters: tuple[numcodecs.abc.Codec, ...] | None
-
-    is_fixed_size = False
-
-    async def _decode_single(
         self,
         chunk_array: NDBuffer,
         chunk_spec: ArraySpec,
-    ) -> NDBuffer:
-        chunk_ndarray = chunk_array.as_ndarray_like()
-        # apply filters in reverse order
-        if self.filters is not None:
-            for filter in self.filters[::-1]:
-                chunk_ndarray = await asyncio.to_thread(filter.decode, chunk_ndarray)
-
-        # ensure correct chunk shape
-        if chunk_ndarray.shape != chunk_spec.shape:
-            chunk_ndarray = chunk_ndarray.reshape(
-                chunk_spec.shape,
-                order=chunk_spec.order,
-            )
+    ) -> Buffer | None:
+        chunk = chunk_array.as_ndarray_like()
 
-        return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
+        # apply filters
+        if self.filters:
+            for f in self.filters:
+                chunk = await asyncio.to_thread(f.encode, chunk)
 
-    async def _encode_single(
-        self,
-        chunk_array: NDBuffer,
-        chunk_spec: ArraySpec,
-    ) -> NDBuffer | None:
-        chunk_ndarray = chunk_array.as_ndarray_like().ravel(order=chunk_spec.order)
+        # check object encoding
+        if ensure_ndarray_like(chunk).dtype == object:
+            raise RuntimeError("cannot write object array without object codec")
 
-        if self.filters is not None:
-            for filter in self.filters:
-                chunk_ndarray = await asyncio.to_thread(filter.encode, chunk_ndarray)
+        # compress
+        if self.compressor:
+            cdata = await asyncio.to_thread(self.compressor.encode, chunk)
+        else:
+            cdata = chunk
 
-        return get_ndbuffer_class().from_ndarray_like(chunk_ndarray)
+        return chunk_spec.prototype.buffer.from_bytes(cdata)
 
     def compute_encoded_size(self, _input_byte_length: int, _chunk_spec: ArraySpec) -> int:
         raise NotImplementedError
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index 8c4d797e9a..a4b86b85ae 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -13,7 +13,7 @@
 from zarr._compat import _deprecate_positional_args
 from zarr.abc.store import Store, set_or_delete
 from zarr.codecs import _get_default_array_bytes_codec
-from zarr.codecs._v2 import V2Compressor, V2Filters
+from zarr.codecs._v2 import V2Codec
 from zarr.core.attributes import Attributes
 from zarr.core.buffer import (
     BufferPrototype,
@@ -118,9 +118,8 @@ def create_codec_pipeline(metadata: ArrayMetadata) -> CodecPipeline:
     if isinstance(metadata, ArrayV3Metadata):
         return get_pipeline_class().from_codecs(metadata.codecs)
     elif isinstance(metadata, ArrayV2Metadata):
-        return get_pipeline_class().from_codecs(
-            [V2Filters(metadata.filters), V2Compressor(metadata.compressor)]
-        )
+        v2_codec = V2Codec(filters=metadata.filters, compressor=metadata.compressor)
+        return get_pipeline_class().from_codecs([v2_codec])
     else:
         raise TypeError
 
diff --git a/tests/test_v2.py b/tests/test_v2.py
index 439b15b64c..3dd17848fb 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -121,3 +121,12 @@ async def test_create_dtype_str(dtype: Any) -> None:
     arr[:] = ["a", "bb", "ccc"]
     result = arr[:]
     np.testing.assert_array_equal(result, np.array(["a", "bb", "ccc"], dtype="object"))
+
+
+@pytest.mark.parametrize("filters", [[], [numcodecs.Delta(dtype="<i4")], [numcodecs.Zlib(level=2)]])
+def test_v2_filters_codecs(filters: Any) -> None:
+    array_fixture = [42]
+    arr = zarr.create(shape=1, dtype="<i4", zarr_format=2, filters=filters)
+    arr[:] = array_fixture
+    result = arr[:]
+    np.testing.assert_array_equal(result, array_fixture)

From f4af51c6433b826147f32655618e12c6a166f83b Mon Sep 17 00:00:00 2001
From: Joe Hamman <joe@earthmover.io>
Date: Thu, 24 Oct 2024 15:51:47 -0700
Subject: [PATCH 5/5] feature(group): add group setitem api (#2393)

* feature(group): add group setitem api

* arrays proxy

* rollback to simple version

* rollback deprecation

* rollback ...

* Update tests/test_group.py
---
 src/zarr/api/asynchronous.py |  8 ++++++--
 src/zarr/core/group.py       | 24 ++++++++++++++++++++++--
 src/zarr/storage/zip.py      |  5 ++++-
 tests/test_group.py          | 21 +++++++++++++++++++--
 4 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index 680433565e..cd8c3543ca 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -396,12 +396,16 @@ async def save_array(
 
     mode = kwargs.pop("mode", None)
     store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
+    if np.isscalar(arr):
+        arr = np.array(arr)
+    shape = arr.shape
+    chunks = getattr(arr, "chunks", None)  # for array-likes with chunks attribute
     new = await AsyncArray.create(
         store_path,
         zarr_format=zarr_format,
-        shape=arr.shape,
+        shape=shape,
         dtype=arr.dtype,
-        chunks=arr.shape,
+        chunks=chunks,
         **kwargs,
     )
     await new.setitem(slice(None), arr)
diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
index 6e54b7ec9b..46f37700eb 100644
--- a/src/zarr/core/group.py
+++ b/src/zarr/core/group.py
@@ -600,6 +600,23 @@ def from_dict(
             store_path=store_path,
         )
 
+    async def setitem(self, key: str, value: Any) -> None:
+        """Fastpath for creating a new array
+
+        New arrays will be created with default array settings for the array type.
+
+        Parameters
+        ----------
+        key : str
+            Array name
+        value : array-like
+            Array data
+        """
+        path = self.store_path / key
+        await async_api.save_array(
+            store=path, arr=value, zarr_format=self.metadata.zarr_format, exists_ok=True
+        )
+
     async def getitem(
         self,
         key: str,
@@ -1394,8 +1411,11 @@ def __len__(self) -> int:
         return self.nmembers()
 
     def __setitem__(self, key: str, value: Any) -> None:
-        """__setitem__ is not supported in v3"""
-        raise NotImplementedError
+        """Fastpath for creating a new array.
+
+        New arrays will be created using default settings for the array type.
+        """
+        self._sync(self._async_group.setitem(key, value))
 
     def __repr__(self) -> str:
         return f"<Group {self.store_path}>"
diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py
index 204a381bdb..d9e1aa1300 100644
--- a/src/zarr/storage/zip.py
+++ b/src/zarr/storage/zip.py
@@ -219,7 +219,10 @@ async def set_if_not_exists(self, key: str, value: Buffer) -> None:
 
     async def delete(self, key: str) -> None:
         # docstring inherited
-        raise NotImplementedError
+        # we choose to only raise NotImplementedError here if the key exists
+        # this allows the array/group APIs to avoid the overhead of existence checks
+        if await self.exists(key):
+            raise NotImplementedError
 
     async def exists(self, key: str) -> bool:
         # docstring inherited
diff --git a/tests/test_group.py b/tests/test_group.py
index 21e4ef4e50..bcdc6ff0da 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -427,8 +427,25 @@ def test_group_setitem(store: Store, zarr_format: ZarrFormat) -> None:
     Test the `Group.__setitem__` method.
     """
     group = Group.from_store(store, zarr_format=zarr_format)
-    with pytest.raises(NotImplementedError):
-        group["key"] = 10
+    arr = np.ones((2, 4))
+    group["key"] = arr
+    assert list(group.array_keys()) == ["key"]
+    assert group["key"].shape == (2, 4)
+    np.testing.assert_array_equal(group["key"][:], arr)
+
+    if store.supports_deletes:
+        key = "key"
+    else:
+        # overwriting with another array requires deletes
+        # for stores that don't support this, we just use a new key
+        key = "key2"
+
+    # overwrite with another array
+    arr = np.zeros((3, 5))
+    group[key] = arr
+    assert key in list(group.array_keys())
+    assert group[key].shape == (3, 5)
+    np.testing.assert_array_equal(group[key], arr)
 
 
 def test_group_contains(store: Store, zarr_format: ZarrFormat) -> None: