Skip to content

Commit

Permalink
Merge branch 'main' into fix/threadpool-exec
Browse files Browse the repository at this point in the history
  • Loading branch information
jhamman authored Jan 8, 2025
2 parents 89c5b46 + eb25424 commit 6034c5e
Showing 7 changed files with 55 additions and 23 deletions.
4 changes: 2 additions & 2 deletions docs/user-guide/arrays.rst
Original file line number Diff line number Diff line change
@@ -209,7 +209,7 @@ prints additional diagnostics, e.g.::
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (BloscCodec(typesize=4, cname=<BloscCname.zstd: 'zstd'>, clevel=3, shuffle=<BloscShuffle.bitshuffle: 'bitshuffle'>, blocksize=0),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 9696302
No. bytes stored : 9696520
Storage ratio : 41.3
Chunks Initialized : 100

@@ -611,7 +611,7 @@ Sharded arrays can be created by providing the ``shards`` parameter to :func:`za
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 100000000 (95.4M)
No. bytes stored : 3981060
No. bytes stored : 3981552
Storage ratio : 25.1
Shards Initialized : 100

4 changes: 2 additions & 2 deletions docs/user-guide/groups.rst
Original file line number Diff line number Diff line change
@@ -113,8 +113,8 @@ property. E.g.::
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 8000000 (7.6M)
No. bytes stored : 1432
Storage ratio : 5586.6
No. bytes stored : 1614
Storage ratio : 4956.6
Chunks Initialized : 0
>>> baz.info
Type : Array
4 changes: 2 additions & 2 deletions docs/user-guide/performance.rst
Original file line number Diff line number Diff line change
@@ -131,7 +131,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 342588717
No. bytes stored : 342588911
Storage ratio : 1.2
Chunks Initialized : 100
>>> with zarr.config.set({'array.order': 'F'}):
@@ -150,7 +150,7 @@ ratios, depending on the correlation structure within the data. E.g.::
Serializer : BytesCodec(endian=<Endian.little: 'little'>)
Compressors : (ZstdCodec(level=0, checksum=False),)
No. bytes : 400000000 (381.5M)
No. bytes stored : 342588717
No. bytes stored : 342588911
Storage ratio : 1.2
Chunks Initialized : 100

2 changes: 2 additions & 0 deletions docs/user-guide/v3_migration.rst
Original file line number Diff line number Diff line change
@@ -206,3 +206,5 @@ of Zarr-Python, please open (or comment on) a
* Object dtypes (:issue:`2617`)
* Ragged arrays (:issue:`2618`)
* Groups and Arrays do not implement ``__enter__`` and ``__exit__`` protocols (:issue:`2619`)
* Big Endian dtypes (:issue:`2324`)
* Default filters for object dtypes for Zarr format 2 arrays (:issue:`2627`)
28 changes: 25 additions & 3 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
from zarr.core.buffer.core import default_buffer_prototype

if TYPE_CHECKING:
from collections.abc import Callable
from typing import Self

from zarr.core.buffer import Buffer, BufferPrototype
@@ -143,9 +144,30 @@ def parse_storage_transformers(data: object) -> tuple[dict[str, JSON], ...]:


class V3JsonEncoder(json.JSONEncoder):
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.indent = kwargs.pop("indent", config.get("json_indent"))
super().__init__(*args, **kwargs)
def __init__(
self,
*,
skipkeys: bool = False,
ensure_ascii: bool = True,
check_circular: bool = True,
allow_nan: bool = True,
sort_keys: bool = False,
indent: int | None = None,
separators: tuple[str, str] | None = None,
default: Callable[[object], object] | None = None,
) -> None:
if indent is None:
indent = config.get("json_indent")
super().__init__(
skipkeys=skipkeys,
ensure_ascii=ensure_ascii,
check_circular=check_circular,
allow_nan=allow_nan,
sort_keys=sort_keys,
indent=indent,
separators=separators,
default=default,
)

def default(self, o: object) -> Any:
if isinstance(o, np.dtype):
25 changes: 12 additions & 13 deletions tests/test_array.py
Original file line number Diff line number Diff line change
@@ -399,27 +399,27 @@ async def test_chunks_initialized() -> None:
def test_nbytes_stored() -> None:
arr = zarr.create(shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()])
result = arr.nbytes_stored()
assert result == 366 # the size of the metadata document. This is a fragile test.
assert result == 502 # the size of the metadata document. This is a fragile test.
arr[:50] = 1
result = arr.nbytes_stored()
assert result == 566 # the size with 5 chunks filled.
assert result == 702 # the size with 5 chunks filled.
arr[50:] = 2
result = arr.nbytes_stored()
assert result == 766 # the size with all chunks filled.
assert result == 902 # the size with all chunks filled.


async def test_nbytes_stored_async() -> None:
arr = await zarr.api.asynchronous.create(
shape=(100,), chunks=(10,), dtype="i4", codecs=[BytesCodec()]
)
result = await arr.nbytes_stored()
assert result == 366 # the size of the metadata document. This is a fragile test.
assert result == 502 # the size of the metadata document. This is a fragile test.
await arr.setitem(slice(50), 1)
result = await arr.nbytes_stored()
assert result == 566 # the size with 5 chunks filled.
assert result == 702 # the size with 5 chunks filled.
await arr.setitem(slice(50, 100), 2)
result = await arr.nbytes_stored()
assert result == 766 # the size with all chunks filled.
assert result == 902 # the size with all chunks filled.


def test_default_fill_values() -> None:
@@ -537,19 +537,19 @@ def test_info_complete(self, chunks: tuple[int, int], shards: tuple[int, int] |
_serializer=BytesCodec(),
_count_bytes=512,
_count_chunks_initialized=0,
_count_bytes_stored=373 if shards is None else 578, # the metadata?
_count_bytes_stored=521 if shards is None else 982, # the metadata?
)
assert result == expected

arr[:4, :4] = 10
result = arr.info_complete()
if shards is None:
expected = dataclasses.replace(
expected, _count_chunks_initialized=4, _count_bytes_stored=501
expected, _count_chunks_initialized=4, _count_bytes_stored=649
)
else:
expected = dataclasses.replace(
expected, _count_chunks_initialized=1, _count_bytes_stored=774
expected, _count_chunks_initialized=1, _count_bytes_stored=1178
)
assert result == expected

@@ -624,21 +624,20 @@ async def test_info_complete_async(
_serializer=BytesCodec(),
_count_bytes=512,
_count_chunks_initialized=0,
_count_bytes_stored=373 if shards is None else 578, # the metadata?
_count_bytes_stored=521 if shards is None else 982, # the metadata?
)
assert result == expected

await arr.setitem((slice(4), slice(4)), 10)
result = await arr.info_complete()
if shards is None:
expected = dataclasses.replace(
expected, _count_chunks_initialized=4, _count_bytes_stored=501
expected, _count_chunks_initialized=4, _count_bytes_stored=553
)
else:
expected = dataclasses.replace(
expected, _count_chunks_initialized=1, _count_bytes_stored=774
expected, _count_chunks_initialized=1, _count_bytes_stored=1178
)
assert result == expected


@pytest.mark.parametrize("store", ["memory"], indirect=True)
11 changes: 10 additions & 1 deletion tests/test_metadata/test_v3.py
Original file line number Diff line number Diff line change
@@ -10,7 +10,8 @@
from zarr.codecs.bytes import BytesCodec
from zarr.core.buffer import default_buffer_prototype
from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding
from zarr.core.group import parse_node_type
from zarr.core.config import config
from zarr.core.group import GroupMetadata, parse_node_type
from zarr.core.metadata.v3 import (
ArrayV3Metadata,
DataType,
@@ -304,6 +305,14 @@ def test_metadata_to_dict(
assert observed == expected


@pytest.mark.parametrize("indent", [2, 4, None])
def test_json_indent(indent: int):
with config.set({"json_indent": indent}):
m = GroupMetadata()
d = m.to_buffer_dict(default_buffer_prototype())["zarr.json"].to_bytes()
assert d == json.dumps(json.loads(d), indent=indent).encode()


# @pytest.mark.parametrize("fill_value", [-1, 0, 1, 2932897])
# @pytest.mark.parametrize("precision", ["ns", "D"])
# async def test_datetime_metadata(fill_value: int, precision: str) -> None:

0 comments on commit 6034c5e

Please sign in to comment.