From 15a9747f2b41681d6cec403ca12edb9cecd6b3bb Mon Sep 17 00:00:00 2001 From: "Daniel Jahn (dahn)" Date: Sat, 6 Apr 2024 10:48:13 +0200 Subject: [PATCH 01/31] Resolve Mypy erorrs in `v3` branch (#1692) * refactor(v3): Using appropriate types * fix(v3): Typing fixes + minor code fixes * fix(v3): _sync_iter works with coroutines * docs(v3/store/core.py): clearer comment * fix(metadata.py): Use Any outside TYPE_CHECKING for Pydantic * fix(zarr/v3): correct zarr format + remove unused method * fix(v3/store/core.py): Potential suggestion on handling str store_like * refactor(zarr/v3): Add more typing * ci(.pre-commit-config.yaml): zarr v3 mypy checks turned on in pre-commit --- .pre-commit-config.yaml | 1 - src/zarr/v3/abc/metadata.py | 3 +- src/zarr/v3/array.py | 2 +- src/zarr/v3/chunk_grids.py | 2 +- src/zarr/v3/chunk_key_encodings.py | 6 ++-- src/zarr/v3/codecs/transpose.py | 8 +++--- src/zarr/v3/group.py | 44 ++++++++++++++++++------------ src/zarr/v3/metadata.py | 6 ++-- src/zarr/v3/store/core.py | 15 ++-------- src/zarr/v3/store/local.py | 2 +- src/zarr/v3/sync.py | 8 ++---- 11 files changed, 47 insertions(+), 50 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 79344604a5..10aff8b4c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,7 +31,6 @@ repos: hooks: - id: mypy files: src - exclude: ^src/zarr/v3 args: [] additional_dependencies: - types-redis diff --git a/src/zarr/v3/abc/metadata.py b/src/zarr/v3/abc/metadata.py index bdd2f86d59..4fcabf72a1 100644 --- a/src/zarr/v3/abc/metadata.py +++ b/src/zarr/v3/abc/metadata.py @@ -5,11 +5,12 @@ from typing import Dict from typing_extensions import Self -from dataclasses import fields +from dataclasses import fields, dataclass from zarr.v3.common import JSON +@dataclass(frozen=True) class Metadata: def to_dict(self) -> JSON: """ diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py index 632f7d8ec7..c0a00a624e 100644 --- a/src/zarr/v3/array.py +++ b/src/zarr/v3/array.py @@ -182,7 +182,7 @@ def shape(self) -> ChunkCoords: @property def size(self) -> int: - return np.prod(self.metadata.shape) + return np.prod(self.metadata.shape).item() @property def dtype(self) -> np.dtype: diff --git a/src/zarr/v3/chunk_grids.py b/src/zarr/v3/chunk_grids.py index 6c48323798..b0a2a7bb36 100644 --- a/src/zarr/v3/chunk_grids.py +++ b/src/zarr/v3/chunk_grids.py @@ -20,7 +20,7 @@ class ChunkGrid(Metadata): @classmethod def from_dict(cls, data: Dict[str, JSON]) -> ChunkGrid: if isinstance(data, ChunkGrid): - return data # type: ignore + return data name_parsed, _ = parse_named_configuration(data) if name_parsed == "regular": diff --git a/src/zarr/v3/chunk_key_encodings.py b/src/zarr/v3/chunk_key_encodings.py index e4339240e3..9889a2f04a 100644 --- a/src/zarr/v3/chunk_key_encodings.py +++ b/src/zarr/v3/chunk_key_encodings.py @@ -1,6 +1,6 @@ from __future__ import annotations from abc import abstractmethod -from typing import TYPE_CHECKING, Dict, Literal +from typing import TYPE_CHECKING, Dict, Literal, cast from dataclasses import dataclass from zarr.v3.abc.metadata import Metadata @@ -19,7 +19,7 @@ def parse_separator(data: JSON) -> SeparatorLiteral: if data not in (".", "/"): raise ValueError(f"Expected an '.' or '/' separator. Got {data} instead.") - return data # type: ignore + return cast(SeparatorLiteral, data) @dataclass(frozen=True) @@ -35,7 +35,7 @@ def __init__(self, *, separator: SeparatorLiteral) -> None: @classmethod def from_dict(cls, data: Dict[str, JSON]) -> ChunkKeyEncoding: if isinstance(data, ChunkKeyEncoding): - return data # type: ignore + return data name_parsed, configuration_parsed = parse_named_configuration(data) if name_parsed == "default": diff --git a/src/zarr/v3/codecs/transpose.py b/src/zarr/v3/codecs/transpose.py index f214d1e7f1..b663230e35 100644 --- a/src/zarr/v3/codecs/transpose.py +++ b/src/zarr/v3/codecs/transpose.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Iterable +from typing import TYPE_CHECKING, Dict, Iterable, Union, cast from dataclasses import dataclass, replace @@ -16,12 +16,12 @@ from zarr.v3.codecs.registry import register_codec -def parse_transpose_order(data: JSON) -> Tuple[int]: +def parse_transpose_order(data: Union[JSON, Iterable[int]]) -> Tuple[int, ...]: if not isinstance(data, Iterable): raise TypeError(f"Expected an iterable. Got {data} instead.") if not all(isinstance(a, int) for a in data): raise TypeError(f"Expected an iterable of integers. Got {data} instead.") - return tuple(data) # type: ignore[return-value] + return tuple(cast(Iterable[int], data)) @dataclass(frozen=True) @@ -31,7 +31,7 @@ class TransposeCodec(ArrayArrayCodec): order: Tuple[int, ...] def __init__(self, *, order: ChunkCoordsLike) -> None: - order_parsed = parse_transpose_order(order) # type: ignore[arg-type] + order_parsed = parse_transpose_order(order) object.__setattr__(self, "order", order_parsed) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index acd5ca0d62..0012a77a81 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -4,7 +4,7 @@ import asyncio import json import logging -from typing import Any, Dict, Literal, Optional, Union, AsyncIterator, Iterator, List +from typing import Any, Dict, Literal, Optional, Union, AsyncIterator, List from zarr.v3.abc.metadata import Metadata from zarr.v3.array import AsyncArray, Array @@ -46,11 +46,11 @@ def to_bytes(self) -> Dict[str, bytes]: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: return { - ZGROUP_JSON: self.zarr_format, + ZGROUP_JSON: json.dumps({"zarr_format": 2}).encode(), ZATTRS_JSON: json.dumps(self.attributes).encode(), } - def __init__(self, attributes: Dict[str, Any] = None, zarr_format: Literal[2, 3] = 3): + def __init__(self, attributes: Optional[Dict[str, Any]] = None, zarr_format: Literal[2, 3] = 3): attributes_parsed = parse_attributes(attributes) zarr_format_parsed = parse_zarr_format(zarr_format) @@ -104,7 +104,7 @@ async def open( zarr_format: Literal[2, 3] = 3, ) -> AsyncGroup: store_path = make_store_path(store) - zarr_json_bytes = await (store_path / ZARR_JSON).get_async() + zarr_json_bytes = await (store_path / ZARR_JSON).get() assert zarr_json_bytes is not None # TODO: consider trying to autodiscover the zarr-format here @@ -139,7 +139,7 @@ def from_dict( store_path: StorePath, data: Dict[str, Any], runtime_configuration: RuntimeConfiguration, - ) -> Group: + ) -> AsyncGroup: group = cls( metadata=GroupMetadata.from_dict(data), store_path=store_path, @@ -168,10 +168,12 @@ async def getitem( zarr_json = json.loads(zarr_json_bytes) if zarr_json["node_type"] == "group": return type(self).from_dict(store_path, zarr_json, self.runtime_configuration) - if zarr_json["node_type"] == "array": + elif zarr_json["node_type"] == "array": return AsyncArray.from_dict( store_path, zarr_json, runtime_configuration=self.runtime_configuration ) + else: + raise ValueError(f"unexpected node_type: {zarr_json['node_type']}") elif self.metadata.zarr_format == 2: # Q: how do we like optimistically fetching .zgroup, .zarray, and .zattrs? # This guarantees that we will always make at least one extra request to the store @@ -271,7 +273,7 @@ def __repr__(self): async def nchildren(self) -> int: raise NotImplementedError - async def children(self) -> AsyncIterator[AsyncArray, AsyncGroup]: + async def children(self) -> AsyncIterator[Union[AsyncArray, AsyncGroup]]: raise NotImplementedError async def contains(self, child: str) -> bool: @@ -381,8 +383,12 @@ async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> Group new_metadata = replace(self.metadata, attributes=new_attributes) # Write new metadata - await (self.store_path / ZARR_JSON).set_async(new_metadata.to_bytes()) - return replace(self, metadata=new_metadata) + to_save = new_metadata.to_bytes() + awaitables = [(self.store_path / key).set(value) for key, value in to_save.items()] + await asyncio.gather(*awaitables) + + async_group = replace(self._async_group, metadata=new_metadata) + return replace(self, _async_group=async_group) @property def metadata(self) -> GroupMetadata: @@ -396,34 +402,38 @@ def attrs(self) -> Attributes: def info(self): return self._async_group.info + @property + def store_path(self) -> StorePath: + return self._async_group.store_path + def update_attributes(self, new_attributes: Dict[str, Any]): self._sync(self._async_group.update_attributes(new_attributes)) return self @property def nchildren(self) -> int: - return self._sync(self._async_group.nchildren) + return self._sync(self._async_group.nchildren()) @property - def children(self) -> List[Array, Group]: - _children = self._sync_iter(self._async_group.children) + def children(self) -> List[Union[Array, Group]]: + _children = self._sync_iter(self._async_group.children()) return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] def __contains__(self, child) -> bool: return self._sync(self._async_group.contains(child)) - def group_keys(self) -> Iterator[str]: - return self._sync_iter(self._async_group.group_keys) + def group_keys(self) -> List[str]: + return self._sync_iter(self._async_group.group_keys()) def groups(self) -> List[Group]: # TODO: in v2 this was a generator that return key: Group - return [Group(obj) for obj in self._sync_iter(self._async_group.groups)] + return [Group(obj) for obj in self._sync_iter(self._async_group.groups())] def array_keys(self) -> List[str]: - return self._sync_iter(self._async_group.array_keys) + return self._sync_iter(self._async_group.array_keys()) def arrays(self) -> List[Array]: - return [Array(obj) for obj in self._sync_iter(self._async_group.arrays)] + return [Array(obj) for obj in self._sync_iter(self._async_group.arrays())] def tree(self, expand=False, level=None) -> Any: return self._sync(self._async_group.tree(expand=expand, level=level)) diff --git a/src/zarr/v3/metadata.py b/src/zarr/v3/metadata.py index de3055abdc..a5e8927311 100644 --- a/src/zarr/v3/metadata.py +++ b/src/zarr/v3/metadata.py @@ -1,6 +1,6 @@ from __future__ import annotations from enum import Enum -from typing import TYPE_CHECKING, cast, Dict, Iterable +from typing import TYPE_CHECKING, cast, Dict, Iterable, Any from dataclasses import dataclass, field import json import numpy as np @@ -10,7 +10,7 @@ if TYPE_CHECKING: - from typing import Any, Literal, Union, List, Optional, Tuple + from typing import Literal, Union, List, Optional, Tuple from zarr.v3.codecs.pipeline import CodecPipeline @@ -244,7 +244,7 @@ class ArrayV2Metadata(Metadata): filters: Optional[List[Dict[str, Any]]] = None dimension_separator: Literal[".", "/"] = "." compressor: Optional[Dict[str, Any]] = None - attributes: Optional[Dict[str, Any]] = field(default_factory=dict) + attributes: Optional[Dict[str, Any]] = cast(Dict[str, Any], field(default_factory=dict)) zarr_format: Literal[2] = field(init=False, default=2) def __init__( diff --git a/src/zarr/v3/store/core.py b/src/zarr/v3/store/core.py index 0ef1c8569e..16714d9e30 100644 --- a/src/zarr/v3/store/core.py +++ b/src/zarr/v3/store/core.py @@ -5,6 +5,7 @@ from zarr.v3.common import BytesLike from zarr.v3.abc.store import Store +from zarr.v3.store.local import LocalStore def _dereference_path(root: str, path: str) -> str: @@ -24,10 +25,6 @@ def __init__(self, store: Store, path: Optional[str] = None): self.store = store self.path = path or "" - @classmethod - def from_path(cls, pth: Path) -> StorePath: - return cls(Store.from_path(pth)) - async def get( self, byte_range: Optional[Tuple[int, Optional[int]]] = None ) -> Optional[BytesLike]: @@ -70,14 +67,6 @@ def make_store_path(store_like: StoreLike) -> StorePath: return store_like elif isinstance(store_like, Store): return StorePath(store_like) - # elif isinstance(store_like, Path): - # return StorePath(Store.from_path(store_like)) elif isinstance(store_like, str): - try: - from upath import UPath - - return StorePath(Store.from_path(UPath(store_like))) - except ImportError as e: - raise e - # return StorePath(LocalStore(Path(store_like))) + return StorePath(LocalStore(Path(store_like))) raise TypeError diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index a62eea20f7..c3da110450 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -146,7 +146,7 @@ async def list_prefix(self, prefix: str) -> List[str]: """ def _list_prefix(root: Path, prefix: str) -> List[str]: - files = [p for p in (root / prefix).rglob("*") if p.is_file()] + files = [str(p) for p in (root / prefix).rglob("*") if p.is_file()] return files return await to_thread(_list_prefix, self.root, prefix) diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index f0996c019e..fcc8e7b275 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -5,7 +5,6 @@ from typing import ( Any, AsyncIterator, - Callable, Coroutine, List, Optional, @@ -112,11 +111,10 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: # this should allow us to better type the sync wrapper return sync(coroutine, loop=self._sync_configuration.asyncio_loop) - def _sync_iter( - self, func: Callable[P, AsyncIterator[T]], *args: P.args, **kwargs: P.kwargs - ) -> List[T]: + def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: async def iter_to_list() -> List[T]: # TODO: replace with generators so we don't materialize the entire iterator at once - return [item async for item in func(*args, **kwargs)] + async_iterator = await coroutine + return [item async for item in async_iterator] return self._sync(iter_to_list()) From 4e6cca2c08992b57326db5d519d3c380d0ea5e16 Mon Sep 17 00:00:00 2001 From: Max Jones <14077947+maxrjones@users.noreply.github.com> Date: Mon, 8 Apr 2024 12:29:26 -0400 Subject: [PATCH 02/31] Specify hatch envs using GitHub actions matrix for v3 tests (#1728) * Specify v3 hatch envs using GitHub actions matrix * Update .github/workflows/test-v3.yml Co-authored-by: Joe Hamman * Update .github/workflows/test-v3.yml Co-authored-by: Joe Hamman * test on 3.12 too * no 3.12 --------- Co-authored-by: Joe Hamman Co-authored-by: Joe Hamman --- .github/workflows/test-v3.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-v3.yml b/.github/workflows/test-v3.yml index bdc6e99299..e767541c75 100644 --- a/.github/workflows/test-v3.yml +++ b/.github/workflows/test-v3.yml @@ -10,15 +10,22 @@ on: branches: [ v3 ] jobs: - run-tests: + test: + name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10', '3.11'] + numpy-version: ['1.24', '1.26'] + dependency-set: ["minimal", "optional"] + steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install Hatch run: | @@ -29,8 +36,8 @@ jobs: hatch env create - name: Run Tests run: | - hatch run test:run + hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run - name: Run mypy continue-on-error: true run: | - hatch run test:run-mypy \ No newline at end of file + hatch run test:run-mypy From 77292b12d1e644f31f81a78973b09d4b6f2ed16c Mon Sep 17 00:00:00 2001 From: Saransh Chopra Date: Mon, 8 Apr 2024 19:42:20 +0200 Subject: [PATCH 03/31] black -> ruff format + cleanup (#1639) * black -> ruff + cleanup * format * Preserve git blame * pre-commit fix --- .flake8 | 2 -- .git-blame-ignore-revs | 2 ++ .pre-commit-config.yaml | 17 ++++------- bench/compress_normal.py | 1 - pyproject.toml | 18 ++--------- src/zarr/_storage/absstore.py | 3 +- src/zarr/_storage/store.py | 1 - src/zarr/_storage/v3.py | 1 - src/zarr/attrs.py | 6 ---- src/zarr/convenience.py | 20 +++--------- src/zarr/core.py | 2 +- src/zarr/creation.py | 2 -- src/zarr/hierarchy.py | 46 ++++++++++++++-------------- src/zarr/indexing.py | 35 ++++----------------- src/zarr/meta.py | 1 - src/zarr/n5.py | 57 ----------------------------------- src/zarr/storage.py | 9 +----- src/zarr/util.py | 16 ++++------ src/zarr/v3/config.py | 1 - src/zarr/v3/group.py | 1 - src/zarr/v3/store/local.py | 3 +- src/zarr/v3/sync.py | 1 - tests/test_attrs.py | 6 ---- tests/test_convenience.py | 7 ----- tests/test_creation.py | 9 ------ tests/test_dim_separator.py | 1 - tests/test_filters.py | 12 -------- tests/test_group_v3.py | 2 -- tests/test_hierarchy.py | 3 -- tests/test_indexing.py | 35 --------------------- tests/test_info.py | 1 - tests/test_meta.py | 27 ++--------------- tests/test_storage.py | 1 - tests/test_sync.py | 2 -- tests/test_util.py | 2 -- tests/v3/test_metadata.py | 1 + 36 files changed, 58 insertions(+), 296 deletions(-) delete mode 100644 .flake8 diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 7da1f9608e..0000000000 --- a/.flake8 +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -max-line-length = 100 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 53bf4633f0..9e0316032f 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,4 @@ # lint codebase with black and ruff 4e348d6b80c96da461fd866576c971b8a659ba15 +# migrate from black to ruff format +22cea005629913208a85799372e045f353744add diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 10aff8b4c6..d4aee4ce86 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,27 +7,22 @@ default_language_version: python: python3 repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - # Ruff version. - rev: 'v0.0.224' + rev: 'v0.2.1' hooks: - id: ruff - # Respect `exclude` and `extend-exclude` settings. - args: ["--force-exclude"] - - repo: https://github.com/psf/black - rev: 22.12.0 - hooks: - - id: black + args: ["--fix", "--show-fixes"] + - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo,zar", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.7.1 + rev: v1.8.0 hooks: - id: mypy files: src diff --git a/bench/compress_normal.py b/bench/compress_normal.py index 9f1655541c..803d54b76b 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -8,7 +8,6 @@ from zarr import blosc if __name__ == "__main__": - sys.path.insert(0, "..") # setup diff --git a/pyproject.toml b/pyproject.toml index 3933376b12..9f21a84aee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,7 +127,8 @@ serve = "sphinx-autobuild docs docs/_build --ignore 'docs/_autoapi/**/*' --host [tool.ruff] line-length = 100 -exclude = [ +force-exclude = true +extend-exclude = [ ".bzr", ".direnv", ".eggs", @@ -146,21 +147,6 @@ exclude = [ "docs" ] -[tool.black] -line-length = 100 -exclude = ''' -/( - \.git - | \.mypy_cache - | \.venv - | _build - | buck-out - | build - | dist - | docs -)/ -''' - [tool.mypy] python_version = "3.8" ignore_missing_imports = true diff --git a/src/zarr/_storage/absstore.py b/src/zarr/_storage/absstore.py index f62529f096..c9a113148c 100644 --- a/src/zarr/_storage/absstore.py +++ b/src/zarr/_storage/absstore.py @@ -87,7 +87,7 @@ def __init__( "https://{}.blob.core.windows.net/".format(account_name), container, credential=account_key, - **blob_service_kwargs + **blob_service_kwargs, ) self.client = client @@ -240,7 +240,6 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def rmdir(self, path=None): - if not path: # Currently allowing clear to delete everything as in v2 diff --git a/src/zarr/_storage/store.py b/src/zarr/_storage/store.py index 8daedae48f..80e4ad8f75 100644 --- a/src/zarr/_storage/store.py +++ b/src/zarr/_storage/store.py @@ -629,7 +629,6 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: - meta_dir = meta_root + path meta_dir = meta_dir.rstrip("/") _rmdir_from_keys(store, meta_dir) diff --git a/src/zarr/_storage/v3.py b/src/zarr/_storage/v3.py index 8ab54984b7..d3cbc58235 100644 --- a/src/zarr/_storage/v3.py +++ b/src/zarr/_storage/v3.py @@ -118,7 +118,6 @@ def _get_files_and_dirs_from_path(store, path): class FSStoreV3(FSStore, StoreV3): - # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) _META_KEYS = () diff --git a/src/zarr/attrs.py b/src/zarr/attrs.py index 01fc617b3c..e967c5b853 100644 --- a/src/zarr/attrs.py +++ b/src/zarr/attrs.py @@ -26,7 +26,6 @@ class Attributes(MutableMapping): """ def __init__(self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None): - self._version = getattr(store, "_store_version", 2) _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) @@ -73,7 +72,6 @@ def __getitem__(self, item): return self.asdict()[item] def _write_op(self, f, *args, **kwargs): - # guard condition if self.read_only: raise PermissionError("attributes are read-only") @@ -89,7 +87,6 @@ def __setitem__(self, item, value): self._write_op(self._setitem_nosync, item, value) def _setitem_nosync(self, item, value): - # load existing data d = self._get_nosync() @@ -106,7 +103,6 @@ def __delitem__(self, item): self._write_op(self._delitem_nosync, item) def _delitem_nosync(self, key): - # load existing data d = self._get_nosync() @@ -128,7 +124,6 @@ def put(self, d): self._write_op(self._put_nosync, dict(attributes=d)) def _put_nosync(self, d): - d_to_check = d if self._version == 2 else d["attributes"] if not all(isinstance(item, str) for item in d_to_check): # TODO: Raise an error for non-string keys @@ -178,7 +173,6 @@ def update(self, *args, **kwargs): self._write_op(self._update_nosync, *args, **kwargs) def _update_nosync(self, *args, **kwargs): - # load existing data d = self._get_nosync() diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py index 0ee8a8d323..9c0deeea47 100644 --- a/src/zarr/convenience.py +++ b/src/zarr/convenience.py @@ -675,10 +675,8 @@ def copy_store( # setup logging with _LogWriter(log) as log: - # iterate over source keys for source_key in sorted(source.keys()): - # filter to keys under source path if source_store_version == 2: if not source_key.startswith(source_path): @@ -757,7 +755,7 @@ def copy( log=None, if_exists="raise", dry_run=False, - **create_kws + **create_kws, ): """Copy the `source` array or group into the `dest` group. @@ -878,7 +876,6 @@ def copy( # setup logging with _LogWriter(log) as log: - # do the copying n_copied, n_skipped, n_bytes_copied = _copy( log, @@ -890,7 +887,7 @@ def copy( without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) # log a final message with a summary of what happened @@ -948,12 +945,10 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: - # log a message about what we're going to do log("copy {} {} {}".format(source.name, source.shape, source.dtype)) if not dry_run: - # clear the way if exists: del dest[name] @@ -1038,12 +1033,10 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: - # log action log("copy {}".format(source.name)) if not dry_run: - # clear the way if exists_array: del dest[name] @@ -1056,7 +1049,6 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ grp.attrs.update(source.attrs) else: - # setup for dry run without creating any groups in the # destination if dest is not None: @@ -1076,7 +1068,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) n_copied += c n_skipped += s @@ -1099,7 +1091,7 @@ def copy_all( log=None, if_exists="raise", dry_run=False, - **create_kws + **create_kws, ): """Copy all children of the `source` group into the `dest` group. @@ -1189,7 +1181,6 @@ def copy_all( # setup logging with _LogWriter(log) as log: - for k in source.keys(): c, s, b = _copy( log, @@ -1201,7 +1192,7 @@ def copy_all( without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) n_copied += c n_skipped += s @@ -1262,7 +1253,6 @@ def is_zarr_key(key): return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") else: - assert_zarr_v3_api_available() sfx = _get_metadata_suffix(store) # type: ignore diff --git a/src/zarr/core.py b/src/zarr/core.py index c07a31e95f..d22a9d79c3 100644 --- a/src/zarr/core.py +++ b/src/zarr/core.py @@ -2536,7 +2536,7 @@ def hexdigest(self, hashname="sha1"): checksum = binascii.hexlify(self.digest(hashname=hashname)) # This is a bytes object on Python 3 and we want a str. - if type(checksum) is not str: + if not isinstance(checksum, str): checksum = checksum.decode("utf8") return checksum diff --git a/src/zarr/creation.py b/src/zarr/creation.py index 726d0b5932..6227f90b7b 100644 --- a/src/zarr/creation.py +++ b/src/zarr/creation.py @@ -234,7 +234,6 @@ def create( def _kwargs_compat(compressor, fill_value, kwargs): - # to be compatible with h5py, as well as backwards-compatible with Zarr # 1.x, accept 'compression' and 'compression_opts' keyword arguments @@ -697,7 +696,6 @@ def open_array( def _like_args(a, kwargs): - shape, chunks = _get_shape_chunks(a) if shape is not None: kwargs.setdefault("shape", shape) diff --git a/src/zarr/hierarchy.py b/src/zarr/hierarchy.py index 3361969f08..1c9848e647 100644 --- a/src/zarr/hierarchy.py +++ b/src/zarr/hierarchy.py @@ -145,7 +145,7 @@ def __init__( synchronizer=None, zarr_version=None, *, - meta_array=None + meta_array=None, ): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: @@ -591,7 +591,25 @@ def groups(self): for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key if contains_group(self._store, path, explicit_only=False): - yield key, Group( + yield ( + key, + Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + ), + ) + + else: + for key in self.group_keys(): + path = self._key_prefix + key + yield ( + key, + Group( self._store, path=path, read_only=self._read_only, @@ -599,19 +617,7 @@ def groups(self): cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, zarr_version=self._version, - ) - - else: - for key in self.group_keys(): - path = self._key_prefix + key - yield key, Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, + ), ) def array_keys(self, recurse=False): @@ -919,7 +925,6 @@ def tree(self, expand=False, level=None): return TreeViewer(self, expand=expand, level=level) def _write_op(self, f, *args, **kwargs): - # guard condition if self._read_only: raise ReadOnlyError() @@ -1094,7 +1099,6 @@ def create_dataset(self, name, **kwargs): return self._write_op(self._create_dataset_nosync, name, **kwargs) def _create_dataset_nosync(self, name, data=None, **kwargs): - assert "mode" not in kwargs path = self._item_path(name) @@ -1138,11 +1142,9 @@ def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): ) def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): - path = self._item_path(name) if contains_array(self._store, path): - # array already exists at path, validate that it is the right shape and type synchronizer = kwargs.get("synchronizer", self._synchronizer) @@ -1235,7 +1237,7 @@ def _full_nosync(self, name, fill_value, **kwargs): path=path, chunk_store=self._chunk_store, fill_value=fill_value, - **kwargs + **kwargs, ) def array(self, name, data, **kwargs): @@ -1361,7 +1363,7 @@ def group( path=None, *, zarr_version=None, - meta_array=None + meta_array=None, ): """Create a group. @@ -1452,7 +1454,7 @@ def open_group( storage_options=None, *, zarr_version=None, - meta_array=None + meta_array=None, ): """Open a group using file-mode-like semantics. diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 487cc8b9d9..b72d5a255d 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -111,7 +111,6 @@ def is_pure_orthogonal_indexing(selection, ndim): def normalize_integer_selection(dim_sel, dim_len): - # normalize type to int dim_sel = int(dim_sel) @@ -145,7 +144,6 @@ def normalize_integer_selection(dim_sel, dim_len): class IntDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize dim_sel = normalize_integer_selection(dim_sel, dim_len) @@ -169,7 +167,6 @@ def ceildiv(a, b): class SliceDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize self.start, self.stop, self.step = dim_sel.indices(dim_len) if self.step < 1: @@ -182,14 +179,12 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) def __iter__(self): - # figure out the range of chunks we need to visit dim_chunk_ix_from = self.start // self.dim_chunk_len dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) # iterate over chunks in range for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): - # compute offsets for chunk within overall array dim_offset = dim_chunk_ix * self.dim_chunk_len dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) @@ -237,7 +232,6 @@ def check_selection_length(selection, shape): def replace_ellipsis(selection, shape): - selection = ensure_tuple(selection) # count number of ellipsis present @@ -330,14 +324,12 @@ def is_basic_selection(selection): # noinspection PyProtectedMember class BasicIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -358,7 +350,6 @@ def __init__(self, selection, array): def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( @@ -370,7 +361,6 @@ def __iter__(self): class BoolArrayDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # check number of dimensions if not is_bool_array(dim_sel, 1): raise IndexError( @@ -380,8 +370,9 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): # check shape if dim_sel.shape[0] != dim_len: raise IndexError( - "Boolean array has the wrong length for dimension; " - "expected {}, got {}".format(dim_len, dim_sel.shape[0]) + "Boolean array has the wrong length for dimension; " "expected {}, got {}".format( + dim_len, dim_sel.shape[0] + ) ) # store attributes @@ -402,10 +393,8 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] def __iter__(self): - # iterate over chunks with at least one item for dim_chunk_ix in self.dim_chunk_ixs: - # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] @@ -472,7 +461,6 @@ def __init__( boundscheck=True, order=Order.UNKNOWN, ): - # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): @@ -526,9 +514,7 @@ def __init__( self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) def __iter__(self): - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in output if dim_chunk_ix == 0: start = 0 @@ -602,7 +588,6 @@ def oindex_set(a, selection, value): # noinspection PyProtectedMember class OrthogonalIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) @@ -612,7 +597,6 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -649,7 +633,6 @@ def __init__(self, selection, array): def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( @@ -658,7 +641,6 @@ def __iter__(self): # handle advanced indexing arrays orthogonally if self.is_advanced: - # N.B., numpy doesn't support orthogonal indexing directly as yet, # so need to work around via np.ix_. Also np.ix_ does not support a # mixture of arrays and slices or integers, so need to convert slices @@ -692,7 +674,6 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember class BlockIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) @@ -794,7 +775,6 @@ def is_mask_selection(selection, array): # noinspection PyProtectedMember class CoordinateIndexer: def __init__(self, selection, array): - # some initial normalization selection = ensure_tuple(selection) selection = tuple([i] if is_integer(i) else i for i in selection) @@ -810,7 +790,6 @@ def __init__(self, selection, array): # handle wraparound, boundscheck for dim_sel, dim_len in zip(selection, array.shape): - # handle wraparound wraparound_indices(dim_sel, dim_len) @@ -861,10 +840,8 @@ def __init__(self, selection, array): self.chunk_mixs = np.unravel_index(self.chunk_rixs, array._cdata_shape) def __iter__(self): - # iterate over chunks for i, chunk_rix in enumerate(self.chunk_rixs): - chunk_coords = tuple(m[i] for m in self.chunk_mixs) if chunk_rix == 0: start = 0 @@ -891,7 +868,6 @@ def __iter__(self): # noinspection PyProtectedMember class MaskIndexer(CoordinateIndexer): def __init__(self, selection, array): - # some initial normalization selection = ensure_tuple(selection) selection = replace_lists(selection) @@ -944,8 +920,9 @@ def check_fields(fields, dtype): # check type if not isinstance(fields, (str, list, tuple)): raise IndexError( - "'fields' argument must be a string or list of strings; found " - "{!r}".format(type(fields)) + "'fields' argument must be a string or list of strings; found " "{!r}".format( + type(fields) + ) ) if fields: if dtype.names is None: diff --git a/src/zarr/meta.py b/src/zarr/meta.py index bd1f4ee037..80f9017456 100644 --- a/src/zarr/meta.py +++ b/src/zarr/meta.py @@ -89,7 +89,6 @@ class Metadata2: @classmethod def parse_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - # Here we allow that a store may return an already-parsed metadata object, # or a string of JSON that we will parse here. We allow for an already-parsed # object to accommodate a consolidated metadata store, where all the metadata for diff --git a/src/zarr/n5.py b/src/zarr/n5.py index 7e73905527..44b44e69e2 100644 --- a/src/zarr/n5.py +++ b/src/zarr/n5.py @@ -72,21 +72,18 @@ class N5Store(NestedDirectoryStore): def __getitem__(self, key: str) -> bytes: if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) top_level = key == zarr_array_meta_key value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key_new)) @@ -104,9 +101,7 @@ def __getitem__(self, key: str) -> bytes: return super().__getitem__(key_new) def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -115,7 +110,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) top_level = key == zarr_array_meta_key n5_attrs = self._load_n5_attrs(key_new) @@ -123,7 +117,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -166,9 +159,7 @@ def __delitem__(self, key: str): super().__delitem__(key_new) def __contains__(self, key): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) if key_new not in self: return False @@ -176,18 +167,15 @@ def __contains__(self, key): return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) return self._contains_attrs(key_new) elif is_chunk_key(key): - key_new = invert_chunk_coords(key) else: key_new = key @@ -198,7 +186,6 @@ def __eq__(self, other): return isinstance(other, N5Store) and self.path == other.path def listdir(self, path: Optional[str] = None): - if path is not None: path = invert_chunk_coords(path) path = cast(str, path) @@ -208,7 +195,6 @@ def listdir(self, path: Optional[str] = None): children = super().listdir(path=path) if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files children.remove(n5_attrs_key) children.append(zarr_array_meta_key) @@ -234,7 +220,6 @@ def listdir(self, path: Optional[str] = None): return sorted(new_children) elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files children.remove(n5_attrs_key) children.append(zarr_group_meta_key) @@ -244,7 +229,6 @@ def listdir(self, path: Optional[str] = None): return sorted(children) else: - return children def _load_n5_attrs(self, path: str) -> Dict[str, Any]: @@ -255,7 +239,6 @@ def _load_n5_attrs(self, path: str) -> Dict[str, Any]: return {} def _is_group(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -265,7 +248,6 @@ def _is_group(self, path: str): return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -274,7 +256,6 @@ def _is_array(self, path: str): return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -376,21 +357,18 @@ def _normalize_key(self, key: str): def __getitem__(self, key: str) -> bytes: if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) top_level = key == zarr_array_meta_key value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key_new)) @@ -409,7 +387,6 @@ def __getitem__(self, key: str) -> bytes: def __setitem__(self, key: str, value: Any): if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) n5_attrs = self._load_n5_attrs(key_new) @@ -418,7 +395,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) top_level = key == zarr_array_meta_key n5_attrs = self._load_n5_attrs(key_new) @@ -427,7 +403,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -456,7 +431,6 @@ def __setitem__(self, key: str, value: Any): super().__setitem__(key_new, value) def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): key_new = key.replace(zarr_group_meta_key, self._group_meta_key) elif key.endswith(zarr_array_meta_key): @@ -471,7 +445,6 @@ def __delitem__(self, key: str): def __contains__(self, key: Any): if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) if key_new not in self: return False @@ -479,13 +452,11 @@ def __contains__(self, key: Any): return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) return self._contains_attrs(key_new) @@ -508,7 +479,6 @@ def listdir(self, path: Optional[str] = None): # doesn't provide. children = super().listdir(path=path) if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files children.remove(self._array_meta_key) children.append(zarr_array_meta_key) @@ -532,7 +502,6 @@ def listdir(self, path: Optional[str] = None): return sorted(new_children) elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files children.remove(self._group_meta_key) children.append(zarr_group_meta_key) @@ -550,7 +519,6 @@ def _load_n5_attrs(self, path: str): return {} def _is_group(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -560,7 +528,6 @@ def _is_group(self, path: Optional[str]): return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -569,7 +536,6 @@ def _is_array(self, path: Optional[str]): return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -712,7 +678,6 @@ def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: - if compressor_config is None: return {"type": "raw"} else: @@ -726,19 +691,16 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict n5_config = {"type": codec_id} if codec_id == "bz2": - n5_config["type"] = "bzip2" n5_config["blockSize"] = _compressor_config["level"] elif codec_id == "blosc": - n5_config["cname"] = _compressor_config["cname"] n5_config["clevel"] = _compressor_config["clevel"] n5_config["shuffle"] = _compressor_config["shuffle"] n5_config["blocksize"] = _compressor_config["blocksize"] elif codec_id == "lzma": - # Switch to XZ for N5 if we are using the default XZ format. # Note: 4 is the default, which is lzma.CHECK_CRC64. if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: @@ -760,50 +722,42 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict n5_config["preset"] = 6 elif codec_id == "zlib": - n5_config["type"] = "gzip" n5_config["level"] = _compressor_config["level"] n5_config["useZlib"] = True elif codec_id == "gzip": - n5_config["type"] = "gzip" n5_config["level"] = _compressor_config["level"] n5_config["useZlib"] = False else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) return n5_config def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config["type"] zarr_config = {"id": codec_id} if codec_id == "bzip2": - zarr_config["id"] = "bz2" zarr_config["level"] = compressor_config["blockSize"] elif codec_id == "blosc": - zarr_config["cname"] = compressor_config["cname"] zarr_config["clevel"] = compressor_config["clevel"] zarr_config["shuffle"] = compressor_config["shuffle"] zarr_config["blocksize"] = compressor_config["blocksize"] elif codec_id == "lzma": - zarr_config["format"] = compressor_config["format"] zarr_config["check"] = compressor_config["check"] zarr_config["preset"] = compressor_config["preset"] zarr_config["filters"] = compressor_config["filters"] elif codec_id == "xz": - zarr_config["id"] = "lzma" zarr_config["format"] = 1 # lzma.FORMAT_XZ zarr_config["check"] = -1 @@ -811,7 +765,6 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic zarr_config["filters"] = None elif codec_id == "gzip": - if "useZlib" in compressor_config and compressor_config["useZlib"]: zarr_config["id"] = "zlib" zarr_config["level"] = compressor_config["level"] @@ -820,22 +773,18 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic zarr_config["level"] = compressor_config["level"] elif codec_id == "raw": - return None else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) return zarr_config class N5ChunkWrapper(Codec): - codec_id = "n5_wrapper" def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): - self.dtype = np.dtype(dtype) self.chunk_shape = tuple(chunk_shape) # is the dtype a little endian format? @@ -860,7 +809,6 @@ def get_config(self): return config def encode(self, chunk): - assert chunk.flags.c_contiguous header = self._create_header(chunk) @@ -872,12 +820,10 @@ def encode(self, chunk): return header + chunk.tobytes(order="A") def decode(self, chunk, out=None) -> bytes: - len_header, chunk_shape = self._read_header(chunk) chunk = chunk[len_header:] if out is not None: - # out should only be used if we read a complete chunk assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( self.chunk_shape, chunk_shape @@ -895,7 +841,6 @@ def decode(self, chunk, out=None) -> bytes: return out else: - if self._compressor: chunk = self._compressor.decode(chunk) @@ -915,7 +860,6 @@ def decode(self, chunk, out=None) -> bytes: @staticmethod def _create_header(chunk): - mode = struct.pack(">H", 0) num_dims = struct.pack(">H", len(chunk.shape)) shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) @@ -924,7 +868,6 @@ def _create_header(chunk): @staticmethod def _read_header(chunk): - num_dims = struct.unpack(">H", chunk[2:4])[0] shape = tuple( struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) diff --git a/src/zarr/storage.py b/src/zarr/storage.py index b36f804ebd..e7bd0c4cf4 100644 --- a/src/zarr/storage.py +++ b/src/zarr/storage.py @@ -482,7 +482,6 @@ def _init_array_metadata( dimension_separator=None, storage_transformers=(), ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -687,7 +686,6 @@ def _init_group_metadata( path: Optional[str] = None, chunk_store: Optional[StoreLike] = None, ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -1055,7 +1053,6 @@ class DirectoryStore(Store): """ def __init__(self, path, normalize_keys=False, dimension_separator=None): - # guard conditions path = os.path.abspath(path) if os.path.exists(path) and not os.path.isdir(path): @@ -1415,7 +1412,6 @@ def _normalize_key(self, key): def getitems( self, keys: Sequence[str], *, contexts: Mapping[str, Context] ) -> Mapping[str, Any]: - keys_transformed = [self._normalize_key(key) for key in keys] results = self.map.getitems(keys_transformed, on_error="omit") # The function calling this method may not recognize the transformed keys @@ -1768,7 +1764,6 @@ def __init__( mode="a", dimension_separator=None, ): - # store properties path = os.path.abspath(path) self.path = path @@ -2707,9 +2702,7 @@ def listdir(self, path=None): SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m FROM zarr WHERE k LIKE (? || "{sep}%") ) ORDER BY l ASC - """.format( - sep=sep - ), + """.format(sep=sep), (path, path), ) keys = list(map(operator.itemgetter(0), keys)) diff --git a/src/zarr/util.py b/src/zarr/util.py index ea0dd9fcec..270a444524 100644 --- a/src/zarr/util.py +++ b/src/zarr/util.py @@ -180,7 +180,6 @@ def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tupl def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: - # convenience API for object arrays if inspect.isclass(dtype): dtype = dtype.__name__ # type: ignore @@ -245,7 +244,6 @@ def is_total_slice(item, shape: Tuple[int]) -> bool: def normalize_resize_args(old_shape, *args): - # normalize new shape argument if len(args) == 1: new_shape = args[0] @@ -294,7 +292,6 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: def normalize_fill_value(fill_value, dtype: np.dtype): - if fill_value is None or dtype.hasobject: # no fill value pass @@ -309,8 +306,9 @@ def normalize_fill_value(fill_value, dtype: np.dtype): if not isinstance(fill_value, str): raise ValueError( - "fill_value {!r} is not valid for dtype {}; must be a " - "unicode string".format(fill_value, dtype) + "fill_value {!r} is not valid for dtype {}; must be a " "unicode string".format( + fill_value, dtype + ) ) else: @@ -324,15 +322,15 @@ def normalize_fill_value(fill_value, dtype: np.dtype): except Exception as e: # re-raise with our own error message to be helpful raise ValueError( - "fill_value {!r} is not valid for dtype {}; nested " - "exception: {}".format(fill_value, dtype, e) + "fill_value {!r} is not valid for dtype {}; nested " "exception: {}".format( + fill_value, dtype, e + ) ) return fill_value def normalize_storage_path(path: Union[str, bytes, None]) -> str: - # handle bytes if isinstance(path, bytes): path = str(path, "ascii") @@ -342,7 +340,6 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: path = str(path) if path: - # convert backslash to forward slash path = path.replace("\\", "/") @@ -506,7 +503,6 @@ def tree_widget(group, expand, level): class TreeViewer: def __init__(self, group, expand=False, level=None): - self.group = group self.expand = expand self.level = level diff --git a/src/zarr/v3/config.py b/src/zarr/v3/config.py index 98a25994c4..cebe5c1b09 100644 --- a/src/zarr/v3/config.py +++ b/src/zarr/v3/config.py @@ -43,7 +43,6 @@ def __init__( concurrency: Optional[int] = None, asyncio_loop: Optional[AbstractEventLoop] = None, ): - order_parsed = parse_indexing_order(order) concurrency_parsed = parse_concurrency(concurrency) asyncio_loop_parsed = parse_asyncio_loop(asyncio_loop) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 0012a77a81..fcd2fea215 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -151,7 +151,6 @@ async def getitem( self, key: str, ) -> Union[AsyncArray, AsyncGroup]: - store_path = self.store_path / key if self.metadata.zarr_format == 3: diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index c3da110450..5d22b30e9a 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -46,7 +46,6 @@ def _put( class LocalStore(Store): - supports_writes: bool = True supports_partial_writes: bool = True supports_listing: bool = True @@ -126,6 +125,7 @@ async def list(self) -> List[str]: ------- list[str] """ + # Q: do we want to return strings or Paths? def _list(root: Path) -> List[str]: files = [str(p) for p in root.rglob("") if p.is_file()] @@ -166,7 +166,6 @@ async def list_dir(self, prefix: str) -> List[str]: """ def _list_dir(root: Path, prefix: str) -> List[str]: - base = root / prefix to_strip = str(base) + "/" try: diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index fcc8e7b275..2e94a815cc 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -103,7 +103,6 @@ def _get_loop(): class SyncMixin: - _sync_configuration: SyncConfiguration def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: diff --git a/tests/test_attrs.py b/tests/test_attrs.py index a5ce4bac89..7e3377f664 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -30,7 +30,6 @@ def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) def test_storage(self, zarr_version): - store = _init_store(zarr_version) root = ".z" if zarr_version == 2 else meta_root attrs_key = root + "attrs" @@ -50,7 +49,6 @@ def test_storage(self, zarr_version): assert dict(foo="bar", baz=42) == d def test_utf8_encoding(self, zarr_version): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent fixdir = project_root / "fixture" testdir = fixdir / "utf8attrs" @@ -67,7 +65,6 @@ def test_utf8_encoding(self, zarr_version): assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") def test_get_set_del_contains(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert "foo" not in a @@ -84,7 +81,6 @@ def test_get_set_del_contains(self, zarr_version): a["foo"] def test_update_put(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert "foo" not in a @@ -102,7 +98,6 @@ def test_update_put(self, zarr_version): assert "baz" not in a def test_iterators(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert 0 == len(a) @@ -232,7 +227,6 @@ def test_caching_on(self, zarr_version): assert get_cnt == store.counter["__getitem__", attrs_key] def test_caching_off(self, zarr_version): - # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" diff --git a/tests/test_convenience.py b/tests/test_convenience.py index 0970a9e1aa..7cb4db7a35 100644 --- a/tests/test_convenience.py +++ b/tests/test_convenience.py @@ -57,7 +57,6 @@ def _init_creation_kwargs(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_array(path_type, zarr_version): - store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) @@ -86,7 +85,6 @@ def test_open_array(path_type, zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group(path_type, zarr_version): - store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) @@ -210,7 +208,6 @@ def test_tree(zarr_version): def test_consolidate_metadata( with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path ): - # setup initial data if stores_from_path: store = tempfile.mkdtemp() @@ -399,7 +396,6 @@ def test_save_array_separator(tmpdir, options): class TestCopyStore(unittest.TestCase): - _version = 2 def setUp(self): @@ -536,7 +532,6 @@ def test_if_exists(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestCopyStoreV3(TestCopyStore): - _version = 3 def setUp(self): @@ -557,7 +552,6 @@ def test_mismatched_store_versions(self): def check_copied_array(original, copied, without_attrs=False, expect_props=None): - # setup source_h5py = original.__module__.startswith("h5py.") dest_h5py = copied.__module__.startswith("h5py.") @@ -621,7 +615,6 @@ def check_copied_array(original, copied, without_attrs=False, expect_props=None) def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): - # setup if expect_props is None: expect_props = dict() diff --git a/tests/test_creation.py b/tests/test_creation.py index 9307b81b52..27ce00bc8a 100644 --- a/tests/test_creation.py +++ b/tests/test_creation.py @@ -74,7 +74,6 @@ def _init_creation_kwargs(zarr_version, at_root=True): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_array(zarr_version, at_root): - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -213,7 +212,6 @@ def test_full_additional_dtypes(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_open_array(zarr_version, at_root, dimension_separator): - store = "data/array.zarr" kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -329,7 +327,6 @@ def test_open_array(zarr_version, at_root, dimension_separator): def test_open_array_none(): - # open with both store and zarr_version = None z = open_array(mode="w", shape=100, chunks=10) assert isinstance(z, Array) @@ -339,7 +336,6 @@ def test_open_array_none(): @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("zarr_version", _VERSIONS2) def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): - if zarr_version == 3: StoreClass = DirectoryStoreV3 path = "data" @@ -370,7 +366,6 @@ def test_open_array_infer_separator_from_store(zarr_version, dimension_separator # TODO: N5 support for v3 @pytest.mark.parametrize("zarr_version", [None, 2]) def test_open_array_n5(zarr_version): - store = "data/array.zarr" kwargs = _init_creation_kwargs(zarr_version) @@ -409,7 +404,6 @@ def test_open_array_n5(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_open_array_dict_store(zarr_version, at_root): - # dict will become a KVStore store = dict() kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -503,7 +497,6 @@ def test_empty_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_zeros_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version @@ -529,7 +522,6 @@ def test_zeros_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_ones_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version @@ -556,7 +548,6 @@ def test_ones_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_full_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version diff --git a/tests/test_dim_separator.py b/tests/test_dim_separator.py index 83f4d3b5b9..4276d1829d 100644 --- a/tests/test_dim_separator.py +++ b/tests/test_dim_separator.py @@ -46,7 +46,6 @@ def dataset(tmpdir, request): static = project_root / "fixture" / suffix if not static.exists(): # pragma: no cover - if "nested" in which: # No way to reproduce the nested_legacy file via code generator = NestedDirectoryStore diff --git a/tests/test_filters.py b/tests/test_filters.py index d55be9145f..fc63cdca8d 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -30,7 +30,6 @@ def test_array_with_delta_filter(): - # setup astype = "u1" dtype = "i8" @@ -38,7 +37,6 @@ def test_array_with_delta_filter(): data = np.arange(100, dtype=dtype) for compressor in compressors: - a = array(data, chunks=10, compressor=compressor, filters=filters) # check round-trip @@ -57,7 +55,6 @@ def test_array_with_delta_filter(): def test_array_with_astype_filter(): - # setup encode_dtype = "i1" decode_dtype = "i8" @@ -68,7 +65,6 @@ def test_array_with_astype_filter(): data = np.arange(shape, dtype=decode_dtype) for compressor in compressors: - a = array(data, chunks=chunks, compressor=compressor, filters=filters) # check round-trip @@ -88,7 +84,6 @@ def test_array_with_astype_filter(): def test_array_with_scaleoffset_filter(): - # setup astype = "u1" dtype = "f8" @@ -97,7 +92,6 @@ def test_array_with_scaleoffset_filter(): data = np.linspace(1000, 1001, 34, dtype="f8") for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -116,7 +110,6 @@ def test_array_with_scaleoffset_filter(): def test_array_with_quantize_filter(): - # setup dtype = "f8" digits = 3 @@ -125,7 +118,6 @@ def test_array_with_quantize_filter(): data = np.linspace(0, 1, 34, dtype=dtype) for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -144,14 +136,12 @@ def test_array_with_quantize_filter(): def test_array_with_packbits_filter(): - # setup flt = PackBits() filters = [flt] data = np.random.randint(0, 2, size=100, dtype=bool) for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -170,14 +160,12 @@ def test_array_with_packbits_filter(): def test_array_with_categorize_filter(): - # setup data = np.random.choice(["foo", "bar", "baz"], size=100) flt = Categorize(dtype=data.dtype, labels=["foo", "bar", "baz"]) filters = [flt] for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 1498d6779b..f5b5dde86d 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -14,7 +14,6 @@ def store_path(tmpdir): def test_group(store_path) -> None: - agroup = AsyncGroup( metadata=GroupMetadata(), store_path=store_path, @@ -57,7 +56,6 @@ def test_group(store_path) -> None: def test_group_sync_constructor(store_path) -> None: - group = Group.create( store=store_path, attributes={"title": "test 123"}, diff --git a/tests/test_hierarchy.py b/tests/test_hierarchy.py index 3eaa4743dd..6d4b1ff54c 100644 --- a/tests/test_hierarchy.py +++ b/tests/test_hierarchy.py @@ -1085,7 +1085,6 @@ def test_paths(self): g1.store.close() def test_pickle(self): - # setup group g = self.create_group() d = g.create_dataset("foo/bar", shape=100, chunks=10) @@ -1113,7 +1112,6 @@ def test_pickle(self): g2.store.close() def test_context_manager(self): - with self.create_group() as g: d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) @@ -1375,7 +1373,6 @@ def create_store(): return store, None def test_context_manager(self): - with self.create_group() as g: store = g.store d = g.create_dataset("foo/bar", shape=100, chunks=10) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 1835206819..d441f3b8fa 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -17,7 +17,6 @@ def test_normalize_integer_selection(): - assert 1 == normalize_integer_selection(1, 100) assert 99 == normalize_integer_selection(-1, 100) with pytest.raises(IndexError): @@ -29,7 +28,6 @@ def test_normalize_integer_selection(): def test_replace_ellipsis(): - # 1D, single item assert (0,) == replace_ellipsis(0, (100,)) @@ -68,7 +66,6 @@ def test_replace_ellipsis(): def test_get_basic_selection_0d(): - # setup a = np.array(42) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) @@ -191,7 +188,6 @@ def _test_get_basic_selection(a, z, selection): # noinspection PyStatementEffect def test_get_basic_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -264,7 +260,6 @@ def test_get_basic_selection_1d(): # noinspection PyStatementEffect def test_get_basic_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -423,7 +418,6 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): def test_set_basic_selection_0d(): - # setup v = np.array(42) a = np.zeros_like(v) @@ -479,7 +473,6 @@ def _test_get_orthogonal_selection(a, z, selection): # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_bool(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -502,7 +495,6 @@ def test_get_orthogonal_selection_1d_bool(): # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_int(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -561,7 +553,6 @@ def _test_get_orthogonal_selection_2d(a, z, ix0, ix1): # noinspection PyStatementEffect def test_get_orthogonal_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -570,7 +561,6 @@ def test_get_orthogonal_selection_2d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -641,7 +631,6 @@ def _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2): def test_get_orthogonal_selection_3d(): - # setup a = np.arange(100000, dtype=int).reshape(200, 50, 10) z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype) @@ -650,7 +639,6 @@ def test_get_orthogonal_selection_3d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -673,7 +661,6 @@ def test_get_orthogonal_selection_3d(): def test_orthogonal_indexing_edge_cases(): - a = np.arange(6).reshape(1, 2, 3) z = zarr.create(shape=a.shape, chunks=(1, 2, 3), dtype=a.dtype) z[:] = a @@ -706,7 +693,6 @@ def _test_set_orthogonal_selection(v, a, z, selection): def test_set_orthogonal_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=int) @@ -715,7 +701,6 @@ def test_set_orthogonal_selection_1d(): # test with different degrees of sparseness np.random.seed(42) for p in 0.5, 0.1, 0.01: - # boolean arrays ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_orthogonal_selection(v, a, z, ix) @@ -734,7 +719,6 @@ def test_set_orthogonal_selection_1d(): def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): - selections = [ # index both axes with array (ix0, ix1), @@ -749,7 +733,6 @@ def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): def test_set_orthogonal_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -758,7 +741,6 @@ def test_set_orthogonal_selection_2d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -780,7 +762,6 @@ def test_set_orthogonal_selection_2d(): def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): - selections = ( # single value (84, 42, 4), @@ -807,7 +788,6 @@ def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): def test_set_orthogonal_selection_3d(): - # setup v = np.arange(100000, dtype=int).reshape(200, 50, 10) a = np.empty_like(v) @@ -816,7 +796,6 @@ def test_set_orthogonal_selection_3d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -888,7 +867,6 @@ def _test_get_coordinate_selection(a, z, selection): # noinspection PyStatementEffect def test_get_coordinate_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -932,7 +910,6 @@ def test_get_coordinate_selection_1d(): def test_get_coordinate_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -1027,7 +1004,6 @@ def test_set_coordinate_selection_1d(): def test_set_coordinate_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -1258,7 +1234,6 @@ def _test_get_mask_selection(a, z, selection): # noinspection PyStatementEffect def test_get_mask_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -1285,7 +1260,6 @@ def test_get_mask_selection_1d(): # noinspection PyStatementEffect def test_get_mask_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -1318,7 +1292,6 @@ def _test_set_mask_selection(v, a, z, selection): def test_set_mask_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty_like(v) @@ -1338,7 +1311,6 @@ def test_set_mask_selection_1d(): def test_set_mask_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -1352,7 +1324,6 @@ def test_set_mask_selection_2d(): def test_get_selection_out(): - # basic selections a = np.arange(1050) z = zarr.create(shape=1050, chunks=100, dtype=a.dtype) @@ -1426,7 +1397,6 @@ def test_get_selection_out(): def test_get_selections_with_fields(): - a = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] a = np.array(a, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) @@ -1444,7 +1414,6 @@ def test_get_selections_with_fields(): ] for fields in fields_fixture: - # total selection expect = a[fields] actual = z.get_basic_selection(Ellipsis, fields=fields) @@ -1534,7 +1503,6 @@ def test_get_selections_with_fields(): def test_set_selections_with_fields(): - v = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] v = np.array(v, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.empty_like(v) @@ -1553,7 +1521,6 @@ def test_set_selections_with_fields(): ] for fields in fields_fixture: - # currently multi-field assignment is not supported in numpy, so we won't support # it either if isinstance(fields, list) and len(fields) > 1: @@ -1567,7 +1534,6 @@ def test_set_selections_with_fields(): z.set_mask_selection([True, False, True], v, fields=fields) else: - if isinstance(fields, list) and len(fields) == 1: # work around numpy does not support multi-field assignment even if there # is only one field @@ -1752,7 +1718,6 @@ def test_accessed_chunks(shape, chunks, ops): z = zarr.create(shape=shape, chunks=chunks, store=store) for ii, (optype, slices) in enumerate(ops): - # Resolve the slices into the accessed chunks for each dimension chunks_per_dim = [] for N, C, sl in zip(shape, chunks, slices): diff --git a/tests/test_info.py b/tests/test_info.py index 7fb6feb11b..96eae999f4 100644 --- a/tests/test_info.py +++ b/tests/test_info.py @@ -7,7 +7,6 @@ @pytest.mark.parametrize("array_size", [10, 15000]) def test_info(array_size): - # setup g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) g.create_group("foo") diff --git a/tests/test_meta.py b/tests/test_meta.py index db50560c8e..50f51929ef 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -34,7 +34,6 @@ def assert_json_equal(expect, actual): def test_encode_decode_array_1(): - meta = dict( shape=(100,), chunks=(10,), @@ -76,7 +75,6 @@ def test_encode_decode_array_1(): def test_encode_decode_array_2(): - # some variations df = Delta(astype=" Date: Mon, 8 Apr 2024 13:43:18 -0400 Subject: [PATCH 04/31] Remove outdated dev install docs from installation.rst and link to contributing.rst (#1643) Co-authored-by: Joe Hamman --- docs/installation.rst | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 8553d451cb..3d4ac41072 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -19,13 +19,4 @@ latest GitHub main:: $ pip install git+https://github.com/zarr-developers/zarr-python.git -To work with Zarr source code in development, install from GitHub:: - - $ git clone --recursive https://github.com/zarr-developers/zarr-python.git - $ cd zarr-python - $ python -m pip install -e . - -To verify that Zarr has been fully installed, run the test suite:: - - $ pip install pytest - $ python -m pytest -v --pyargs zarr +To work with Zarr source code in development, see `Contributing `_. \ No newline at end of file From b762fa47da38d3a1664e3b74dc8f53c72c2e52fa Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 11:15:29 +0200 Subject: [PATCH 05/31] fix: Rename children to members; AsyncGroup.members yields tuples of (name, AsyncArray / AsyncGroup) pairs; Group.members repackages these into a dict. --- src/zarr/v3/group.py | 39 ++++++++++++++++++++++++++------------- tests/test_group_v3.py | 26 +++++++++++--------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 5bce87376c..a30d7d1702 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -287,13 +287,15 @@ async def update_attributes(self, new_attributes: dict[str, Any]): def __repr__(self): return f"" - async def nchildren(self) -> int: + async def nmembers(self) -> int: raise NotImplementedError - async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: + async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], None]: """ Returns an AsyncGenerator over the arrays and groups contained in this group. This method requires that `store_path.store` supports directory listing. + + The results are not guaranteed to be ordered. """ if not self.store_path.store.supports_listing: msg = ( @@ -311,13 +313,17 @@ async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: # is there a better way to schedule this? for subkey in subkeys_filtered: try: - yield await self.getitem(subkey) + yield (subkey, await self.getitem(subkey)) except KeyError: - # keyerror is raised when `subkey``names an object in the store + # keyerror is raised when `subkey` names an object (in the object storage sense), + # as opposed to a prefix, in the store under the prefix associated with this group # in which case `subkey` cannot be the name of a sub-array or sub-group. + logger.warning( + "Object at %s is not recognized as a component of a Zarr hierarchy.", subkey + ) pass - async def contains(self, child: str) -> bool: + async def contains(self, member: str) -> bool: raise NotImplementedError async def group_keys(self) -> AsyncIterator[str]: @@ -444,16 +450,23 @@ def update_attributes(self, new_attributes: dict[str, Any]): return self @property - def nchildren(self) -> int: - return self._sync(self._async_group.nchildren) + def nmembers(self) -> int: + return self._sync(self._async_group.nmembers) @property - def children(self) -> list[Array | Group]: - _children = self._sync_iter(self._async_group.children) - return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] - - def __contains__(self, child) -> bool: - return self._sync(self._async_group.contains(child)) + def members(self) -> dict[str, Array | Group]: + """ + Return the sub-arrays and sub-groups of this group as a `dict` of (name, array | group) + pairs + """ + _members = self._sync_iter(self._async_group.members) + return { + key: Array(value) if isinstance(value, AsyncArray) else Group(value) + for key, value in _members + } + + def __contains__(self, member) -> bool: + return self._sync(self._async_group.contains(member)) def group_keys(self) -> Iterator[str]: return self._sync_iter(self._async_group.group_keys) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 555374f5b3..204c255064 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -13,9 +13,9 @@ # todo: put RemoteStore in here @pytest.mark.parametrize("store_type", ("local_store", "memory_store")) -def test_group_children(store_type, request): +def test_group_members(store_type, request): """ - Test that `Group.children` returns correct values, i.e. the arrays and groups + Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ @@ -26,12 +26,14 @@ def test_group_children(store_type, request): store_path=StorePath(store=store, path=path), ) group = Group(agroup) + members_expected = {} - subgroup = group.create_group("subgroup") + members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy - _ = subgroup.create_group("subsubgroup") - subarray = group.create_array( + _ = members_expected["subgroup"].create_group("subsubgroup") + + members_expected["subarray"] = group.create_array( "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True ) @@ -42,21 +44,15 @@ def test_group_children(store_type, request): # this creates an implicit group called implicit_subgroup sync(store.set(f"{path}/implicit_subgroup/extra_object", b"000000")) # make the implicit subgroup - implicit_subgroup = Group( + members_expected["implicit_subgroup"] = Group( AsyncGroup( metadata=GroupMetadata(), store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), ) ) - # note: these assertions are order-independent, because it is not clear - # if group.children guarantees a particular order for the children. - # If order is not guaranteed, then the better version of this test is - # to compare two sets, but presently neither the group nor array classes are hashable. - observed = group.children - assert len(observed) == 3 - assert subarray in observed - assert implicit_subgroup in observed - assert subgroup in observed + members_observed = group.members + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(members_observed) == sorted(members_expected) @pytest.mark.parametrize("store_type", (("local_store",))) From 55742269a77f20f19114a7e077b675edf7332d77 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 13:20:32 +0200 Subject: [PATCH 06/31] fix: make Group.members return a tuple of str, Array | Group pairs --- src/zarr/v3/group.py | 10 +++++----- tests/test_group_v3.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index a30d7d1702..f2158d3b28 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -454,16 +454,16 @@ def nmembers(self) -> int: return self._sync(self._async_group.nmembers) @property - def members(self) -> dict[str, Array | Group]: + def members(self) -> tuple[tuple[str, Array | Group], ...]: """ - Return the sub-arrays and sub-groups of this group as a `dict` of (name, array | group) + Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ _members = self._sync_iter(self._async_group.members) - return { - key: Array(value) if isinstance(value, AsyncArray) else Group(value) + return tuple( + (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) for key, value in _members - } + ) def __contains__(self, member) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 204c255064..6b1f78df60 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -52,7 +52,7 @@ def test_group_members(store_type, request): ) members_observed = group.members # members are not guaranteed to be ordered, so sort before comparing - assert sorted(members_observed) == sorted(members_expected) + assert sorted(dict(members_observed)) == sorted(members_expected) @pytest.mark.parametrize("store_type", (("local_store",))) From d634cbf211f24290f8d282b2479df1566e317f85 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 20:47:28 +0200 Subject: [PATCH 07/31] fix: revert changes to synchronization code; this is churn that we need to deal with --- src/zarr/v3/group.py | 6 +++++- src/zarr/v3/sync.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ce5a3a3e58..a93f8404e9 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -437,6 +437,10 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group async_group = replace(self._async_group, metadata=new_metadata) return replace(self, _async_group=async_group) + @property + def store_path(self) -> StorePath: + return self._async_group.store_path + @property def metadata(self) -> GroupMetadata: return self._async_group.metadata @@ -463,7 +467,7 @@ def members(self) -> tuple[tuple[str, Array | Group], ...]: Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ - _members = self._sync_iter(self._async_group.members) + _members: list[AsyncArray | AsyncGroup] = self._sync_iter(self._async_group.members) return tuple( (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) for key, value in _members diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index 2e94a815cc..592ce8b75b 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -113,7 +113,7 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: async def iter_to_list() -> List[T]: # TODO: replace with generators so we don't materialize the entire iterator at once - async_iterator = await coroutine - return [item async for item in async_iterator] + # async_iterator = await coroutine + return [item async for item in coroutine()] return self._sync(iter_to_list()) From 3a73950160e9c26bdad2e141c42711b762823b82 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Thu, 11 Apr 2024 19:48:57 +0100 Subject: [PATCH 08/31] Allow dmypy to be run (#1780) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9f21a84aee..77b7dcd66f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -150,7 +150,7 @@ extend-exclude = [ [tool.mypy] python_version = "3.8" ignore_missing_imports = true -follow_imports = "silent" +namespace_packages = false [tool.pytest.ini_options] doctest_optionflags = [ From 3a9d968dd856efdb3d56cd73ad527baac6ffeded Mon Sep 17 00:00:00 2001 From: David Stansby Date: Thu, 11 Apr 2024 19:50:31 +0100 Subject: [PATCH 09/31] Remove unused typing ignore comments (#1781) Co-authored-by: Davis Bennett --- pyproject.toml | 5 +++++ src/zarr/_storage/store.py | 4 ++-- src/zarr/_storage/v3_storage_transformers.py | 2 +- src/zarr/meta.py | 4 ++-- src/zarr/storage.py | 12 ++++++------ src/zarr/util.py | 2 +- 6 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 77b7dcd66f..966065655f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,6 +152,11 @@ python_version = "3.8" ignore_missing_imports = true namespace_packages = false +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true + + [tool.pytest.ini_options] doctest_optionflags = [ "NORMALIZE_WHITESPACE", diff --git a/src/zarr/_storage/store.py b/src/zarr/_storage/store.py index 80e4ad8f75..9911cfa12d 100644 --- a/src/zarr/_storage/store.py +++ b/src/zarr/_storage/store.py @@ -642,10 +642,10 @@ def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: sfx = _get_metadata_suffix(store) array_meta_file = meta_dir + ".array" + sfx if array_meta_file in store: - store.erase(array_meta_file) # type: ignore + store.erase(array_meta_file) group_meta_file = meta_dir + ".group" + sfx if group_meta_file in store: - store.erase(group_meta_file) # type: ignore + store.erase(group_meta_file) def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: diff --git a/src/zarr/_storage/v3_storage_transformers.py b/src/zarr/_storage/v3_storage_transformers.py index 3090aea28c..cb11cea52e 100644 --- a/src/zarr/_storage/v3_storage_transformers.py +++ b/src/zarr/_storage/v3_storage_transformers.py @@ -367,7 +367,7 @@ def erase_prefix(self, prefix): def rmdir(self, path=None): path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) # type: ignore + _rmdir_from_keys_v3(self, path) def __contains__(self, key): if self._is_data_key(key): diff --git a/src/zarr/meta.py b/src/zarr/meta.py index 80f9017456..3a5435a174 100644 --- a/src/zarr/meta.py +++ b/src/zarr/meta.py @@ -234,8 +234,8 @@ def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return np.array(v, dtype=dtype)[()] elif dtype.kind in "c": v = ( - cls.decode_fill_value(v[0], dtype.type().real.dtype), # type: ignore - cls.decode_fill_value(v[1], dtype.type().imag.dtype), # type: ignore + cls.decode_fill_value(v[0], dtype.type().real.dtype), + cls.decode_fill_value(v[1], dtype.type().imag.dtype), ) v = v[0] + 1j * v[1] return np.array(v, dtype=dtype)[()] diff --git a/src/zarr/storage.py b/src/zarr/storage.py index e7bd0c4cf4..e3a43d26c8 100644 --- a/src/zarr/storage.py +++ b/src/zarr/storage.py @@ -205,7 +205,7 @@ def rmdir(store: StoreLike, path: Path = None): store_version = getattr(store, "_store_version", 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through - store.rmdir(path) # type: ignore + store.rmdir(path) else: # slow version, delete one key at a time if store_version == 2: @@ -235,7 +235,7 @@ def listdir(store: BaseStore, path: Path = None): path = normalize_storage_path(path) if hasattr(store, "listdir"): # pass through - return store.listdir(path) # type: ignore + return store.listdir(path) else: # slow version, iterate through all keys warnings.warn( @@ -288,7 +288,7 @@ def getsize(store: BaseStore, path: Path = None) -> int: if hasattr(store, "getsize"): # pass through path = normalize_storage_path(path) - return store.getsize(path) # type: ignore + return store.getsize(path) elif isinstance(store, MutableMapping): return _getsize(store, path) else: @@ -626,7 +626,7 @@ def _init_array_metadata( key = _prefix_to_array_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore + store[key] = store._metadata_class.encode_array_metadata(meta) else: store[key] = encode_array_metadata(meta) @@ -729,10 +729,10 @@ def _init_group_metadata( if store_version == 3: meta = {"attributes": {}} # type: ignore else: - meta = {} # type: ignore + meta = {} key = _prefix_to_group_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore + store[key] = store._metadata_class.encode_group_metadata(meta) else: store[key] = encode_group_metadata(meta) diff --git a/src/zarr/util.py b/src/zarr/util.py index 270a444524..35ecc64bba 100644 --- a/src/zarr/util.py +++ b/src/zarr/util.py @@ -182,7 +182,7 @@ def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tupl def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: # convenience API for object arrays if inspect.isclass(dtype): - dtype = dtype.__name__ # type: ignore + dtype = dtype.__name__ if isinstance(dtype, str): # allow ':' to delimit class from codec arguments tokens = dtype.split(":") From ce6fcbbee24f4429a740963cec8b239a0b76cd34 Mon Sep 17 00:00:00 2001 From: David Stansby Date: Fri, 12 Apr 2024 10:42:09 +0100 Subject: [PATCH 10/31] Check untyped defs (#1784) Co-authored-by: Davis Bennett --- pyproject.toml | 20 +++++++++++++++++++- src/zarr/attrs.py | 3 ++- src/zarr/n5.py | 5 ++--- src/zarr/v3/sync.py | 5 +++-- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 966065655f..b67f5fec94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -148,7 +148,7 @@ extend-exclude = [ ] [tool.mypy] -python_version = "3.8" +python_version = "3.10" ignore_missing_imports = true namespace_packages = false @@ -157,6 +157,24 @@ warn_redundant_casts = true warn_unused_ignores = true +check_untyped_defs = true + +[[tool.mypy.overrides]] +module = [ + "zarr._storage.store", + "zarr._storage.v3_storage_transformers", + "zarr.v3.group", + "zarr.core", + "zarr.hierarchy", + "zarr.indexing", + "zarr.storage", + "zarr.sync", + "zarr.util", + "tests.*", +] +check_untyped_defs = false + + [tool.pytest.ini_options] doctest_optionflags = [ "NORMALIZE_WHITESPACE", diff --git a/src/zarr/attrs.py b/src/zarr/attrs.py index e967c5b853..e589bc9022 100644 --- a/src/zarr/attrs.py +++ b/src/zarr/attrs.py @@ -1,3 +1,4 @@ +from typing import Any import warnings from collections.abc import MutableMapping @@ -39,7 +40,7 @@ def _get_nosync(self): try: data = self.store[self.key] except KeyError: - d = dict() + d: dict[str, Any] = dict() if self._version > 2: d["attributes"] = {} else: diff --git a/src/zarr/n5.py b/src/zarr/n5.py index 44b44e69e2..79bab20576 100644 --- a/src/zarr/n5.py +++ b/src/zarr/n5.py @@ -325,10 +325,9 @@ class N5FSStore(FSStore): def __init__(self, *args, **kwargs): if "dimension_separator" in kwargs: - kwargs.pop("dimension_separator") warnings.warn("Keyword argument `dimension_separator` will be ignored") - dimension_separator = "." - super().__init__(*args, dimension_separator=dimension_separator, **kwargs) + kwargs["dimension_separator"] = "." + super().__init__(*args, **kwargs) @staticmethod def _swap_separator(key: str): diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index 2e94a815cc..41dfeadba9 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -90,8 +90,9 @@ def _get_loop(): # repeat the check just in case the loop got filled between the # previous two calls from another thread if loop[0] is None: - loop[0] = asyncio.new_event_loop() - th = threading.Thread(target=loop[0].run_forever, name="zarrIO") + new_loop = asyncio.new_event_loop() + loop[0] = new_loop + th = threading.Thread(target=new_loop.run_forever, name="zarrIO") th.daemon = True th.start() iothread[0] = th From d264f71a5a5f49f47daef36e8074a468eae3010e Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 13:30:01 +0200 Subject: [PATCH 11/31] chore: move v3 tests into v3 folder --- tests/{ => v3}/test_codecs_v3.py | 0 tests/{ => v3}/test_group_v3.py | 0 tests/{ => v3}/test_storage_v3.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => v3}/test_codecs_v3.py (100%) rename tests/{ => v3}/test_group_v3.py (100%) rename tests/{ => v3}/test_storage_v3.py (100%) diff --git a/tests/test_codecs_v3.py b/tests/v3/test_codecs_v3.py similarity index 100% rename from tests/test_codecs_v3.py rename to tests/v3/test_codecs_v3.py diff --git a/tests/test_group_v3.py b/tests/v3/test_group_v3.py similarity index 100% rename from tests/test_group_v3.py rename to tests/v3/test_group_v3.py diff --git a/tests/test_storage_v3.py b/tests/v3/test_storage_v3.py similarity index 100% rename from tests/test_storage_v3.py rename to tests/v3/test_storage_v3.py From 0741bed728fe724d733250e84c006698cb9899f8 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 12 Apr 2024 14:36:18 +0200 Subject: [PATCH 12/31] chore: type hints --- tests/{ => v3}/conftest.py | 13 +++++++++++++ tests/v3/test_group_v3.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 8 deletions(-) rename tests/{ => v3}/conftest.py (66%) diff --git a/tests/conftest.py b/tests/v3/conftest.py similarity index 66% rename from tests/conftest.py rename to tests/v3/conftest.py index 40275ba62c..0e166e224e 100644 --- a/tests/conftest.py +++ b/tests/v3/conftest.py @@ -31,3 +31,16 @@ def remote_store(): @pytest.fixture(scope="function") def memory_store(): return MemoryStore() + + +@pytest.fixture(scope="function") +def store(request: str, tmpdir): + param = request.param + if param == "local_store": + return LocalStore(str(tmpdir)) + elif param == "memory_store": + return MemoryStore() + elif param == "remote_store": + return RemoteStore() + else: + assert False diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 79bbd4af45..87d59eb31a 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -3,6 +3,7 @@ if TYPE_CHECKING: from zarr.v3.store.remote import MemoryStore, LocalStore + import pytest import numpy as np @@ -13,14 +14,13 @@ # todo: put RemoteStore in here -@pytest.mark.parametrize("store_type", ("local_store", "memory_store")) -def test_group_members(store_type, request): +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_group_children(store: MemoryStore | LocalStore): """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ - store: LocalStore | MemoryStore = request.getfixturevalue(store_type) path = "group" agroup = AsyncGroup( metadata=GroupMetadata(), @@ -56,9 +56,8 @@ def test_group_members(store_type, request): assert sorted(dict(members_observed)) == sorted(members_expected) -@pytest.mark.parametrize("store_type", (("local_store",))) -def test_group(store_type, request) -> None: - store = request.getfixturevalue(store_type) +@pytest.mark.parametrize("store", (("local_store", "memory_store")), indirect=["store"]) +def test_group(store: MemoryStore | LocalStore) -> None: store_path = StorePath(store) agroup = AsyncGroup( metadata=GroupMetadata(), @@ -100,9 +99,10 @@ def test_group(store_type, request) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -def test_group_sync_constructor(store_path) -> None: +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: group = Group.create( - store=store_path, + store=store, attributes={"title": "test 123"}, runtime_configuration=RuntimeConfiguration(), ) From eb8a535368970391fc6378c9c03875fae6d19ff4 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 14:59:51 +0200 Subject: [PATCH 13/31] test: add schema for group method tests --- tests/v3/test_group_v3.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 87d59eb31a..b3fedf6c8e 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -3,6 +3,7 @@ if TYPE_CHECKING: from zarr.v3.store.remote import MemoryStore, LocalStore + from typing import Literal import pytest import numpy as np @@ -108,3 +109,27 @@ def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: ) assert group._async_group.metadata.attributes["title"] == "test 123" + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", ("2", "3")) +@pytest.mark.parametrize("exists_ok", (True, False)) +@pytest.mark.parametrize("runtime_configuration", (None,)) +def test_create( + store: MemoryStore | LocalStore, + exists_ok: bool, + zarr_format: Literal["2", "3"], + runtime_configuration: None, +): + ... + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +def test_from_dict(store: MemoryStore | LocalStore): + ... + + +@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", ("2", "3")) +def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): + ... From ee2e233e44fc368181989de004239ea2a66b2eb3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 21:43:05 +0200 Subject: [PATCH 14/31] chore: add type for zarr_formats --- src/zarr/v3/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/common.py b/src/zarr/v3/common.py index 1caf83a764..e92e84ef7b 100644 --- a/src/zarr/v3/common.py +++ b/src/zarr/v3/common.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Union, Tuple, Iterable, Dict, List, TypeVar, overload +from typing import TYPE_CHECKING, Literal, Union, Tuple, Iterable, Dict, List, TypeVar, overload import asyncio import contextvars from dataclasses import dataclass @@ -21,6 +21,7 @@ ChunkCoordsLike = Iterable[int] SliceSelection = Tuple[slice, ...] Selection = Union[slice, SliceSelection] +ZarrFormat = Literal[2, 3] JSON = Union[str, None, int, float, Enum, Dict[str, "JSON"], List["JSON"], Tuple["JSON", ...]] From 01eec6f269c3ff01d71a20a1c70752775540bd59 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:19:03 +0200 Subject: [PATCH 15/31] chore: remove localstore for now --- tests/conftest.py | 7 +++ tests/v3/conftest.py | 54 ++++++++++++++++++---- tests/v3/test_group_v3.py | 96 +++++++++++++++++++++++++++++---------- 3 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000..6680e4066b --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +import pytest +import pathlib + + +@pytest.fixture(params=[str, pathlib.Path]) +def path_type(request): + return request.param diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 0e166e224e..ceb0b558eb 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -1,10 +1,33 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +from zarr.v3.common import ZarrFormat +from zarr.v3.group import AsyncGroup, Group + +if TYPE_CHECKING: + from typing import Any, Literal +from dataclasses import dataclass, field import pathlib + import pytest +from zarr.v3.config import RuntimeConfiguration from zarr.v3.store import LocalStore, StorePath, MemoryStore from zarr.v3.store.remote import RemoteStore +def parse_store( + store: Literal["local", "memory", "remote"], path: str +) -> LocalStore | MemoryStore | RemoteStore: + if store == "local": + return LocalStore(path) + if store == "memory": + return MemoryStore() + if store == "remote": + return RemoteStore() + assert False + + @pytest.fixture(params=[str, pathlib.Path]) def path_type(request): return request.param @@ -36,11 +59,26 @@ def memory_store(): @pytest.fixture(scope="function") def store(request: str, tmpdir): param = request.param - if param == "local_store": - return LocalStore(str(tmpdir)) - elif param == "memory_store": - return MemoryStore() - elif param == "remote_store": - return RemoteStore() - else: - assert False + return parse_store(param, str(tmpdir)) + + +@dataclass +class AsyncGroupRequest: + zarr_format: ZarrFormat + store: str + attributes: dict[str, Any] = field(default_factory=dict) + runtime_configuration: RuntimeConfiguration = RuntimeConfiguration() + + +@pytest.fixture(scope="function") +async def async_group(request: AsyncGroupRequest, tmpdir) -> Group: + param: AsyncGroupRequest = request.param + + store = parse_store(param.store, str(tmpdir)) + return await AsyncGroup.create( + store, + attributes=param.attributes, + zarr_format=param.zarr_format, + runtime_configuration=param.runtime_configuration, + exists_ok=False, + ) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index b3fedf6c8e..ceef30a467 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -1,9 +1,12 @@ from __future__ import annotations from typing import TYPE_CHECKING +from zarr.v3.store.core import make_store_path + if TYPE_CHECKING: - from zarr.v3.store.remote import MemoryStore, LocalStore + from zarr.v3.store import MemoryStore, LocalStore from typing import Literal + from zarr.v3.common import ZarrFormat import pytest import numpy as np @@ -15,7 +18,7 @@ # todo: put RemoteStore in here -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) def test_group_children(store: MemoryStore | LocalStore): """ Test that `Group.members` returns correct values, i.e. the arrays and groups @@ -57,14 +60,10 @@ def test_group_children(store: MemoryStore | LocalStore): assert sorted(dict(members_observed)) == sorted(members_expected) -@pytest.mark.parametrize("store", (("local_store", "memory_store")), indirect=["store"]) +@pytest.mark.parametrize("store", (("local", "memory")), indirect=["store"]) def test_group(store: MemoryStore | LocalStore) -> None: store_path = StorePath(store) - agroup = AsyncGroup( - metadata=GroupMetadata(), - store_path=store_path, - runtime_configuration=RuntimeConfiguration(), - ) + agroup = AsyncGroup(metadata=GroupMetadata(), store_path=store_path) group = Group(agroup) assert agroup.metadata is group.metadata @@ -100,36 +99,85 @@ def test_group(store: MemoryStore | LocalStore) -> None: assert dict(bar3.attrs) == {"baz": "qux", "name": "bar"} -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -def test_group_sync_constructor(store: MemoryStore | LocalStore) -> None: +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("exists_ok", (True, False)) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(order="C"), RuntimeConfiguration(order="F")) +) +def test_group_create( + store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration +): + + attributes = {"foo": 100} group = Group.create( - store=store, - attributes={"title": "test 123"}, - runtime_configuration=RuntimeConfiguration(), + store, + attributes=attributes, + exists_ok=exists_ok, + runtime_configuration=runtime_configuration, ) - assert group._async_group.metadata.attributes["title"] == "test 123" + assert group.attrs == attributes + assert group._async_group.runtime_configuration == runtime_configuration + if not exists_ok: + with pytest.raises(AssertionError): + group = Group.create( + store, + attributes=attributes, + exists_ok=exists_ok, + runtime_configuration=runtime_configuration, + ) -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", ("2", "3")) + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) @pytest.mark.parametrize("exists_ok", (True, False)) -@pytest.mark.parametrize("runtime_configuration", (None,)) -def test_create( +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(order="C"), RuntimeConfiguration(order="F")) +) +async def test_asyncgroup_create( store: MemoryStore | LocalStore, exists_ok: bool, - zarr_format: Literal["2", "3"], - runtime_configuration: None, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, ): - ... + """ + Test that `AsyncGroup.create` works as expected. + """ + attributes = {"foo": 100} + group = await AsyncGroup.create( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + runtime_configuration=runtime_configuration, + ) + assert group.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert group.store_path == make_store_path(store) + assert group.runtime_configuration == runtime_configuration -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) -def test_from_dict(store: MemoryStore | LocalStore): + if not exists_ok: + with pytest.raises(AssertionError): + group = await AsyncGroup.create( + store, + attributes=attributes, + exists_ok=exists_ok, + zarr_format=zarr_format, + runtime_configuration=runtime_configuration, + ) + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +async def test_asyncgroup_open(store: MemoryStore | LocalStore): + """ + Test that + """ ... -@pytest.mark.parametrize("store", ("local_store", "memory_store"), indirect=["store"]) +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", ("2", "3")) def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): ... From acae77a857f91232ad28a19f36af2c0ced79fcf3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:49:06 +0200 Subject: [PATCH 16/31] test: add __init__.py to support imports from top-level conftest.py, and add some docstrings, and remove redundant def --- tests/v3/__init__.py | 0 tests/v3/test_group_v3.py | 30 ++++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 tests/v3/__init__.py diff --git a/tests/v3/__init__.py b/tests/v3/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index ceef30a467..62b7dff178 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -107,7 +107,9 @@ def test_group(store: MemoryStore | LocalStore) -> None: def test_group_create( store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration ): - + """ + Test that `Group.create` works as expected. + """ attributes = {"foo": 100} group = Group.create( store, @@ -169,12 +171,32 @@ async def test_asyncgroup_create( ) +@pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -async def test_asyncgroup_open(store: MemoryStore | LocalStore): +@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("runtime_configuration", (RuntimeConfiguration(),)) +async def test_asyncgroup_open( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, +) -> None: """ - Test that + Create an `AsyncGroup`, then ensure that we can open it using `AsyncGroup.open` """ - ... + attributes = {"foo": 100} + group_w = await AsyncGroup.create( + store=store, + attributes=attributes, + exists_ok=False, + zarr_format=ZarrFormat, + runtime_configuration=runtime_configuration, + ) + + group_r = AsyncGroup.open( + store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration + ) + + assert group_r == group_w @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) From ebe15483abced98144a7f3ede81bba3c745ddaa3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:56:13 +0200 Subject: [PATCH 17/31] fix: return valid JSON from GroupMetadata.to_bytes for v2 metadata --- src/zarr/v3/group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index a93f8404e9..470cf0b0cf 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -54,7 +54,7 @@ def to_bytes(self) -> dict[str, bytes]: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: return { - ZGROUP_JSON: json.dumps({"zarr_format": 2}).encode(), + ZGROUP_JSON: json.dumps({"zarr_format": self.zarr_format}).encode(), ZATTRS_JSON: json.dumps(self.attributes).encode(), } From 3dce5e37cf9fe9ee4c6b20a011c9d78fbad67a6a Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 22:56:34 +0200 Subject: [PATCH 18/31] fix: don't use a type as a value --- tests/v3/test_group_v3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 62b7dff178..04a9479e9b 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -188,7 +188,7 @@ async def test_asyncgroup_open( store=store, attributes=attributes, exists_ok=False, - zarr_format=ZarrFormat, + zarr_format=zarr_format, runtime_configuration=runtime_configuration, ) From 7f82fdfd4ab102e9d9e66ad86d3884f7795cb2ad Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 4 Apr 2024 23:57:29 +0200 Subject: [PATCH 19/31] test: add getitem test --- tests/v3/conftest.py | 7 ++-- tests/v3/test_group_v3.py | 70 +++++++++++++++++++++++++++++++++++---- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index ceb0b558eb..05ab0550e7 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING from zarr.v3.common import ZarrFormat -from zarr.v3.group import AsyncGroup, Group +from zarr.v3.group import AsyncGroup if TYPE_CHECKING: from typing import Any, Literal @@ -71,14 +71,15 @@ class AsyncGroupRequest: @pytest.fixture(scope="function") -async def async_group(request: AsyncGroupRequest, tmpdir) -> Group: +async def async_group(request: AsyncGroupRequest, tmpdir) -> AsyncGroup: param: AsyncGroupRequest = request.param store = parse_store(param.store, str(tmpdir)) - return await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=param.attributes, zarr_format=param.zarr_format, runtime_configuration=param.runtime_configuration, exists_ok=False, ) + return agroup diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group_v3.py index 04a9479e9b..3e2779fa79 100644 --- a/tests/v3/test_group_v3.py +++ b/tests/v3/test_group_v3.py @@ -1,11 +1,10 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any from zarr.v3.store.core import make_store_path if TYPE_CHECKING: from zarr.v3.store import MemoryStore, LocalStore - from typing import Literal from zarr.v3.common import ZarrFormat import pytest @@ -192,14 +191,71 @@ async def test_asyncgroup_open( runtime_configuration=runtime_configuration, ) - group_r = AsyncGroup.open( + group_r = await AsyncGroup.open( store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration ) - assert group_r == group_w + assert group_w.attrs == group_w.attrs == attributes + assert group_w == group_r + # try opening with the wrong zarr format + if zarr_format == 3: + zarr_format_wrong = 2 + elif zarr_format == 2: + zarr_format_wrong = 3 + else: + assert False + # todo: get more specific than this + with pytest.raises(ValueError): + await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + + +# todo: replace the dict[str, Any] type with something a bit more specific +# should this be async? @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", ("2", "3")) -def test_getitem(store: MemoryStore | LocalStore, zarr_format: Literal["2", "3"]): - ... +@pytest.mark.parametrize( + "data", + ( + {"zarr_format": 3, "node_type": "group", "attributes": {"foo": 100}}, + {"zarr_format": 2, "attributes": {"foo": 100}}, + ), +) +def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]): + """ + Test that we can create an AsyncGroup from a dict + """ + path = "test" + store_path = StorePath(store=store, path=path) + group = AsyncGroup.from_dict( + store_path, data=data, runtime_configuration=RuntimeConfiguration() + ) + + assert group.metadata.zarr_format == data["zarr_format"] + assert group.metadata.attributes == data["attributes"] + + +# todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + """ + Create an `AsyncGroup`, then create members of that group, and ensure that we can access those + members via the `AsyncGroup.getitem` method. + """ + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + + sub_array_path = "sub_array" + sub_array = await agroup.create_array( + path=sub_array_path, shape=(10,), dtype="uint8", chunk_shape=(2,) + ) + assert await agroup.getitem(sub_array_path) == sub_array + + sub_group_path = "sub_group" + sub_group = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + assert await agroup.getitem(sub_group_path) == sub_group + + # check that asking for a nonexistent key raises KeyError + with pytest.raises(KeyError): + agroup.getitem("foo") From 1655ff88b017b5ac2cd0619cd77280c6406d09cc Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 10:47:23 +0200 Subject: [PATCH 20/31] fix: replace reference to nonexistent method in with , which does exist --- src/zarr/v3/group.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 470cf0b0cf..ff8ef53eba 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -123,7 +123,6 @@ async def open( zarr_json = ( json.loads(zarr_json_bytes) if zarr_json_bytes is not None else {"zarr_format": 3} ) - elif zarr_format == 2: # V2 groups are comprised of a .zgroup and .zattrs objects # (both are optional in the case of implicit groups) From e8514b110576d7433a28a5e970f9b592aaedb2df Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 10:48:07 +0200 Subject: [PATCH 21/31] test: declare v3ness via directory structure, not test file name --- tests/v3/{test_codecs_v3.py => test_codecs.py} | 0 tests/v3/{test_group_v3.py => test_group.py} | 0 tests/v3/{test_storage_v3.py => test_storage.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/v3/{test_codecs_v3.py => test_codecs.py} (100%) rename tests/v3/{test_group_v3.py => test_group.py} (100%) rename tests/v3/{test_storage_v3.py => test_storage.py} (100%) diff --git a/tests/v3/test_codecs_v3.py b/tests/v3/test_codecs.py similarity index 100% rename from tests/v3/test_codecs_v3.py rename to tests/v3/test_codecs.py diff --git a/tests/v3/test_group_v3.py b/tests/v3/test_group.py similarity index 100% rename from tests/v3/test_group_v3.py rename to tests/v3/test_group.py diff --git a/tests/v3/test_storage_v3.py b/tests/v3/test_storage.py similarity index 100% rename from tests/v3/test_storage_v3.py rename to tests/v3/test_storage.py From dacacc8e08d19887b49dcc2f5c00f6273fcd5550 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 12:48:08 +0200 Subject: [PATCH 22/31] add a docstring to _get, and pass auto_mkdir to _put --- src/zarr/v3/store/local.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 5d22b30e9a..3d9595b28c 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -10,6 +10,20 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes: + """ + Fetch a contiguous region of bytes from a file. + + Parameters + ---------- + + path: Path + The file to read bytes from. + byte_range: Optional[Tuple[int, Optional[int]]] = None + The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. + If `byte_range` is a tuple, the first value specifies the index of the first byte to read, + and the second value specifies the total number of bytes to read. If the total value is + `None`, then the entire file after the first byte will be read. + """ if byte_range is not None: start = byte_range[0] end = (start + byte_range[1]) if byte_range[1] is not None else None @@ -94,7 +108,7 @@ async def get_partial_values( async def set(self, key: str, value: BytesLike) -> None: assert isinstance(key, str) path = self.root / key - await to_thread(_put, path, value) + await to_thread(_put, path, value, auto_mkdir=self.auto_mkdir) async def set_partial_values(self, key_start_values: List[Tuple[str, int, bytes]]) -> None: args = [] From 5d2a532619d0bead92625f692608f150781be6cd Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 13:42:55 +0200 Subject: [PATCH 23/31] fix: add docstring to LocalStore.get_partial_values; adjust body of LocalStore.get_partial_values to properly handle the byte_range parameter of LocalStore.get. --- src/zarr/v3/store/local.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 3d9595b28c..1a789141d9 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -95,14 +95,23 @@ async def get( async def get_partial_values( self, key_ranges: List[Tuple[str, Tuple[int, int]]] ) -> List[bytes]: + """ + Read byte ranges from multiple keys. + + Parameters + ---------- + + key_ranges: List[Tuple[str, Tuple[int, int]]] + A list of (key, (start, length)) tuples. The first element of the tuple is the name of + the key in storage to fetch bytes from. The second element the tuple defines the byte + range to retrieve. These values are arguments to `get`, as this method wraps + concurrent invocation of `get`. + """ args = [] for key, byte_range in key_ranges: assert isinstance(key, str) path = self.root / key - if byte_range is not None: - args.append((_get, path, byte_range[0], byte_range[1])) - else: - args.append((_get, path)) + args.append((_get, path, byte_range)) return await concurrent_map(args, to_thread, limit=None) # TODO: fix limit async def set(self, key: str, value: BytesLike) -> None: From 06d8b04963bbe9d2a569c75f05a22fe3edac818b Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Fri, 5 Apr 2024 15:49:49 +0200 Subject: [PATCH 24/31] test: add tests for localstore init, set, get, get_partial --- tests/v3/test_group.py | 2 +- tests/v3/test_storage.py | 101 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 3e2779fa79..b9be73fcc8 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -258,4 +258,4 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): - agroup.getitem("foo") + await agroup.getitem("foo") diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 3d8024de70..88d83c8e85 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -6,9 +6,108 @@ # import tempfile # import numpy as np +from __future__ import annotations +from zarr.v3.store.local import LocalStore +from pathlib import Path import pytest -pytest.skip("old v3 tests are disabled", allow_module_level=True) + +@pytest.mark.parametrize("auto_mkdir", (True, False)) +def test_local_store_init(tmpdir, auto_mkdir: bool) -> None: + tmpdir_str = str(tmpdir) + tmpdir_path = Path(tmpdir_str) + store = LocalStore(root=tmpdir_str, auto_mkdir=auto_mkdir) + + assert store.root == tmpdir_path + assert store.auto_mkdir == auto_mkdir + + # ensure that str and pathlib.Path get normalized to the same output + # a stronger test is to ensure that these two store instances are identical + # but LocalStore.__eq__ is not defined at this time. + assert store.root == LocalStore(root=tmpdir_path, auto_mkdir=auto_mkdir).root + + store_str = f"file://{tmpdir_str}" + assert str(store) == store_str + assert repr(store) == f"LocalStore({repr(store_str)})" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("byte_range", (None, (0, None), (1, None), (1, 2), (None, 1))) +async def test_local_store_get( + local_store, byte_range: None | tuple[int | None, int | None] +) -> None: + payload = b"\x01\x02\x03\x04" + object_name = "foo" + (local_store.root / object_name).write_bytes(payload) + observed = await local_store.get(object_name, byte_range=byte_range) + + if byte_range is None: + start = 0 + length = len(payload) + else: + maybe_start, maybe_len = byte_range + if maybe_start is None: + start = 0 + else: + start = maybe_start + + if maybe_len is None: + length = len(payload) - start + else: + length = maybe_len + + expected = payload[start : start + length] + assert observed == expected + + # test that it's an error to get bytes from a file that doesn't exist + with pytest.raises(FileNotFoundError): + await local_store.get(object_name + "_absent", byte_range=byte_range) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "key_ranges", + ( + [], + [("key_0", (0, 1))], + [("dir/key_0", (0, 1)), ("key_1", (0, 2))], + [("key_0", (0, 1)), ("key_1", (0, 2)), ("key_1", (0, 2))], + ), +) +async def test_local_store_get_partial( + tmpdir, key_ranges: tuple[list[tuple[str, tuple[int, int]]]] +) -> None: + store = LocalStore(str(tmpdir), auto_mkdir=True) + # use the utf-8 encoding of the key as the bytes + for key, _ in key_ranges: + payload = bytes(key, encoding="utf-8") + (store.root / key).write_bytes(payload) + + results = await store.get_partial_values(key_ranges) + for idx, observed in enumerate(results): + key, byte_range = key_ranges[idx] + expected = await store.get(key, byte_range=byte_range) + assert observed == expected + + +@pytest.mark.asyncio +@pytest.mark.parametrize("path", ("foo", "foo/bar")) +@pytest.mark.parametrize("auto_mkdir", (True, False)) +async def test_local_store_set(tmpdir, path: str, auto_mkdir: bool) -> None: + store = LocalStore(str(tmpdir), auto_mkdir=auto_mkdir) + payload = b"\x01\x02\x03\x04" + + if "/" in path and not auto_mkdir: + with pytest.raises(FileNotFoundError): + await store.set(path, payload) + else: + x = await store.set(path, payload) + + # this method should not return anything + assert x is None + + assert (store.root / path).read_bytes() == payload + # import zarr # from zarr._storage.store import _get_hierarchy_metadata, v3_api_available, StorageTransformer From d8749dea7bd4758970eb80455154177162aa6730 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 10:51:34 +0200 Subject: [PATCH 25/31] fix: remove pre-emptive fetching from group.open --- src/zarr/v3/group.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ff8ef53eba..b80348aff8 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -112,8 +112,8 @@ async def open( zarr_format: Literal[2, 3] = 3, ) -> AsyncGroup: store_path = make_store_path(store) - zarr_json_bytes = await (store_path / ZARR_JSON).get() - assert zarr_json_bytes is not None + + zarr_json_bytes: bytes | None # TODO: consider trying to autodiscover the zarr-format here if zarr_format == 3: @@ -129,12 +129,8 @@ async def open( zgroup_bytes, zattrs_bytes = await asyncio.gather( (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get() ) - zgroup = ( - json.loads(json.loads(zgroup_bytes)) - if zgroup_bytes is not None - else {"zarr_format": 2} - ) - zattrs = json.loads(json.loads(zattrs_bytes)) if zattrs_bytes is not None else {} + zgroup = json.loads(zgroup_bytes) if zgroup_bytes is not None else {"zarr_format": 2} + zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {} zarr_json = {**zgroup, "attributes": zattrs} else: raise ValueError(f"unexpected zarr_format: {zarr_format}") @@ -160,12 +156,6 @@ async def getitem( ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key - # Note: - # in zarr-python v2, we first check if `key` references an Array, else if `key` references - # a group,using standalone `contains_array` and `contains_group` functions. These functions - # are reusable, but for v3 they would perform redundant I/O operations. - # Not clear how much of that strategy we want to keep here. - # if `key` names an object in storage, it cannot be an array or group if await store_path.exists(): raise KeyError(key) From eed03c8c4e3b9d12264b385571d42590237b0844 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 15:06:12 +0200 Subject: [PATCH 26/31] fix: use removeprefix (removes a substring) instead of strip (removes any member of a set); comment out / avoid tests that cannot pass right now; don't consider implicit groups for v2; check if prefix is present in storage before opening for Group.getitem --- src/zarr/v3/group.py | 14 +++++++++----- src/zarr/v3/store/memory.py | 2 +- tests/v3/test_group.py | 8 ++++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ac81686e59..1764a7e039 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -126,11 +126,12 @@ async def open( ) elif zarr_format == 2: # V2 groups are comprised of a .zgroup and .zattrs objects - # (both are optional in the case of implicit groups) zgroup_bytes, zattrs_bytes = await asyncio.gather( (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get() ) - zgroup = json.loads(zgroup_bytes) if zgroup_bytes is not None else {"zarr_format": 2} + if zgroup_bytes is None: + raise FileNotFoundError(f"No Zarr v2 group metadata found at {store_path}") + zgroup = json.loads(zgroup_bytes) zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {} zarr_json = {**zgroup, "attributes": zattrs} else: @@ -161,6 +162,12 @@ async def getitem( if await store_path.exists(): raise KeyError(key) + # calling list_dir here is a big performance loss. We should try to find a way around + # this. + # see https://github.com/zarr-developers/zarr-python/pull/1743#issuecomment-2058681807 + if key not in await store_path.store.list_dir(self.store_path.path): + raise KeyError(key) + if self.metadata.zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: @@ -202,9 +209,6 @@ async def getitem( store_path, zarray, runtime_configuration=self.runtime_configuration ) else: - if zgroup_bytes is None: - # implicit group? - logger.warning("group at %s is an implicit group", store_path) zgroup = ( json.loads(zgroup_bytes) if zgroup_bytes is not None diff --git a/src/zarr/v3/store/memory.py b/src/zarr/v3/store/memory.py index afacfa4321..e430c79f91 100644 --- a/src/zarr/v3/store/memory.py +++ b/src/zarr/v3/store/memory.py @@ -79,7 +79,7 @@ async def list_dir(self, prefix: str) -> List[str]: else: return list( { - key.strip(prefix + "/").split("/")[0] + key.removeprefix(prefix + "/").split("/")[0] for key in self._store_dict if (key.startswith(prefix + "/") and key != prefix) } diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index b9be73fcc8..6e5db356c2 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -206,9 +206,9 @@ async def test_asyncgroup_open( else: assert False - # todo: get more specific than this - with pytest.raises(ValueError): - await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + # todo: uncomment this test when we get rid of implicit groups + # with pytest.raises(FileNotFoundError): + # await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) # todo: replace the dict[str, Any] type with something a bit more specific @@ -238,7 +238,7 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize("zarr_format", (3,)) # todo: add testing for v2 when we support v2 arrays async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those From 75f75b1807c1531ab01cbf977a7d36e8a6cd2236 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 16:24:24 +0200 Subject: [PATCH 27/31] xfail v2 tests that are sure to fail; add delitem tests; partition xfailing tests into subtests --- tests/v3/test_group.py | 111 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 6e5db356c2..08735d68ed 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -147,7 +147,7 @@ async def test_asyncgroup_create( Test that `AsyncGroup.create` works as expected. """ attributes = {"foo": 100} - group = await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, @@ -155,13 +155,13 @@ async def test_asyncgroup_create( runtime_configuration=runtime_configuration, ) - assert group.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) - assert group.store_path == make_store_path(store) - assert group.runtime_configuration == runtime_configuration + assert agroup.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert agroup.store_path == make_store_path(store) + assert agroup.runtime_configuration == runtime_configuration if not exists_ok: with pytest.raises(AssertionError): - group = await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, @@ -170,6 +170,28 @@ async def test_asyncgroup_create( ) +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_attrs(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + attributes = {"foo": 100} + agroup = await AsyncGroup.create(store, zarr_format=zarr_format, attributes=attributes) + + assert agroup.attrs == agroup.metadata.attributes == attributes + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + agroup = await AsyncGroup.create( # noqa + store, + zarr_format=zarr_format, + ) + pytest.xfail("Info is not implemented for metadata yet") + # assert agroup.info == agroup.metadata.info + + @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @@ -198,6 +220,16 @@ async def test_asyncgroup_open( assert group_w.attrs == group_w.attrs == attributes assert group_w == group_r + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (pytest.param(2, marks=pytest.mark.xfail), 3)) +async def test_asyncgroup_open_wrong_format( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, +) -> None: + _ = await AsyncGroup.create(store=store, exists_ok=False, zarr_format=zarr_format) + # try opening with the wrong zarr format if zarr_format == 3: zarr_format_wrong = 2 @@ -206,9 +238,8 @@ async def test_asyncgroup_open( else: assert False - # todo: uncomment this test when we get rid of implicit groups - # with pytest.raises(FileNotFoundError): - # await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) + with pytest.raises(FileNotFoundError): + await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) # todo: replace the dict[str, Any] type with something a bit more specific @@ -238,7 +269,10 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (3,)) # todo: add testing for v2 when we support v2 arrays +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail), 3), +) async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those @@ -259,3 +293,62 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): await agroup.getitem("foo") + + +# todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + sub_array_path = "sub_array" + _ = await agroup.create_array( + path=sub_array_path, shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} + ) + await agroup.delitem(sub_array_path) + + # todo: clean up the code duplication here + if zarr_format == 2: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zarray") + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") + else: + assert False + + sub_group_path = "sub_group" + _ = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + await agroup.delitem(sub_group_path) + if zarr_format == 2: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zgroup") + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") + else: + assert False + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_create_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + + shape = (10,) + dtype = "uint8" + chunk_shape = (4,) + attributes = {"foo": 100} + + sub_array_path = "sub_array" + array = await agroup.create_array( + path=sub_array_path, + shape=shape, + dtype=dtype, + chunk_shape=chunk_shape, + attributes=attributes, + ) + + assert array.shape == shape + assert array.dtype == dtype + # todo: fix this + assert array.metadata.chunk_grid.chunk_shape == chunk_shape From 8a14e3b879f803bcaccbaa27697f08b55fa40f1b Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 17:30:00 +0200 Subject: [PATCH 28/31] fix: handle byte_range[0] being None --- src/zarr/v3/store/local.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 1a789141d9..0e8eed52aa 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -25,7 +25,11 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> `None`, then the entire file after the first byte will be read. """ if byte_range is not None: - start = byte_range[0] + if byte_range[0] is None: + start = 0 + else: + start = byte_range[0] + end = (start + byte_range[1]) if byte_range[1] is not None else None else: return path.read_bytes() From 459bb42824e83145feb905f7e5a03277271efe62 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 16 Apr 2024 17:45:23 +0200 Subject: [PATCH 29/31] fix: adjust test for localstore.get to check that get on nonexistent keys returns None; correctly create intermediate directories when preparing test data in test_local_store_get_partial --- tests/v3/test_storage.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 88d83c8e85..fd2af079c0 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -59,9 +59,8 @@ async def test_local_store_get( expected = payload[start : start + length] assert observed == expected - # test that it's an error to get bytes from a file that doesn't exist - with pytest.raises(FileNotFoundError): - await local_store.get(object_name + "_absent", byte_range=byte_range) + # test that getting from a file that doesn't exist returns None + assert await local_store.get(object_name + "_absent", byte_range=byte_range) is None @pytest.mark.asyncio @@ -81,7 +80,11 @@ async def test_local_store_get_partial( # use the utf-8 encoding of the key as the bytes for key, _ in key_ranges: payload = bytes(key, encoding="utf-8") - (store.root / key).write_bytes(payload) + target_path: Path = store.root / key + # create the parent directories + target_path.parent.mkdir(parents=True, exist_ok=True) + # write bytes + target_path.write_bytes(payload) results = await store.get_partial_values(key_ranges) for idx, observed in enumerate(results): From 8ef3fec909a68013396bebfaa8dbc9404beb9da9 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 17 Apr 2024 16:23:53 +0200 Subject: [PATCH 30/31] fix: add zarr_format parameter to array creation routines (which raises if zarr_format is not 3), and xfail the tests that will hit this condition. add tests for create_group, create_array, and update_attributes methods of asyncgroup. --- src/zarr/v3/array.py | 50 ++++++++++--------- src/zarr/v3/group.py | 2 + tests/v3/test_group.py | 109 ++++++++++++++++++++++++++++++++++------- 3 files changed, 121 insertions(+), 40 deletions(-) diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py index c0a00a624e..64a73e64dc 100644 --- a/src/zarr/v3/array.py +++ b/src/zarr/v3/array.py @@ -27,6 +27,7 @@ ChunkCoords, Selection, SliceSelection, + ZarrFormat, concurrent_map, ) from zarr.v3.config import RuntimeConfiguration @@ -88,6 +89,7 @@ async def create( attributes: Optional[Dict[str, Any]] = None, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), exists_ok: bool = False, + zarr_format: ZarrFormat = 3, ) -> AsyncArray: store_path = make_store_path(store) if not exists_ok: @@ -100,31 +102,33 @@ async def create( fill_value = False else: fill_value = 0 + if zarr_format == 3: + metadata = ArrayMetadata( + shape=shape, + data_type=dtype, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + chunk_key_encoding=( + V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) + if chunk_key_encoding[0] == "v2" + else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) + ), + fill_value=fill_value, + codecs=codecs, + dimension_names=tuple(dimension_names) if dimension_names else None, + attributes=attributes or {}, + ) + runtime_configuration = runtime_configuration or RuntimeConfiguration() - metadata = ArrayMetadata( - shape=shape, - data_type=dtype, - chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), - chunk_key_encoding=( - V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) - if chunk_key_encoding[0] == "v2" - else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) - ), - fill_value=fill_value, - codecs=codecs, - dimension_names=tuple(dimension_names) if dimension_names else None, - attributes=attributes or {}, - ) - runtime_configuration = runtime_configuration or RuntimeConfiguration() - - array = cls( - metadata=metadata, - store_path=store_path, - runtime_configuration=runtime_configuration, - ) + array = cls( + metadata=metadata, + store_path=store_path, + runtime_configuration=runtime_configuration, + ) - await array._save_metadata() - return array + await array._save_metadata() + return array + else: + raise NotImplementedError("Zarr version 2 arrays cannot be created yet.") @classmethod def from_dict( diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 1764a7e039..09e466fa99 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -249,6 +249,7 @@ async def create_group(self, path: str, **kwargs) -> AsyncGroup: return await type(self).create( self.store_path / path, runtime_configuration=runtime_configuration, + zarr_format=self.metadata.zarr_format, **kwargs, ) @@ -257,6 +258,7 @@ async def create_array(self, path: str, **kwargs) -> AsyncArray: return await AsyncArray.create( self.store_path / path, runtime_configuration=runtime_configuration, + zarr_format=self.metadata.zarr_format, **kwargs, ) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 08735d68ed..45df2bd276 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -1,6 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any +from zarr.v3.array import AsyncArray from zarr.v3.store.core import make_store_path if TYPE_CHECKING: @@ -18,7 +19,7 @@ # todo: put RemoteStore in here @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -def test_group_children(store: MemoryStore | LocalStore): +def test_group_children(store: MemoryStore | LocalStore) -> None: """ Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. @@ -105,7 +106,7 @@ def test_group(store: MemoryStore | LocalStore) -> None: ) def test_group_create( store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration -): +) -> None: """ Test that `Group.create` works as expected. """ @@ -142,7 +143,7 @@ async def test_asyncgroup_create( exists_ok: bool, zarr_format: ZarrFormat, runtime_configuration: RuntimeConfiguration, -): +) -> None: """ Test that `AsyncGroup.create` works as expected. """ @@ -252,7 +253,7 @@ async def test_asyncgroup_open_wrong_format( {"zarr_format": 2, "attributes": {"foo": 100}}, ), ) -def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]): +def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]) -> None: """ Test that we can create an AsyncGroup from a dict """ @@ -271,9 +272,9 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize( "zarr_format", - (pytest.param(2, marks=pytest.mark.xfail), 3), + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet.")), 3), ) -async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): +async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. @@ -298,8 +299,11 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) -async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet.")), 3), +) +async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) sub_array_path = "sub_array" _ = await agroup.create_array( @@ -330,25 +334,96 @@ async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(), RuntimeConfiguration(order="F")) +) @pytest.mark.parametrize("zarr_format", (2, 3)) -async def test_asyncgroup_create_group(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): - agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) +async def test_asyncgroup_create_group( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, +) -> None: + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, runtime_configuration=RuntimeConfiguration + ) + sub_node_path = "sub_group" + attributes = {"foo": 999} + subnode = await agroup.create_group( + path=sub_node_path, attributes=attributes, runtime_configuration=runtime_configuration + ) + + assert isinstance(subnode, AsyncGroup) + assert subnode.runtime_configuration == runtime_configuration + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.metadata.zarr_format == zarr_format + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(), RuntimeConfiguration(order="F")) +) +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet")), 3), +) +async def test_asyncgroup_create_array( + store: LocalStore | MemoryStore, + runtime_configuration: RuntimeConfiguration, + zarr_format: ZarrFormat, +) -> None: + """ + Test that the AsyncGroup.create_array method works correctly. We ensure that array properties + specified in create_array are present on the resulting array. + """ + + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration + ) shape = (10,) dtype = "uint8" chunk_shape = (4,) attributes = {"foo": 100} - sub_array_path = "sub_array" - array = await agroup.create_array( - path=sub_array_path, + sub_node_path = "sub_array" + subnode = await agroup.create_array( + path=sub_node_path, shape=shape, dtype=dtype, chunk_shape=chunk_shape, attributes=attributes, + runtime_configuration=runtime_configuration, + ) + assert isinstance(subnode, AsyncArray) + assert subnode.runtime_configuration == runtime_configuration + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.shape == shape + assert subnode.dtype == dtype + # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete + # here. + assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape + assert subnode.metadata.zarr_format == zarr_format + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_update_attributes( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + """ + Test that the AsyncGroup.update_attributes method works correctly. + """ + attributes_old = {"foo": 10} + attributes_new = {"baz": "new"} + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, attributes=attributes_old ) - assert array.shape == shape - assert array.dtype == dtype - # todo: fix this - assert array.metadata.chunk_grid.chunk_shape == chunk_shape + agroup_new_attributes = await agroup.update_attributes(attributes_new) + assert agroup_new_attributes.attrs == attributes_new From b5a76981c823b93dee4710eed2951ee4cd67f736 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 17 Apr 2024 16:56:19 +0200 Subject: [PATCH 31/31] test: add group init test --- tests/v3/test_group.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/v3/test_group.py b/tests/v3/test_group.py index 45df2bd276..a7e7a10be8 100644 --- a/tests/v3/test_group.py +++ b/tests/v3/test_group.py @@ -13,7 +13,7 @@ from zarr.v3.group import AsyncGroup, Group, GroupMetadata from zarr.v3.store import StorePath -from zarr.v3.config import RuntimeConfiguration +from zarr.v3.config import RuntimeConfiguration, SyncConfiguration from zarr.v3.sync import sync @@ -427,3 +427,17 @@ async def test_asyncgroup_update_attributes( agroup_new_attributes = await agroup.update_attributes(attributes_new) assert agroup_new_attributes.attrs == attributes_new + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize( + "sync_configuration", (SyncConfiguration(), SyncConfiguration(concurrency=2)) +) +async def test_group_init( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat, sync_configuration: SyncConfiguration +) -> None: + agroup = sync(AsyncGroup.create(store=store, zarr_format=zarr_format)) + group = Group(_async_group=agroup, _sync_configuration=sync_configuration) + assert group._async_group == agroup + assert group._sync_configuration == sync_configuration