diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 7da1f9608e..0000000000 --- a/.flake8 +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -max-line-length = 100 diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 53bf4633f0..9e0316032f 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1,2 +1,4 @@ # lint codebase with black and ruff 4e348d6b80c96da461fd866576c971b8a659ba15 +# migrate from black to ruff format +22cea005629913208a85799372e045f353744add diff --git a/.github/workflows/test-v3.yml b/.github/workflows/test-v3.yml index bdc6e99299..e767541c75 100644 --- a/.github/workflows/test-v3.yml +++ b/.github/workflows/test-v3.yml @@ -10,15 +10,22 @@ on: branches: [ v3 ] jobs: - run-tests: + test: + name: py=${{ matrix.python-version }}, np=${{ matrix.numpy-version }}, deps=${{ matrix.dependency-set }} runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10', '3.11'] + numpy-version: ['1.24', '1.26'] + dependency-set: ["minimal", "optional"] + steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: ${{ matrix.python-version }} cache: 'pip' - name: Install Hatch run: | @@ -29,8 +36,8 @@ jobs: hatch env create - name: Run Tests run: | - hatch run test:run + hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run - name: Run mypy continue-on-error: true run: | - hatch run test:run-mypy \ No newline at end of file + hatch run test:run-mypy diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 79344604a5..d4aee4ce86 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,31 +7,25 @@ default_language_version: python: python3 repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - # Ruff version. - rev: 'v0.0.224' + rev: 'v0.2.1' hooks: - id: ruff - # Respect `exclude` and `extend-exclude` settings. - args: ["--force-exclude"] - - repo: https://github.com/psf/black - rev: 22.12.0 - hooks: - - id: black + args: ["--fix", "--show-fixes"] + - id: ruff-format - repo: https://github.com/codespell-project/codespell - rev: v2.2.5 + rev: v2.2.6 hooks: - id: codespell args: ["-L", "ba,ihs,kake,nd,noe,nwo,te,fo,zar", "-S", "fixture"] - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.7.1 + rev: v1.8.0 hooks: - id: mypy files: src - exclude: ^src/zarr/v3 args: [] additional_dependencies: - types-redis diff --git a/bench/compress_normal.py b/bench/compress_normal.py index 9f1655541c..803d54b76b 100644 --- a/bench/compress_normal.py +++ b/bench/compress_normal.py @@ -8,7 +8,6 @@ from zarr import blosc if __name__ == "__main__": - sys.path.insert(0, "..") # setup diff --git a/docs/installation.rst b/docs/installation.rst index 8553d451cb..3d4ac41072 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -19,13 +19,4 @@ latest GitHub main:: $ pip install git+https://github.com/zarr-developers/zarr-python.git -To work with Zarr source code in development, install from GitHub:: - - $ git clone --recursive https://github.com/zarr-developers/zarr-python.git - $ cd zarr-python - $ python -m pip install -e . - -To verify that Zarr has been fully installed, run the test suite:: - - $ pip install pytest - $ python -m pytest -v --pyargs zarr +To work with Zarr source code in development, see `Contributing `_. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 3933376b12..b67f5fec94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,7 +127,8 @@ serve = "sphinx-autobuild docs docs/_build --ignore 'docs/_autoapi/**/*' --host [tool.ruff] line-length = 100 -exclude = [ +force-exclude = true +extend-exclude = [ ".bzr", ".direnv", ".eggs", @@ -146,25 +147,33 @@ exclude = [ "docs" ] -[tool.black] -line-length = 100 -exclude = ''' -/( - \.git - | \.mypy_cache - | \.venv - | _build - | buck-out - | build - | dist - | docs -)/ -''' - [tool.mypy] -python_version = "3.8" +python_version = "3.10" ignore_missing_imports = true -follow_imports = "silent" +namespace_packages = false + +warn_unused_configs = true +warn_redundant_casts = true +warn_unused_ignores = true + + +check_untyped_defs = true + +[[tool.mypy.overrides]] +module = [ + "zarr._storage.store", + "zarr._storage.v3_storage_transformers", + "zarr.v3.group", + "zarr.core", + "zarr.hierarchy", + "zarr.indexing", + "zarr.storage", + "zarr.sync", + "zarr.util", + "tests.*", +] +check_untyped_defs = false + [tool.pytest.ini_options] doctest_optionflags = [ diff --git a/src/zarr/_storage/absstore.py b/src/zarr/_storage/absstore.py index f62529f096..c9a113148c 100644 --- a/src/zarr/_storage/absstore.py +++ b/src/zarr/_storage/absstore.py @@ -87,7 +87,7 @@ def __init__( "https://{}.blob.core.windows.net/".format(account_name), container, credential=account_key, - **blob_service_kwargs + **blob_service_kwargs, ) self.client = client @@ -240,7 +240,6 @@ def __setitem__(self, key, value): super().__setitem__(key, value) def rmdir(self, path=None): - if not path: # Currently allowing clear to delete everything as in v2 diff --git a/src/zarr/_storage/store.py b/src/zarr/_storage/store.py index 8daedae48f..9911cfa12d 100644 --- a/src/zarr/_storage/store.py +++ b/src/zarr/_storage/store.py @@ -629,7 +629,6 @@ def _rmdir_from_keys(store: StoreLike, path: Optional[str] = None) -> None: def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: - meta_dir = meta_root + path meta_dir = meta_dir.rstrip("/") _rmdir_from_keys(store, meta_dir) @@ -643,10 +642,10 @@ def _rmdir_from_keys_v3(store: StoreV3, path: str = "") -> None: sfx = _get_metadata_suffix(store) array_meta_file = meta_dir + ".array" + sfx if array_meta_file in store: - store.erase(array_meta_file) # type: ignore + store.erase(array_meta_file) group_meta_file = meta_dir + ".group" + sfx if group_meta_file in store: - store.erase(group_meta_file) # type: ignore + store.erase(group_meta_file) def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: diff --git a/src/zarr/_storage/v3.py b/src/zarr/_storage/v3.py index 8ab54984b7..d3cbc58235 100644 --- a/src/zarr/_storage/v3.py +++ b/src/zarr/_storage/v3.py @@ -118,7 +118,6 @@ def _get_files_and_dirs_from_path(store, path): class FSStoreV3(FSStore, StoreV3): - # FSStoreV3 doesn't use this (FSStore uses it within _normalize_key) _META_KEYS = () diff --git a/src/zarr/_storage/v3_storage_transformers.py b/src/zarr/_storage/v3_storage_transformers.py index 3090aea28c..cb11cea52e 100644 --- a/src/zarr/_storage/v3_storage_transformers.py +++ b/src/zarr/_storage/v3_storage_transformers.py @@ -367,7 +367,7 @@ def erase_prefix(self, prefix): def rmdir(self, path=None): path = normalize_storage_path(path) - _rmdir_from_keys_v3(self, path) # type: ignore + _rmdir_from_keys_v3(self, path) def __contains__(self, key): if self._is_data_key(key): diff --git a/src/zarr/attrs.py b/src/zarr/attrs.py index 01fc617b3c..e589bc9022 100644 --- a/src/zarr/attrs.py +++ b/src/zarr/attrs.py @@ -1,3 +1,4 @@ +from typing import Any import warnings from collections.abc import MutableMapping @@ -26,7 +27,6 @@ class Attributes(MutableMapping): """ def __init__(self, store, key=".zattrs", read_only=False, cache=True, synchronizer=None): - self._version = getattr(store, "_store_version", 2) _Store = Store if self._version == 2 else StoreV3 self.store = _Store._ensure_store(store) @@ -40,7 +40,7 @@ def _get_nosync(self): try: data = self.store[self.key] except KeyError: - d = dict() + d: dict[str, Any] = dict() if self._version > 2: d["attributes"] = {} else: @@ -73,7 +73,6 @@ def __getitem__(self, item): return self.asdict()[item] def _write_op(self, f, *args, **kwargs): - # guard condition if self.read_only: raise PermissionError("attributes are read-only") @@ -89,7 +88,6 @@ def __setitem__(self, item, value): self._write_op(self._setitem_nosync, item, value) def _setitem_nosync(self, item, value): - # load existing data d = self._get_nosync() @@ -106,7 +104,6 @@ def __delitem__(self, item): self._write_op(self._delitem_nosync, item) def _delitem_nosync(self, key): - # load existing data d = self._get_nosync() @@ -128,7 +125,6 @@ def put(self, d): self._write_op(self._put_nosync, dict(attributes=d)) def _put_nosync(self, d): - d_to_check = d if self._version == 2 else d["attributes"] if not all(isinstance(item, str) for item in d_to_check): # TODO: Raise an error for non-string keys @@ -178,7 +174,6 @@ def update(self, *args, **kwargs): self._write_op(self._update_nosync, *args, **kwargs) def _update_nosync(self, *args, **kwargs): - # load existing data d = self._get_nosync() diff --git a/src/zarr/convenience.py b/src/zarr/convenience.py index 0ee8a8d323..9c0deeea47 100644 --- a/src/zarr/convenience.py +++ b/src/zarr/convenience.py @@ -675,10 +675,8 @@ def copy_store( # setup logging with _LogWriter(log) as log: - # iterate over source keys for source_key in sorted(source.keys()): - # filter to keys under source path if source_store_version == 2: if not source_key.startswith(source_path): @@ -757,7 +755,7 @@ def copy( log=None, if_exists="raise", dry_run=False, - **create_kws + **create_kws, ): """Copy the `source` array or group into the `dest` group. @@ -878,7 +876,6 @@ def copy( # setup logging with _LogWriter(log) as log: - # do the copying n_copied, n_skipped, n_bytes_copied = _copy( log, @@ -890,7 +887,7 @@ def copy( without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) # log a final message with a summary of what happened @@ -948,12 +945,10 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: - # log a message about what we're going to do log("copy {} {} {}".format(source.name, source.shape, source.dtype)) if not dry_run: - # clear the way if exists: del dest[name] @@ -1038,12 +1033,10 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ # take action if do_copy: - # log action log("copy {}".format(source.name)) if not dry_run: - # clear the way if exists_array: del dest[name] @@ -1056,7 +1049,6 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ grp.attrs.update(source.attrs) else: - # setup for dry run without creating any groups in the # destination if dest is not None: @@ -1076,7 +1068,7 @@ def _copy(log, source, dest, name, root, shallow, without_attrs, if_exists, dry_ without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) n_copied += c n_skipped += s @@ -1099,7 +1091,7 @@ def copy_all( log=None, if_exists="raise", dry_run=False, - **create_kws + **create_kws, ): """Copy all children of the `source` group into the `dest` group. @@ -1189,7 +1181,6 @@ def copy_all( # setup logging with _LogWriter(log) as log: - for k in source.keys(): c, s, b = _copy( log, @@ -1201,7 +1192,7 @@ def copy_all( without_attrs=without_attrs, if_exists=if_exists, dry_run=dry_run, - **create_kws + **create_kws, ) n_copied += c n_skipped += s @@ -1262,7 +1253,6 @@ def is_zarr_key(key): return key.endswith(".zarray") or key.endswith(".zgroup") or key.endswith(".zattrs") else: - assert_zarr_v3_api_available() sfx = _get_metadata_suffix(store) # type: ignore diff --git a/src/zarr/core.py b/src/zarr/core.py index c07a31e95f..d22a9d79c3 100644 --- a/src/zarr/core.py +++ b/src/zarr/core.py @@ -2536,7 +2536,7 @@ def hexdigest(self, hashname="sha1"): checksum = binascii.hexlify(self.digest(hashname=hashname)) # This is a bytes object on Python 3 and we want a str. - if type(checksum) is not str: + if not isinstance(checksum, str): checksum = checksum.decode("utf8") return checksum diff --git a/src/zarr/creation.py b/src/zarr/creation.py index 726d0b5932..6227f90b7b 100644 --- a/src/zarr/creation.py +++ b/src/zarr/creation.py @@ -234,7 +234,6 @@ def create( def _kwargs_compat(compressor, fill_value, kwargs): - # to be compatible with h5py, as well as backwards-compatible with Zarr # 1.x, accept 'compression' and 'compression_opts' keyword arguments @@ -697,7 +696,6 @@ def open_array( def _like_args(a, kwargs): - shape, chunks = _get_shape_chunks(a) if shape is not None: kwargs.setdefault("shape", shape) diff --git a/src/zarr/hierarchy.py b/src/zarr/hierarchy.py index 3361969f08..1c9848e647 100644 --- a/src/zarr/hierarchy.py +++ b/src/zarr/hierarchy.py @@ -145,7 +145,7 @@ def __init__( synchronizer=None, zarr_version=None, *, - meta_array=None + meta_array=None, ): store: BaseStore = _normalize_store_arg(store, zarr_version=zarr_version) if zarr_version is None: @@ -591,7 +591,25 @@ def groups(self): for key in sorted(listdir(self._store, self._path)): path = self._key_prefix + key if contains_group(self._store, path, explicit_only=False): - yield key, Group( + yield ( + key, + Group( + self._store, + path=path, + read_only=self._read_only, + chunk_store=self._chunk_store, + cache_attrs=self.attrs.cache, + synchronizer=self._synchronizer, + zarr_version=self._version, + ), + ) + + else: + for key in self.group_keys(): + path = self._key_prefix + key + yield ( + key, + Group( self._store, path=path, read_only=self._read_only, @@ -599,19 +617,7 @@ def groups(self): cache_attrs=self.attrs.cache, synchronizer=self._synchronizer, zarr_version=self._version, - ) - - else: - for key in self.group_keys(): - path = self._key_prefix + key - yield key, Group( - self._store, - path=path, - read_only=self._read_only, - chunk_store=self._chunk_store, - cache_attrs=self.attrs.cache, - synchronizer=self._synchronizer, - zarr_version=self._version, + ), ) def array_keys(self, recurse=False): @@ -919,7 +925,6 @@ def tree(self, expand=False, level=None): return TreeViewer(self, expand=expand, level=level) def _write_op(self, f, *args, **kwargs): - # guard condition if self._read_only: raise ReadOnlyError() @@ -1094,7 +1099,6 @@ def create_dataset(self, name, **kwargs): return self._write_op(self._create_dataset_nosync, name, **kwargs) def _create_dataset_nosync(self, name, data=None, **kwargs): - assert "mode" not in kwargs path = self._item_path(name) @@ -1138,11 +1142,9 @@ def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): ) def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs): - path = self._item_path(name) if contains_array(self._store, path): - # array already exists at path, validate that it is the right shape and type synchronizer = kwargs.get("synchronizer", self._synchronizer) @@ -1235,7 +1237,7 @@ def _full_nosync(self, name, fill_value, **kwargs): path=path, chunk_store=self._chunk_store, fill_value=fill_value, - **kwargs + **kwargs, ) def array(self, name, data, **kwargs): @@ -1361,7 +1363,7 @@ def group( path=None, *, zarr_version=None, - meta_array=None + meta_array=None, ): """Create a group. @@ -1452,7 +1454,7 @@ def open_group( storage_options=None, *, zarr_version=None, - meta_array=None + meta_array=None, ): """Open a group using file-mode-like semantics. diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index 487cc8b9d9..b72d5a255d 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -111,7 +111,6 @@ def is_pure_orthogonal_indexing(selection, ndim): def normalize_integer_selection(dim_sel, dim_len): - # normalize type to int dim_sel = int(dim_sel) @@ -145,7 +144,6 @@ def normalize_integer_selection(dim_sel, dim_len): class IntDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize dim_sel = normalize_integer_selection(dim_sel, dim_len) @@ -169,7 +167,6 @@ def ceildiv(a, b): class SliceDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize self.start, self.stop, self.step = dim_sel.indices(dim_len) if self.step < 1: @@ -182,14 +179,12 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) def __iter__(self): - # figure out the range of chunks we need to visit dim_chunk_ix_from = self.start // self.dim_chunk_len dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) # iterate over chunks in range for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): - # compute offsets for chunk within overall array dim_offset = dim_chunk_ix * self.dim_chunk_len dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) @@ -237,7 +232,6 @@ def check_selection_length(selection, shape): def replace_ellipsis(selection, shape): - selection = ensure_tuple(selection) # count number of ellipsis present @@ -330,14 +324,12 @@ def is_basic_selection(selection): # noinspection PyProtectedMember class BasicIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -358,7 +350,6 @@ def __init__(self, selection, array): def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( @@ -370,7 +361,6 @@ def __iter__(self): class BoolArrayDimIndexer: def __init__(self, dim_sel, dim_len, dim_chunk_len): - # check number of dimensions if not is_bool_array(dim_sel, 1): raise IndexError( @@ -380,8 +370,9 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): # check shape if dim_sel.shape[0] != dim_len: raise IndexError( - "Boolean array has the wrong length for dimension; " - "expected {}, got {}".format(dim_len, dim_sel.shape[0]) + "Boolean array has the wrong length for dimension; " "expected {}, got {}".format( + dim_len, dim_sel.shape[0] + ) ) # store attributes @@ -402,10 +393,8 @@ def __init__(self, dim_sel, dim_len, dim_chunk_len): self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] def __iter__(self): - # iterate over chunks with at least one item for dim_chunk_ix in self.dim_chunk_ixs: - # find region in chunk dim_offset = dim_chunk_ix * self.dim_chunk_len dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] @@ -472,7 +461,6 @@ def __init__( boundscheck=True, order=Order.UNKNOWN, ): - # ensure 1d array dim_sel = np.asanyarray(dim_sel) if not is_integer_array(dim_sel, 1): @@ -526,9 +514,7 @@ def __init__( self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) def __iter__(self): - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in output if dim_chunk_ix == 0: start = 0 @@ -602,7 +588,6 @@ def oindex_set(a, selection, value): # noinspection PyProtectedMember class OrthogonalIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) @@ -612,7 +597,6 @@ def __init__(self, selection, array): # setup per-dimension indexers dim_indexers = [] for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) @@ -649,7 +633,6 @@ def __init__(self, selection, array): def __iter__(self): for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) out_selection = tuple( @@ -658,7 +641,6 @@ def __iter__(self): # handle advanced indexing arrays orthogonally if self.is_advanced: - # N.B., numpy doesn't support orthogonal indexing directly as yet, # so need to work around via np.ix_. Also np.ix_ does not support a # mixture of arrays and slices or integers, so need to convert slices @@ -692,7 +674,6 @@ def __setitem__(self, selection, value): # noinspection PyProtectedMember class BlockIndexer: def __init__(self, selection, array): - # handle ellipsis selection = replace_ellipsis(selection, array._shape) @@ -794,7 +775,6 @@ def is_mask_selection(selection, array): # noinspection PyProtectedMember class CoordinateIndexer: def __init__(self, selection, array): - # some initial normalization selection = ensure_tuple(selection) selection = tuple([i] if is_integer(i) else i for i in selection) @@ -810,7 +790,6 @@ def __init__(self, selection, array): # handle wraparound, boundscheck for dim_sel, dim_len in zip(selection, array.shape): - # handle wraparound wraparound_indices(dim_sel, dim_len) @@ -861,10 +840,8 @@ def __init__(self, selection, array): self.chunk_mixs = np.unravel_index(self.chunk_rixs, array._cdata_shape) def __iter__(self): - # iterate over chunks for i, chunk_rix in enumerate(self.chunk_rixs): - chunk_coords = tuple(m[i] for m in self.chunk_mixs) if chunk_rix == 0: start = 0 @@ -891,7 +868,6 @@ def __iter__(self): # noinspection PyProtectedMember class MaskIndexer(CoordinateIndexer): def __init__(self, selection, array): - # some initial normalization selection = ensure_tuple(selection) selection = replace_lists(selection) @@ -944,8 +920,9 @@ def check_fields(fields, dtype): # check type if not isinstance(fields, (str, list, tuple)): raise IndexError( - "'fields' argument must be a string or list of strings; found " - "{!r}".format(type(fields)) + "'fields' argument must be a string or list of strings; found " "{!r}".format( + type(fields) + ) ) if fields: if dtype.names is None: diff --git a/src/zarr/meta.py b/src/zarr/meta.py index bd1f4ee037..3a5435a174 100644 --- a/src/zarr/meta.py +++ b/src/zarr/meta.py @@ -89,7 +89,6 @@ class Metadata2: @classmethod def parse_metadata(cls, s: Union[MappingType, bytes, str]) -> MappingType[str, Any]: - # Here we allow that a store may return an already-parsed metadata object, # or a string of JSON that we will parse here. We allow for an already-parsed # object to accommodate a consolidated metadata store, where all the metadata for @@ -235,8 +234,8 @@ def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> return np.array(v, dtype=dtype)[()] elif dtype.kind in "c": v = ( - cls.decode_fill_value(v[0], dtype.type().real.dtype), # type: ignore - cls.decode_fill_value(v[1], dtype.type().imag.dtype), # type: ignore + cls.decode_fill_value(v[0], dtype.type().real.dtype), + cls.decode_fill_value(v[1], dtype.type().imag.dtype), ) v = v[0] + 1j * v[1] return np.array(v, dtype=dtype)[()] diff --git a/src/zarr/n5.py b/src/zarr/n5.py index 7e73905527..79bab20576 100644 --- a/src/zarr/n5.py +++ b/src/zarr/n5.py @@ -72,21 +72,18 @@ class N5Store(NestedDirectoryStore): def __getitem__(self, key: str) -> bytes: if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) top_level = key == zarr_array_meta_key value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key_new)) @@ -104,9 +101,7 @@ def __getitem__(self, key: str) -> bytes: return super().__getitem__(key_new) def __setitem__(self, key: str, value: Any): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -115,7 +110,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) top_level = key == zarr_array_meta_key n5_attrs = self._load_n5_attrs(key_new) @@ -123,7 +117,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -166,9 +159,7 @@ def __delitem__(self, key: str): super().__delitem__(key_new) def __contains__(self, key): - if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, n5_attrs_key) if key_new not in self: return False @@ -176,18 +167,15 @@ def __contains__(self, key): return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, n5_attrs_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, n5_attrs_key) return self._contains_attrs(key_new) elif is_chunk_key(key): - key_new = invert_chunk_coords(key) else: key_new = key @@ -198,7 +186,6 @@ def __eq__(self, other): return isinstance(other, N5Store) and self.path == other.path def listdir(self, path: Optional[str] = None): - if path is not None: path = invert_chunk_coords(path) path = cast(str, path) @@ -208,7 +195,6 @@ def listdir(self, path: Optional[str] = None): children = super().listdir(path=path) if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files children.remove(n5_attrs_key) children.append(zarr_array_meta_key) @@ -234,7 +220,6 @@ def listdir(self, path: Optional[str] = None): return sorted(new_children) elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files children.remove(n5_attrs_key) children.append(zarr_group_meta_key) @@ -244,7 +229,6 @@ def listdir(self, path: Optional[str] = None): return sorted(children) else: - return children def _load_n5_attrs(self, path: str) -> Dict[str, Any]: @@ -255,7 +239,6 @@ def _load_n5_attrs(self, path: str) -> Dict[str, Any]: return {} def _is_group(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -265,7 +248,6 @@ def _is_group(self, path: str): return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -274,7 +256,6 @@ def _is_array(self, path: str): return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: str): - if path is None: attrs_key = n5_attrs_key else: @@ -344,10 +325,9 @@ class N5FSStore(FSStore): def __init__(self, *args, **kwargs): if "dimension_separator" in kwargs: - kwargs.pop("dimension_separator") warnings.warn("Keyword argument `dimension_separator` will be ignored") - dimension_separator = "." - super().__init__(*args, dimension_separator=dimension_separator, **kwargs) + kwargs["dimension_separator"] = "." + super().__init__(*args, **kwargs) @staticmethod def _swap_separator(key: str): @@ -376,21 +356,18 @@ def _normalize_key(self, key: str): def __getitem__(self, key: str) -> bytes: if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) value = group_metadata_to_zarr(self._load_n5_attrs(key_new)) return json_dumps(value) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) top_level = key == zarr_array_meta_key value = array_metadata_to_zarr(self._load_n5_attrs(key_new), top_level=top_level) return json_dumps(value) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) value = attrs_to_zarr(self._load_n5_attrs(key_new)) @@ -409,7 +386,6 @@ def __getitem__(self, key: str) -> bytes: def __setitem__(self, key: str, value: Any): if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) n5_attrs = self._load_n5_attrs(key_new) @@ -418,7 +394,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) top_level = key == zarr_array_meta_key n5_attrs = self._load_n5_attrs(key_new) @@ -427,7 +402,6 @@ def __setitem__(self, key: str, value: Any): value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) n5_attrs = self._load_n5_attrs(key_new) @@ -456,7 +430,6 @@ def __setitem__(self, key: str, value: Any): super().__setitem__(key_new, value) def __delitem__(self, key: str): - if key.endswith(zarr_group_meta_key): key_new = key.replace(zarr_group_meta_key, self._group_meta_key) elif key.endswith(zarr_array_meta_key): @@ -471,7 +444,6 @@ def __delitem__(self, key: str): def __contains__(self, key: Any): if key.endswith(zarr_group_meta_key): - key_new = key.replace(zarr_group_meta_key, self._group_meta_key) if key_new not in self: return False @@ -479,13 +451,11 @@ def __contains__(self, key: Any): return "dimensions" not in self._load_n5_attrs(key_new) elif key.endswith(zarr_array_meta_key): - key_new = key.replace(zarr_array_meta_key, self._array_meta_key) # array if attributes contain 'dimensions' return "dimensions" in self._load_n5_attrs(key_new) elif key.endswith(zarr_attrs_key): - key_new = key.replace(zarr_attrs_key, self._attrs_key) return self._contains_attrs(key_new) @@ -508,7 +478,6 @@ def listdir(self, path: Optional[str] = None): # doesn't provide. children = super().listdir(path=path) if self._is_array(path): - # replace n5 attribute file with respective zarr attribute files children.remove(self._array_meta_key) children.append(zarr_array_meta_key) @@ -532,7 +501,6 @@ def listdir(self, path: Optional[str] = None): return sorted(new_children) elif self._is_group(path): - # replace n5 attribute file with respective zarr attribute files children.remove(self._group_meta_key) children.append(zarr_group_meta_key) @@ -550,7 +518,6 @@ def _load_n5_attrs(self, path: str): return {} def _is_group(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -560,7 +527,6 @@ def _is_group(self, path: Optional[str]): return len(n5_attrs) > 0 and "dimensions" not in n5_attrs def _is_array(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -569,7 +535,6 @@ def _is_array(self, path: Optional[str]): return "dimensions" in self._load_n5_attrs(attrs_key) def _contains_attrs(self, path: Optional[str]): - if path is None: attrs_key = self._attrs_key else: @@ -712,7 +677,6 @@ def attrs_to_zarr(attrs: Dict[str, Any]) -> Dict[str, Any]: def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict[str, Any]: - if compressor_config is None: return {"type": "raw"} else: @@ -726,19 +690,16 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict n5_config = {"type": codec_id} if codec_id == "bz2": - n5_config["type"] = "bzip2" n5_config["blockSize"] = _compressor_config["level"] elif codec_id == "blosc": - n5_config["cname"] = _compressor_config["cname"] n5_config["clevel"] = _compressor_config["clevel"] n5_config["shuffle"] = _compressor_config["shuffle"] n5_config["blocksize"] = _compressor_config["blocksize"] elif codec_id == "lzma": - # Switch to XZ for N5 if we are using the default XZ format. # Note: 4 is the default, which is lzma.CHECK_CRC64. if _compressor_config["format"] == 1 and _compressor_config["check"] in [-1, 4]: @@ -760,50 +721,42 @@ def compressor_config_to_n5(compressor_config: Optional[Dict[str, Any]]) -> Dict n5_config["preset"] = 6 elif codec_id == "zlib": - n5_config["type"] = "gzip" n5_config["level"] = _compressor_config["level"] n5_config["useZlib"] = True elif codec_id == "gzip": - n5_config["type"] = "gzip" n5_config["level"] = _compressor_config["level"] n5_config["useZlib"] = False else: - n5_config.update({k: v for k, v in _compressor_config.items() if k != "type"}) return n5_config def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dict[str, Any]]: - codec_id = compressor_config["type"] zarr_config = {"id": codec_id} if codec_id == "bzip2": - zarr_config["id"] = "bz2" zarr_config["level"] = compressor_config["blockSize"] elif codec_id == "blosc": - zarr_config["cname"] = compressor_config["cname"] zarr_config["clevel"] = compressor_config["clevel"] zarr_config["shuffle"] = compressor_config["shuffle"] zarr_config["blocksize"] = compressor_config["blocksize"] elif codec_id == "lzma": - zarr_config["format"] = compressor_config["format"] zarr_config["check"] = compressor_config["check"] zarr_config["preset"] = compressor_config["preset"] zarr_config["filters"] = compressor_config["filters"] elif codec_id == "xz": - zarr_config["id"] = "lzma" zarr_config["format"] = 1 # lzma.FORMAT_XZ zarr_config["check"] = -1 @@ -811,7 +764,6 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic zarr_config["filters"] = None elif codec_id == "gzip": - if "useZlib" in compressor_config and compressor_config["useZlib"]: zarr_config["id"] = "zlib" zarr_config["level"] = compressor_config["level"] @@ -820,22 +772,18 @@ def compressor_config_to_zarr(compressor_config: Dict[str, Any]) -> Optional[Dic zarr_config["level"] = compressor_config["level"] elif codec_id == "raw": - return None else: - zarr_config.update({k: v for k, v in compressor_config.items() if k != "type"}) return zarr_config class N5ChunkWrapper(Codec): - codec_id = "n5_wrapper" def __init__(self, dtype, chunk_shape, compressor_config=None, compressor=None): - self.dtype = np.dtype(dtype) self.chunk_shape = tuple(chunk_shape) # is the dtype a little endian format? @@ -860,7 +808,6 @@ def get_config(self): return config def encode(self, chunk): - assert chunk.flags.c_contiguous header = self._create_header(chunk) @@ -872,12 +819,10 @@ def encode(self, chunk): return header + chunk.tobytes(order="A") def decode(self, chunk, out=None) -> bytes: - len_header, chunk_shape = self._read_header(chunk) chunk = chunk[len_header:] if out is not None: - # out should only be used if we read a complete chunk assert chunk_shape == self.chunk_shape, "Expected chunk of shape {}, found {}".format( self.chunk_shape, chunk_shape @@ -895,7 +840,6 @@ def decode(self, chunk, out=None) -> bytes: return out else: - if self._compressor: chunk = self._compressor.decode(chunk) @@ -915,7 +859,6 @@ def decode(self, chunk, out=None) -> bytes: @staticmethod def _create_header(chunk): - mode = struct.pack(">H", 0) num_dims = struct.pack(">H", len(chunk.shape)) shape = b"".join(struct.pack(">I", d) for d in chunk.shape[::-1]) @@ -924,7 +867,6 @@ def _create_header(chunk): @staticmethod def _read_header(chunk): - num_dims = struct.unpack(">H", chunk[2:4])[0] shape = tuple( struct.unpack(">I", chunk[i : i + 4])[0] for i in range(4, num_dims * 4 + 4, 4) diff --git a/src/zarr/storage.py b/src/zarr/storage.py index b36f804ebd..e3a43d26c8 100644 --- a/src/zarr/storage.py +++ b/src/zarr/storage.py @@ -205,7 +205,7 @@ def rmdir(store: StoreLike, path: Path = None): store_version = getattr(store, "_store_version", 2) if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through - store.rmdir(path) # type: ignore + store.rmdir(path) else: # slow version, delete one key at a time if store_version == 2: @@ -235,7 +235,7 @@ def listdir(store: BaseStore, path: Path = None): path = normalize_storage_path(path) if hasattr(store, "listdir"): # pass through - return store.listdir(path) # type: ignore + return store.listdir(path) else: # slow version, iterate through all keys warnings.warn( @@ -288,7 +288,7 @@ def getsize(store: BaseStore, path: Path = None) -> int: if hasattr(store, "getsize"): # pass through path = normalize_storage_path(path) - return store.getsize(path) # type: ignore + return store.getsize(path) elif isinstance(store, MutableMapping): return _getsize(store, path) else: @@ -482,7 +482,6 @@ def _init_array_metadata( dimension_separator=None, storage_transformers=(), ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -627,7 +626,7 @@ def _init_array_metadata( key = _prefix_to_array_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore + store[key] = store._metadata_class.encode_array_metadata(meta) else: store[key] = encode_array_metadata(meta) @@ -687,7 +686,6 @@ def _init_group_metadata( path: Optional[str] = None, chunk_store: Optional[StoreLike] = None, ): - store_version = getattr(store, "_store_version", 2) path = normalize_storage_path(path) @@ -731,10 +729,10 @@ def _init_group_metadata( if store_version == 3: meta = {"attributes": {}} # type: ignore else: - meta = {} # type: ignore + meta = {} key = _prefix_to_group_key(store, _path_to_prefix(path)) if hasattr(store, "_metadata_class"): - store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore + store[key] = store._metadata_class.encode_group_metadata(meta) else: store[key] = encode_group_metadata(meta) @@ -1055,7 +1053,6 @@ class DirectoryStore(Store): """ def __init__(self, path, normalize_keys=False, dimension_separator=None): - # guard conditions path = os.path.abspath(path) if os.path.exists(path) and not os.path.isdir(path): @@ -1415,7 +1412,6 @@ def _normalize_key(self, key): def getitems( self, keys: Sequence[str], *, contexts: Mapping[str, Context] ) -> Mapping[str, Any]: - keys_transformed = [self._normalize_key(key) for key in keys] results = self.map.getitems(keys_transformed, on_error="omit") # The function calling this method may not recognize the transformed keys @@ -1768,7 +1764,6 @@ def __init__( mode="a", dimension_separator=None, ): - # store properties path = os.path.abspath(path) self.path = path @@ -2707,9 +2702,7 @@ def listdir(self, path=None): SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m FROM zarr WHERE k LIKE (? || "{sep}%") ) ORDER BY l ASC - """.format( - sep=sep - ), + """.format(sep=sep), (path, path), ) keys = list(map(operator.itemgetter(0), keys)) diff --git a/src/zarr/util.py b/src/zarr/util.py index ea0dd9fcec..35ecc64bba 100644 --- a/src/zarr/util.py +++ b/src/zarr/util.py @@ -180,10 +180,9 @@ def normalize_chunks(chunks: Any, shape: Tuple[int, ...], typesize: int) -> Tupl def normalize_dtype(dtype: Union[str, np.dtype], object_codec) -> Tuple[np.dtype, Any]: - # convenience API for object arrays if inspect.isclass(dtype): - dtype = dtype.__name__ # type: ignore + dtype = dtype.__name__ if isinstance(dtype, str): # allow ':' to delimit class from codec arguments tokens = dtype.split(":") @@ -245,7 +244,6 @@ def is_total_slice(item, shape: Tuple[int]) -> bool: def normalize_resize_args(old_shape, *args): - # normalize new shape argument if len(args) == 1: new_shape = args[0] @@ -294,7 +292,6 @@ def normalize_dimension_separator(sep: Optional[str]) -> Optional[str]: def normalize_fill_value(fill_value, dtype: np.dtype): - if fill_value is None or dtype.hasobject: # no fill value pass @@ -309,8 +306,9 @@ def normalize_fill_value(fill_value, dtype: np.dtype): if not isinstance(fill_value, str): raise ValueError( - "fill_value {!r} is not valid for dtype {}; must be a " - "unicode string".format(fill_value, dtype) + "fill_value {!r} is not valid for dtype {}; must be a " "unicode string".format( + fill_value, dtype + ) ) else: @@ -324,15 +322,15 @@ def normalize_fill_value(fill_value, dtype: np.dtype): except Exception as e: # re-raise with our own error message to be helpful raise ValueError( - "fill_value {!r} is not valid for dtype {}; nested " - "exception: {}".format(fill_value, dtype, e) + "fill_value {!r} is not valid for dtype {}; nested " "exception: {}".format( + fill_value, dtype, e + ) ) return fill_value def normalize_storage_path(path: Union[str, bytes, None]) -> str: - # handle bytes if isinstance(path, bytes): path = str(path, "ascii") @@ -342,7 +340,6 @@ def normalize_storage_path(path: Union[str, bytes, None]) -> str: path = str(path) if path: - # convert backslash to forward slash path = path.replace("\\", "/") @@ -506,7 +503,6 @@ def tree_widget(group, expand, level): class TreeViewer: def __init__(self, group, expand=False, level=None): - self.group = group self.expand = expand self.level = level diff --git a/src/zarr/v3/abc/metadata.py b/src/zarr/v3/abc/metadata.py index bdd2f86d59..4fcabf72a1 100644 --- a/src/zarr/v3/abc/metadata.py +++ b/src/zarr/v3/abc/metadata.py @@ -5,11 +5,12 @@ from typing import Dict from typing_extensions import Self -from dataclasses import fields +from dataclasses import fields, dataclass from zarr.v3.common import JSON +@dataclass(frozen=True) class Metadata: def to_dict(self) -> JSON: """ diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py index 632f7d8ec7..64a73e64dc 100644 --- a/src/zarr/v3/array.py +++ b/src/zarr/v3/array.py @@ -27,6 +27,7 @@ ChunkCoords, Selection, SliceSelection, + ZarrFormat, concurrent_map, ) from zarr.v3.config import RuntimeConfiguration @@ -88,6 +89,7 @@ async def create( attributes: Optional[Dict[str, Any]] = None, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), exists_ok: bool = False, + zarr_format: ZarrFormat = 3, ) -> AsyncArray: store_path = make_store_path(store) if not exists_ok: @@ -100,31 +102,33 @@ async def create( fill_value = False else: fill_value = 0 + if zarr_format == 3: + metadata = ArrayMetadata( + shape=shape, + data_type=dtype, + chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), + chunk_key_encoding=( + V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) + if chunk_key_encoding[0] == "v2" + else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) + ), + fill_value=fill_value, + codecs=codecs, + dimension_names=tuple(dimension_names) if dimension_names else None, + attributes=attributes or {}, + ) + runtime_configuration = runtime_configuration or RuntimeConfiguration() - metadata = ArrayMetadata( - shape=shape, - data_type=dtype, - chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape), - chunk_key_encoding=( - V2ChunkKeyEncoding(separator=chunk_key_encoding[1]) - if chunk_key_encoding[0] == "v2" - else DefaultChunkKeyEncoding(separator=chunk_key_encoding[1]) - ), - fill_value=fill_value, - codecs=codecs, - dimension_names=tuple(dimension_names) if dimension_names else None, - attributes=attributes or {}, - ) - runtime_configuration = runtime_configuration or RuntimeConfiguration() - - array = cls( - metadata=metadata, - store_path=store_path, - runtime_configuration=runtime_configuration, - ) + array = cls( + metadata=metadata, + store_path=store_path, + runtime_configuration=runtime_configuration, + ) - await array._save_metadata() - return array + await array._save_metadata() + return array + else: + raise NotImplementedError("Zarr version 2 arrays cannot be created yet.") @classmethod def from_dict( @@ -182,7 +186,7 @@ def shape(self) -> ChunkCoords: @property def size(self) -> int: - return np.prod(self.metadata.shape) + return np.prod(self.metadata.shape).item() @property def dtype(self) -> np.dtype: diff --git a/src/zarr/v3/chunk_grids.py b/src/zarr/v3/chunk_grids.py index 6c48323798..b0a2a7bb36 100644 --- a/src/zarr/v3/chunk_grids.py +++ b/src/zarr/v3/chunk_grids.py @@ -20,7 +20,7 @@ class ChunkGrid(Metadata): @classmethod def from_dict(cls, data: Dict[str, JSON]) -> ChunkGrid: if isinstance(data, ChunkGrid): - return data # type: ignore + return data name_parsed, _ = parse_named_configuration(data) if name_parsed == "regular": diff --git a/src/zarr/v3/chunk_key_encodings.py b/src/zarr/v3/chunk_key_encodings.py index e4339240e3..9889a2f04a 100644 --- a/src/zarr/v3/chunk_key_encodings.py +++ b/src/zarr/v3/chunk_key_encodings.py @@ -1,6 +1,6 @@ from __future__ import annotations from abc import abstractmethod -from typing import TYPE_CHECKING, Dict, Literal +from typing import TYPE_CHECKING, Dict, Literal, cast from dataclasses import dataclass from zarr.v3.abc.metadata import Metadata @@ -19,7 +19,7 @@ def parse_separator(data: JSON) -> SeparatorLiteral: if data not in (".", "/"): raise ValueError(f"Expected an '.' or '/' separator. Got {data} instead.") - return data # type: ignore + return cast(SeparatorLiteral, data) @dataclass(frozen=True) @@ -35,7 +35,7 @@ def __init__(self, *, separator: SeparatorLiteral) -> None: @classmethod def from_dict(cls, data: Dict[str, JSON]) -> ChunkKeyEncoding: if isinstance(data, ChunkKeyEncoding): - return data # type: ignore + return data name_parsed, configuration_parsed = parse_named_configuration(data) if name_parsed == "default": diff --git a/src/zarr/v3/codecs/transpose.py b/src/zarr/v3/codecs/transpose.py index f214d1e7f1..b663230e35 100644 --- a/src/zarr/v3/codecs/transpose.py +++ b/src/zarr/v3/codecs/transpose.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Dict, Iterable +from typing import TYPE_CHECKING, Dict, Iterable, Union, cast from dataclasses import dataclass, replace @@ -16,12 +16,12 @@ from zarr.v3.codecs.registry import register_codec -def parse_transpose_order(data: JSON) -> Tuple[int]: +def parse_transpose_order(data: Union[JSON, Iterable[int]]) -> Tuple[int, ...]: if not isinstance(data, Iterable): raise TypeError(f"Expected an iterable. Got {data} instead.") if not all(isinstance(a, int) for a in data): raise TypeError(f"Expected an iterable of integers. Got {data} instead.") - return tuple(data) # type: ignore[return-value] + return tuple(cast(Iterable[int], data)) @dataclass(frozen=True) @@ -31,7 +31,7 @@ class TransposeCodec(ArrayArrayCodec): order: Tuple[int, ...] def __init__(self, *, order: ChunkCoordsLike) -> None: - order_parsed = parse_transpose_order(order) # type: ignore[arg-type] + order_parsed = parse_transpose_order(order) object.__setattr__(self, "order", order_parsed) diff --git a/src/zarr/v3/config.py b/src/zarr/v3/config.py index 98a25994c4..cebe5c1b09 100644 --- a/src/zarr/v3/config.py +++ b/src/zarr/v3/config.py @@ -43,7 +43,6 @@ def __init__( concurrency: Optional[int] = None, asyncio_loop: Optional[AbstractEventLoop] = None, ): - order_parsed = parse_indexing_order(order) concurrency_parsed = parse_concurrency(concurrency) asyncio_loop_parsed = parse_asyncio_loop(asyncio_loop) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 031d9a0ad9..dbf69682a4 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -12,7 +12,6 @@ Any, Literal, AsyncIterator, - Iterator, ) from zarr.v3.abc.metadata import Metadata @@ -55,11 +54,12 @@ def to_bytes(self) -> dict[str, bytes]: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: return { + ZGROUP_JSON: json.dumps({"zarr_format": self.zarr_format}).encode(), ZGROUP_JSON: json.dumps({"zarr_format": self.zarr_format}).encode(), ZATTRS_JSON: json.dumps(self.attributes).encode(), } - def __init__(self, attributes: dict[str, Any] = {}, zarr_format: Literal[2, 3] = 3): + def __init__(self, attributes: dict[str, Any] | None = None, zarr_format: Literal[2, 3] = 3): attributes_parsed = parse_attributes(attributes) zarr_format_parsed = parse_zarr_format(zarr_format) @@ -113,8 +113,8 @@ async def open( zarr_format: Literal[2, 3] = 3, ) -> AsyncGroup: store_path = make_store_path(store) - zarr_json_bytes = await (store_path / ZARR_JSON).get() - assert zarr_json_bytes is not None + + zarr_json_bytes: bytes | None # TODO: consider trying to autodiscover the zarr-format here if zarr_format == 3: @@ -126,16 +126,13 @@ async def open( ) elif zarr_format == 2: # V2 groups are comprised of a .zgroup and .zattrs objects - # (both are optional in the case of implicit groups) zgroup_bytes, zattrs_bytes = await asyncio.gather( (store_path / ZGROUP_JSON).get(), (store_path / ZATTRS_JSON).get() ) - zgroup = ( - json.loads(json.loads(zgroup_bytes)) - if zgroup_bytes is not None - else {"zarr_format": 2} - ) - zattrs = json.loads(json.loads(zattrs_bytes)) if zattrs_bytes is not None else {} + if zgroup_bytes is None: + raise FileNotFoundError(f"No Zarr v2 group metadata found at {store_path}") + zgroup = json.loads(zgroup_bytes) + zattrs = json.loads(zattrs_bytes) if zattrs_bytes is not None else {} zarr_json = {**zgroup, "attributes": zattrs} else: raise ValueError(f"unexpected zarr_format: {zarr_format}") @@ -147,7 +144,7 @@ def from_dict( store_path: StorePath, data: dict[str, Any], runtime_configuration: RuntimeConfiguration, - ) -> Group: + ) -> AsyncGroup: group = cls( metadata=GroupMetadata.from_dict(data), store_path=store_path, @@ -159,20 +156,19 @@ async def getitem( self, key: str, ) -> AsyncArray | AsyncGroup: - store_path = self.store_path / key logger.warning("key=%s, store_path=%s", key, store_path) - # Note: - # in zarr-python v2, we first check if `key` references an Array, else if `key` references - # a group,using standalone `contains_array` and `contains_group` functions. These functions - # are reusable, but for v3 they would perform redundant I/O operations. - # Not clear how much of that strategy we want to keep here. - # if `key` names an object in storage, it cannot be an array or group if await store_path.exists(): raise KeyError(key) + # calling list_dir here is a big performance loss. We should try to find a way around + # this. + # see https://github.com/zarr-developers/zarr-python/pull/1743#issuecomment-2058681807 + if key not in await store_path.store.list_dir(self.store_path.path): + raise KeyError(key) + if self.metadata.zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: @@ -187,10 +183,12 @@ async def getitem( zarr_json = json.loads(zarr_json_bytes) if zarr_json["node_type"] == "group": return type(self).from_dict(store_path, zarr_json, self.runtime_configuration) - if zarr_json["node_type"] == "array": + elif zarr_json["node_type"] == "array": return AsyncArray.from_dict( store_path, zarr_json, runtime_configuration=self.runtime_configuration ) + else: + raise ValueError(f"unexpected node_type: {zarr_json['node_type']}") elif self.metadata.zarr_format == 2: # Q: how do we like optimistically fetching .zgroup, .zarray, and .zattrs? # This guarantees that we will always make at least one extra request to the store @@ -212,9 +210,6 @@ async def getitem( store_path, zarray, runtime_configuration=self.runtime_configuration ) else: - if zgroup_bytes is None: - # implicit group? - logger.warning("group at %s is an implicit group", store_path) zgroup = ( json.loads(zgroup_bytes) if zgroup_bytes is not None @@ -255,6 +250,7 @@ async def create_group(self, path: str, **kwargs) -> AsyncGroup: return await type(self).create( self.store_path / path, runtime_configuration=runtime_configuration, + zarr_format=self.metadata.zarr_format, **kwargs, ) @@ -263,6 +259,7 @@ async def create_array(self, path: str, **kwargs) -> AsyncArray: return await AsyncArray.create( self.store_path / path, runtime_configuration=runtime_configuration, + zarr_format=self.metadata.zarr_format, **kwargs, ) @@ -287,13 +284,15 @@ async def update_attributes(self, new_attributes: dict[str, Any]): def __repr__(self): return f"" - async def nchildren(self) -> int: + async def nmembers(self) -> int: raise NotImplementedError - async def children(self) -> AsyncGenerator[AsyncArray | AsyncGroup, None]: + async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], None]: """ Returns an AsyncGenerator over the arrays and groups contained in this group. This method requires that `store_path.store` supports directory listing. + + The results are not guaranteed to be ordered. """ if not self.store_path.store.supports_listing: msg = ( @@ -311,16 +310,21 @@ async def children(self) -> AsyncGenerator[AsyncArray | AsyncGroup, None]: # and scoped to specific zarr versions if key not in ("zarr.json", ".zgroup", ".zattrs"): try: - # TODO: performance optimization -- batch - print(key) + # TODO: performance optimization -- load children concurrently child = await self.getitem(key) - # keyerror is raised when `subkey``names an object in the store + # keyerror is raised when `key``names an object in the store # in which case `subkey` cannot be the name of a sub-array or sub-group. - yield child + yield key, child except KeyError: + # keyerror is raised when `subkey` names an object (in the object storage sense), + # as opposed to a prefix, in the store under the prefix associated with this group + # in which case `subkey` cannot be the name of a sub-array or sub-group. + logger.warning( + "Object at %s is not recognized as a component of a Zarr hierarchy.", key + ) pass - async def contains(self, child: str) -> bool: + async def contains(self, member: str) -> bool: raise NotImplementedError async def group_keys(self) -> AsyncIterator[str]: @@ -427,8 +431,16 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group new_metadata = replace(self.metadata, attributes=new_attributes) # Write new metadata - await (self.store_path / ZARR_JSON).set_async(new_metadata.to_bytes()) - return replace(self, metadata=new_metadata) + to_save = new_metadata.to_bytes() + awaitables = [(self.store_path / key).set(value) for key, value in to_save.items()] + await asyncio.gather(*awaitables) + + async_group = replace(self._async_group, metadata=new_metadata) + return replace(self, _async_group=async_group) + + @property + def store_path(self) -> StorePath: + return self._async_group.store_path @property def metadata(self) -> GroupMetadata: @@ -447,23 +459,30 @@ def update_attributes(self, new_attributes: dict[str, Any]): return self @property - def nchildren(self) -> int: - return self._sync(self._async_group.nchildren) + def nmembers(self) -> int: + return self._sync(self._async_group.nmembers) @property - def children(self) -> list[Array | Group]: - _children = self._sync_iter(self._async_group.children) - return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] + def members(self) -> tuple[tuple[str, Array | Group], ...]: + """ + Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) + pairs + """ + _members: list[AsyncArray | AsyncGroup] = self._sync_iter(self._async_group.members) + return tuple( + (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) + for key, value in _members + ) - def __contains__(self, child) -> bool: - return self._sync(self._async_group.contains(child)) + def __contains__(self, member) -> bool: + return self._sync(self._async_group.contains(member)) - def group_keys(self) -> Iterator[str]: - return self._sync_iter(self._async_group.group_keys) + def group_keys(self) -> list[str]: + return self._sync_iter(self._async_group.group_keys()) def groups(self) -> list[Group]: # TODO: in v2 this was a generator that return key: Group - return [Group(obj) for obj in self._sync_iter(self._async_group.groups)] + return [Group(obj) for obj in self._sync_iter(self._async_group.groups())] def array_keys(self) -> list[str]: return self._sync_iter(self._async_group.array_keys) diff --git a/src/zarr/v3/metadata.py b/src/zarr/v3/metadata.py index de3055abdc..a5e8927311 100644 --- a/src/zarr/v3/metadata.py +++ b/src/zarr/v3/metadata.py @@ -1,6 +1,6 @@ from __future__ import annotations from enum import Enum -from typing import TYPE_CHECKING, cast, Dict, Iterable +from typing import TYPE_CHECKING, cast, Dict, Iterable, Any from dataclasses import dataclass, field import json import numpy as np @@ -10,7 +10,7 @@ if TYPE_CHECKING: - from typing import Any, Literal, Union, List, Optional, Tuple + from typing import Literal, Union, List, Optional, Tuple from zarr.v3.codecs.pipeline import CodecPipeline @@ -244,7 +244,7 @@ class ArrayV2Metadata(Metadata): filters: Optional[List[Dict[str, Any]]] = None dimension_separator: Literal[".", "/"] = "." compressor: Optional[Dict[str, Any]] = None - attributes: Optional[Dict[str, Any]] = field(default_factory=dict) + attributes: Optional[Dict[str, Any]] = cast(Dict[str, Any], field(default_factory=dict)) zarr_format: Literal[2] = field(init=False, default=2) def __init__( diff --git a/src/zarr/v3/store/core.py b/src/zarr/v3/store/core.py index 0ef1c8569e..16714d9e30 100644 --- a/src/zarr/v3/store/core.py +++ b/src/zarr/v3/store/core.py @@ -5,6 +5,7 @@ from zarr.v3.common import BytesLike from zarr.v3.abc.store import Store +from zarr.v3.store.local import LocalStore def _dereference_path(root: str, path: str) -> str: @@ -24,10 +25,6 @@ def __init__(self, store: Store, path: Optional[str] = None): self.store = store self.path = path or "" - @classmethod - def from_path(cls, pth: Path) -> StorePath: - return cls(Store.from_path(pth)) - async def get( self, byte_range: Optional[Tuple[int, Optional[int]]] = None ) -> Optional[BytesLike]: @@ -70,14 +67,6 @@ def make_store_path(store_like: StoreLike) -> StorePath: return store_like elif isinstance(store_like, Store): return StorePath(store_like) - # elif isinstance(store_like, Path): - # return StorePath(Store.from_path(store_like)) elif isinstance(store_like, str): - try: - from upath import UPath - - return StorePath(Store.from_path(UPath(store_like))) - except ImportError as e: - raise e - # return StorePath(LocalStore(Path(store_like))) + return StorePath(LocalStore(Path(store_like))) raise TypeError diff --git a/src/zarr/v3/store/local.py b/src/zarr/v3/store/local.py index 1a87c450a0..ac883c35e6 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/v3/store/local.py @@ -26,7 +26,11 @@ def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> `None`, then the entire file after the first byte will be read. """ if byte_range is not None: - start = byte_range[0] + if byte_range[0] is None: + start = 0 + else: + start = byte_range[0] + end = (start + byte_range[1]) if byte_range[1] is not None else None else: return path.read_bytes() @@ -61,7 +65,6 @@ def _put( class LocalStore(Store): - supports_writes: bool = True supports_partial_writes: bool = True supports_listing: bool = True diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index f0996c019e..d8374feaa5 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -5,7 +5,6 @@ from typing import ( Any, AsyncIterator, - Callable, Coroutine, List, Optional, @@ -91,8 +90,9 @@ def _get_loop(): # repeat the check just in case the loop got filled between the # previous two calls from another thread if loop[0] is None: - loop[0] = asyncio.new_event_loop() - th = threading.Thread(target=loop[0].run_forever, name="zarrIO") + new_loop = asyncio.new_event_loop() + loop[0] = new_loop + th = threading.Thread(target=new_loop.run_forever, name="zarrIO") th.daemon = True th.start() iothread[0] = th @@ -104,7 +104,6 @@ def _get_loop(): class SyncMixin: - _sync_configuration: SyncConfiguration def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: @@ -112,11 +111,10 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: # this should allow us to better type the sync wrapper return sync(coroutine, loop=self._sync_configuration.asyncio_loop) - def _sync_iter( - self, func: Callable[P, AsyncIterator[T]], *args: P.args, **kwargs: P.kwargs - ) -> List[T]: + def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: async def iter_to_list() -> List[T]: # TODO: replace with generators so we don't materialize the entire iterator at once - return [item async for item in func(*args, **kwargs)] + # async_iterator = await coroutine + return [item async for item in coroutine()] return self._sync(iter_to_list()) diff --git a/tests/test_attrs.py b/tests/test_attrs.py index a5ce4bac89..7e3377f664 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -30,7 +30,6 @@ def init_attributes(self, store, read_only=False, cache=True, zarr_version=2): return Attributes(store, key=root + "attrs", read_only=read_only, cache=cache) def test_storage(self, zarr_version): - store = _init_store(zarr_version) root = ".z" if zarr_version == 2 else meta_root attrs_key = root + "attrs" @@ -50,7 +49,6 @@ def test_storage(self, zarr_version): assert dict(foo="bar", baz=42) == d def test_utf8_encoding(self, zarr_version): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent fixdir = project_root / "fixture" testdir = fixdir / "utf8attrs" @@ -67,7 +65,6 @@ def test_utf8_encoding(self, zarr_version): assert fixture["utf8attrs"].attrs.asdict() == dict(foo="た") def test_get_set_del_contains(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert "foo" not in a @@ -84,7 +81,6 @@ def test_get_set_del_contains(self, zarr_version): a["foo"] def test_update_put(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert "foo" not in a @@ -102,7 +98,6 @@ def test_update_put(self, zarr_version): assert "baz" not in a def test_iterators(self, zarr_version): - store = _init_store(zarr_version) a = self.init_attributes(store, zarr_version=zarr_version) assert 0 == len(a) @@ -232,7 +227,6 @@ def test_caching_on(self, zarr_version): assert get_cnt == store.counter["__getitem__", attrs_key] def test_caching_off(self, zarr_version): - # setup store store = CountingDict() if zarr_version == 2 else CountingDictV3() attrs_key = ".zattrs" if zarr_version == 2 else "meta/root/attrs" diff --git a/tests/test_convenience.py b/tests/test_convenience.py index 0970a9e1aa..7cb4db7a35 100644 --- a/tests/test_convenience.py +++ b/tests/test_convenience.py @@ -57,7 +57,6 @@ def _init_creation_kwargs(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_array(path_type, zarr_version): - store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) @@ -86,7 +85,6 @@ def test_open_array(path_type, zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) def test_open_group(path_type, zarr_version): - store = tempfile.mkdtemp() atexit.register(atexit_rmtree, store) store = path_type(store) @@ -210,7 +208,6 @@ def test_tree(zarr_version): def test_consolidate_metadata( with_chunk_store, zarr_version, listable, monkeypatch, stores_from_path ): - # setup initial data if stores_from_path: store = tempfile.mkdtemp() @@ -399,7 +396,6 @@ def test_save_array_separator(tmpdir, options): class TestCopyStore(unittest.TestCase): - _version = 2 def setUp(self): @@ -536,7 +532,6 @@ def test_if_exists(self): @pytest.mark.skipif(not v3_api_available, reason="V3 is disabled") class TestCopyStoreV3(TestCopyStore): - _version = 3 def setUp(self): @@ -557,7 +552,6 @@ def test_mismatched_store_versions(self): def check_copied_array(original, copied, without_attrs=False, expect_props=None): - # setup source_h5py = original.__module__.startswith("h5py.") dest_h5py = copied.__module__.startswith("h5py.") @@ -621,7 +615,6 @@ def check_copied_array(original, copied, without_attrs=False, expect_props=None) def check_copied_group(original, copied, without_attrs=False, expect_props=None, shallow=False): - # setup if expect_props is None: expect_props = dict() diff --git a/tests/test_creation.py b/tests/test_creation.py index 9307b81b52..27ce00bc8a 100644 --- a/tests/test_creation.py +++ b/tests/test_creation.py @@ -74,7 +74,6 @@ def _init_creation_kwargs(zarr_version, at_root=True): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_array(zarr_version, at_root): - expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -213,7 +212,6 @@ def test_full_additional_dtypes(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_open_array(zarr_version, at_root, dimension_separator): - store = "data/array.zarr" kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -329,7 +327,6 @@ def test_open_array(zarr_version, at_root, dimension_separator): def test_open_array_none(): - # open with both store and zarr_version = None z = open_array(mode="w", shape=100, chunks=10) assert isinstance(z, Array) @@ -339,7 +336,6 @@ def test_open_array_none(): @pytest.mark.parametrize("dimension_separator", [".", "/", None]) @pytest.mark.parametrize("zarr_version", _VERSIONS2) def test_open_array_infer_separator_from_store(zarr_version, dimension_separator): - if zarr_version == 3: StoreClass = DirectoryStoreV3 path = "data" @@ -370,7 +366,6 @@ def test_open_array_infer_separator_from_store(zarr_version, dimension_separator # TODO: N5 support for v3 @pytest.mark.parametrize("zarr_version", [None, 2]) def test_open_array_n5(zarr_version): - store = "data/array.zarr" kwargs = _init_creation_kwargs(zarr_version) @@ -409,7 +404,6 @@ def test_open_array_n5(zarr_version): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_open_array_dict_store(zarr_version, at_root): - # dict will become a KVStore store = dict() kwargs = _init_creation_kwargs(zarr_version, at_root) @@ -503,7 +497,6 @@ def test_empty_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_zeros_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version @@ -529,7 +522,6 @@ def test_zeros_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_ones_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version @@ -556,7 +548,6 @@ def test_ones_like(zarr_version, at_root): @pytest.mark.parametrize("zarr_version", _VERSIONS) @pytest.mark.parametrize("at_root", [False, True]) def test_full_like(zarr_version, at_root): - kwargs = _init_creation_kwargs(zarr_version, at_root) expected_zarr_version = DEFAULT_ZARR_VERSION if zarr_version is None else zarr_version diff --git a/tests/test_dim_separator.py b/tests/test_dim_separator.py index 83f4d3b5b9..4276d1829d 100644 --- a/tests/test_dim_separator.py +++ b/tests/test_dim_separator.py @@ -46,7 +46,6 @@ def dataset(tmpdir, request): static = project_root / "fixture" / suffix if not static.exists(): # pragma: no cover - if "nested" in which: # No way to reproduce the nested_legacy file via code generator = NestedDirectoryStore diff --git a/tests/test_filters.py b/tests/test_filters.py index d55be9145f..fc63cdca8d 100644 --- a/tests/test_filters.py +++ b/tests/test_filters.py @@ -30,7 +30,6 @@ def test_array_with_delta_filter(): - # setup astype = "u1" dtype = "i8" @@ -38,7 +37,6 @@ def test_array_with_delta_filter(): data = np.arange(100, dtype=dtype) for compressor in compressors: - a = array(data, chunks=10, compressor=compressor, filters=filters) # check round-trip @@ -57,7 +55,6 @@ def test_array_with_delta_filter(): def test_array_with_astype_filter(): - # setup encode_dtype = "i1" decode_dtype = "i8" @@ -68,7 +65,6 @@ def test_array_with_astype_filter(): data = np.arange(shape, dtype=decode_dtype) for compressor in compressors: - a = array(data, chunks=chunks, compressor=compressor, filters=filters) # check round-trip @@ -88,7 +84,6 @@ def test_array_with_astype_filter(): def test_array_with_scaleoffset_filter(): - # setup astype = "u1" dtype = "f8" @@ -97,7 +92,6 @@ def test_array_with_scaleoffset_filter(): data = np.linspace(1000, 1001, 34, dtype="f8") for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -116,7 +110,6 @@ def test_array_with_scaleoffset_filter(): def test_array_with_quantize_filter(): - # setup dtype = "f8" digits = 3 @@ -125,7 +118,6 @@ def test_array_with_quantize_filter(): data = np.linspace(0, 1, 34, dtype=dtype) for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -144,14 +136,12 @@ def test_array_with_quantize_filter(): def test_array_with_packbits_filter(): - # setup flt = PackBits() filters = [flt] data = np.random.randint(0, 2, size=100, dtype=bool) for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip @@ -170,14 +160,12 @@ def test_array_with_packbits_filter(): def test_array_with_categorize_filter(): - # setup data = np.random.choice(["foo", "bar", "baz"], size=100) flt = Categorize(dtype=data.dtype, labels=["foo", "bar", "baz"]) filters = [flt] for compressor in compressors: - a = array(data, chunks=5, compressor=compressor, filters=filters) # check round-trip diff --git a/tests/test_hierarchy.py b/tests/test_hierarchy.py index 3eaa4743dd..6d4b1ff54c 100644 --- a/tests/test_hierarchy.py +++ b/tests/test_hierarchy.py @@ -1085,7 +1085,6 @@ def test_paths(self): g1.store.close() def test_pickle(self): - # setup group g = self.create_group() d = g.create_dataset("foo/bar", shape=100, chunks=10) @@ -1113,7 +1112,6 @@ def test_pickle(self): g2.store.close() def test_context_manager(self): - with self.create_group() as g: d = g.create_dataset("foo/bar", shape=100, chunks=10) d[:] = np.arange(100) @@ -1375,7 +1373,6 @@ def create_store(): return store, None def test_context_manager(self): - with self.create_group() as g: store = g.store d = g.create_dataset("foo/bar", shape=100, chunks=10) diff --git a/tests/test_indexing.py b/tests/test_indexing.py index 1835206819..d441f3b8fa 100644 --- a/tests/test_indexing.py +++ b/tests/test_indexing.py @@ -17,7 +17,6 @@ def test_normalize_integer_selection(): - assert 1 == normalize_integer_selection(1, 100) assert 99 == normalize_integer_selection(-1, 100) with pytest.raises(IndexError): @@ -29,7 +28,6 @@ def test_normalize_integer_selection(): def test_replace_ellipsis(): - # 1D, single item assert (0,) == replace_ellipsis(0, (100,)) @@ -68,7 +66,6 @@ def test_replace_ellipsis(): def test_get_basic_selection_0d(): - # setup a = np.array(42) z = zarr.create(shape=a.shape, dtype=a.dtype, fill_value=None) @@ -191,7 +188,6 @@ def _test_get_basic_selection(a, z, selection): # noinspection PyStatementEffect def test_get_basic_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -264,7 +260,6 @@ def test_get_basic_selection_1d(): # noinspection PyStatementEffect def test_get_basic_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -423,7 +418,6 @@ def test_fancy_indexing_doesnt_mix_with_implicit_slicing(): def test_set_basic_selection_0d(): - # setup v = np.array(42) a = np.zeros_like(v) @@ -479,7 +473,6 @@ def _test_get_orthogonal_selection(a, z, selection): # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_bool(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -502,7 +495,6 @@ def test_get_orthogonal_selection_1d_bool(): # noinspection PyStatementEffect def test_get_orthogonal_selection_1d_int(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -561,7 +553,6 @@ def _test_get_orthogonal_selection_2d(a, z, ix0, ix1): # noinspection PyStatementEffect def test_get_orthogonal_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -570,7 +561,6 @@ def test_get_orthogonal_selection_2d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -641,7 +631,6 @@ def _test_get_orthogonal_selection_3d(a, z, ix0, ix1, ix2): def test_get_orthogonal_selection_3d(): - # setup a = np.arange(100000, dtype=int).reshape(200, 50, 10) z = zarr.create(shape=a.shape, chunks=(60, 20, 3), dtype=a.dtype) @@ -650,7 +639,6 @@ def test_get_orthogonal_selection_3d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -673,7 +661,6 @@ def test_get_orthogonal_selection_3d(): def test_orthogonal_indexing_edge_cases(): - a = np.arange(6).reshape(1, 2, 3) z = zarr.create(shape=a.shape, chunks=(1, 2, 3), dtype=a.dtype) z[:] = a @@ -706,7 +693,6 @@ def _test_set_orthogonal_selection(v, a, z, selection): def test_set_orthogonal_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty(v.shape, dtype=int) @@ -715,7 +701,6 @@ def test_set_orthogonal_selection_1d(): # test with different degrees of sparseness np.random.seed(42) for p in 0.5, 0.1, 0.01: - # boolean arrays ix = np.random.binomial(1, p, size=a.shape[0]).astype(bool) _test_set_orthogonal_selection(v, a, z, ix) @@ -734,7 +719,6 @@ def test_set_orthogonal_selection_1d(): def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): - selections = [ # index both axes with array (ix0, ix1), @@ -749,7 +733,6 @@ def _test_set_orthogonal_selection_2d(v, a, z, ix0, ix1): def test_set_orthogonal_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -758,7 +741,6 @@ def test_set_orthogonal_selection_2d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -780,7 +762,6 @@ def test_set_orthogonal_selection_2d(): def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): - selections = ( # single value (84, 42, 4), @@ -807,7 +788,6 @@ def _test_set_orthogonal_selection_3d(v, a, z, ix0, ix1, ix2): def test_set_orthogonal_selection_3d(): - # setup v = np.arange(100000, dtype=int).reshape(200, 50, 10) a = np.empty_like(v) @@ -816,7 +796,6 @@ def test_set_orthogonal_selection_3d(): np.random.seed(42) # test with different degrees of sparseness for p in 0.5, 0.1, 0.01: - # boolean arrays ix0 = np.random.binomial(1, p, size=a.shape[0]).astype(bool) ix1 = np.random.binomial(1, 0.5, size=a.shape[1]).astype(bool) @@ -888,7 +867,6 @@ def _test_get_coordinate_selection(a, z, selection): # noinspection PyStatementEffect def test_get_coordinate_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -932,7 +910,6 @@ def test_get_coordinate_selection_1d(): def test_get_coordinate_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -1027,7 +1004,6 @@ def test_set_coordinate_selection_1d(): def test_set_coordinate_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -1258,7 +1234,6 @@ def _test_get_mask_selection(a, z, selection): # noinspection PyStatementEffect def test_get_mask_selection_1d(): - # setup a = np.arange(1050, dtype=int) z = zarr.create(shape=a.shape, chunks=100, dtype=a.dtype) @@ -1285,7 +1260,6 @@ def test_get_mask_selection_1d(): # noinspection PyStatementEffect def test_get_mask_selection_2d(): - # setup a = np.arange(10000, dtype=int).reshape(1000, 10) z = zarr.create(shape=a.shape, chunks=(300, 3), dtype=a.dtype) @@ -1318,7 +1292,6 @@ def _test_set_mask_selection(v, a, z, selection): def test_set_mask_selection_1d(): - # setup v = np.arange(1050, dtype=int) a = np.empty_like(v) @@ -1338,7 +1311,6 @@ def test_set_mask_selection_1d(): def test_set_mask_selection_2d(): - # setup v = np.arange(10000, dtype=int).reshape(1000, 10) a = np.empty_like(v) @@ -1352,7 +1324,6 @@ def test_set_mask_selection_2d(): def test_get_selection_out(): - # basic selections a = np.arange(1050) z = zarr.create(shape=1050, chunks=100, dtype=a.dtype) @@ -1426,7 +1397,6 @@ def test_get_selection_out(): def test_get_selections_with_fields(): - a = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] a = np.array(a, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) z = zarr.create(shape=a.shape, chunks=2, dtype=a.dtype, fill_value=None) @@ -1444,7 +1414,6 @@ def test_get_selections_with_fields(): ] for fields in fields_fixture: - # total selection expect = a[fields] actual = z.get_basic_selection(Ellipsis, fields=fields) @@ -1534,7 +1503,6 @@ def test_get_selections_with_fields(): def test_set_selections_with_fields(): - v = [("aaa", 1, 4.2), ("bbb", 2, 8.4), ("ccc", 3, 12.6)] v = np.array(v, dtype=[("foo", "S3"), ("bar", "i4"), ("baz", "f8")]) a = np.empty_like(v) @@ -1553,7 +1521,6 @@ def test_set_selections_with_fields(): ] for fields in fields_fixture: - # currently multi-field assignment is not supported in numpy, so we won't support # it either if isinstance(fields, list) and len(fields) > 1: @@ -1567,7 +1534,6 @@ def test_set_selections_with_fields(): z.set_mask_selection([True, False, True], v, fields=fields) else: - if isinstance(fields, list) and len(fields) == 1: # work around numpy does not support multi-field assignment even if there # is only one field @@ -1752,7 +1718,6 @@ def test_accessed_chunks(shape, chunks, ops): z = zarr.create(shape=shape, chunks=chunks, store=store) for ii, (optype, slices) in enumerate(ops): - # Resolve the slices into the accessed chunks for each dimension chunks_per_dim = [] for N, C, sl in zip(shape, chunks, slices): diff --git a/tests/test_info.py b/tests/test_info.py index 7fb6feb11b..96eae999f4 100644 --- a/tests/test_info.py +++ b/tests/test_info.py @@ -7,7 +7,6 @@ @pytest.mark.parametrize("array_size", [10, 15000]) def test_info(array_size): - # setup g = zarr.group(store=dict(), chunk_store=dict(), synchronizer=zarr.ThreadSynchronizer()) g.create_group("foo") diff --git a/tests/test_meta.py b/tests/test_meta.py index db50560c8e..50f51929ef 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -34,7 +34,6 @@ def assert_json_equal(expect, actual): def test_encode_decode_array_1(): - meta = dict( shape=(100,), chunks=(10,), @@ -76,7 +75,6 @@ def test_encode_decode_array_1(): def test_encode_decode_array_2(): - # some variations df = Delta(astype=" None: """ - Test that `Group.children` returns correct values, i.e. the arrays and groups + Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ @@ -30,12 +31,14 @@ def test_group_children(store: MemoryStore | LocalStore): store_path=StorePath(store=store, path=path), ) group = Group(agroup) + members_expected = {} - subgroup = group.create_group("subgroup") + members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy - _ = subgroup.create_group("subsubgroup") - subarray = group.create_array( + _ = members_expected["subgroup"].create_group("subsubgroup") + + members_expected["subarray"] = group.create_array( "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True ) @@ -46,26 +49,15 @@ def test_group_children(store: MemoryStore | LocalStore): # this creates an implicit group called implicit_subgroup sync(store.set(f"{path}/implicit_subgroup/extra_object", b"000000")) # make the implicit subgroup - implicit_subgroup = Group( + members_expected["implicit_subgroup"] = Group( AsyncGroup( metadata=GroupMetadata(), store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), ) ) - # note: these assertions are order-independent, because it is not clear - # if group.children guarantees a particular order for the children. - # If order is not guaranteed, then the better version of this test is - # to compare two sets, but presently neither the group nor array classes are hashable. - print('getting children') - observed = group.children - print(observed) - print(list([subgroup, subarray, implicit_subgroup])) - assert len(observed) == 3 - assert subarray in observed - assert implicit_subgroup in observed - assert subgroup in observed - - + members_observed = group.members + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(dict(members_observed)) == sorted(members_expected) @pytest.mark.parametrize("store", (("local", "memory")), indirect=["store"]) @@ -114,7 +106,7 @@ def test_group(store: MemoryStore | LocalStore) -> None: ) def test_group_create( store: MemoryStore | LocalStore, exists_ok: bool, runtime_configuration: RuntimeConfiguration -): +) -> None: """ Test that `Group.create` works as expected. """ @@ -151,12 +143,12 @@ async def test_asyncgroup_create( exists_ok: bool, zarr_format: ZarrFormat, runtime_configuration: RuntimeConfiguration, -): +) -> None: """ Test that `AsyncGroup.create` works as expected. """ attributes = {"foo": 100} - group = await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, @@ -164,13 +156,13 @@ async def test_asyncgroup_create( runtime_configuration=runtime_configuration, ) - assert group.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) - assert group.store_path == make_store_path(store) - assert group.runtime_configuration == runtime_configuration + assert agroup.metadata == GroupMetadata(zarr_format=zarr_format, attributes=attributes) + assert agroup.store_path == make_store_path(store) + assert agroup.runtime_configuration == runtime_configuration if not exists_ok: with pytest.raises(AssertionError): - group = await AsyncGroup.create( + agroup = await AsyncGroup.create( store, attributes=attributes, exists_ok=exists_ok, @@ -179,6 +171,28 @@ async def test_asyncgroup_create( ) +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_attrs(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + attributes = {"foo": 100} + agroup = await AsyncGroup.create(store, zarr_format=zarr_format, attributes=attributes) + + assert agroup.attrs == agroup.metadata.attributes == attributes + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_info(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + agroup = await AsyncGroup.create( # noqa + store, + zarr_format=zarr_format, + ) + pytest.xfail("Info is not implemented for metadata yet") + # assert agroup.info == agroup.metadata.info + + @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) @pytest.mark.parametrize("zarr_format", (2, 3)) @@ -207,6 +221,16 @@ async def test_asyncgroup_open( assert group_w.attrs == group_w.attrs == attributes assert group_w == group_r + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (pytest.param(2, marks=pytest.mark.xfail), 3)) +async def test_asyncgroup_open_wrong_format( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, +) -> None: + _ = await AsyncGroup.create(store=store, exists_ok=False, zarr_format=zarr_format) + # try opening with the wrong zarr format if zarr_format == 3: zarr_format_wrong = 2 @@ -215,8 +239,7 @@ async def test_asyncgroup_open( else: assert False - # todo: get more specific than this - with pytest.raises(ValueError): + with pytest.raises(FileNotFoundError): await AsyncGroup.open(store=store, zarr_format=zarr_format_wrong) @@ -230,7 +253,7 @@ async def test_asyncgroup_open( {"zarr_format": 2, "attributes": {"foo": 100}}, ), ) -def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]): +def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, Any]) -> None: """ Test that we can create an AsyncGroup from a dict """ @@ -247,8 +270,11 @@ def test_asyncgroup_from_dict(store: MemoryStore | LocalStore, data: dict[str, A # todo: replace this with a declarative API where we model a full hierarchy @pytest.mark.asyncio @pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) -@pytest.mark.parametrize("zarr_format", (2, 3)) -async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat): +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet.")), 3), +) +async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: """ Create an `AsyncGroup`, then create members of that group, and ensure that we can access those members via the `AsyncGroup.getitem` method. @@ -268,3 +294,150 @@ async def test_asyncgroup_getitem(store: LocalStore | MemoryStore, zarr_format: # check that asking for a nonexistent key raises KeyError with pytest.raises(KeyError): await agroup.getitem("foo") + + +# todo: replace this with a declarative API where we model a full hierarchy +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet.")), 3), +) +async def test_asyncgroup_delitem(store: LocalStore | MemoryStore, zarr_format: ZarrFormat) -> None: + agroup = await AsyncGroup.create(store=store, zarr_format=zarr_format) + sub_array_path = "sub_array" + _ = await agroup.create_array( + path=sub_array_path, shape=(10,), dtype="uint8", chunk_shape=(2,), attributes={"foo": 100} + ) + await agroup.delitem(sub_array_path) + + # todo: clean up the code duplication here + if zarr_format == 2: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zarray") + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") + else: + assert False + + sub_group_path = "sub_group" + _ = await agroup.create_group(sub_group_path, attributes={"foo": 100}) + await agroup.delitem(sub_group_path) + if zarr_format == 2: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zgroup") + assert not await agroup.store_path.store.exists(sub_array_path + "/" + ".zattrs") + elif zarr_format == 3: + assert not await agroup.store_path.store.exists(sub_array_path + "/" + "zarr.json") + else: + assert False + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(), RuntimeConfiguration(order="F")) +) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_create_group( + store: LocalStore | MemoryStore, + zarr_format: ZarrFormat, + runtime_configuration: RuntimeConfiguration, +) -> None: + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, runtime_configuration=RuntimeConfiguration + ) + sub_node_path = "sub_group" + attributes = {"foo": 999} + subnode = await agroup.create_group( + path=sub_node_path, attributes=attributes, runtime_configuration=runtime_configuration + ) + + assert isinstance(subnode, AsyncGroup) + assert subnode.runtime_configuration == runtime_configuration + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.metadata.zarr_format == zarr_format + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize( + "runtime_configuration", (RuntimeConfiguration(), RuntimeConfiguration(order="F")) +) +@pytest.mark.parametrize( + "zarr_format", + (pytest.param(2, marks=pytest.mark.xfail(reason="V2 arrays cannot be created yet")), 3), +) +async def test_asyncgroup_create_array( + store: LocalStore | MemoryStore, + runtime_configuration: RuntimeConfiguration, + zarr_format: ZarrFormat, +) -> None: + """ + Test that the AsyncGroup.create_array method works correctly. We ensure that array properties + specified in create_array are present on the resulting array. + """ + + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, runtime_configuration=runtime_configuration + ) + + shape = (10,) + dtype = "uint8" + chunk_shape = (4,) + attributes = {"foo": 100} + + sub_node_path = "sub_array" + subnode = await agroup.create_array( + path=sub_node_path, + shape=shape, + dtype=dtype, + chunk_shape=chunk_shape, + attributes=attributes, + runtime_configuration=runtime_configuration, + ) + assert isinstance(subnode, AsyncArray) + assert subnode.runtime_configuration == runtime_configuration + assert subnode.attrs == attributes + assert subnode.store_path.path == sub_node_path + assert subnode.store_path.store == store + assert subnode.shape == shape + assert subnode.dtype == dtype + # todo: fix the type annotation of array.metadata.chunk_grid so that we get some autocomplete + # here. + assert subnode.metadata.chunk_grid.chunk_shape == chunk_shape + assert subnode.metadata.zarr_format == zarr_format + + +@pytest.mark.asyncio +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +async def test_asyncgroup_update_attributes( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat +) -> None: + """ + Test that the AsyncGroup.update_attributes method works correctly. + """ + attributes_old = {"foo": 10} + attributes_new = {"baz": "new"} + agroup = await AsyncGroup.create( + store=store, zarr_format=zarr_format, attributes=attributes_old + ) + + agroup_new_attributes = await agroup.update_attributes(attributes_new) + assert agroup_new_attributes.attrs == attributes_new + + +@pytest.mark.parametrize("store", ("local", "memory"), indirect=["store"]) +@pytest.mark.parametrize("zarr_format", (2, 3)) +@pytest.mark.parametrize( + "sync_configuration", (SyncConfiguration(), SyncConfiguration(concurrency=2)) +) +async def test_group_init( + store: LocalStore | MemoryStore, zarr_format: ZarrFormat, sync_configuration: SyncConfiguration +) -> None: + agroup = sync(AsyncGroup.create(store=store, zarr_format=zarr_format)) + group = Group(_async_group=agroup, _sync_configuration=sync_configuration) + assert group._async_group == agroup + assert group._sync_configuration == sync_configuration diff --git a/tests/v3/test_metadata.py b/tests/v3/test_metadata.py index c7ca0f2e1a..e477842259 100644 --- a/tests/v3/test_metadata.py +++ b/tests/v3/test_metadata.py @@ -7,6 +7,7 @@ from zarr.v3.metadata import parse_dimension_names, parse_zarr_format_v2, parse_zarr_format_v3 + # todo: test def test_datatype_enum(): ... diff --git a/tests/v3/test_storage.py b/tests/v3/test_storage.py index 88d83c8e85..fd2af079c0 100644 --- a/tests/v3/test_storage.py +++ b/tests/v3/test_storage.py @@ -59,9 +59,8 @@ async def test_local_store_get( expected = payload[start : start + length] assert observed == expected - # test that it's an error to get bytes from a file that doesn't exist - with pytest.raises(FileNotFoundError): - await local_store.get(object_name + "_absent", byte_range=byte_range) + # test that getting from a file that doesn't exist returns None + assert await local_store.get(object_name + "_absent", byte_range=byte_range) is None @pytest.mark.asyncio @@ -81,7 +80,11 @@ async def test_local_store_get_partial( # use the utf-8 encoding of the key as the bytes for key, _ in key_ranges: payload = bytes(key, encoding="utf-8") - (store.root / key).write_bytes(payload) + target_path: Path = store.root / key + # create the parent directories + target_path.parent.mkdir(parents=True, exist_ok=True) + # write bytes + target_path.write_bytes(payload) results = await store.get_partial_values(key_ranges) for idx, observed in enumerate(results):