Skip to content

Commit

Permalink
fix(memh5): block axis downselection in shared datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
ljgray committed May 10, 2024
1 parent 25e8916 commit 098847e
Showing 1 changed file with 22 additions and 9 deletions.
31 changes: 22 additions & 9 deletions caput/memh5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2541,6 +2541,8 @@ def deep_group_copy(
>>> list(g2["foo"]["bar"])
[0, 1]
Axis downselections cannot be applied to shared datasets.
Parameters
----------
g1 : h5py.Group or zarr.Group
Expand All @@ -2550,7 +2552,7 @@ def deep_group_copy(
selections : dict
If this is not None, it should have a subset of the same hierarchical structure
as g1, but ultimately describe axis selections for group entries as valid
numpy indexes.
numpy indexes. Selections cannot be applied to shared datasets.
convert_attribute_strings : bool, optional
Convert string attributes (or lists/arrays of them) to ensure that they are
unicode.
Expand All @@ -2567,12 +2569,11 @@ def deep_group_copy(
entries, and can modify either.
deep_copy_dsets : bool, optional
Explicitly deep copy all datasets. This will only alter behaviour when copying
from memory to memory. XXX: enabling this in places where it is not currently
enabled could break legacy code, so be very careful
from memory to memory.
shared : list, optional
List of datasets to share, if `deep_copy_dsets` is True. Otherwise, no effect.
Shared datasets just point to the existing object in g1 storage, and override
any other behaviour
Shared datasets just point to the existing object in g1 storage. Axis selections
cannot be applied to shared datasets.
Returns
-------
Expand All @@ -2588,10 +2589,9 @@ def deep_group_copy(

to_file = isinstance(g2, file_format.module.Group)

# Prepare a dataset for writing out, applying selections and transforming any
# datatypes
# Returns: dict(dtype, shape, data_to_write)
def _prepare_dataset(dset):
# Get the selection associated with this dataset
# Returns: slice
def _get_selection(dset):
# Look for a selection for this dataset (also try without the leading "/")
try:
selection = selections.get(
Expand All @@ -2600,6 +2600,14 @@ def _prepare_dataset(dset):
except AttributeError:
selection = slice(None)

return selection

# Prepare a dataset for writing out, applying selections and transforming any
# datatypes
# Returns: dict(dtype, shape, data_to_write)
def _prepare_dataset(dset):
selection = _get_selection(dset)

# Check if this is a distributed dataset and figure out if we can make this work
# out
if to_file and isinstance(dset, MemDatasetDistributed):
Expand Down Expand Up @@ -2706,6 +2714,11 @@ def _prepare_compression_args(dset):
stack += [entry[k] for k in sorted(entry, reverse=True)]

elif key in shared:
# Make sure that we aren't trying to apply a selection to this dataset
if _get_selection(entry) != slice(None):
raise ValueError(
f"Cannot apply a selection to a shared dataset ({entry.name})"
)
# Just point to the existing dataset
parent_name, name = posixpath.split(posixpath.join(g2.name, key))
parent_name = format_abs_path(parent_name)
Expand Down

0 comments on commit 098847e

Please sign in to comment.