From 714b167200ac1a8a95f21bfddc4515a10a3634d0 Mon Sep 17 00:00:00 2001 From: ljgray Date: Tue, 25 Apr 2023 11:45:23 -0700 Subject: [PATCH] fix(memh5): block axis downselection in shared datasets --- caput/memh5.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/caput/memh5.py b/caput/memh5.py index 39e73040..83b3a857 100644 --- a/caput/memh5.py +++ b/caput/memh5.py @@ -2543,6 +2543,9 @@ def deep_group_copy( >>> list(g2["foo"]["bar"]) [0, 1] + It is important to note that axis downselections cannot be applied to + shared datasets. + Parameters ---------- g1 : h5py.Group or zarr.Group @@ -2590,10 +2593,9 @@ def deep_group_copy( to_file = isinstance(g2, file_format.module.Group) - # Prepare a dataset for writing out, applying selections and transforming any - # datatypes - # Returns: dict(dtype, shape, data_to_write) - def _prepare_dataset(dset): + # Get the selection associated with this dataset + # Returns: slice + def _get_selection(dset): # Look for a selection for this dataset (also try without the leading "/") try: selection = selections.get( @@ -2602,6 +2604,14 @@ def _prepare_dataset(dset): except AttributeError: selection = slice(None) + return selection + + # Prepare a dataset for writing out, applying selections and transforming any + # datatypes + # Returns: dict(dtype, shape, data_to_write) + def _prepare_dataset(dset): + selection = _get_selection(dset) + # Check if this is a distributed dataset and figure out if we can make this work # out if to_file and isinstance(dset, MemDatasetDistributed): @@ -2708,6 +2718,11 @@ def _prepare_compression_args(dset): stack += [entry[k] for k in sorted(entry, reverse=True)] elif key in shared: + # Make sure that we aren't trying to apply a selection to this dataset + if _get_selection(entry) != slice(None): + raise ValueError( + f"Cannot apply a selection to a shared dataset ({entry.name})" + ) # Just point to the existing dataset parent_name, name = posixpath.split(posixpath.join(g2.name, key)) parent_name = format_abs_path(parent_name)