From 098847e15c1242b76056a37766f3315a60b1dc0c Mon Sep 17 00:00:00 2001
From: ljgray <liam.j.gray@usask.ca>
Date: Tue, 25 Apr 2023 11:45:23 -0700
Subject: [PATCH] fix(memh5): block axis downselection in shared datasets

---
 caput/memh5.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/caput/memh5.py b/caput/memh5.py
index d3ecf2c6..73889df9 100644
--- a/caput/memh5.py
+++ b/caput/memh5.py
@@ -2541,6 +2541,8 @@ def deep_group_copy(
     >>> list(g2["foo"]["bar"])
     [0, 1]
 
+    Axis downselections cannot be applied to shared datasets.
+
     Parameters
     ----------
     g1 : h5py.Group or zarr.Group
@@ -2550,7 +2552,7 @@ def deep_group_copy(
     selections : dict
         If this is not None, it should have a subset of the same hierarchical structure
         as g1, but ultimately describe axis selections for group entries as valid
-        numpy indexes.
+        numpy indexes. Selections cannot be applied to shared datasets.
     convert_attribute_strings : bool, optional
         Convert string attributes (or lists/arrays of them) to ensure that they are
         unicode.
@@ -2567,12 +2569,11 @@ def deep_group_copy(
         entries, and can modify either.
     deep_copy_dsets : bool, optional
         Explicitly deep copy all datasets. This will only alter behaviour when copying
-        from memory to memory. XXX: enabling this in places where it is not currently
-        enabled could break legacy code, so be very careful
+        from memory to memory.
     shared : list, optional
         List of datasets to share, if `deep_copy_dsets` is True. Otherwise, no effect.
-        Shared datasets just point to the existing object in g1 storage, and override
-        any other behaviour
+        Shared datasets just point to the existing object in g1 storage. Axis selections
+        cannot be applied to shared datasets.
 
     Returns
     -------
@@ -2588,10 +2589,9 @@ def deep_group_copy(
 
     to_file = isinstance(g2, file_format.module.Group)
 
-    # Prepare a dataset for writing out, applying selections and transforming any
-    # datatypes
-    # Returns: dict(dtype, shape, data_to_write)
-    def _prepare_dataset(dset):
+    # Get the selection associated with this dataset
+    # Returns: slice
+    def _get_selection(dset):
         # Look for a selection for this dataset (also try without the leading "/")
         try:
             selection = selections.get(
@@ -2600,6 +2600,14 @@ def _prepare_dataset(dset):
         except AttributeError:
             selection = slice(None)
 
+        return selection
+
+    # Prepare a dataset for writing out, applying selections and transforming any
+    # datatypes
+    # Returns: dict(dtype, shape, data_to_write)
+    def _prepare_dataset(dset):
+        selection = _get_selection(dset)
+
         # Check if this is a distributed dataset and figure out if we can make this work
         # out
         if to_file and isinstance(dset, MemDatasetDistributed):
@@ -2706,6 +2714,11 @@ def _prepare_compression_args(dset):
             stack += [entry[k] for k in sorted(entry, reverse=True)]
 
         elif key in shared:
+            # Make sure that we aren't trying to apply a selection to this dataset
+            if _get_selection(entry) != slice(None):
+                raise ValueError(
+                    f"Cannot apply a selection to a shared dataset ({entry.name})"
+                )
             # Just point to the existing dataset
             parent_name, name = posixpath.split(posixpath.join(g2.name, key))
             parent_name = format_abs_path(parent_name)