feat(memh5): optionally skip datasets when copying a group

radiocosmology · May 21, 2024 · 8c4ff16 · 8c4ff16
1 parent e284a44
commit 8c4ff16
Showing 1 changed file with 20 additions and 3 deletions.
diff --git a/caput/memh5.py b/caput/memh5.py
@@ -2034,13 +2034,17 @@ def flush(self):
         if self.ondisk:
             self._data.flush()
 
-    def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
+    def copy(
+        self, *, shared: list = [], skipped: list = [], shallow: bool = False
+    ) -> MemDiskGroup:
         """Return a deep copy of this class or subclass.
 
         Parameters
         ----------
         shared
             dataset names to share (i.e. don't deep copy)
+        skipped
+            dataset names to skip (do not add to new group)
         shallow
             True if this should be a shallow copy
 
@@ -2052,7 +2056,11 @@ def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
         cls = self.__class__.__new__(self.__class__)
         MemDiskGroup.__init__(cls, distributed=self.distributed, comm=self.comm)
         deep_group_copy(
-            self._data, cls._data, deep_copy_dsets=not shallow, shared=shared
+            self._data,
+            cls._data,
+            deep_copy_dsets=not shallow,
+            shared=shared,
+            skipped=skipped,
         )
 
         return cls
@@ -2521,6 +2529,7 @@ def deep_group_copy(
     postprocess=None,
     deep_copy_dsets=False,
     shared=[],
+    skipped=[],
 ):
     """Copy full data tree from one group to another.
 
@@ -2574,6 +2583,9 @@ def deep_group_copy(
         List of datasets to share, if `deep_copy_dsets` is True. Otherwise, no effect.
         Shared datasets just point to the existing object in g1 storage. Axis selections
         cannot be applied to shared datasets.
+    skipped : list, optional
+        List of datasets to skip. These datasets will not be initialized in the new
+        container. Use with caution.
 
     Returns
     -------
@@ -2695,8 +2707,9 @@ def _prepare_compression_args(dset):
 
         return compression_kwargs
 
-    # Make sure shared dataset names are properly formatted
+    # Make sure shared and skipped dataset names are properly formatted
     shared = {"/" + k if k[0] != "/" else k for k in shared}
+    skipped = {"/" + k if k[0] != "/" else k for k in skipped}
 
     # Do a non-recursive traversal of the tree, recreating the structure and attributes,
     # and copying over any non-distributed datasets
@@ -2713,6 +2726,10 @@ def _prepare_compression_args(dset):
 
             stack += [entry[k] for k in sorted(entry, reverse=True)]
 
+        elif key in skipped:
+            # Ignore this dataset
+            continue
+
         elif key in shared:
             # Make sure that we aren't trying to apply a selection to this dataset
             if _get_selection(entry) != slice(None):