Skip to content

Commit

Permalink
feat(memh5): optionally skip datasets when copying a group
Browse files Browse the repository at this point in the history
  • Loading branch information
ljgray committed May 21, 2024
1 parent e284a44 commit 8c4ff16
Showing 1 changed file with 20 additions and 3 deletions.
23 changes: 20 additions & 3 deletions caput/memh5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2034,13 +2034,17 @@ def flush(self):
if self.ondisk:
self._data.flush()

def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
def copy(
self, *, shared: list = [], skipped: list = [], shallow: bool = False
) -> MemDiskGroup:
"""Return a deep copy of this class or subclass.
Parameters
----------
shared
dataset names to share (i.e. don't deep copy)
skipped
dataset names to skip (do not add to new group)
shallow
True if this should be a shallow copy
Expand All @@ -2052,7 +2056,11 @@ def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
cls = self.__class__.__new__(self.__class__)
MemDiskGroup.__init__(cls, distributed=self.distributed, comm=self.comm)
deep_group_copy(
self._data, cls._data, deep_copy_dsets=not shallow, shared=shared
self._data,
cls._data,
deep_copy_dsets=not shallow,
shared=shared,
skipped=skipped,
)

return cls
Expand Down Expand Up @@ -2521,6 +2529,7 @@ def deep_group_copy(
postprocess=None,
deep_copy_dsets=False,
shared=[],
skipped=[],
):
"""Copy full data tree from one group to another.
Expand Down Expand Up @@ -2574,6 +2583,9 @@ def deep_group_copy(
List of datasets to share, if `deep_copy_dsets` is True. Otherwise, no effect.
Shared datasets just point to the existing object in g1 storage. Axis selections
cannot be applied to shared datasets.
skipped : list, optional
List of datasets to skip. These datasets will not be initialized in the new
container. Use with caution.
Returns
-------
Expand Down Expand Up @@ -2695,8 +2707,9 @@ def _prepare_compression_args(dset):

return compression_kwargs

# Make sure shared dataset names are properly formatted
# Make sure shared and skipped dataset names are properly formatted
shared = {"/" + k if k[0] != "/" else k for k in shared}
skipped = {"/" + k if k[0] != "/" else k for k in skipped}

# Do a non-recursive traversal of the tree, recreating the structure and attributes,
# and copying over any non-distributed datasets
Expand All @@ -2713,6 +2726,10 @@ def _prepare_compression_args(dset):

stack += [entry[k] for k in sorted(entry, reverse=True)]

elif key in skipped:
# Ignore this dataset
continue

elif key in shared:
# Make sure that we aren't trying to apply a selection to this dataset
if _get_selection(entry) != slice(None):
Expand Down

0 comments on commit 8c4ff16

Please sign in to comment.