Skip to content

Commit

Permalink
feat(memh5): add option to only include specific datasets when copying
Browse files Browse the repository at this point in the history
  • Loading branch information
ljgray committed Jul 21, 2023
1 parent fcd3cc1 commit fe5dfdb
Showing 1 changed file with 23 additions and 3 deletions.
26 changes: 23 additions & 3 deletions caput/memh5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2038,7 +2038,9 @@ def flush(self):
if self.ondisk:
self._data.flush()

def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
def copy(
self, shared: list = [], shallow: bool = False, include: list = []
) -> MemDiskGroup:
"""Return a deep copy of this class or subclass.
Parameters
Expand All @@ -2047,6 +2049,9 @@ def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
dataset names to share (i.e. don't deep copy)
shallow
True if this should be a shallow copy
include
If provided, only the datasets in this list will be included. Otherwise, all
datasets are copied. This does not change the behaviour of `shared`
Returns
-------
Expand All @@ -2056,7 +2061,11 @@ def copy(self, shared: list = [], shallow: bool = False) -> MemDiskGroup:
cls = self.__class__.__new__(self.__class__)
MemDiskGroup.__init__(cls, distributed=self.distributed, comm=self.comm)
deep_group_copy(
self._data, cls._data, deep_copy_dsets=not shallow, shared=shared
self._data,
cls._data,
deep_copy_dsets=not shallow,
shared=shared,
include=include,
)

return cls
Expand Down Expand Up @@ -2523,6 +2532,7 @@ def deep_group_copy(
postprocess=None,
deep_copy_dsets=False,
shared=[],
include=[],
):
"""Copy full data tree from one group to another.
Expand Down Expand Up @@ -2576,6 +2586,9 @@ def deep_group_copy(
List of datasets to share, if `deep_copy_dsets` is True. Otherwise, no effect.
Shared datasets just point to the existing object in g1 storage. Axis selections
cannot be applied to shared datasets.
include : list, optional
If provided, only the datasets in this list will be included. Otherwise, all
datasets are copied. This does not change the behaviour of `shared`
Returns
-------
Expand Down Expand Up @@ -2699,6 +2712,7 @@ def _prepare_compression_args(dset):

# Make sure shared dataset names are properly formatted
shared = {"/" + k if k[0] != "/" else k for k in shared}
include = {"/" + k if k[0] != "/" else k for k in include}

# Do a non-recursive traversal of the tree, recreating the structure and attributes,
# and copying over any non-distributed datasets
Expand All @@ -2716,18 +2730,24 @@ def _prepare_compression_args(dset):
stack += [entry[k] for k in sorted(entry, reverse=True)]

elif key in shared:
if include and key not in include:
continue

# Make sure that we aren't trying to apply a selection to this dataset
if _get_selection(entry) != slice(None):
raise ValueError(
f"Cannot apply a selection to a shared dataset ({entry.name})"
)
# Just point to the existing dataset
# Point to the existing dataset
parent_name, name = posixpath.split(posixpath.join(g2.name, key))
parent_name = format_abs_path(parent_name)
# Get the proper storage location for this dataset
g2[parent_name]._get_storage()[name] = g1._get_storage()[key]

else:
if include and key not in include:
continue

# Copy over this dataset
dset_args = _prepare_dataset(entry)
compression_kwargs = _prepare_compression_args(entry)
Expand Down

0 comments on commit fe5dfdb

Please sign in to comment.