Skip to content

Commit

Permalink
Merge branch 'dev' into internal-link-checking
Browse files Browse the repository at this point in the history
  • Loading branch information
rly authored Feb 8, 2024
2 parents b9ab2f8 + 047c37d commit dc74e36
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 53 deletions.
11 changes: 6 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
### Bug fixes
- Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795)
- Fix bug where pynwb version was reported as "unknown" to readthedocs @stephprince [#1810](https://github.com/NeurodataWithoutBorders/pynwb/pull/1810)
- Fixed bug to allow linking of `TimeSeries.data` by setting the `data` constructor argument to another `TimeSeries`. @oruebel [#1766](https://github.com/NeurodataWithoutBorders/pynwb/pull/1766)

### Documentation and tutorial enhancements
- Add RemFile to streaming tutorial. @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761)
Expand All @@ -27,8 +28,8 @@
## PyNWB 2.5.0 (August 18, 2023)

### Enhancements and minor changes
- Add `TimeSeries.get_timestamps()`. @bendichter [#1741](https://github.com/NeurodataWithoutBorders/pynwb/pull/1741)
- Add `TimeSeries.get_data_in_units()`. @bendichter [#1745](https://github.com/NeurodataWithoutBorders/pynwb/pull/1745)
- Added `TimeSeries.get_timestamps()`. @bendichter [#1741](https://github.com/NeurodataWithoutBorders/pynwb/pull/1741)
- Added `TimeSeries.get_data_in_units()`. @bendichter [#1745](https://github.com/NeurodataWithoutBorders/pynwb/pull/1745)
- Updated `ExternalResources` name change to `HERD`, along with HDMF 3.9.0 being the new minimum. @mavaylon1 [#1754](https://github.com/NeurodataWithoutBorders/pynwb/pull/1754)

### Documentation and tutorial enhancements
Expand All @@ -41,15 +42,15 @@
## PyNWB 2.4.0 (July 23, 2023)

### Enhancements and minor changes
- Add support for `ExternalResources`. @mavaylon1 [#1684](https://github.com/NeurodataWithoutBorders/pynwb/pull/1684)
- Update links for making a release. @mavaylon1 [#1720](https://github.com/NeurodataWithoutBorders/pynwb/pull/1720)
- Added support for `ExternalResources`. @mavaylon1 [#1684](https://github.com/NeurodataWithoutBorders/pynwb/pull/1684)
- Updated links for making a release. @mavaylon1 [#1720](https://github.com/NeurodataWithoutBorders/pynwb/pull/1720)

### Bug fixes
- Fixed sphinx-gallery setting to correctly display index in the docs with sphinx-gallery>=0.11. @oruebel [#1733](https://github.com/NeurodataWithoutBorders/pynwb/pull/1733)

### Documentation and tutorial enhancements
- Added thumbnail for Optogentics tutorial. @oruebel [#1729](https://github.com/NeurodataWithoutBorders/pynwb/pull/1729)
- Update and fix errors in tutorials. @bendichter @oruebel
- Updated and fixed errors in tutorials. @bendichter @oruebel

## PyNWB 2.3.3 (June 26, 2023)

Expand Down
18 changes: 17 additions & 1 deletion src/pynwb/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ def timestamps_carg(self, builder, manager):
else:
return tstamps_builder.data

@NWBContainerMapper.object_attr("data")
def data_attr(self, container, manager):
ret = container.fields.get('data')
if isinstance(ret, TimeSeries):
owner = ret
curr = owner.fields.get('data')
while isinstance(curr, TimeSeries):
owner = curr
curr = owner.fields.get('data')
data_builder = manager.build(owner)
ret = LinkBuilder(data_builder['data'], 'data')
return ret

@NWBContainerMapper.constructor_arg("data")
def data_carg(self, builder, manager):
# handle case where a TimeSeries is read and missing data
Expand Down Expand Up @@ -105,7 +118,10 @@ def unit_carg(self, builder, manager):
data_builder = manager.construct(target.parent)
else:
data_builder = target
unit_value = data_builder.attributes.get('unit')
if isinstance(data_builder, TimeSeries): # Data linked in another timeseries
unit_value = data_builder.unit
else: # DatasetBuilder owned by this timeseries
unit_value = data_builder.attributes.get('unit')
if unit_value is None:
return timeseries_cls.DEFAULT_UNIT
return unit_value
21 changes: 21 additions & 0 deletions tests/integration/hdf5/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,27 @@ def test_timestamps_linking(self):
tsb = nwbfile.acquisition['b']
self.assertIs(tsa.timestamps, tsb.timestamps)

def test_data_linking(self):
''' Test that data get linked to in TimeSeries '''
tsa = TimeSeries(name='a', data=np.linspace(0, 1, 1000), timestamps=np.arange(1000.), unit='m')
tsb = TimeSeries(name='b', data=tsa, timestamps=np.arange(1000.), unit='m')
tsc = TimeSeries(name='c', data=tsb, timestamps=np.arange(1000.), unit='m')
nwbfile = NWBFile(identifier='foo',
session_start_time=datetime(2017, 5, 1, 12, 0, 0, tzinfo=tzlocal()),
session_description='bar')
nwbfile.add_acquisition(tsa)
nwbfile.add_acquisition(tsb)
nwbfile.add_acquisition(tsc)
with NWBHDF5IO(self.path, 'w') as io:
io.write(nwbfile)
with NWBHDF5IO(self.path, 'r') as io:
nwbfile = io.read()
tsa = nwbfile.acquisition['a']
tsb = nwbfile.acquisition['b']
tsc = nwbfile.acquisition['c']
self.assertIs(tsa.data, tsb.data)
self.assertIs(tsa.data, tsc.data)


class TestImagesIO(AcquisitionH5IOMixin, TestCase):

Expand Down
186 changes: 139 additions & 47 deletions tests/integration/hdf5/test_modular_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,29 +14,35 @@
class TestTimeSeriesModular(TestCase):

def setUp(self):
self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc())
# File paths
self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb')
self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb')

# Make the data container file write
self.start_time = datetime(1971, 1, 1, 12, tzinfo=tzutc())
self.data = np.arange(2000).reshape((1000, 2))
self.timestamps = np.linspace(0, 1, 1000)

# The container before roundtrip
self.container = TimeSeries(
name='data_ts',
unit='V',
data=self.data,
timestamps=self.timestamps
)
self.data_read_io = None # HDF5IO used for reading the main data file
self.read_data_nwbfile = None # The NWBFile read after roundtrip
self.read_data_container = None # self.container after roundtrip

self.data_filename = os.path.join(os.getcwd(), 'test_time_series_modular_data.nwb')
self.link_filename = os.path.join(os.getcwd(), 'test_time_series_modular_link.nwb')

self.read_container = None
self.link_read_io = None
self.data_read_io = None
# Variables for the second file which links the main data file
self.link_container = None # The container with the links before write
self.read_link_container = None # The container with the links after roundtrip
self.read_link_nwbfile = None # The NWBFile container containing link_container after roundtrip
self.link_read_io = None # HDF5IO use for reading the read_link_nwbfile

def tearDown(self):
if self.read_container:
self.read_container.data.file.close()
self.read_container.timestamps.file.close()
if self.read_link_container:
self.read_link_container.data.file.close()
self.read_link_container.timestamps.file.close()
if self.link_read_io:
self.link_read_io.close()
if self.data_read_io:
Expand Down Expand Up @@ -64,49 +70,83 @@ def roundtripContainer(self):
data_write_io.write(data_file)

# read data file
with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io:
data_file_obt = self.data_read_io.read()

# write "link file" with timeseries.data that is an external link to the timeseries in "data file"
# also link timeseries.timestamps.data to the timeseries.timestamps in "data file"
with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io:
link_file = NWBFile(
session_description='a test file',
identifier='link_file',
session_start_time=self.start_time
)
self.link_container = TimeSeries(
name='test_mod_ts',
unit='V',
data=data_file_obt.get_acquisition('data_ts'), # test direct link
timestamps=H5DataIO(
data=data_file_obt.get_acquisition('data_ts').timestamps,
link_data=True # test with setting link data
)
)
link_file.add_acquisition(self.link_container)
link_write_io.write(link_file)
self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager())
self.read_data_nwbfile = self.data_read_io.read()
self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts')

# note that self.link_container contains a link to a dataset that is now closed
# write "link file" with timeseries.data that is an external link to the timeseries in "data file"
# also link timeseries.timestamps.data to the timeseries.timestamps in "data file"
with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io:
link_file = NWBFile(
session_description='a test file',
identifier='link_file',
session_start_time=self.start_time
)
self.link_container = TimeSeries(
name='test_mod_ts',
unit='V',
data=H5DataIO(
data=self.read_data_container.data,
link_data=True # test with setting link data
),
timestamps=H5DataIO(
data=self.read_data_container.timestamps,
link_data=True # test with setting link data
)
)
link_file.add_acquisition(self.link_container)
link_write_io.write(link_file)

# read the link file
self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager())
self.read_nwbfile = self.link_read_io.read()
return self.getContainer(self.read_nwbfile)
self.read_link_nwbfile = self.link_read_io.read()
return self.getContainer(self.read_link_nwbfile)

def test_roundtrip(self):
self.read_container = self.roundtripContainer()

# make sure we get a completely new object
self.assertIsNotNone(str(self.container)) # added as a test to make sure printing works
# Roundtrip the container
self.read_link_container = self.roundtripContainer()

# 1. Make sure our containers are set correctly for the test
# 1.1: Make sure the container we read is not identical to the container we used for writing
self.assertNotEqual(id(self.link_container), id(self.read_link_container))
self.assertNotEqual(id(self.container), id(self.read_data_container))
# 1.2: Make sure the container we read is indeed the correct container we should use for testing
self.assertIs(self.read_link_nwbfile.objects[self.link_container.object_id], self.read_link_container)
self.assertIs(self.read_data_nwbfile.objects[self.container.object_id], self.read_data_container)
# 1.3: Make sure the object_ids of the container we wrote and read are the same
self.assertEqual(self.read_link_container.object_id, self.link_container.object_id)
self.assertEqual(self.read_data_container.object_id, self.container.object_id)
# 1.4: Make sure the object_ids between the source data and link data container are different
self.assertNotEqual(self.read_link_container.object_id, self.read_data_container.object_id)

# Test that printing works for the source data and linked data container
self.assertIsNotNone(str(self.container))
self.assertIsNotNone(str(self.read_data_container))
self.assertIsNotNone(str(self.link_container))
self.assertIsNotNone(str(self.read_container))
self.assertFalse(self.link_container.timestamps.valid)
self.assertTrue(self.read_container.timestamps.id.valid)
self.assertNotEqual(id(self.link_container), id(self.read_container))
self.assertIs(self.read_nwbfile.objects[self.link_container.object_id], self.read_container)
self.assertContainerEqual(self.read_container, self.container, ignore_name=True, ignore_hdmf_attrs=True)
self.assertEqual(self.read_container.object_id, self.link_container.object_id)
self.assertIsNotNone(str(self.read_link_container))

# Test that timestamps and data are valid after write
self.assertTrue(self.read_link_container.timestamps.id.valid)
self.assertTrue(self.read_link_container.data.id.valid)
self.assertTrue(self.read_data_container.timestamps.id.valid)
self.assertTrue(self.read_data_container.data.id.valid)

# Make sure the data in the read data container and linked data container match the original container
self.assertContainerEqual(self.read_link_container, self.container, ignore_name=True, ignore_hdmf_attrs=True)
self.assertContainerEqual(self.read_data_container, self.container, ignore_name=True, ignore_hdmf_attrs=True)

# Make sure the timestamps and data are linked correctly. I.e., the filename of the h5py dataset should
# match between the data file and the file with links even-though they have been read from different files
self.assertEqual(
self.read_data_container.data.file.filename, # Path where the source data is stored
self.read_link_container.data.file.filename # Path where the linked h5py dataset points to
)
self.assertEqual(
self.read_data_container.timestamps.file.filename, # Path where the source data is stored
self.read_link_container.timestamps.file.filename # Path where the linked h5py dataset points to
)

# validate both the source data and linked data file via the pynwb validator
self.validate()

def test_link_root(self):
Expand Down Expand Up @@ -159,3 +199,55 @@ def validate(self):

def getContainer(self, nwbfile):
return nwbfile.get_acquisition('test_mod_ts')


class TestTimeSeriesModularLinkViaTimeSeries(TestTimeSeriesModular):
"""
Same as TestTimeSeriesModular but creating links by setting TimeSeries.data
and TimeSeries.timestamps to the other TimeSeries on construction, rather than
using H5DataIO.
"""
def setUp(self):
super().setUp()
self.skipTest("This behavior is currently broken. See issue .")

def roundtripContainer(self):
# create and write data file
data_file = NWBFile(
session_description='a test file',
identifier='data_file',
session_start_time=self.start_time
)
data_file.add_acquisition(self.container)

with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io:
data_write_io.write(data_file)

# read data file
self.data_read_io = HDF5IO(self.data_filename, 'r', manager=get_manager())
self.read_data_nwbfile = self.data_read_io.read()
self.read_data_container = self.read_data_nwbfile.get_acquisition('data_ts')

# write "link file" with timeseries.data that is an external link to the timeseries in "data file"
# also link timeseries.timestamps.data to the timeseries.timestamps in "data file"
with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io:
link_file = NWBFile(
session_description='a test file',
identifier='link_file',
session_start_time=self.start_time
)
self.link_container = TimeSeries(
name='test_mod_ts',
unit='V',
data=self.read_data_container, # <--- This is the main difference to TestTimeSeriesModular
timestamps=self.read_data_container # <--- This is the main difference to TestTimeSeriesModular
)
link_file.add_acquisition(self.link_container)
link_write_io.write(link_file)

# note that self.link_container contains a link to a dataset that is now closed

# read the link file
self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager())
self.read_link_nwbfile = self.link_read_io.read()
return self.getContainer(self.read_link_nwbfile)

0 comments on commit dc74e36

Please sign in to comment.