From 97ea60309541360cd6a89eb821728a86ca4f6554 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 3 Oct 2023 19:18:36 -0700 Subject: [PATCH 01/21] Fix warnings from tests (#1778) --- .github/workflows/run_all_tests.yml | 12 +- .github/workflows/run_dandi_read_tests.yml | 2 +- .github/workflows/run_tests.yml | 4 +- .../convert-crcns-ret-1-meisterlab.ipynb | 2 +- ...tom-extensions-and-external-stimulus.ipynb | 2 +- ...-1-meisterlab-with-custom-extensions.ipynb | 2 +- ...meisterlab-without-custom-extensions.ipynb | 2 +- setup.cfg | 1 + src/pynwb/testing/mock/ophys.py | 30 ++- test.py | 5 +- tests/integration/hdf5/test_ecephys.py | 245 ++++++++++-------- tests/read_dandi/test_read_dandi.py | 90 ++++--- tests/unit/test_ecephys.py | 36 ++- tests/unit/test_file.py | 5 +- tests/unit/test_ophys.py | 26 +- tests/unit/test_resources.py | 12 +- tests/validation/test_validate.py | 45 ++-- 17 files changed, 328 insertions(+), 193 deletions(-) diff --git a/.github/workflows/run_all_tests.yml b/.github/workflows/run_all_tests.yml index 0e9d9b131..c47941c21 100644 --- a/.github/workflows/run_all_tests.yml +++ b/.github/workflows/run_all_tests.yml @@ -196,9 +196,9 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } - - { name: windows-python3.11-ros3, python-ver: "3.11", os: windows-latest } - - { name: macos-python3.11-ros3 , python-ver: "3.11", os: macos-latest } + - { name: conda-linux-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-windows-python3.11-ros3, python-ver: "3.11", os: windows-latest } + - { name: conda-macos-python3.11-ros3 , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -243,9 +243,9 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } - - { name: windows-gallery-python3.11-ros3, python-ver: "3.11", os: windows-latest } - - { name: macos-gallery-python3.11-ros3 , python-ver: "3.11", os: macos-latest } + - { name: conda-linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-windows-gallery-python3.11-ros3, python-ver: "3.11", os: windows-latest } + - { name: conda-macos-gallery-python3.11-ros3 , python-ver: "3.11", os: macos-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 diff --git a/.github/workflows/run_dandi_read_tests.yml b/.github/workflows/run_dandi_read_tests.yml index ec8cc2e84..857b32c9a 100644 --- a/.github/workflows/run_dandi_read_tests.yml +++ b/.github/workflows/run_dandi_read_tests.yml @@ -47,4 +47,4 @@ jobs: - name: Run DANDI read tests run: | - pytest -rP tests/read_dandi/ + python tests/read_dandi/test_read_dandi.py diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 89793901d..e4479a554 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -174,7 +174,7 @@ jobs: fail-fast: false matrix: include: - - { name: linux-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -219,7 +219,7 @@ jobs: fail-fast: false matrix: include: - - { name: linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } + - { name: conda-linux-gallery-python3.11-ros3 , python-ver: "3.11", os: ubuntu-latest } steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 diff --git a/docs/notebooks/convert-crcns-ret-1-meisterlab.ipynb b/docs/notebooks/convert-crcns-ret-1-meisterlab.ipynb index be1883f96..0107d42aa 100644 --- a/docs/notebooks/convert-crcns-ret-1-meisterlab.ipynb +++ b/docs/notebooks/convert-crcns-ret-1-meisterlab.ipynb @@ -1004,7 +1004,7 @@ "source": [ "## Step 5.2: Convert all files\n", "\n", - "Convert all the files by iteating over the files and calling `convert_single_file` function for each of the file" + "Convert all the files by iterating over the files and calling `convert_single_file` function for each of the file" ] }, { diff --git a/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions-and-external-stimulus.ipynb b/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions-and-external-stimulus.ipynb index 685744050..4d081f7ab 100644 --- a/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions-and-external-stimulus.ipynb +++ b/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions-and-external-stimulus.ipynb @@ -1001,7 +1001,7 @@ "source": [ "## Step 5.3: Convert all files\n", "\n", - "Convert all the files by iteating over the files and calling `convert_single_file` function for each of the file" + "Convert all the files by iterating over the files and calling `convert_single_file` function for each of the file" ] }, { diff --git a/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions.ipynb b/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions.ipynb index a32c1869e..d612c11c9 100644 --- a/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions.ipynb +++ b/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-with-custom-extensions.ipynb @@ -974,7 +974,7 @@ "source": [ "## Step 5.3: Convert all files\n", "\n", - "Convert all the files by iteating over the files and calling `convert_single_file` function for each of the file" + "Convert all the files by iterating over the files and calling `convert_single_file` function for each of the file" ] }, { diff --git a/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-without-custom-extensions.ipynb b/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-without-custom-extensions.ipynb index c599ba37d..a03f26ff0 100644 --- a/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-without-custom-extensions.ipynb +++ b/docs/notebooks/convert-crcns-ret-1-old/convert-crcns-ret-1-meisterlab-without-custom-extensions.ipynb @@ -649,7 +649,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Convert all the files by iteating over the files and calling `convert_single_file` function for each of the file" + "Convert all the files by iterating over the files and calling `convert_single_file` function for each of the file" ] }, { diff --git a/setup.cfg b/setup.cfg index 3c492df25..cccacf048 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,6 +28,7 @@ per-file-ignores = tests/integration/__init__.py:F401 src/pynwb/testing/__init__.py:F401 src/pynwb/validate.py:T201 + tests/read_dandi/test_read_dandi.py:T201 setup.py:T201 test.py:T201 scripts/*:T201 diff --git a/src/pynwb/testing/mock/ophys.py b/src/pynwb/testing/mock/ophys.py index 5b43828fa..d9ba02572 100644 --- a/src/pynwb/testing/mock/ophys.py +++ b/src/pynwb/testing/mock/ophys.py @@ -4,7 +4,7 @@ from hdmf.common.table import DynamicTableRegion -from ... import NWBFile +from ... import NWBFile, ProcessingModule from ...device import Device from ...ophys import ( RoiResponseSeries, @@ -272,6 +272,8 @@ def mock_RoiResponseSeries( else: n_rois = 5 + plane_seg = plane_segmentation or mock_PlaneSegmentation(n_rois=n_rois, nwbfile=nwbfile) + roi_response_series = RoiResponseSeries( name=name if name is not None else name_generator("RoiResponseSeries"), data=data if data is not None else np.ones((30, n_rois)), @@ -280,7 +282,7 @@ def mock_RoiResponseSeries( or DynamicTableRegion( name="rois", description="rois", - table=plane_segmentation or mock_PlaneSegmentation(n_rois=n_rois, nwbfile=nwbfile), + table=plane_seg, data=list(range(n_rois)), ), resolution=resolution, @@ -298,6 +300,9 @@ def mock_RoiResponseSeries( if "ophys" not in nwbfile.processing: nwbfile.create_processing_module("ophys", "ophys") + if plane_seg.name not in nwbfile.processing["ophys"].data_interfaces: + nwbfile.processing["ophys"].add(plane_seg) + nwbfile.processing["ophys"].add(roi_response_series) return roi_response_series @@ -309,9 +314,9 @@ def mock_DfOverF( nwbfile: Optional[NWBFile] = None ) -> DfOverF: df_over_f = DfOverF( - roi_response_series=roi_response_series or [mock_RoiResponseSeries(nwbfile=nwbfile)], name=name if name is not None else name_generator("DfOverF"), ) + plane_seg = mock_PlaneSegmentation(nwbfile=nwbfile) if nwbfile is not None: if "ophys" not in nwbfile.processing: @@ -319,6 +324,14 @@ def mock_DfOverF( nwbfile.processing["ophys"].add(df_over_f) + else: + pm = ProcessingModule(name="ophys", description="ophys") + pm.add(plane_seg) + pm.add(df_over_f) + + df_over_f.add_roi_response_series( + roi_response_series or mock_RoiResponseSeries(nwbfile=nwbfile, plane_segmentation=plane_seg) + ) return df_over_f @@ -328,13 +341,22 @@ def mock_Fluorescence( nwbfile: Optional[NWBFile] = None, ) -> Fluorescence: fluorescence = Fluorescence( - roi_response_series=roi_response_series or [mock_RoiResponseSeries(nwbfile=nwbfile)], name=name if name is not None else name_generator("Fluorescence"), ) + plane_seg = mock_PlaneSegmentation(nwbfile=nwbfile) if nwbfile is not None: if "ophys" not in nwbfile.processing: nwbfile.create_processing_module("ophys", "ophys") + nwbfile.processing["ophys"].add(fluorescence) + else: + pm = ProcessingModule(name="ophys", description="ophys") + pm.add(plane_seg) + pm.add(fluorescence) + + fluorescence.add_roi_response_series( + roi_response_series or mock_RoiResponseSeries(nwbfile=nwbfile, plane_segmentation=plane_seg) + ) return fluorescence diff --git a/test.py b/test.py index 96b33445c..16191ae3f 100755 --- a/test.py +++ b/test.py @@ -163,7 +163,7 @@ def validate_nwbs(): def get_namespaces(nwbfile): comp = run(["python", "-m", "pynwb.validate", - "--list-namespaces", "--cached-namespace", nwb], + "--list-namespaces", nwbfile], stdout=PIPE, stderr=STDOUT, universal_newlines=True, timeout=30) if comp.returncode != 0: @@ -179,14 +179,13 @@ def get_namespaces(nwbfile): cmds = [] cmds += [["python", "-m", "pynwb.validate", nwb]] - cmds += [["python", "-m", "pynwb.validate", "--cached-namespace", nwb]] cmds += [["python", "-m", "pynwb.validate", "--no-cached-namespace", nwb]] for ns in namespaces: # for some reason, this logging command is necessary to correctly printing the namespace in the # next logging command logging.info("Namespace found: %s" % ns) - cmds += [["python", "-m", "pynwb.validate", "--cached-namespace", "--ns", ns, nwb]] + cmds += [["python", "-m", "pynwb.validate", "--ns", ns, nwb]] for cmd in cmds: logging.info("Validating with \"%s\"." % (" ".join(cmd[:-1]))) diff --git a/tests/integration/hdf5/test_ecephys.py b/tests/integration/hdf5/test_ecephys.py index df6e81dfa..ff67d27c9 100644 --- a/tests/integration/hdf5/test_ecephys.py +++ b/tests/integration/hdf5/test_ecephys.py @@ -1,4 +1,5 @@ from hdmf.common import DynamicTableRegion +from pynwb import NWBFile from pynwb.ecephys import ( ElectrodeGroup, @@ -14,7 +15,7 @@ ) from pynwb.device import Device from pynwb.file import ElectrodeTable as get_electrode_table -from pynwb.testing import NWBH5IOMixin, AcquisitionH5IOMixin, TestCase +from pynwb.testing import NWBH5IOMixin, AcquisitionH5IOMixin, NWBH5IOFlexMixin, TestCase class TestElectrodeGroupIO(NWBH5IOMixin, TestCase): @@ -38,27 +39,36 @@ def getContainer(self, nwbfile): return nwbfile.get_electrode_group(self.container.name) -class TestElectricalSeriesIO(AcquisitionH5IOMixin, TestCase): +def setup_electrode_table(): + table = get_electrode_table() + dev1 = Device(name='dev1') + group = ElectrodeGroup( + name='tetrode1', + description='tetrode description', + location='tetrode location', + device=dev1 + ) + for i in range(4): + table.add_row(location='CA1', group=group, group_name='tetrode1') + return table, group, dev1 - @staticmethod - def make_electrode_table(self): - """ Make an electrode table, electrode group, and device """ - self.table = get_electrode_table() - self.dev1 = Device(name='dev1') - self.group = ElectrodeGroup(name='tetrode1', - description='tetrode description', - location='tetrode location', - device=self.dev1) - for i in range(4): - self.table.add_row(location='CA1', group=self.group, group_name='tetrode1') - def setUpContainer(self): - """ Return the test ElectricalSeries to read/write """ - self.make_electrode_table(self) +class TestElectricalSeriesIO(NWBH5IOFlexMixin, TestCase): + + def getContainerType(self): + return "ElectricalSeries" + + def addContainer(self): + """ Add the test ElectricalSeries and related objects to the given NWBFile """ + table, group, dev1 = setup_electrode_table() + self.nwbfile.add_device(dev1) + self.nwbfile.add_electrode_group(group) + self.nwbfile.set_electrode_table(table) + region = DynamicTableRegion(name='electrodes', data=[0, 2], description='the first and third electrodes', - table=self.table) + table=table) data = list(zip(range(10), range(10, 20))) timestamps = list(map(lambda x: x/10., range(10))) channel_conversion = [1., 2., 3., 4.] @@ -71,14 +81,11 @@ def setUpContainer(self): filtering=filtering, timestamps=timestamps ) - return es - def addContainer(self, nwbfile): - """ Add the test ElectricalSeries and related objects to the given NWBFile """ - nwbfile.add_device(self.dev1) - nwbfile.add_electrode_group(self.group) - nwbfile.set_electrode_table(self.table) - nwbfile.add_acquisition(self.container) + self.nwbfile.add_acquisition(es) + + def getContainer(self, nwbfile: NWBFile): + return nwbfile.acquisition['test_eS'] def test_eg_ref(self): """ @@ -92,58 +99,70 @@ def test_eg_ref(self): self.assertIsInstance(row2.iloc[0]['group'], ElectrodeGroup) -class MultiElectricalSeriesIOMixin(AcquisitionH5IOMixin): - """ - Mixin class for methods to run a roundtrip test writing an NWB file with multiple ElectricalSeries. +class TestLFPIO(NWBH5IOFlexMixin, TestCase): + + def getContainerType(self): + return "LFP" - The abstract method setUpContainer needs to be implemented by classes that include this mixin. - def setUpContainer(self): - # return a test Container to read/write - """ + def addContainer(self): + table, group, dev1 = setup_electrode_table() + self.nwbfile.add_device(dev1) + self.nwbfile.add_electrode_group(group) + self.nwbfile.set_electrode_table(table) - def setUpTwoElectricalSeries(self): - """ Return two test ElectricalSeries to read/write """ - TestElectricalSeriesIO.make_electrode_table(self) region1 = DynamicTableRegion(name='electrodes', data=[0, 2], description='the first and third electrodes', - table=self.table) + table=table) region2 = DynamicTableRegion(name='electrodes', data=[1, 3], description='the second and fourth electrodes', - table=self.table) + table=table) data1 = list(zip(range(10), range(10, 20))) data2 = list(zip(reversed(range(10)), reversed(range(10, 20)))) timestamps = list(map(lambda x: x/10., range(10))) es1 = ElectricalSeries(name='test_eS1', data=data1, electrodes=region1, timestamps=timestamps) es2 = ElectricalSeries(name='test_eS2', data=data2, electrodes=region2, channel_conversion=[4., .4], timestamps=timestamps) - return es1, es2 + lfp = LFP() + self.nwbfile.add_acquisition(lfp) + lfp.add_electrical_series([es1, es2]) - def addContainer(self, nwbfile): - """ Add the test ElectricalSeries and related objects to the given NWBFile """ - nwbfile.add_device(self.dev1) - nwbfile.add_electrode_group(self.group) - nwbfile.set_electrode_table(self.table) - nwbfile.add_acquisition(self.container) + def getContainer(self, nwbfile: NWBFile): + return nwbfile.acquisition['LFP'] -class TestLFPIO(MultiElectricalSeriesIOMixin, TestCase): +class TestFilteredEphysIO(NWBH5IOFlexMixin, TestCase): - def setUpContainer(self): - """ Return a test LFP to read/write """ - es = self.setUpTwoElectricalSeries() - lfp = LFP(es) - return lfp + def getContainerType(self): + return "FilteredEphys" + def addContainer(self): + table, group, dev1 = setup_electrode_table() + self.nwbfile.add_device(dev1) + self.nwbfile.add_electrode_group(group) + self.nwbfile.set_electrode_table(table) -class TestFilteredEphysIO(MultiElectricalSeriesIOMixin, TestCase): + region1 = DynamicTableRegion(name='electrodes', + data=[0, 2], + description='the first and third electrodes', + table=table) + region2 = DynamicTableRegion(name='electrodes', + data=[1, 3], + description='the second and fourth electrodes', + table=table) + data1 = list(zip(range(10), range(10, 20))) + data2 = list(zip(reversed(range(10)), reversed(range(10, 20)))) + timestamps = list(map(lambda x: x/10., range(10))) + es1 = ElectricalSeries(name='test_eS1', data=data1, electrodes=region1, timestamps=timestamps) + es2 = ElectricalSeries(name='test_eS2', data=data2, electrodes=region2, channel_conversion=[4., .4], + timestamps=timestamps) + fe = FilteredEphys() + self.nwbfile.add_acquisition(fe) + fe.add_electrical_series([es1, es2]) - def setUpContainer(self): - """ Return a test FilteredEphys to read/write """ - es = self.setUpTwoElectricalSeries() - fe = FilteredEphys(es) - return fe + def getContainer(self, nwbfile: NWBFile): + return nwbfile.acquisition['FilteredEphys'] class TestClusteringIO(AcquisitionH5IOMixin, TestCase): @@ -165,28 +184,35 @@ def roundtripExportContainer(self, cache_spec=False): return super().roundtripExportContainer(cache_spec) -class EventWaveformConstructor(AcquisitionH5IOMixin, TestCase): +class EventWaveformConstructor(NWBH5IOFlexMixin, TestCase): + + def getContainerType(self): + return "SpikeEventSeries" + + def addContainer(self): + """ Add the test SpikeEventSeries and related objects to the given NWBFile """ + table, group, dev1 = setup_electrode_table() + self.nwbfile.add_device(dev1) + self.nwbfile.add_electrode_group(group) + self.nwbfile.set_electrode_table(table) - def setUpContainer(self): - """ Return a test EventWaveform to read/write """ - TestElectricalSeriesIO.make_electrode_table(self) region = DynamicTableRegion(name='electrodes', data=[0, 2], description='the first and third electrodes', - table=self.table) - sES = SpikeEventSeries(name='test_sES', - data=((1, 1), (2, 2), (3, 3)), - timestamps=[0., 1., 2.], - electrodes=region) - ew = EventWaveform(sES) - return ew + table=table) + ses = SpikeEventSeries( + name='test_sES', + data=((1, 1), (2, 2), (3, 3)), + timestamps=[0., 1., 2.], + electrodes=region + ) - def addContainer(self, nwbfile): - """ Add the test EventWaveform and related objects to the given NWBFile """ - nwbfile.add_device(self.dev1) - nwbfile.add_electrode_group(self.group) - nwbfile.set_electrode_table(self.table) - nwbfile.add_acquisition(self.container) + ew = EventWaveform() + self.nwbfile.add_acquisition(ew) + ew.add_spike_event_series(ses) + + def getContainer(self, nwbfile: NWBFile): + return nwbfile.acquisition['EventWaveform'] class ClusterWaveformsConstructor(AcquisitionH5IOMixin, TestCase): @@ -220,51 +246,66 @@ def roundtripExportContainer(self, cache_spec=False): return super().roundtripExportContainer(cache_spec) -class FeatureExtractionConstructor(AcquisitionH5IOMixin, TestCase): +class FeatureExtractionConstructor(NWBH5IOFlexMixin, TestCase): + + def getContainerType(self): + return "FeatureExtraction" + + def addContainer(self): + """ Add the test FeatureExtraction and related objects to the given NWBFile """ + table, group, dev1 = setup_electrode_table() + self.nwbfile.add_device(dev1) + self.nwbfile.add_electrode_group(group) + self.nwbfile.set_electrode_table(table) - def setUpContainer(self): - """ Return a test FeatureExtraction to read/write """ event_times = [1.9, 3.5] - TestElectricalSeriesIO.make_electrode_table(self) region = DynamicTableRegion(name='electrodes', data=[0, 2], description='the first and third electrodes', - table=self.table) + table=table) description = ['desc1', 'desc2', 'desc3'] features = [[[0., 1., 2.], [3., 4., 5.]], [[6., 7., 8.], [9., 10., 11.]]] fe = FeatureExtraction(electrodes=region, description=description, times=event_times, features=features) - return fe - def addContainer(self, nwbfile): - """ Add the test FeatureExtraction and related objects to the given NWBFile """ - nwbfile.add_device(self.dev1) - nwbfile.add_electrode_group(self.group) - nwbfile.set_electrode_table(self.table) - nwbfile.add_acquisition(self.container) + self.nwbfile.add_acquisition(fe) + def getContainer(self, nwbfile: NWBFile): + return nwbfile.acquisition['FeatureExtraction'] -class EventDetectionConstructor(AcquisitionH5IOMixin, TestCase): - def setUpContainer(self): - """ Return a test EventDetection to read/write """ - TestElectricalSeriesIO.make_electrode_table(self) +class EventDetectionConstructor(NWBH5IOFlexMixin, TestCase): + + def getContainerType(self): + return "EventDetection" + + def addContainer(self): + """ Add the test EventDetection and related objects to the given NWBFile """ + table, group, dev1 = setup_electrode_table() + self.nwbfile.add_device(dev1) + self.nwbfile.add_electrode_group(group) + self.nwbfile.set_electrode_table(table) + region = DynamicTableRegion(name='electrodes', data=[0, 2], description='the first and third electrodes', - table=self.table) + table=table) data = list(range(10)) ts = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] - self.eS = ElectricalSeries(name='test_eS', data=data, electrodes=region, timestamps=ts) - eD = EventDetection(detection_method='detection_method', - source_electricalseries=self.eS, - source_idx=(1, 2, 3), - times=(0.1, 0.2, 0.3)) - return eD + eS = ElectricalSeries( + name='test_eS', + data=data, + electrodes=region, + timestamps=ts + ) + eD = EventDetection( + detection_method='detection_method', + source_electricalseries=eS, + source_idx=(1, 2, 3), + times=(0.1, 0.2, 0.3) + ) - def addContainer(self, nwbfile): - """ Add the test EventDetection and related objects to the given NWBFile """ - nwbfile.add_device(self.dev1) - nwbfile.add_electrode_group(self.group) - nwbfile.set_electrode_table(self.table) - nwbfile.add_acquisition(self.eS) - nwbfile.add_acquisition(self.container) + self.nwbfile.add_acquisition(eS) + self.nwbfile.add_acquisition(eD) + + def getContainer(self, nwbfile: NWBFile): + return nwbfile.acquisition['EventDetection'] diff --git a/tests/read_dandi/test_read_dandi.py b/tests/read_dandi/test_read_dandi.py index 84e9f3f62..0e0698d77 100644 --- a/tests/read_dandi/test_read_dandi.py +++ b/tests/read_dandi/test_read_dandi.py @@ -1,52 +1,62 @@ +"""Test reading NWB files from the DANDI Archive using ROS3.""" from dandi.dandiapi import DandiAPIClient +import random import sys import traceback from pynwb import NWBHDF5IO -from pynwb.testing import TestCase -class TestReadNWBDandisets(TestCase): - """Test reading NWB files from the DANDI Archive using ROS3.""" +# NOTE: do not name the function with "test_" prefix, otherwise pytest +# will try to run it as a test + +def read_first_nwb_asset(): + """Test reading the first NWB asset from a random selection of 50 dandisets that uses NWB.""" + num_dandisets_to_read = 50 + client = DandiAPIClient() + dandisets = list(client.get_dandisets()) + random.shuffle(dandisets) + dandisets_to_read = dandisets[:num_dandisets_to_read] + print("Reading NWB files from the following dandisets:") + print([d.get_raw_metadata()["identifier"] for d in dandisets_to_read]) + + failed_reads = dict() + for i, dandiset in enumerate(dandisets_to_read): + dandiset_metadata = dandiset.get_raw_metadata() + + # skip any dandisets that do not use NWB + if not any( + data_standard["identifier"] == "RRID:SCR_015242" # this is the RRID for NWB + for data_standard in dandiset_metadata["assetsSummary"].get("dataStandard", []) + ): + continue + + dandiset_identifier = dandiset_metadata["identifier"] + print("--------------") + print(f"{i}: {dandiset_identifier}") + + # iterate through assets until we get an NWB file (it could be MP4) + assets = dandiset.get_assets() + first_asset = next(assets) + while first_asset.path.split(".")[-1] != "nwb": + first_asset = next(assets) + if first_asset.path.split(".")[-1] != "nwb": + print("No NWB files?!") + continue - def test_read_first_nwb_asset(self): - """Test reading the first NWB asset from each dandiset that uses NWB.""" - client = DandiAPIClient() - dandisets = client.get_dandisets() + s3_url = first_asset.get_content_url(follow_redirects=1, strip_query=True) - failed_reads = dict() - for i, dandiset in enumerate(dandisets): - dandiset_metadata = dandiset.get_raw_metadata() + try: + with NWBHDF5IO(path=s3_url, load_namespaces=True, driver="ros3") as io: + io.read() + except Exception as e: + print(traceback.format_exc()) + failed_reads[dandiset] = e - # skip any dandisets that do not use NWB - if not any( - data_standard["identifier"] == "RRID:SCR_015242" # this is the RRID for NWB - for data_standard in dandiset_metadata["assetsSummary"].get("dataStandard", []) - ): - continue + if failed_reads: + print(failed_reads) + sys.exit(1) - dandiset_identifier = dandiset_metadata["identifier"] - print("--------------") - print(f"{i}: {dandiset_identifier}") - # iterate through assets until we get an NWB file (it could be MP4) - assets = dandiset.get_assets() - first_asset = next(assets) - while first_asset.path.split(".")[-1] != "nwb": - first_asset = next(assets) - if first_asset.path.split(".")[-1] != "nwb": - print("No NWB files?!") - continue - - s3_url = first_asset.get_content_url(follow_redirects=1, strip_query=True) - - try: - with NWBHDF5IO(path=s3_url, load_namespaces=True, driver="ros3") as io: - io.read() - except Exception as e: - print(traceback.format_exc()) - failed_reads[dandiset] = e - - if failed_reads: - print(failed_reads) - sys.exit(1) +if __name__ == "__main__": + read_first_nwb_asset() diff --git a/tests/unit/test_ecephys.py b/tests/unit/test_ecephys.py index 26320394b..6f76a5e8c 100644 --- a/tests/unit/test_ecephys.py +++ b/tests/unit/test_ecephys.py @@ -2,6 +2,7 @@ import numpy as np +from pynwb.base import ProcessingModule from pynwb.ecephys import ( ElectricalSeries, SpikeEventSeries, @@ -217,7 +218,11 @@ def test_init(self): table, region = self._create_table_and_region() sES = SpikeEventSeries('test_sES', list(range(10)), list(range(10)), region) - ew = EventWaveform(sES) + pm = ProcessingModule(name='test_module', description='a test module') + ew = EventWaveform() + pm.add(table) + pm.add(ew) + ew.add_spike_event_series(sES) self.assertEqual(ew.spike_event_series['test_sES'], sES) self.assertEqual(ew['test_sES'], ew.spike_event_series['test_sES']) @@ -274,10 +279,25 @@ def _create_table_and_region(self): ) return table, region + def test_init(self): + _, region = self._create_table_and_region() + eS = ElectricalSeries('test_eS', [0, 1, 2, 3], region, timestamps=[0.1, 0.2, 0.3, 0.4]) + msg = ( + "The linked table for DynamicTableRegion 'electrodes' does not share " + "an ancestor with the DynamicTableRegion." + ) + with self.assertWarnsRegex(UserWarning, msg): + lfp = LFP(eS) + self.assertEqual(lfp.electrical_series.get('test_eS'), eS) + self.assertEqual(lfp['test_eS'], lfp.electrical_series.get('test_eS')) + def test_add_electrical_series(self): lfp = LFP() table, region = self._create_table_and_region() eS = ElectricalSeries('test_eS', [0, 1, 2, 3], region, timestamps=[0.1, 0.2, 0.3, 0.4]) + pm = ProcessingModule(name='test_module', description='a test module') + pm.add(table) + pm.add(lfp) lfp.add_electrical_series(eS) self.assertEqual(lfp.electrical_series.get('test_eS'), eS) @@ -295,16 +315,24 @@ def _create_table_and_region(self): return table, region def test_init(self): - table, region = self._create_table_and_region() + _, region = self._create_table_and_region() eS = ElectricalSeries('test_eS', [0, 1, 2, 3], region, timestamps=[0.1, 0.2, 0.3, 0.4]) - fe = FilteredEphys(eS) + msg = ( + "The linked table for DynamicTableRegion 'electrodes' does not share " + "an ancestor with the DynamicTableRegion." + ) + with self.assertWarnsRegex(UserWarning, msg): + fe = FilteredEphys(eS) self.assertEqual(fe.electrical_series.get('test_eS'), eS) self.assertEqual(fe['test_eS'], fe.electrical_series.get('test_eS')) def test_add_electrical_series(self): - fe = FilteredEphys() table, region = self._create_table_and_region() eS = ElectricalSeries('test_eS', [0, 1, 2, 3], region, timestamps=[0.1, 0.2, 0.3, 0.4]) + pm = ProcessingModule(name='test_module', description='a test module') + fe = FilteredEphys() + pm.add(table) + pm.add(fe) fe.add_electrical_series(eS) self.assertEqual(fe.electrical_series.get('test_eS'), eS) self.assertEqual(fe['test_eS'], fe.electrical_series.get('test_eS')) diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index bb5c9c1e1..c9bd98ad0 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -563,9 +563,8 @@ def test_simple(self): with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile, cache_spec=False) - with self.assertWarnsWith(UserWarning, "No cached namespaces found in %s" % self.path): - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: - nwbfile = reader.read() + with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + nwbfile = reader.read() class TestTimestampsRefDefault(TestCase): diff --git a/tests/unit/test_ophys.py b/tests/unit/test_ophys.py index 1ebb7c640..88bd24535 100644 --- a/tests/unit/test_ophys.py +++ b/tests/unit/test_ophys.py @@ -2,7 +2,7 @@ import numpy as np -from pynwb.base import TimeSeries +from pynwb.base import TimeSeries, ProcessingModule from pynwb.device import Device from pynwb.image import ImageSeries from pynwb.ophys import ( @@ -398,9 +398,15 @@ def test_warnings(self): class DfOverFConstructor(TestCase): def test_init(self): + pm = ProcessingModule(name='ophys', description="Optical physiology") + ps = create_plane_segmentation() - rt_region = ps.create_roi_table_region(description='the second ROI', region=[1]) + pm.add(ps) + + dof = DfOverF() + pm.add(dof) + rt_region = ps.create_roi_table_region(description='the second ROI', region=[1]) rrs = RoiResponseSeries( name='test_ts', data=[1, 2, 3], @@ -408,26 +414,32 @@ def test_init(self): unit='unit', timestamps=[0.1, 0.2, 0.3] ) + dof.add_roi_response_series(rrs) - dof = DfOverF(rrs) self.assertEqual(dof.roi_response_series['test_ts'], rrs) class FluorescenceConstructor(TestCase): def test_init(self): + pm = ProcessingModule(name='ophys', description="Optical physiology") + ps = create_plane_segmentation() - rt_region = ps.create_roi_table_region(description='the second ROI', region=[1]) + pm.add(ps) - ts = RoiResponseSeries( + ff = Fluorescence() + pm.add(ff) + + rt_region = ps.create_roi_table_region(description='the second ROI', region=[1]) + rrs = RoiResponseSeries( name='test_ts', data=[1, 2, 3], rois=rt_region, unit='unit', timestamps=[0.1, 0.2, 0.3] ) + ff.add_roi_response_series(rrs) - ff = Fluorescence(ts) - self.assertEqual(ff.roi_response_series['test_ts'], ts) + self.assertEqual(ff.roi_response_series['test_ts'], rrs) class ImageSegmentationConstructor(TestCase): diff --git a/tests/unit/test_resources.py b/tests/unit/test_resources.py index e04f5c653..108a7fd84 100644 --- a/tests/unit/test_resources.py +++ b/tests/unit/test_resources.py @@ -1,3 +1,5 @@ +import warnings + from pynwb.resources import HERD from pynwb.testing import TestCase @@ -7,5 +9,11 @@ def test_constructor(self): """ Test constructor """ - er = HERD() - self.assertIsInstance(er, HERD) + with warnings.catch_warnings(record=True): + warnings.filterwarnings( + "ignore", + message=r"HERD is experimental .*", + category=UserWarning, + ) + er = HERD() + self.assertIsInstance(er, HERD) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 813f8d4e3..74ce0992c 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -2,6 +2,7 @@ import re from unittest.mock import patch from io import StringIO +import warnings from pynwb.testing import TestCase from pynwb import validate, NWBHDF5IO @@ -29,8 +30,6 @@ def test_validate_file_no_cache(self): "tests/back_compat/1.0.2_nwbfile.nwb"], capture_output=True) stderr_regex = re.compile( - r".*UserWarning: No cached namespaces found in tests/back_compat/1\.0\.2_nwbfile\.nwb\s*" - r"warnings.warn\(msg\)\s*" r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " r"Falling back to PyNWB namespace information\.\s*" ) @@ -47,8 +46,6 @@ def test_validate_file_no_cache_bad_ns(self): "--ns", "notfound"], capture_output=True) stderr_regex = re.compile( - r".*UserWarning: No cached namespaces found in tests/back_compat/1\.0\.2_nwbfile\.nwb\s*" - r"warnings.warn\(msg\)\s*" r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " r"Falling back to PyNWB namespace information\.\s*" r"The namespace 'notfound' could not be found in PyNWB namespace information as only " @@ -222,26 +219,44 @@ def test_validate_io_cached(self): def test_validate_io_cached_extension(self): """Test that validating a file with cached spec against its cached namespaces succeeds.""" - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io) - self.assertEqual(errors, []) + with warnings.catch_warnings(record=True): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: + errors = validate(io) + self.assertEqual(errors, []) def test_validate_io_cached_extension_pass_ns(self): """Test that validating a file with cached extension spec against the extension namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io, 'ndx-testextension') - self.assertEqual(errors, []) + with warnings.catch_warnings(record=True): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: + errors = validate(io, 'ndx-testextension') + self.assertEqual(errors, []) def test_validate_io_cached_core_with_io(self): """ For back-compatability, test that validating a file with cached extension spec against the core namespace succeeds when using the `io` + `namespace` keywords. """ - with NWBHDF5IO( - path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb', mode='r', load_namespaces=True - ) as io: - results = validate(io=io, namespace="core") - self.assertEqual(results, []) + with warnings.catch_warnings(record=True): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + with NWBHDF5IO( + path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb', mode='r', load_namespaces=True + ) as io: + results = validate(io=io, namespace="core") + self.assertEqual(results, []) def test_validate_file_cached_extension(self): """ From 734a6c4c3bd66912f5438e7423574c31c348460e Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Wed, 4 Oct 2023 07:20:06 +0200 Subject: [PATCH 02/21] Update plot_file.py, typo (#1769) Co-authored-by: Ryan Ly --- docs/gallery/general/plot_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gallery/general/plot_file.py b/docs/gallery/general/plot_file.py index 39fc1ad10..beead22f6 100644 --- a/docs/gallery/general/plot_file.py +++ b/docs/gallery/general/plot_file.py @@ -33,7 +33,7 @@ ^^^^^^^^^^ :py:class:`~pynwb.base.TimeSeries` objects store time series data and correspond to the *TimeSeries* specifications -provided by the `NWB Format`_ . Like the NWB specification, :py:class:`~pynwb.base.TimeSeries` Python objects +provided by the `NWB Format`_. Like the NWB specification, :py:class:`~pynwb.base.TimeSeries` Python objects follow an object-oriented inheritance pattern, i.e., the class :py:class:`~pynwb.base.TimeSeries` serves as the base class for all other :py:class:`~pynwb.base.TimeSeries` types, such as, :py:class:`~pynwb.ecephys.ElectricalSeries`, which itself may have further subtypes, e.g., From 2aceed0e6998d2a32f53ff154ec560ff54f3342a Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Thu, 5 Oct 2023 01:47:39 +0200 Subject: [PATCH 03/21] can_read method for NWBHDF5IO respects NWB version (#1703) Co-authored-by: Ryan Ly --- CHANGELOG.md | 6 +++++ src/pynwb/__init__.py | 56 ++++++++++++++++++++++++++++++------------- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9016dbb2e..6a3a79232 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # PyNWB Changelog +## PyNWB 2.6.0 (Upcoming) + +### Enhancements and minor changes +- Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) +- Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) + ## PyNWB 2.5.0 (August 18, 2023) ### Enhancements and minor changes diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index a6195e779..710f55ee8 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -148,6 +148,33 @@ def _dec(cls): _dec(container_cls) +def get_nwbfile_version(h5py_file: h5py.File): + """ + Get the NWB version of the file if it is an NWB file. + :returns: Tuple consisting of: 1) the original version string as stored in the file and + 2) a tuple with the parsed components of the version string, consisting of integers + and strings, e.g., (2, 5, 1, beta). (None, None) will be returned if the file is not a valid NWB file + or the nwb_version is missing, e.g., in the case when no data has been written to the file yet. + """ + # Get the version string for the NWB file + try: + nwb_version_string = h5py_file.attrs['nwb_version'] + # KeyError occurs when the file is empty (e.g., when creating a new file nothing has been written) + # or when the HDF5 file is not a valid NWB file + except KeyError: + return None, None + # Other system may have written nwb_version as a fixed-length string, resulting in a numpy.bytes_ object + # on read, rather than a variable-length string. To address this, decode the bytes if necessary. + if not isinstance(nwb_version_string, str): + nwb_version_string = nwb_version_string.decode() + + # Parse the version string + nwb_version_parts = nwb_version_string.replace("-", ".").replace("_", ".").split(".") + nwb_version = tuple([int(i) if i.isnumeric() else i + for i in nwb_version_parts]) + return nwb_version_string, nwb_version + + # a function to register an object mapper for a container class @docval({"name": "container_cls", "type": type, "doc": "the Container class for which the given ObjectMapper class gets used"}, @@ -201,6 +228,17 @@ def get_sum(self, a, b): class NWBHDF5IO(_HDF5IO): + @staticmethod + def can_read(path: str): + """Determine whether a given path is readable by this class""" + if not os.path.isfile(path): # path is file that exists + return False + try: + with h5py.File(path, "r") as file: # path is HDF5 file + return get_nwbfile_version(file)[1][0] >= 2 # Major version of NWB >= 2 + except IOError: + return False + @docval({'name': 'path', 'type': (str, Path), 'doc': 'the path to the HDF5 file', 'default': None}, {'name': 'mode', 'type': str, 'doc': 'the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x")', @@ -263,23 +301,7 @@ def nwb_version(self): and strings, e.g., (2, 5, 1, beta). (None, None) will be returned if the nwb_version is missing, e.g., in the case when no data has been written to the file yet. """ - # Get the version string for the NWB file - try: - nwb_version_string = self._file.attrs['nwb_version'] - # KeyError occurs when the file is empty (e.g., when creating a new file nothing has been written) - # or when the HDF5 file is not a valid NWB file - except KeyError: - return None, None - # Other system may have written nwb_version as a fixed-length string, resulting in a numpy.bytes_ object - # on read, rather than a variable-length string. To address this, decode the bytes if necessary. - if not isinstance(nwb_version_string, str): - nwb_version_string = nwb_version_string.decode() - - # Parse the version string - nwb_version_parts = nwb_version_string.replace("-", ".").replace("_", ".").split(".") - nwb_version = tuple([int(i) if i.isnumeric() else i - for i in nwb_version_parts]) - return nwb_version_string, nwb_version + return get_nwbfile_version(self._file) @docval(*get_docval(_HDF5IO.read), {'name': 'skip_version_check', 'type': bool, 'doc': 'skip checking of NWB version', 'default': False}) From eb585063b3e035d68f206c49ff19200f9f25fa43 Mon Sep 17 00:00:00 2001 From: Zach McKenzie <92116279+zm711@users.noreply.github.com> Date: Wed, 1 Nov 2023 13:38:30 -0400 Subject: [PATCH 04/21] typo fixes (#1785) --- docs/source/export.rst | 2 +- docs/source/validation.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/export.rst b/docs/source/export.rst index 44a7a3a4b..490cd346e 100644 --- a/docs/source/export.rst +++ b/docs/source/export.rst @@ -109,7 +109,7 @@ How do I write a newly instantiated ``NWBFile`` to two different file paths? ----------------------------------------------------------------------------------------------------------------------- PyNWB does not support writing an :py:class:`~pynwb.file.NWBFile` that was not read from a file to two different files. For example, if you instantiate :py:class:`~pynwb.file.NWBFile` A and write it to file path 1, you cannot also write it -to file path 2. However, you can first write the :py:class:`~pynwb.file.NWBFile`` to file path 1, read the +to file path 2. However, you can first write the :py:class:`~pynwb.file.NWBFile` to file path 1, read the :py:class:`~pynwb.file.NWBFile` from file path 1, and then export it to file path 2. .. code-block:: python diff --git a/docs/source/validation.rst b/docs/source/validation.rst index 8cc32a3f7..73c138127 100644 --- a/docs/source/validation.rst +++ b/docs/source/validation.rst @@ -11,7 +11,7 @@ The validator can be invoked like so: python -m pynwb.validate test.nwb If the file contains no NWB extensions, then this command will validate the file ``test.nwb`` against the -*core* NWB specification. On success, the output will is: +*core* NWB specification. On success, the output will be: .. code-block:: text From 9fafde27ff3577835adbc026c7fcb3260830b393 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 27 Nov 2023 19:25:01 +0100 Subject: [PATCH 05/21] default load_namespaces=True (#1748) * default load_namespaces=True * Update CHANGELOG.md * change logic around to ignore load_namespaces when it cannot be used * flake8 * Update __init__.py * fix back-compatibility test * flake8 * flake8 * remove warning from test * rmv useless test * Update tests, validate script for default load ns * Update CHANGELOG.md * Update docs/gallery/advanced_io/linking_data.py Co-authored-by: Ben Dichter * Update docs/gallery/advanced_io/linking_data.py Co-authored-by: Ben Dichter * Update test_file.py --------- Co-authored-by: Ryan Ly Co-authored-by: Oliver Ruebel --- CHANGELOG.md | 1 + docs/gallery/advanced_io/linking_data.py | 81 ++++++++++------------ docs/gallery/general/extensions.py | 2 +- src/pynwb/__init__.py | 17 ++--- src/pynwb/validate.py | 1 + tests/back_compat/test_import_structure.py | 1 - tests/back_compat/test_read.py | 26 ++++--- tests/read_dandi/test_read_dandi.py | 2 +- tests/unit/test_file.py | 12 ++-- tests/validation/test_validate.py | 58 +++++++--------- 10 files changed, 97 insertions(+), 104 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a3a79232..aaef47607 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## PyNWB 2.6.0 (Upcoming) ### Enhancements and minor changes +- For `NWBHDF5IO()`, change the default of arg `load_namespaces` from `False` to `True`. @bendichter [#1748](https://github.com/NeurodataWithoutBorders/pynwb/pull/1748) - Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) diff --git a/docs/gallery/advanced_io/linking_data.py b/docs/gallery/advanced_io/linking_data.py index 082aa3c51..82824f6cd 100644 --- a/docs/gallery/advanced_io/linking_data.py +++ b/docs/gallery/advanced_io/linking_data.py @@ -6,57 +6,50 @@ PyNWB supports linking between files using external links. -""" +Example Use Case: Integrating data from multiple files +--------------------------------------------------------- -#################### -# Example Use Case: Integrating data from multiple files -# --------------------------------------------------------- -# -# NBWContainer classes (e.g., :py:class:`~pynwb.base.TimeSeries`) support the integration of data stored in external -# HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use -# case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external -# links allows us to save each data stream to an external HDF5 files during data acquisition and to -# afterwards link the data into a single NWB:N file. In this case, each recording becomes represented by a -# separate file-system object that can be set as read-only once the experiment is done. In the following -# we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other -# NWBContainers as well. -# +NBWContainer classes (e.g., :py:class:`~pynwb.base.TimeSeries`) support the integration of data stored in external +HDF5 files with NWB data files via external links. To make things more concrete, let's look at the following use +case. We want to simultaneously record multiple data streams during data acquisition. Using the concept of external +links allows us to save each data stream to an external HDF5 files during data acquisition and to +afterwards link the data into a single NWB file. In this case, each recording becomes represented by a +separate file-system object that can be set as read-only once the experiment is done. In the following +we are using :py:meth:`~pynwb.base.TimeSeries` as an example, but the same approach works for other +NWBContainers as well. -#################### -# .. tip:: -# -# The same strategies we use here for creating External Links also apply to Soft Links. -# The main difference between soft and external links is that soft links point to other -# objects within the same file while external links point to objects in external files. -# +.. tip:: -#################### -# .. tip:: -# -# In the case of :py:meth:`~pynwb.base.TimeSeries`, the uncorrected timestamps generated by the acquisition -# system can be stored (or linked) in the *sync* group. In the NWB:N format, hardware-recorded time data -# must then be corrected to a common time base (e.g., timestamps from all hardware sources aligned) before -# it can be included in the *timestamps* of the *TimeSeries*. This means, in the case -# of :py:meth:`~pynwb.base.TimeSeries` we need to be careful that we are not including data with incompatible -# timestamps in the same file when using external links. -# + The same strategies we use here for creating External Links also apply to Soft Links. + The main difference between soft and external links is that soft links point to other + objects within the same file while external links point to objects in external files. -#################### -# .. warning:: -# -# External links can become stale/break. Since external links are pointing to data in other files -# external links may become invalid any time files are modified on the file system, e.g., renamed, -# moved or access permissions are changed. -# + .. tip:: -#################### -# Creating test data -# --------------------------- -# -# In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. -# We then show how we can integrate these files into a single NWBFile. + In the case of :py:meth:`~pynwb.base.TimeSeries`, the uncorrected timestamps generated by the acquisition + system can be stored (or linked) in the *sync* group. In the NWB format, hardware-recorded time data + must then be corrected to a common time base (e.g., timestamps from all hardware sources aligned) before + it can be included in the *timestamps* of the *TimeSeries*. This means, in the case + of :py:meth:`~pynwb.base.TimeSeries` we need to be careful that we are not including data with incompatible + timestamps in the same file when using external links. + + +.. warning:: + + External links can become stale/break. Since external links are pointing to data in other files + external links may become invalid any time files are modified on the file system, e.g., renamed, + moved or access permissions are changed. + + +Creating test data +--------------------------- + +In the following we are creating two :py:meth:`~pynwb.base.TimeSeries` each written to a separate file. +We then show how we can integrate these files into a single NWBFile. +""" # sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnails_linking_data.png' + from datetime import datetime from uuid import uuid4 diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py index fa4f4cbb7..5140c531b 100644 --- a/docs/gallery/general/extensions.py +++ b/docs/gallery/general/extensions.py @@ -254,7 +254,7 @@ def __init__(self, **kwargs): # explicitly specify this. This behavior is enabled by the *load_namespaces* # argument to the :py:class:`~pynwb.NWBHDF5IO` constructor. -with NWBHDF5IO("cache_spec_example.nwb", mode="r", load_namespaces=True) as io: +with NWBHDF5IO("cache_spec_example.nwb", mode="r") as io: nwbfile = io.read() #################### diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 710f55ee8..7cf32e074 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -4,7 +4,6 @@ import os.path from pathlib import Path from copy import deepcopy -from warnings import warn import h5py from hdmf.spec import NamespaceCatalog @@ -244,8 +243,9 @@ def can_read(path: str): 'doc': 'the mode to open the HDF5 file with, one of ("w", "r", "r+", "a", "w-", "x")', 'default': 'r'}, {'name': 'load_namespaces', 'type': bool, - 'doc': 'whether or not to load cached namespaces from given path - not applicable in write mode', - 'default': False}, + 'doc': ('whether or not to load cached namespaces from given path - not applicable in write mode ' + 'or when `manager` is not None or when `extensions` is not None'), + 'default': True}, {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager to use for I/O', 'default': None}, {'name': 'extensions', 'type': (str, TypeMap, list), 'doc': 'a path to a namespace, a TypeMap, or a list consisting paths to namespaces and TypeMaps', @@ -261,15 +261,10 @@ def __init__(self, **kwargs): popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', 'driver', 'herd_path', kwargs) # Define the BuildManager to use - if load_namespaces: - if manager is not None: - warn("loading namespaces from file - ignoring 'manager'") - if extensions is not None: - warn("loading namespaces from file - ignoring 'extensions' argument") - # namespaces are not loaded when creating an NWBHDF5IO object in write mode - if 'w' in mode or mode == 'x': - raise ValueError("cannot load namespaces from file when writing to it") + if mode in 'wx' or manager is not None or extensions is not None: + load_namespaces = False + if load_namespaces: tm = get_type_map() super().load_namespaces(tm, path, file=file_obj, driver=driver) manager = BuildManager(tm) diff --git a/src/pynwb/validate.py b/src/pynwb/validate.py index 23b3aee6f..62aa41426 100644 --- a/src/pynwb/validate.py +++ b/src/pynwb/validate.py @@ -156,6 +156,7 @@ def validate(**kwargs): file=sys.stderr, ) else: + io_kwargs.update(load_namespaces=False) namespaces_to_validate = [CORE_NAMESPACE] if namespace is not None: diff --git a/tests/back_compat/test_import_structure.py b/tests/back_compat/test_import_structure.py index e5f931f5d..79d4f6ad0 100644 --- a/tests/back_compat/test_import_structure.py +++ b/tests/back_compat/test_import_structure.py @@ -82,7 +82,6 @@ def test_outer_import_structure(self): "spec", "testing", "validate", - "warn", ] for member in expected_structure: self.assertIn(member=member, container=current_structure) diff --git a/tests/back_compat/test_read.py b/tests/back_compat/test_read.py index 919ae6bde..792d26e7a 100644 --- a/tests/back_compat/test_read.py +++ b/tests/back_compat/test_read.py @@ -31,6 +31,16 @@ class TestReadOldVersions(TestCase): "- expected an array of shape '[None]', got non-array data 'one publication'")], } + def get_io(self, path): + """Get an NWBHDF5IO object for the given path.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + return NWBHDF5IO(str(path), 'r') + def test_read(self): """Test reading and validating all NWB files in the same folder as this file. @@ -43,7 +53,7 @@ def test_read(self): with self.subTest(file=f.name): with warnings.catch_warnings(record=True) as warnings_on_read: warnings.simplefilter("always") - with NWBHDF5IO(str(f), 'r', load_namespaces=True) as io: + with self.get_io(f) as io: errors = validate(io) io.read() for w in warnings_on_read: @@ -69,28 +79,28 @@ def test_read(self): def test_read_timeseries_no_data(self): """Test that a TimeSeries written without data is read with data set to the default value.""" f = Path(__file__).parent / '1.5.1_timeseries_no_data.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_timeseries'].data, TimeSeries.DEFAULT_DATA) def test_read_timeseries_no_unit(self): """Test that an ImageSeries written without unit is read with unit set to the default value.""" f = Path(__file__).parent / '1.5.1_timeseries_no_unit.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_timeseries'].unit, TimeSeries.DEFAULT_UNIT) def test_read_imageseries_no_data(self): """Test that an ImageSeries written without data is read with data set to the default value.""" f = Path(__file__).parent / '1.5.1_imageseries_no_data.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_imageseries'].data, ImageSeries.DEFAULT_DATA) def test_read_imageseries_no_unit(self): """Test that an ImageSeries written without unit is read with unit set to the default value.""" f = Path(__file__).parent / '1.5.1_imageseries_no_unit.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_imageseries'].unit, ImageSeries.DEFAULT_UNIT) @@ -100,7 +110,7 @@ def test_read_imageseries_non_external_format(self): f = Path(__file__).parent / fbase expected_warning = self.expected_warnings[fbase][0] with self.assertWarnsWith(UserWarning, expected_warning): - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertEqual(read_nwbfile.acquisition['test_imageseries'].format, "tiff") @@ -110,13 +120,13 @@ def test_read_imageseries_nonmatch_starting_frame(self): f = Path(__file__).parent / fbase expected_warning = self.expected_warnings[fbase][0] with self.assertWarnsWith(UserWarning, expected_warning): - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() np.testing.assert_array_equal(read_nwbfile.acquisition['test_imageseries'].starting_frame, [1, 2, 3]) def test_read_subject_no_age__reference(self): """Test that reading a Subject without an age__reference set with NWB schema 2.5.0 sets the value to None""" f = Path(__file__).parent / '2.2.0_subject_no_age__reference.nwb' - with NWBHDF5IO(str(f), 'r') as io: + with self.get_io(f) as io: read_nwbfile = io.read() self.assertIsNone(read_nwbfile.subject.age__reference) diff --git a/tests/read_dandi/test_read_dandi.py b/tests/read_dandi/test_read_dandi.py index 0e0698d77..f9dafd938 100644 --- a/tests/read_dandi/test_read_dandi.py +++ b/tests/read_dandi/test_read_dandi.py @@ -47,7 +47,7 @@ def read_first_nwb_asset(): s3_url = first_asset.get_content_url(follow_redirects=1, strip_query=True) try: - with NWBHDF5IO(path=s3_url, load_namespaces=True, driver="ros3") as io: + with NWBHDF5IO(path=s3_url, driver="ros3") as io: io.read() except Exception as e: print(traceback.format_exc()) diff --git a/tests/unit/test_file.py b/tests/unit/test_file.py index c9bd98ad0..756009ff3 100644 --- a/tests/unit/test_file.py +++ b/tests/unit/test_file.py @@ -527,6 +527,7 @@ def test_subject_age_duration(self): class TestCacheSpec(TestCase): + """Test whether the file can be written and read when caching the spec.""" def setUp(self): self.path = 'unittest_cached_spec.nwb' @@ -535,18 +536,20 @@ def tearDown(self): remove_test_file(self.path) def test_simple(self): - nwbfile = NWBFile(' ', ' ', + nwbfile = NWBFile('sess_desc', 'identifier', datetime.now(tzlocal()), file_create_date=datetime.now(tzlocal()), institution='University of California, San Francisco', lab='Chang Lab') with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile) - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + with NWBHDF5IO(self.path, 'r') as reader: nwbfile = reader.read() + assert nwbfile.session_description == "sess_desc" class TestNoCacheSpec(TestCase): + """Test whether the file can be written and read when not caching the spec.""" def setUp(self): self.path = 'unittest_cached_spec.nwb' @@ -555,7 +558,7 @@ def tearDown(self): remove_test_file(self.path) def test_simple(self): - nwbfile = NWBFile(' ', ' ', + nwbfile = NWBFile('sess_desc', 'identifier', datetime.now(tzlocal()), file_create_date=datetime.now(tzlocal()), institution='University of California, San Francisco', @@ -563,8 +566,9 @@ def test_simple(self): with NWBHDF5IO(self.path, 'w') as io: io.write(nwbfile, cache_spec=False) - with NWBHDF5IO(self.path, 'r', load_namespaces=True) as reader: + with NWBHDF5IO(self.path, 'r') as reader: nwbfile = reader.read() + assert nwbfile.session_description == "sess_desc" class TestTimestampsRefDefault(TestCase): diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 74ce0992c..6aa2ee25e 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -199,64 +199,54 @@ class TestValidateFunction(TestCase): # 1.0.3_nwbfile.nwb has cached "core" specification # 1.1.2_nwbfile.nwb has cached "core" and "hdmf-common" specificaitions + def get_io(self, path): + """Get an NWBHDF5IO object for the given path, ignoring the warning about ignoring cached namespaces.""" + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + message=r"Ignoring cached namespace .*", + category=UserWarning, + ) + return NWBHDF5IO(str(path), 'r') + def test_validate_io_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/1.0.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: errors = validate(io) self.assertEqual(errors, []) def test_validate_io_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.0.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.0.2_nwbfile.nwb') as io: with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): validate(io, 'notfound') def test_validate_io_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: errors = validate(io) self.assertEqual(errors, []) def test_validate_io_cached_extension(self): """Test that validating a file with cached spec against its cached namespaces succeeds.""" - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io) - self.assertEqual(errors, []) + with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + errors = validate(io) + self.assertEqual(errors, []) def test_validate_io_cached_extension_pass_ns(self): """Test that validating a file with cached extension spec against the extension namespace succeeds.""" - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO('tests/back_compat/2.1.0_nwbfile_with_extension.nwb', 'r', load_namespaces=True) as io: - errors = validate(io, 'ndx-testextension') - self.assertEqual(errors, []) + with self.get_io('tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + errors = validate(io, 'ndx-testextension') + self.assertEqual(errors, []) def test_validate_io_cached_core_with_io(self): """ For back-compatability, test that validating a file with cached extension spec against the core namespace succeeds when using the `io` + `namespace` keywords. """ - with warnings.catch_warnings(record=True): - warnings.filterwarnings( - "ignore", - message=r"Ignoring cached namespace .*", - category=UserWarning, - ) - with NWBHDF5IO( - path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb', mode='r', load_namespaces=True - ) as io: - results = validate(io=io, namespace="core") - self.assertEqual(results, []) + with self.get_io(path='tests/back_compat/2.1.0_nwbfile_with_extension.nwb') as io: + results = validate(io=io, namespace="core") + self.assertEqual(results, []) def test_validate_file_cached_extension(self): """ @@ -310,13 +300,13 @@ def test_validate_file_cached_no_cache_bad_ns(self): def test_validate_io_cached_bad_ns(self): """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: with self.assertRaisesWith(KeyError, "\"'notfound' not a namespace\""): validate(io, 'notfound') def test_validate_io_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - with NWBHDF5IO('tests/back_compat/1.1.2_nwbfile.nwb', 'r') as io: + with self.get_io('tests/back_compat/1.1.2_nwbfile.nwb') as io: # TODO this error should not be different from the error when using the validate script above msg = "builder must have data type defined with attribute 'data_type'" with self.assertRaisesWith(ValueError, msg): From 5e8ddc289d93b710939ade6768d65b57540a9348 Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Mon, 27 Nov 2023 21:10:51 +0100 Subject: [PATCH 06/21] Docs rmv load namespaces (#1792) * rmv unnecessary load_namespaces from gallery * fix extensions.py to reflect new default --- docs/gallery/advanced_io/streaming.py | 4 ++-- docs/gallery/general/extensions.py | 15 +++++---------- docs/gallery/general/plot_read_basics.py | 4 ++-- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py index bb5c2e1d8..fb34b498b 100644 --- a/docs/gallery/advanced_io/streaming.py +++ b/docs/gallery/advanced_io/streaming.py @@ -69,7 +69,7 @@ # next, open the file with fs.open(s3_url, "rb") as f: with h5py.File(f) as file: - with pynwb.NWBHDF5IO(file=file, load_namespaces=True) as io: + with pynwb.NWBHDF5IO(file=file) as io: nwbfile = io.read() print(nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:]) @@ -99,7 +99,7 @@ from pynwb import NWBHDF5IO -with NWBHDF5IO(s3_url, mode='r', load_namespaces=True, driver='ros3') as io: +with NWBHDF5IO(s3_url, mode='r', driver='ros3') as io: nwbfile = io.read() print(nwbfile) print(nwbfile.acquisition['lick_times'].time_series['lick_left_times'].data[:]) diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py index 5140c531b..66645a660 100644 --- a/docs/gallery/general/extensions.py +++ b/docs/gallery/general/extensions.py @@ -248,16 +248,11 @@ def __init__(self, **kwargs): # .. note:: # # For more information on writing NWB files, see :ref:`basic_writing`. - -#################### -# By default, PyNWB does not use the namespaces cached in a file--you must -# explicitly specify this. This behavior is enabled by the *load_namespaces* -# argument to the :py:class:`~pynwb.NWBHDF5IO` constructor. - -with NWBHDF5IO("cache_spec_example.nwb", mode="r") as io: - nwbfile = io.read() - -#################### +# +# By default, if a namespace is not already loaded, PyNWB loads the namespace cached in +# the file. To disable this, set ``load_namespaces=False`` in the +# :py:class:`~pynwb.NWBHDF5IO` constructor. +# # .. _MultiContainerInterface: # # Creating and using a custom MultiContainerInterface diff --git a/docs/gallery/general/plot_read_basics.py b/docs/gallery/general/plot_read_basics.py index bba380092..c4a829d75 100644 --- a/docs/gallery/general/plot_read_basics.py +++ b/docs/gallery/general/plot_read_basics.py @@ -104,14 +104,14 @@ filepath = "sub-P11HMH_ses-20061101_ecephys+image.nwb" # Open the file in read mode "r", -io = NWBHDF5IO(filepath, mode="r", load_namespaces=True) +io = NWBHDF5IO(filepath, mode="r") nwbfile = io.read() nwbfile ####################################### # :py:class:`~pynwb.NWBHDF5IO` can also be used as a context manager: -with NWBHDF5IO(filepath, mode="r", load_namespaces=True) as io2: +with NWBHDF5IO(filepath, mode="r") as io2: nwbfile2 = io2.read() # data accessible here From 87a7c578e806f3679fc7b8c5e9777f382406db6a Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Thu, 30 Nov 2023 09:25:12 -0800 Subject: [PATCH 07/21] Change timeseries data checks to warn instead of error on read (#1793) * change timeseries rate errors to warn on read (#1786, #1721) * add test for starting time and update CHANGELOG --------- Co-authored-by: Steph Prince --- CHANGELOG.md | 1 + src/pynwb/base.py | 12 +++++++-- tests/unit/test_base.py | 57 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aaef47607..62122888a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - For `NWBHDF5IO()`, change the default of arg `load_namespaces` from `False` to `True`. @bendichter [#1748](https://github.com/NeurodataWithoutBorders/pynwb/pull/1748) - Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) +- Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) ## PyNWB 2.5.0 (August 18, 2023) diff --git a/src/pynwb/base.py b/src/pynwb/base.py index bec8903d5..5514288e8 100644 --- a/src/pynwb/base.py +++ b/src/pynwb/base.py @@ -174,15 +174,23 @@ def __init__(self, **kwargs): timestamps = args_to_process['timestamps'] if timestamps is not None: if self.rate is not None: - raise ValueError('Specifying rate and timestamps is not supported.') + self._error_on_new_warn_on_construct( + error_msg='Specifying rate and timestamps is not supported.' + ) if self.starting_time is not None: - raise ValueError('Specifying starting_time and timestamps is not supported.') + self._error_on_new_warn_on_construct( + error_msg='Specifying starting_time and timestamps is not supported.' + ) self.fields['timestamps'] = timestamps self.timestamps_unit = self.__time_unit self.interval = 1 if isinstance(timestamps, TimeSeries): timestamps.__add_link('timestamp_link', self) elif self.rate is not None: + if self.rate <= 0: + self._error_on_new_warn_on_construct( + error_msg='Rate must be a positive value.' + ) if self.starting_time is None: # override default if rate is provided but not starting time self.starting_time = 0.0 self.starting_time_unit = self.__time_unit diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index 805f946ec..899628190 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -405,6 +405,63 @@ def test_get_data_in_units(self): ts = mock_TimeSeries(data=[1., 2., 3.]) assert_array_equal(ts.get_data_in_units(), [1., 2., 3.]) + def test_non_positive_rate(self): + with self.assertRaisesWith(ValueError, 'Rate must be a positive value.'): + TimeSeries(name='test_ts', data=list(), unit='volts', rate=-1.0) + with self.assertRaisesWith(ValueError, 'Rate must be a positive value.'): + TimeSeries(name='test_ts1', data=list(), unit='volts', rate=0.0) + + def test_file_with_non_positive_rate_in_construct_mode(self): + """Test that UserWarning is raised when rate is 0 or negative + while being in construct mode (i.e,. on data read).""" + obj = TimeSeries.__new__(TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) + with self.assertWarnsWith(warn_type=UserWarning, exc_msg='Rate must be a positive value.'): + obj.__init__( + name="test_ts", + data=list(), + unit="volts", + rate=-1.0 + ) + + def test_file_with_rate_and_timestamps_in_construct_mode(self): + """Test that UserWarning is raised when rate and timestamps are both specified + while being in construct mode (i.e,. on data read).""" + obj = TimeSeries.__new__(TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) + with self.assertWarnsWith(warn_type=UserWarning, exc_msg='Specifying rate and timestamps is not supported.'): + obj.__init__( + name="test_ts", + data=[11, 12, 13, 14, 15], + unit="volts", + rate=1.0, + timestamps=[1, 2, 3, 4, 5] + ) + + def test_file_with_starting_time_and_timestamps_in_construct_mode(self): + """Test that UserWarning is raised when starting_time and timestamps are both specified + while being in construct mode (i.e,. on data read).""" + obj = TimeSeries.__new__(TimeSeries, + container_source=None, + parent=None, + object_id="test", + in_construct_mode=True) + with self.assertWarnsWith(warn_type=UserWarning, + exc_msg='Specifying starting_time and timestamps is not supported.'): + obj.__init__( + name="test_ts", + data=[11, 12, 13, 14, 15], + unit="volts", + starting_time=1.0, + timestamps=[1, 2, 3, 4, 5] + ) + class TestImage(TestCase): def test_init(self): From 0e45cd927a0734428358eab10a75672e8dd75344 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 14 Dec 2023 00:17:27 +0100 Subject: [PATCH 08/21] Enable offset, conversion and channel conversion in `mock_ElectricalSeries` (#1796) Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/pynwb/testing/mock/ecephys.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62122888a..bd4dbfd05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) +- Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) ## PyNWB 2.5.0 (August 18, 2023) diff --git a/src/pynwb/testing/mock/ecephys.py b/src/pynwb/testing/mock/ecephys.py index 888f19962..8afa9ebc3 100644 --- a/src/pynwb/testing/mock/ecephys.py +++ b/src/pynwb/testing/mock/ecephys.py @@ -73,7 +73,10 @@ def mock_ElectricalSeries( timestamps=None, electrodes: Optional[DynamicTableRegion] = None, filtering: str = "filtering", - nwbfile: Optional[NWBFile] = None + nwbfile: Optional[NWBFile] = None, + channel_conversion: Optional[np.ndarray] = None, + conversion: float = 1.0, + offset: float = 0., ) -> ElectricalSeries: electrical_series = ElectricalSeries( name=name or name_generator("ElectricalSeries"), @@ -83,6 +86,9 @@ def mock_ElectricalSeries( timestamps=timestamps, electrodes=electrodes or mock_electrodes(nwbfile=nwbfile), filtering=filtering, + conversion=conversion, + offset=offset, + channel_conversion=channel_conversion, ) if nwbfile is not None: From dd2e848152e3a50ddde1591eae268481db08a015 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 14 Dec 2023 20:04:38 +0100 Subject: [PATCH 09/21] fix bug with w- io mode (#1795) Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 +++ src/pynwb/__init__.py | 3 ++- tests/integration/hdf5/test_io.py | 15 +++++++++++++++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd4dbfd05..09bc731e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ - Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) - Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) +### Bug fixes +- Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) + ## PyNWB 2.5.0 (August 18, 2023) ### Enhancements and minor changes diff --git a/src/pynwb/__init__.py b/src/pynwb/__init__.py index 7cf32e074..6e3b3104f 100644 --- a/src/pynwb/__init__.py +++ b/src/pynwb/__init__.py @@ -261,7 +261,8 @@ def __init__(self, **kwargs): popargs('path', 'mode', 'manager', 'extensions', 'load_namespaces', 'file', 'comm', 'driver', 'herd_path', kwargs) # Define the BuildManager to use - if mode in 'wx' or manager is not None or extensions is not None: + io_modes_that_create_file = ['w', 'w-', 'x'] + if mode in io_modes_that_create_file or manager is not None or extensions is not None: load_namespaces = False if load_namespaces: diff --git a/tests/integration/hdf5/test_io.py b/tests/integration/hdf5/test_io.py index 0fd790073..d68334c89 100644 --- a/tests/integration/hdf5/test_io.py +++ b/tests/integration/hdf5/test_io.py @@ -3,6 +3,7 @@ import numpy as np from h5py import File from pathlib import Path +import tempfile from pynwb import NWBFile, TimeSeries, get_manager, NWBHDF5IO, validate @@ -14,6 +15,7 @@ from pynwb.spec import NWBGroupSpec, NWBDatasetSpec, NWBNamespace from pynwb.ecephys import ElectricalSeries, LFP from pynwb.testing import remove_test_file, TestCase +from pynwb.testing.mock.file import mock_NWBFile class TestHDF5Writer(TestCase): @@ -122,6 +124,19 @@ def test_write_no_cache_spec(self): with File(self.path, 'r') as f: self.assertNotIn('specifications', f) + def test_file_creation_io_modes(self): + io_modes_that_create_file = ["w", "w-", "x"] + + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + for io_mode in io_modes_that_create_file: + file_path = temp_dir / f"test_io_mode={io_mode}.nwb" + + # Test file creation + nwbfile = mock_NWBFile() + with NWBHDF5IO(str(file_path), io_mode) as io: + io.write(nwbfile) + class TestHDF5WriterWithInjectedFile(TestCase): From d9a409d1456c0477e2cb393f0323b82993f6556e Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Tue, 19 Dec 2023 17:41:44 -0500 Subject: [PATCH 10/21] change from error to warn when timeseries has a zero rate (#1809) * only warn when writing zero rates instead of error * update changelog --- CHANGELOG.md | 2 +- src/pynwb/base.py | 6 ++++-- tests/unit/test_base.py | 10 ++++++---- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09bc731e9..b096f6bdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - For `NWBHDF5IO()`, change the default of arg `load_namespaces` from `False` to `True`. @bendichter [#1748](https://github.com/NeurodataWithoutBorders/pynwb/pull/1748) - Add `NWBHDF5IO.can_read()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) -- Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) +- Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) and [#1809](https://github.com/NeurodataWithoutBorders/pynwb/pull/1809) - Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) ### Bug fixes diff --git a/src/pynwb/base.py b/src/pynwb/base.py index 5514288e8..846266853 100644 --- a/src/pynwb/base.py +++ b/src/pynwb/base.py @@ -187,10 +187,12 @@ def __init__(self, **kwargs): if isinstance(timestamps, TimeSeries): timestamps.__add_link('timestamp_link', self) elif self.rate is not None: - if self.rate <= 0: + if self.rate < 0: self._error_on_new_warn_on_construct( - error_msg='Rate must be a positive value.' + error_msg='Rate must not be a negative value.' ) + elif self.rate == 0.0 and get_data_shape(data)[0] > 1: + warn('Timeseries has a rate of 0.0 Hz, but the length of the data is greater than 1.') if self.starting_time is None: # override default if rate is provided but not starting time self.starting_time = 0.0 self.starting_time_unit = self.__time_unit diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py index 899628190..ad4ce6739 100644 --- a/tests/unit/test_base.py +++ b/tests/unit/test_base.py @@ -406,10 +406,12 @@ def test_get_data_in_units(self): assert_array_equal(ts.get_data_in_units(), [1., 2., 3.]) def test_non_positive_rate(self): - with self.assertRaisesWith(ValueError, 'Rate must be a positive value.'): + with self.assertRaisesWith(ValueError, 'Rate must not be a negative value.'): TimeSeries(name='test_ts', data=list(), unit='volts', rate=-1.0) - with self.assertRaisesWith(ValueError, 'Rate must be a positive value.'): - TimeSeries(name='test_ts1', data=list(), unit='volts', rate=0.0) + + with self.assertWarnsWith(UserWarning, + 'Timeseries has a rate of 0.0 Hz, but the length of the data is greater than 1.'): + TimeSeries(name='test_ts1', data=[1, 2, 3], unit='volts', rate=0.0) def test_file_with_non_positive_rate_in_construct_mode(self): """Test that UserWarning is raised when rate is 0 or negative @@ -419,7 +421,7 @@ def test_file_with_non_positive_rate_in_construct_mode(self): parent=None, object_id="test", in_construct_mode=True) - with self.assertWarnsWith(warn_type=UserWarning, exc_msg='Rate must be a positive value.'): + with self.assertWarnsWith(warn_type=UserWarning, exc_msg='Rate must not be a negative value.'): obj.__init__( name="test_ts", data=list(), From ac17d17799b1d17a09e259ed187db9fc1f7d0808 Mon Sep 17 00:00:00 2001 From: Steph Prince <40640337+stephprince@users.noreply.github.com> Date: Wed, 20 Dec 2023 15:00:18 -0500 Subject: [PATCH 11/21] update versioneer configuration to fix reported readthedocs version (#1810) * update _version.py configuration * update changelog --------- Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + setup.cfg | 1 + src/pynwb/_version.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b096f6bdc..60f0e42fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ ### Bug fixes - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) +- Fix bug where pynwb version was reported as "unknown" to readthedocs @stephprince [#1810](https://github.com/NeurodataWithoutBorders/pynwb/pull/1810) ## PyNWB 2.5.0 (August 18, 2023) diff --git a/setup.cfg b/setup.cfg index cccacf048..0b7c5ee50 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,6 +3,7 @@ VCS = git versionfile_source = src/pynwb/_version.py versionfile_build = pynwb/_version.py tag_prefix = '' +style = pep440-pre [flake8] max-line-length = 120 diff --git a/src/pynwb/_version.py b/src/pynwb/_version.py index 57dfeb9fc..bf16355e1 100644 --- a/src/pynwb/_version.py +++ b/src/pynwb/_version.py @@ -44,7 +44,7 @@ def get_config(): cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440-pre" - cfg.tag_prefix = "*.*.*" + cfg.tag_prefix = "" cfg.parentdir_prefix = "None" cfg.versionfile_source = "src/pynwb/_version.py" cfg.verbose = False From 17b5621eb52e5158b8950930dbb3e69e834fd230 Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Thu, 21 Dec 2023 03:17:18 +0100 Subject: [PATCH 12/21] Make `get_data_in_units` work with `channel_conversion` (#1806) Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + src/pynwb/base.py | 6 +++++- tests/unit/test_ecephys.py | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60f0e42fa..078eb5ce3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) and [#1809](https://github.com/NeurodataWithoutBorders/pynwb/pull/1809) - Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) +- Enhance `get_data_in_units()` to work with objects that have a `channel_conversion` attribute like the `ElectricalSeries` @h-mayorquin [#1806](https://github.com/NeurodataWithoutBorders/pynwb/pull/1806) ### Bug fixes - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) diff --git a/src/pynwb/base.py b/src/pynwb/base.py index 846266853..42f7b7ff3 100644 --- a/src/pynwb/base.py +++ b/src/pynwb/base.py @@ -298,7 +298,11 @@ def get_timestamps(self): return np.arange(len(self.data)) / self.rate + self.starting_time def get_data_in_units(self): - return np.asarray(self.data) * self.conversion + self.offset + if "channel_conversion" in self.fields: + scale_factor = self.conversion * self.channel_conversion[:, np.newaxis] + else: + scale_factor = self.conversion + return np.asarray(self.data) * scale_factor + self.offset @register_class('Image', CORE_NAMESPACE) diff --git a/tests/unit/test_ecephys.py b/tests/unit/test_ecephys.py index 6f76a5e8c..f81b61f84 100644 --- a/tests/unit/test_ecephys.py +++ b/tests/unit/test_ecephys.py @@ -18,6 +18,7 @@ from pynwb.device import Device from pynwb.file import ElectrodeTable from pynwb.testing import TestCase +from pynwb.testing.mock.ecephys import mock_ElectricalSeries from hdmf.common import DynamicTableRegion @@ -115,6 +116,24 @@ def test_dimensions_warning(self): "but instead the first does. Data is oriented incorrectly and should be transposed." ) in str(w[-1].message) + def test_get_data_in_units(self): + + data = np.asarray([[1, 1, 1, 1, 1], [1, 1, 1, 1, 1]]) + conversion = 1.0 + offset = 3.0 + channel_conversion = np.asarray([2.0, 2.0]) + electrical_series = mock_ElectricalSeries( + data=data, + conversion=conversion, + offset=offset, + channel_conversion=channel_conversion, + ) + + data_in_units = electrical_series.get_data_in_units() + expected_data = data * conversion * channel_conversion[:, np.newaxis] + offset + + np.testing.assert_almost_equal(data_in_units, expected_data) + class SpikeEventSeriesConstructor(TestCase): From 649800088ffb4bffb95bf0f55302d82537a8d78f Mon Sep 17 00:00:00 2001 From: Heberto Mayorquin Date: Fri, 22 Dec 2023 01:43:41 +0100 Subject: [PATCH 13/21] Expose starting time in `mock_ElectricalSeries` (#1805) Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 ++- src/pynwb/testing/mock/ecephys.py | 2 ++ src/pynwb/testing/mock/ophys.py | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 078eb5ce3..f38f6001d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ - Add `pynwb.get_nwbfile_version()`. @bendichter [#1703](https://github.com/NeurodataWithoutBorders/pynwb/pull/1703) - Updated timeseries data checks to warn instead of error when reading invalid files. @stephprince [#1793](https://github.com/NeurodataWithoutBorders/pynwb/pull/1793) and [#1809](https://github.com/NeurodataWithoutBorders/pynwb/pull/1809) - Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) -- Enhance `get_data_in_units()` to work with objects that have a `channel_conversion` attribute like the `ElectricalSeries` @h-mayorquin [#1806](https://github.com/NeurodataWithoutBorders/pynwb/pull/1806) +- Expose `starting_time` in `mock_ElectricalSeries`. @h-mayorquin [#1805](https://github.com/NeurodataWithoutBorders/pynwb/pull/1805) +- Enhance `get_data_in_units()` to work with objects that have a `channel_conversion` attribute like the `ElectricalSeries`. @h-mayorquin [#1806](https://github.com/NeurodataWithoutBorders/pynwb/pull/1806) ### Bug fixes - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) diff --git a/src/pynwb/testing/mock/ecephys.py b/src/pynwb/testing/mock/ecephys.py index 8afa9ebc3..54edf7680 100644 --- a/src/pynwb/testing/mock/ecephys.py +++ b/src/pynwb/testing/mock/ecephys.py @@ -71,6 +71,7 @@ def mock_ElectricalSeries( data=None, rate: float = 30000.0, timestamps=None, + starting_time: Optional[float] = None, electrodes: Optional[DynamicTableRegion] = None, filtering: str = "filtering", nwbfile: Optional[NWBFile] = None, @@ -83,6 +84,7 @@ def mock_ElectricalSeries( description=description, data=data if data is not None else np.ones((10, 5)), rate=rate, + starting_time=starting_time, timestamps=timestamps, electrodes=electrodes or mock_electrodes(nwbfile=nwbfile), filtering=filtering, diff --git a/src/pynwb/testing/mock/ophys.py b/src/pynwb/testing/mock/ophys.py index d9ba02572..cd99d5957 100644 --- a/src/pynwb/testing/mock/ophys.py +++ b/src/pynwb/testing/mock/ophys.py @@ -132,6 +132,7 @@ def mock_OnePhotonSeries( conversion=conversion, timestamps=timestamps, starting_time=starting_time, + offset=offset, rate=rate, comments=comments, description=description, @@ -162,6 +163,7 @@ def mock_TwoPhotonSeries( dimension=None, resolution=-1.0, conversion=1.0, + offset=0.0, timestamps=None, starting_time=None, comments="no comments", @@ -194,6 +196,7 @@ def mock_TwoPhotonSeries( control=control, control_description=control_description, device=device, + offset=offset, ) if nwbfile is not None: From 883b1b15810b4a040698c9e40d717f143b3a3248 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 22 Dec 2023 14:55:13 -0500 Subject: [PATCH 14/21] RF: centralize invocation of coverage within run_coverage and use sys.executable -m coverage (#1811) Co-authored-by: Ryan Ly --- CHANGELOG.md | 1 + tests/validation/test_validate.py | 84 ++++++++++++------------------- 2 files changed, 34 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f38f6001d..c5cfa5a9e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Expose the offset, conversion and channel conversion parameters in `mock_ElectricalSeries`. @h-mayorquin [#1796](https://github.com/NeurodataWithoutBorders/pynwb/pull/1796) - Expose `starting_time` in `mock_ElectricalSeries`. @h-mayorquin [#1805](https://github.com/NeurodataWithoutBorders/pynwb/pull/1805) - Enhance `get_data_in_units()` to work with objects that have a `channel_conversion` attribute like the `ElectricalSeries`. @h-mayorquin [#1806](https://github.com/NeurodataWithoutBorders/pynwb/pull/1806) +- Refactor validation CLI tests to use `{sys.executable} -m coverage` to use the same Python version and run correctly on Debian systems. @yarikoptic [#1811](https://github.com/NeurodataWithoutBorders/pynwb/pull/1811) ### Bug fixes - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) diff --git a/tests/validation/test_validate.py b/tests/validation/test_validate.py index 6aa2ee25e..c2829ee1f 100644 --- a/tests/validation/test_validate.py +++ b/tests/validation/test_validate.py @@ -1,5 +1,6 @@ import subprocess import re +import sys from unittest.mock import patch from io import StringIO import warnings @@ -8,26 +9,35 @@ from pynwb import validate, NWBHDF5IO +# NOTE we use "coverage run -m pynwb.validate" instead of "python -m pynwb.validate" +# so that we can both test pynwb.validate and compute code coverage from that test. +# NOTE we also use "coverage run -p" which will generate a .coverage file with the +# machine name, process id, and a random number appended to the filename to +# simplify collecting and merging coverage data from multiple subprocesses. if "-p" +# is not used, then each "coverage run" will overwrite the .coverage file from a +# previous "coverage run". +# NOTE we run "coverage" as "{sys.executable} -m coverage" to 1. make sure to use +# the same python version, and on Debian systems executable is "python3-coverage", not +# just "coverage". +# NOTE the run_coverage.yml GitHub Action runs "python -m coverage combine" to +# combine the individual coverage reports into one .coverage file. +def run_coverage(extra_args: list[str]): + return subprocess.run( + [sys.executable, "-m", "coverage", "run", "-p", "-m", "pynwb.validate"] + + extra_args, + capture_output=True + ) + + class TestValidateCLI(TestCase): # 1.0.2_nwbfile.nwb has no cached specifications # 1.0.3_nwbfile.nwb has cached "core" specification # 1.1.2_nwbfile.nwb has cached "core" and "hdmf-common" specifications - # NOTE we use "coverage run -m pynwb.validate" instead of "python -m pynwb.validate" - # so that we can both test pynwb.validate and compute code coverage from that test. - # NOTE we also use "coverage run -p" which will generate a .coverage file with the - # machine name, process id, and a random number appended to the filename to - # simplify collecting and merging coverage data from multiple subprocesses. if "-p" - # is not used, then each "coverage run" will overwrite the .coverage file from a - # previous "coverage run". - # NOTE the run_coverage.yml GitHub Action runs "python -m coverage combine" to - # combine the individual coverage reports into one .coverage file. - def test_validate_file_no_cache(self): """Test that validating a file with no cached spec against the core namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/1.0.2_nwbfile.nwb"], capture_output=True) + result = run_coverage(["tests/back_compat/1.0.2_nwbfile.nwb"]) stderr_regex = re.compile( r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " @@ -42,8 +52,7 @@ def test_validate_file_no_cache(self): def test_validate_file_no_cache_bad_ns(self): """Test that validating a file with no cached spec against a specified, unknown namespace fails.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.0.2_nwbfile.nwb", - "--ns", "notfound"], capture_output=True) + result = run_coverage(["tests/back_compat/1.0.2_nwbfile.nwb", "--ns", "notfound"]) stderr_regex = re.compile( r"The file tests/back_compat/1\.0\.2_nwbfile\.nwb has no cached namespace information\. " @@ -57,8 +66,7 @@ def test_validate_file_no_cache_bad_ns(self): def test_validate_file_cached(self): """Test that validating a file with cached spec against its cached namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/1.1.2_nwbfile.nwb"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -69,8 +77,7 @@ def test_validate_file_cached(self): def test_validate_file_cached_bad_ns(self): """Test that validating a file with cached spec against a specified, unknown namespace fails.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/1.1.2_nwbfile.nwb", "--ns", "notfound"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--ns", "notfound"]) stderr_regex = re.compile( r"The namespace 'notfound' could not be found in cached namespace information as only " @@ -82,8 +89,7 @@ def test_validate_file_cached_bad_ns(self): def test_validate_file_cached_extension(self): """Test that validating a file with cached spec against the cached namespaces succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/2.1.0_nwbfile_with_extension.nwb"], capture_output=True) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -94,9 +100,7 @@ def test_validate_file_cached_extension(self): def test_validate_file_cached_extension_pass_ns(self): """Test that validating a file with cached spec against the extension namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/2.1.0_nwbfile_with_extension.nwb", - "--ns", "ndx-testextension"], capture_output=True) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb", "--ns", "ndx-testextension"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -107,9 +111,7 @@ def test_validate_file_cached_extension_pass_ns(self): def test_validate_file_cached_core(self): """Test that validating a file with cached spec against the core namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", - "tests/back_compat/2.1.0_nwbfile_with_extension.nwb", - "--ns", "core"], capture_output=True) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb", "--ns", "core"]) stdout_regex = re.compile( r"The namespace 'core' is included by the namespace 'ndx-testextension'. " @@ -119,8 +121,7 @@ def test_validate_file_cached_core(self): def test_validate_file_cached_hdmf_common(self): """Test that validating a file with cached spec against the hdmf-common namespace fails.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.1.2_nwbfile.nwb", - "--ns", "hdmf-common"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--ns", "hdmf-common"]) stderr_regex = re.compile( r"The namespace 'hdmf-common' is included by the namespace 'core'\. Please validate against that " @@ -130,8 +131,7 @@ def test_validate_file_cached_hdmf_common(self): def test_validate_file_cached_ignore(self): """Test that validating a file with cached spec against the core namespace succeeds.""" - result = subprocess.run(["coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.1.2_nwbfile.nwb", - "--no-cached-namespace"], capture_output=True) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--no-cached-namespace"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -142,13 +142,7 @@ def test_validate_file_cached_ignore(self): def test_validate_file_invalid(self): """Test that validating an invalid file outputs errors.""" - result = subprocess.run( - [ - "coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.0.2_str_experimenter.nwb", - "--no-cached-namespace" - ], - capture_output=True - ) + result = run_coverage(["tests/back_compat/1.0.2_str_experimenter.nwb", "--no-cached-namespace"]) stderr_regex = re.compile( r" - found the following errors:\s*" @@ -164,13 +158,7 @@ def test_validate_file_invalid(self): def test_validate_file_list_namespaces_core(self): """Test listing namespaces from a file""" - result = subprocess.run( - [ - "coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/1.1.2_nwbfile.nwb", - "--list-namespaces" - ], - capture_output=True - ) + result = run_coverage(["tests/back_compat/1.1.2_nwbfile.nwb", "--list-namespaces"]) self.assertEqual(result.stderr.decode('utf-8'), '') @@ -179,13 +167,7 @@ def test_validate_file_list_namespaces_core(self): def test_validate_file_list_namespaces_extension(self): """Test listing namespaces from a file with an extension""" - result = subprocess.run( - [ - "coverage", "run", "-p", "-m", "pynwb.validate", "tests/back_compat/2.1.0_nwbfile_with_extension.nwb", - "--list-namespaces" - ], - capture_output=True - ) + result = run_coverage(["tests/back_compat/2.1.0_nwbfile_with_extension.nwb", "--list-namespaces"]) self.assertEqual(result.stderr.decode('utf-8'), '') From 306048faa5153b31d48cf0d2526ee68939a381d8 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 10 Jan 2024 10:59:56 -0800 Subject: [PATCH 15/21] Fix broken links in docs (#1816) --- docs/source/install_users.rst | 2 +- docs/source/software_process.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/install_users.rst b/docs/source/install_users.rst index 6e33c2035..368ab7bd0 100644 --- a/docs/source/install_users.rst +++ b/docs/source/install_users.rst @@ -34,7 +34,7 @@ This will automatically install the following required dependencies: Install release from Conda-forge -------------------------------- -`Conda-forge `_ is a community led collection of recipes, build infrastructure +`Conda-forge `_ is a community led collection of recipes, build infrastructure and distributions for the `conda `_ package manager. To install or update PyNWB distribution from conda-forge using conda simply run: diff --git a/docs/source/software_process.rst b/docs/source/software_process.rst index f2ccb335d..07fd97246 100644 --- a/docs/source/software_process.rst +++ b/docs/source/software_process.rst @@ -17,7 +17,7 @@ tested on all supported operating systems and python distributions. That way, as a contributor, you know if you introduced regressions or coding style inconsistencies. -There are badges in the :pynwb:`README <#readme>` file which shows +There are badges in the :pynwb:`README ` file which shows the current condition of the dev branch. -------- @@ -25,7 +25,7 @@ Coverage -------- Code coverage is computed and reported using the coverage_ tool. There are two coverage-related -badges in the :pynwb:`README <#readme>` file. One shows the status of the :pynwb:`GitHub Action workflow ` which runs the coverage_ tool and uploads the report to +badges in the :pynwb:`README ` file. One shows the status of the :pynwb:`GitHub Action workflow ` which runs the coverage_ tool and uploads the report to codecov_, and the other badge shows the percentage coverage reported from codecov_. A detailed report can be found on codecov_, which shows line by line which lines are covered by the tests. From 60ec8fe436c2eea1ec60b762648b5914165d707b Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 10 Jan 2024 16:13:11 -0800 Subject: [PATCH 16/21] Update dandi req in environment-ros3.yml (#1817) --- environment-ros3.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-ros3.yml b/environment-ros3.yml index ae15e985c..2bf2678d2 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -12,7 +12,7 @@ dependencies: - pandas==2.0.0 - python-dateutil==2.8.2 - setuptools - - dandi==0.55.1 # NOTE: dandi does not support osx-arm64 + - dandi==0.59.0 # NOTE: dandi does not support osx-arm64 - fsspec==2023.6.0 - requests==2.28.1 - aiohttp==3.8.3 From 15290289f6304a3a83c6fcf8bf4110c15dc1683c Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Thu, 11 Jan 2024 16:38:29 -0800 Subject: [PATCH 17/21] Disable nightly dandi read workflow for now (#1794) --- .github/workflows/run_dandi_read_tests.yml | 25 ++++++++----------- setup.cfg | 2 +- ..._read_dandi.py => read_first_nwb_asset.py} | 19 ++++++++++---- 3 files changed, 25 insertions(+), 21 deletions(-) rename tests/read_dandi/{test_read_dandi.py => read_first_nwb_asset.py} (79%) diff --git a/.github/workflows/run_dandi_read_tests.yml b/.github/workflows/run_dandi_read_tests.yml index 857b32c9a..7148d209e 100644 --- a/.github/workflows/run_dandi_read_tests.yml +++ b/.github/workflows/run_dandi_read_tests.yml @@ -1,15 +1,15 @@ name: Run DANDI read tests on: - schedule: - - cron: '0 6 * * *' # once per day at 1am ET + # NOTE this is disabled until we can run this systematically instead of randomly + # so we don't get constant error notifications and waste compute cycles + # See https://github.com/NeurodataWithoutBorders/pynwb/issues/1804 + # schedule: + # - cron: '0 6 * * *' # once per day at 1am ET workflow_dispatch: jobs: run-tests: runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} # necessary for conda steps: - name: Cancel non-latest runs uses: styfle/cancel-workflow-action@0.11.0 @@ -22,19 +22,14 @@ jobs: submodules: 'recursive' fetch-depth: 0 # tags are required for versioneer to determine the version - - name: Set up Conda - uses: conda-incubator/setup-miniconda@v2 + - name: Set up Python + uses: actions/setup-python@v4 with: - auto-update-conda: true - activate-environment: ros3 - environment-file: environment-ros3.yml - python-version: "3.11" - channels: conda-forge - auto-activate-base: false + python-version: '3.11' - name: Install run dependencies run: | - python -m pip install dandi pytest + python -m pip install dandi fsspec requests aiohttp pytest python -m pip uninstall -y pynwb # uninstall pynwb python -m pip install -e . python -m pip list @@ -47,4 +42,4 @@ jobs: - name: Run DANDI read tests run: | - python tests/read_dandi/test_read_dandi.py + python tests/read_dandi/read_dandi.py diff --git a/setup.cfg b/setup.cfg index 0b7c5ee50..d44fcc2b1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,7 @@ per-file-ignores = tests/integration/__init__.py:F401 src/pynwb/testing/__init__.py:F401 src/pynwb/validate.py:T201 - tests/read_dandi/test_read_dandi.py:T201 + tests/read_dandi/read_first_nwb_asset.py:T201 setup.py:T201 test.py:T201 scripts/*:T201 diff --git a/tests/read_dandi/test_read_dandi.py b/tests/read_dandi/read_first_nwb_asset.py similarity index 79% rename from tests/read_dandi/test_read_dandi.py rename to tests/read_dandi/read_first_nwb_asset.py index f9dafd938..895dbb1c2 100644 --- a/tests/read_dandi/test_read_dandi.py +++ b/tests/read_dandi/read_first_nwb_asset.py @@ -1,5 +1,7 @@ -"""Test reading NWB files from the DANDI Archive using ROS3.""" +"""Test reading NWB files from the DANDI Archive using fsspec.""" from dandi.dandiapi import DandiAPIClient +import fsspec +import h5py import random import sys import traceback @@ -10,9 +12,12 @@ # NOTE: do not name the function with "test_" prefix, otherwise pytest # will try to run it as a test +# TODO read dandisets systematically, not randomly +# see https://github.com/NeurodataWithoutBorders/pynwb/issues/1804 + def read_first_nwb_asset(): - """Test reading the first NWB asset from a random selection of 50 dandisets that uses NWB.""" - num_dandisets_to_read = 50 + """Test reading the first NWB asset from a random selection of 2 dandisets that uses NWB.""" + num_dandisets_to_read = 2 client = DandiAPIClient() dandisets = list(client.get_dandisets()) random.shuffle(dandisets) @@ -20,6 +25,8 @@ def read_first_nwb_asset(): print("Reading NWB files from the following dandisets:") print([d.get_raw_metadata()["identifier"] for d in dandisets_to_read]) + fs = fsspec.filesystem("http") + failed_reads = dict() for i, dandiset in enumerate(dandisets_to_read): dandiset_metadata = dandiset.get_raw_metadata() @@ -47,8 +54,10 @@ def read_first_nwb_asset(): s3_url = first_asset.get_content_url(follow_redirects=1, strip_query=True) try: - with NWBHDF5IO(path=s3_url, driver="ros3") as io: - io.read() + with fs.open(s3_url, "rb") as f: + with h5py.File(f) as file: + with NWBHDF5IO(file=file) as io: + io.read() except Exception as e: print(traceback.format_exc()) failed_reads[dandiset] = e From 51a113f094be5e3c8b8d002f1a644c60a8fc098c Mon Sep 17 00:00:00 2001 From: Ben Dichter Date: Sat, 13 Jan 2024 03:30:17 -0500 Subject: [PATCH 18/21] Streaming add remfile (#1761) Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 +++ docs/gallery/advanced_io/streaming.py | 35 +++++++++++++++++++++++++-- environment-ros3.yml | 3 +++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5cfa5a9e..e2161a28b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,9 @@ - Fix bug where namespaces were loaded in "w-" mode. @h-mayorquin [#1795](https://github.com/NeurodataWithoutBorders/pynwb/pull/1795) - Fix bug where pynwb version was reported as "unknown" to readthedocs @stephprince [#1810](https://github.com/NeurodataWithoutBorders/pynwb/pull/1810) +### Documentation and tutorial enhancements +- Add RemFile to streaming tutorial @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761) + ## PyNWB 2.5.0 (August 18, 2023) ### Enhancements and minor changes diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py index fb34b498b..101400c2d 100644 --- a/docs/gallery/advanced_io/streaming.py +++ b/docs/gallery/advanced_io/streaming.py @@ -90,6 +90,9 @@ # `fsspec documentation on known implementations `_ # for a full updated list of supported store formats. # +# One downside of this fsspec method is that fsspec is not optimized for reading HDF5 files, and so streaming data +# using this method can be slow. A faster alternative is ``remfile`` described below. +# # Streaming Method 2: ROS3 # ------------------------ # ROS3 stands for "read only S3" and is a driver created by the HDF5 Group that allows HDF5 to read HDF5 files stored @@ -120,15 +123,43 @@ # # pip uninstall h5py # conda install -c conda-forge "h5py>=3.2" +# +# Besides the extra burden of installing h5py from a non-PyPI source, one downside of this ROS3 method is that +# this method does not support automatic retries in case the connection fails. + + +################################################## +# Method 3: remfile +# ----------------- +# ``remfile`` is another library that enables indexing and streaming of files in s3. remfile is simple, fast, and +# allows for caching of data in the local filesystem. The caveats of ``remfile`` are that it is a very new project +# that has not been tested in a variety of use-cases and caching options are limited compared to ``fsspec``. +# You can install ``remfile`` with pip: +# +# .. code-block:: bash +# +# pip install remfile +# + +import h5py +from pynwb import NWBHDF5IO +import remfile + +rem_file = remfile.File(s3_url) +with h5py.File(rem_file, "r") as h5py_file: + with NWBHDF5IO(file=h5py_file, load_namespaces=True) as io: + nwbfile = io.read() + print(nwbfile.acquisition["lick_times"].time_series["lick_left_times"].data[:]) ################################################## # Which streaming method to choose? # --------------------------------- # # From a user perspective, once opened, the :py:class:`~pynwb.file.NWBFile` works the same with -# both fsspec and ros3. However, in general, we currently recommend using fsspec for streaming -# NWB files because it is more performant and reliable than ros3. In particular fsspec: +# fsspec, ros3, or remfile. However, in general, we currently recommend using fsspec for streaming +# NWB files because it is more performant and reliable than ros3 and more widely tested than remfile. +# In particular, fsspec: # # 1. supports caching, which will dramatically speed up repeated requests for the # same region of data, diff --git a/environment-ros3.yml b/environment-ros3.yml index 2bf2678d2..c84b4c090 100644 --- a/environment-ros3.yml +++ b/environment-ros3.yml @@ -16,3 +16,6 @@ dependencies: - fsspec==2023.6.0 - requests==2.28.1 - aiohttp==3.8.3 + - pip + - pip: + - remfile==0.1.9 From 82c4f2bdeec46acfaeddee14b849068d29d5dff7 Mon Sep 17 00:00:00 2001 From: Jeremy Magland Date: Sun, 14 Jan 2024 14:06:49 -0500 Subject: [PATCH 19/21] update remfile verbiage in streaming.py (#1823) --- docs/gallery/advanced_io/streaming.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py index 101400c2d..31ce6793c 100644 --- a/docs/gallery/advanced_io/streaming.py +++ b/docs/gallery/advanced_io/streaming.py @@ -131,9 +131,11 @@ ################################################## # Method 3: remfile # ----------------- -# ``remfile`` is another library that enables indexing and streaming of files in s3. remfile is simple, fast, and -# allows for caching of data in the local filesystem. The caveats of ``remfile`` are that it is a very new project -# that has not been tested in a variety of use-cases and caching options are limited compared to ``fsspec``. +# ``remfile`` is another library that enables indexing and streaming of files in s3. remfile is simple and fast, +# especially for the initial load of the nwb file and for accessing small pieces of data. The caveats of ``remfile`` +# are that it is a very new project that has not been tested in a variety of use-cases and caching options are +# limited compared to ``fsspec``. `remfile` is a simple, lightweight dependency with a very small codebase. +# # You can install ``remfile`` with pip: # # .. code-block:: bash @@ -159,11 +161,14 @@ # From a user perspective, once opened, the :py:class:`~pynwb.file.NWBFile` works the same with # fsspec, ros3, or remfile. However, in general, we currently recommend using fsspec for streaming # NWB files because it is more performant and reliable than ros3 and more widely tested than remfile. -# In particular, fsspec: +# However, if you are experiencing long wait times for the initial file load on your network, you +# may want to try remfile. +# +# Advantages of fsspec include: # # 1. supports caching, which will dramatically speed up repeated requests for the # same region of data, # 2. automatically retries when s3 fails to return, which helps avoid errors when accessing data due to -# intermittent errors in connections with S3, +# intermittent errors in connections with S3 (remfile does this as well), # 3. works also with other storage backends (e.g., GoogleDrive or Dropbox, not just S3) and file formats, and # 4. in our experience appears to provide faster out-of-the-box performance than the ros3 driver. From dfc19bd8a7576d454013ca74132babd4a42d32f4 Mon Sep 17 00:00:00 2001 From: Zach McKenzie <92116279+zm711@users.noreply.github.com> Date: Tue, 16 Jan 2024 15:03:39 -0500 Subject: [PATCH 20/21] Fix some typos in the gallery examples (#1825) Co-authored-by: Ryan Ly --- CHANGELOG.md | 3 +- docs/gallery/advanced_io/linking_data.py | 2 +- .../advanced_io/plot_iterative_write.py | 40 +++++++++---------- docs/gallery/general/add_remove_containers.py | 2 +- docs/gallery/general/extensions.py | 14 +++---- docs/gallery/general/object_id.py | 2 +- 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2161a28b..2ddcdda7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,8 @@ - Fix bug where pynwb version was reported as "unknown" to readthedocs @stephprince [#1810](https://github.com/NeurodataWithoutBorders/pynwb/pull/1810) ### Documentation and tutorial enhancements -- Add RemFile to streaming tutorial @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761) +- Add RemFile to streaming tutorial. @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761) +- Fix typos and improve clarify throughout tutorials. @zm711 [#1825](https://github.com/NeurodataWithoutBorders/pynwb/pull/1825) ## PyNWB 2.5.0 (August 18, 2023) diff --git a/docs/gallery/advanced_io/linking_data.py b/docs/gallery/advanced_io/linking_data.py index 82824f6cd..2f79d1488 100644 --- a/docs/gallery/advanced_io/linking_data.py +++ b/docs/gallery/advanced_io/linking_data.py @@ -221,7 +221,7 @@ # Step 2: Add the container to another NWBFile # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # To integrate both :py:meth:`~pynwb.base.TimeSeries` into a single file we simply create a new -# :py:meth:`~pynwb.file.NWBFile` and our existing :py:meth:`~pynwb.base.TimeSeries` to it. PyNWB's +# :py:meth:`~pynwb.file.NWBFile` and add our existing :py:meth:`~pynwb.base.TimeSeries` to it. PyNWB's # :py:class:`~pynwb.NWBHDF5IO` backend then automatically detects that the TimeSeries have already # been written to another file and will create external links for us. # diff --git a/docs/gallery/advanced_io/plot_iterative_write.py b/docs/gallery/advanced_io/plot_iterative_write.py index c461cddf8..958981a0b 100644 --- a/docs/gallery/advanced_io/plot_iterative_write.py +++ b/docs/gallery/advanced_io/plot_iterative_write.py @@ -17,7 +17,7 @@ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # In the typical write process, datasets are created and written as a whole. In contrast, -# iterative data write refers to the writing of the content of a dataset in an incremental, +# iterative data write refers to the writing of the contents of a dataset in an incremental, # iterative fashion. #################### @@ -32,10 +32,10 @@ # to avoid this problem by writing the data one-subblock-at-a-time, so that we only need to hold # a small subset of the array in memory at any given time. # * **Data streaming** In the context of streaming data we are faced with several issues: -# **1)** data is not available in memory but arrives in subblocks as the stream progresses +# **1)** data is not available in-memory but arrives in subblocks as the stream progresses # **2)** caching the data of a stream in-memory is often prohibitively expensive and volatile # **3)** the total size of the data is often unknown ahead of time. -# Iterative data write allows us to address issues 1) and 2) by enabling us to save data to +# Iterative data write allows us to address issues 1) and 2) by enabling us to save data to a # file incrementally as it arrives from the data stream. Issue 3) is addressed in the HDF5 # storage backend via support for chunking, enabling the creation of resizable arrays. # @@ -44,7 +44,7 @@ # data source. # # * **Sparse data arrays** In order to reduce storage size of sparse arrays a challenge is that while -# the data array (e.g., a matrix) may be large, only few values are set. To avoid storage overhead +# the data array (e.g., a matrix) may be large, only a few values are set. To avoid storage overhead # for storing the full array we can employ (in HDF5) a combination of chunking, compression, and # and iterative data write to significantly reduce storage cost for sparse data. # @@ -161,7 +161,7 @@ def write_test_file(filename, data, close_io=True): # # Here we use a simple data generator but PyNWB does not make any assumptions about what happens # inside the generator. Instead of creating data programmatically, you may hence, e.g., receive -# data from an acquisition system (or other source). We can, hence, use the same approach to write streaming data. +# data from an acquisition system (or other source). We can use the same approach to write streaming data. #################### # Step 1: Define the data generator @@ -208,7 +208,7 @@ def iter_sin(chunk_length=10, max_chunks=100): #################### # Discussion # ^^^^^^^^^^ -# Note, we here actually do not know how long our timeseries will be. +# Note, here we don't actually know how long our timeseries will be. print( "maxshape=%s, recommended_data_shape=%s, dtype=%s" @@ -218,7 +218,7 @@ def iter_sin(chunk_length=10, max_chunks=100): #################### # As we can see :py:class:`~hdmf.data_utils.DataChunkIterator` automatically recommends # in its ``maxshape`` that the first dimensions of our array should be unlimited (``None``) and the second -# dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator` +# dimension should be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf.data_utils.DataChunkIterator` # has no way of knowing the minimum size of the array it automatically recommends the size of the first # chunk as the minimum size (i.e, ``(1, 10)``) and also infers the data type automatically from the first chunk. # To further customize this behavior we may also define the ``maxshape``, ``dtype``, and ``buffer_size`` when @@ -227,8 +227,8 @@ def iter_sin(chunk_length=10, max_chunks=100): # .. tip:: # # We here used :py:class:`~hdmf.data_utils.DataChunkIterator` to conveniently wrap our data stream. -# :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order** -# **single** complete element along the **first dimension** of our a array (i.e., iterate over the first +# :py:class:`~hdmf.data_utils.DataChunkIterator` assumes that our generator yields in **consecutive order** +# a **single** complete element along the **first dimension** of our array (i.e., iterate over the first # axis and yield one-element-at-a-time). This behavior is useful in many practical cases. However, if # this strategy does not match our needs, then using :py:class:`~hdmf.data_utils.GenericDataChunkIterator` # or implementing your own derived :py:class:`~hdmf.data_utils.AbstractDataChunkIterator` may be more @@ -266,7 +266,7 @@ def __next__(self): """ Return in each iteration a fully occupied data chunk of self.chunk_shape values at a random location within the matrix. Chunks are non-overlapping. REMEMBER: h5py does not support all - fancy indexing that numpy does so we need to make sure our selection can be + the fancy indexing that numpy does so we need to make sure our selection can be handled by the backend. """ if self.__chunks_created < self.num_chunks: @@ -289,7 +289,7 @@ def __next__(self): next = __next__ def recommended_chunk_shape(self): - # Here we can optionally recommend what a good chunking should be. + # Here we can optionally recommend what a good chunking could be. return self.chunk_shape def recommended_data_shape(self): @@ -379,7 +379,7 @@ def maxshape(self): # Now lets check out the size of our data file and compare it against the expected full size of our matrix import os -expected_size = xsize * ysize * 8 # This is the full size of our matrix in byte +expected_size = xsize * ysize * 8 # This is the full size of our matrix in bytes occupied_size = num_values * 8 # Number of non-zero values in out matrix file_size = os.stat( "basic_sparse_iterwrite_example.nwb" @@ -420,14 +420,14 @@ def maxshape(self): # A slight overhead (here 0.08MB) is expected because our file contains also the additional objects from # the NWBFile, plus some overhead for managing all the HDF5 metadata for all objects. # * **3) vs 2):** Adding compression does not yield any improvement here. This is expected, because, again we -# selected the chunking here in a way that we already allocated the minimum amount of storage to represent our data +# selected the chunking here in a way that we already allocated the minimum amount of storage to represent our data # and lossless compression of random data is not efficient. # * **4) vs 2):** When we increase our chunk size to ``(100,100)`` (i.e., ``100x`` larger than the chunks produced by -# our matrix generator) we observe an according roughly ``100x`` increase in file size. This is expected +# our matrix generator) we observe an accordingly roughly ``100x`` increase in file size. This is expected # since our chunks now do not align perfectly with the occupied data and each occupied chunk is allocated fully. # * **5) vs 4):** When using compression for the larger chunks we see a significant reduction # in file size (``1.14MB`` vs. ``80MB``). This is because the allocated chunks now contain in addition to the random -# values large areas of constant fillvalues, which compress easily. +# values large areas of constant fill values, which compress easily. # # **Advantages:** # @@ -435,12 +435,12 @@ def maxshape(self): # * Only the data chunks in the HDF5 file that contain non-default values are ever being allocated # * The overall size of our file is reduced significantly # * Reduced I/O load -# * On read users can use the array as usual +# * On read, users can use the array as usual # # .. tip:: # -# With great power comes great responsibility **!** I/O and storage cost will depend among others on the chunk size, -# compression options, and the write pattern, i.e., the number and structure of the +# With great power comes great responsibility **!** I/O and storage cost will depend, among other factors, +# on the chunk size, compression options, and the write pattern, i.e., the number and structure of the # :py:class:`~hdmf.data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them # one value at a time would result in poor I/O performance in most practical cases, because of the large number of # chunks and large number of small I/O operations required. @@ -471,7 +471,7 @@ def maxshape(self): # # When converting large data files, a typical problem is that it is often too expensive to load all the data # into memory. This example is very similar to the data generator example only that instead of generating -# data on-the-fly in memory we are loading data from a file one-chunk-at-a-time in our generator. +# data on-the-fly in-memory we are loading data from a file one-chunk-at-a-time in our generator. # #################### @@ -568,7 +568,7 @@ def iter_largearray(filename, shape, dtype="float64"): # In practice, data from recording devices may be distributed across many files, e.g., one file per time range # or one file per recording channel. Using iterative data write provides an elegant solution to this problem # as it allows us to process large arrays one-subarray-at-a-time. To make things more interesting we'll show -# this for the case where each recording channel (i.e, the second dimension of our ``TimeSeries``) is broken up +# this for the case where each recording channel (i.e., the second dimension of our ``TimeSeries``) is broken up # across files. #################### diff --git a/docs/gallery/general/add_remove_containers.py b/docs/gallery/general/add_remove_containers.py index 26708f639..80c5cb032 100644 --- a/docs/gallery/general/add_remove_containers.py +++ b/docs/gallery/general/add_remove_containers.py @@ -77,7 +77,7 @@ # modifies the data on disk # (the :py:meth:`NWBHDF5IO.write ` method does not need to be called and the # :py:class:`~pynwb.NWBHDF5IO` instance does not need to be closed). Directly modifying datasets in this way -# can lead to files that do not validate or cannot be opened, so take caution when using this method. +# can lead to files that do not validate or cannot be opened, so exercise caution when using this method. # Note: only chunked datasets or datasets with ``maxshape`` set can be resized. # See the `h5py chunked storage documentation `_ # for more details. diff --git a/docs/gallery/general/extensions.py b/docs/gallery/general/extensions.py index 66645a660..4ec8f4749 100644 --- a/docs/gallery/general/extensions.py +++ b/docs/gallery/general/extensions.py @@ -100,7 +100,7 @@ # Using extensions # ----------------------------------------------------- # -# After an extension has been created, it can be used by downstream codes for reading and writing data. +# After an extension has been created, it can be used by downstream code for reading and writing data. # There are two main mechanisms for reading and writing extension data with PyNWB. # The first involves defining new :py:class:`~pynwb.core.NWBContainer` classes that are then mapped # to the neurodata types in the extension. @@ -167,7 +167,7 @@ def __init__(self, **kwargs): # By default, extensions are cached to file so that your NWB file will carry the extensions needed to read the file # with it. # -# To demonstrate this, first we will make some fake data using our extensions. +# To demonstrate this, first we will make some simulated data using our extensions. from datetime import datetime @@ -370,17 +370,17 @@ class PotatoSack(MultiContainerInterface): nwb = io.read() print(nwb.get_processing_module()["potato_sack"].get_potato("big_potato").weight) # note: you can call get_processing_module() with or without the module name as -# an argument. however, if there is more than one module, the name is required. -# here, there is more than one potato, so the name of the potato is required as -# an argument to get get_potato +# an argument. However, if there is more than one module, the name is required. +# Here, there is more than one potato, so the name of the potato is required as +# an argument to get_potato #################### # Example: Cortical Surface Mesh # ----------------------------------------------------- # # Here we show how to create extensions by creating a data class for a -# cortical surface mesh. This data type is particularly important for ECoG data, we need to know where each electrode is -# with respect to the gyri and sulci. Surface mesh objects contain two types of data: +# cortical surface mesh. This data type is particularly important for ECoG data, since we need to know where +# each electrode is with respect to the gyri and sulci. Surface mesh objects contain two types of data: # # 1. `vertices`, which is an (n, 3) matrix of floats that represents points in 3D space # diff --git a/docs/gallery/general/object_id.py b/docs/gallery/general/object_id.py index 481cbb36a..206142715 100644 --- a/docs/gallery/general/object_id.py +++ b/docs/gallery/general/object_id.py @@ -32,7 +32,7 @@ session_start_time=start_time, ) -# make some fake data +# make some simulated data timestamps = np.linspace(0, 100, 1024) data = ( np.sin(0.333 * timestamps) From 7c6868b77088c846d81c4a055d4d84ea43ff9a78 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Sat, 20 Jan 2024 10:38:14 -0800 Subject: [PATCH 21/21] Fix formatting error in streaming.py (#1821) --- docs/gallery/advanced_io/streaming.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/gallery/advanced_io/streaming.py b/docs/gallery/advanced_io/streaming.py index 31ce6793c..760e2da71 100644 --- a/docs/gallery/advanced_io/streaming.py +++ b/docs/gallery/advanced_io/streaming.py @@ -169,6 +169,6 @@ # 1. supports caching, which will dramatically speed up repeated requests for the # same region of data, # 2. automatically retries when s3 fails to return, which helps avoid errors when accessing data due to -# intermittent errors in connections with S3 (remfile does this as well), +# intermittent errors in connections with S3 (remfile does this as well), # 3. works also with other storage backends (e.g., GoogleDrive or Dropbox, not just S3) and file formats, and # 4. in our experience appears to provide faster out-of-the-box performance than the ros3 driver.