Skip to content

Commit

Permalink
Merge branch 'dev' into tmc
Browse files Browse the repository at this point in the history
  • Loading branch information
mavaylon1 authored Jan 31, 2024
2 parents 5bd2a25 + c40df49 commit 9fdd1c3
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
### Documentation and tutorial enhancements
- Add RemFile to streaming tutorial. @bendichter [#1761](https://github.com/NeurodataWithoutBorders/pynwb/pull/1761)
- Fix typos and improve clarify throughout tutorials. @zm711 [#1825](https://github.com/NeurodataWithoutBorders/pynwb/pull/1825)
- Add Zarr IO tutorial @bendichter [#1834](https://github.com/NeurodataWithoutBorders/pynwb/pull/1834)

## PyNWB 2.5.0 (August 18, 2023)

Expand Down
98 changes: 98 additions & 0 deletions docs/gallery/advanced_io/plot_zarr_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""
Zarr IO
=======
Zarr is an alternative backend option for NWB files. It is a Python package that
provides an implementation of chunked, compressed, N-dimensional arrays. Zarr is a good
option for large datasets because, like HDF5, it is designed to store data on disk and
only load the data into memory when needed. Zarr is also a good option for parallel
computing because it supports concurrent reads and writes.
Note that the Zarr native storage formats are optimized for storage in cloud storage
(e.g., S3). For very large files, Zarr will create many files which can lead to
issues for traditional file system (that are not cloud object stores) due to limitations
on the number of files per directory (this affects local disk, GDrive, Dropbox etc.).
Zarr read and write is provided by the :hdmf-zarr:`hdmf-zarr<>` package. First, create an
an NWBFile using PyNWB.
"""

# sphinx_gallery_thumbnail_path = 'figures/gallery_thumbnail_plot_nwbzarrio.png'


from datetime import datetime
from dateutil.tz import tzlocal

import numpy as np
from pynwb import NWBFile, TimeSeries

# Create the NWBFile. Substitute your NWBFile generation here.
nwbfile = NWBFile(
session_description="my first synthetic recording",
identifier="EXAMPLE_ID",
session_start_time=datetime.now(tzlocal()),
session_id="LONELYMTN",
)

#######################################################################################
# Dataset Configuration
# ---------------------
# Like HDF5, Zarr provides options to chunk and compress datasets. To leverage these
# features, replace all :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` with the analogous
# :py:class:`~hdmf_zarr.utils.ZarrDataIO`, which takes compressors specified by the
# :py:mod:`numcodecs` library. For example, here is an example :py:class:`.TimeSeries`
# where the ``data`` Dataset is compressed with a Blosc-zstd compressor:

from numcodecs import Blosc
from hdmf_zarr import ZarrDataIO

data_with_zarr_data_io = ZarrDataIO(
data=np.random.randn(100, 100),
chunks=(10, 10),
fillvalue=0,
compressor=Blosc(cname='zstd', clevel=3, shuffle=Blosc.SHUFFLE)
)

#######################################################################################
# Now add it to the :py:class:`.NWBFile`.

nwbfile.add_acquisition(
TimeSeries(
name="synthetic_timeseries",
data=data_with_zarr_data_io,
unit="m",
rate=10e3,
)
)

#######################################################################################
# Writing to Zarr
# ---------------
# To write NWB files to Zarr, replace the :py:class:`~pynwb.NWBHDF5IO` with
# :py:class:`hdmf_zarr.nwb.NWBZarrIO`.

from hdmf_zarr.nwb import NWBZarrIO
import os

path = "zarr_tutorial.nwb.zarr"
absolute_path = os.path.abspath(path)
with NWBZarrIO(path=path, mode="w") as io:
io.write(nwbfile)

#######################################################################################
# .. note::
# The main reason for using the ``absolute_path`` here is for testing purposes to
# ensure links and references work as expected. Otherwise, using the relative path
# here instead is fine.
#
# Reading from Zarr
# -----------------
# To read NWB files from Zarr, replace the :py:class:`~pynwb.NWBHDF5IO` with the analogous
# :py:class:`hdmf_zarr.nwb.NWBZarrIO`.

with NWBZarrIO(path=absolute_path, mode="r") as io:
read_nwbfile = io.read()

#######################################################################################
# .. note::
# For more information, see the :hdmf-zarr:`hdmf-zarr documentation<>`.
2 changes: 1 addition & 1 deletion docs/gallery/advanced_io/streaming.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,6 @@
# 1. supports caching, which will dramatically speed up repeated requests for the
# same region of data,
# 2. automatically retries when s3 fails to return, which helps avoid errors when accessing data due to
# intermittent errors in connections with S3 (remfile does this as well),
# intermittent errors in connections with S3 (remfile does this as well),
# 3. works also with other storage backends (e.g., GoogleDrive or Dropbox, not just S3) and file formats, and
# 4. in our experience appears to provide faster out-of-the-box performance than the ros3 driver.
3 changes: 3 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,8 @@ def __call__(self, filename):
'fsspec': ("https://filesystem-spec.readthedocs.io/en/latest/", None),
'nwbwidgets': ("https://nwb-widgets.readthedocs.io/en/latest/", None),
'nwb-overview': ("https://nwb-overview.readthedocs.io/en/latest/", None),
'hdmf-zarr': ("https://hdmf-zarr.readthedocs.io/en/latest/", None),
'numcodecs': ("https://numcodecs.readthedocs.io/en/latest/", None),
}

extlinks = {
Expand All @@ -159,6 +161,7 @@ def __call__(self, filename):
'hdmf-docs': ('https://hdmf.readthedocs.io/en/stable/%s', '%s'),
'dandi': ('https://www.dandiarchive.org/%s', '%s'),
"nwbinspector": ("https://nwbinspector.readthedocs.io/en/dev/%s", "%s"),
'hdmf-zarr': ('https://hdmf-zarr.readthedocs.io/en/latest/%s', '%s'),
}

# Add any paths that contain templates here, relative to this directory.
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions requirements-doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ dataframe_image # used to render large dataframe as image in the sphinx galler
lxml # used by dataframe_image when using the matplotlib backend
hdf5plugin
dandi>=0.46.6
hdmf-zarr

0 comments on commit 9fdd1c3

Please sign in to comment.