Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Short-term fix for errors in reading of metadata-only CDL #129

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ version NEXT
* Fix for zero-sized unlimited dimensions when read from a grouped
netCDF file (https://github.com/NCAS-CMS/cfdm/issues/113)
* Performance enhancements (https://github.com/NCAS-CMS/cfdm/issues/124)
* Better error message in the case of a `numpy.ma.core.MaskError` occurring
upon reading of CDL files with only header or coordinate information
(https://github.com/NCAS-CMS/cfdm/issues/128)
* Changed dependency: ``cftime>=1.4.1``
* Changed dependency: ``netCDF4>=1.5.4``

Expand Down
40 changes: 30 additions & 10 deletions cfdm/read_write/read.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os

from numpy.ma.core import MaskError

from ..cfdmimplementation import implementation

from .netcdf import NetCDFRead
Expand Down Expand Up @@ -299,16 +301,34 @@ def read(
filename = netcdf.cdl_to_netcdf(filename)

if netcdf.is_netcdf_file(filename):
fields = netcdf.read(
filename,
external=external,
extra=extra,
verbose=verbose,
warnings=warnings,
warn_valid=warn_valid,
mask=mask,
extra_read_vars=None,
)
# See https://github.com/NCAS-CMS/cfdm/issues/128 for context on the
# try/except here, which acts as a temporary fix pending decisions on
# the best way to handle CDL with only header or coordinate info.
try:
fields = netcdf.read(
filename,
external=external,
extra=extra,
verbose=verbose,
warnings=warnings,
warn_valid=warn_valid,
mask=mask,
extra_read_vars=None,
)
except MaskError:
# Some data required for field interpretation is missing,
# manifesting downstream as a NumPy MaskError.
if cdl:
raise ValueError(
"Unable to convert CDL without data to field construct(s) "
"because there is insufficient information provided by "
"the header and/or coordinates alone in this case."
)
else:
raise ValueError(
"Unable to convert netCDF to field construct(s) because "
"there is missing data."
)
elif cdl:
raise IOError(
"Can't determine format of file {} "
Expand Down
37 changes: 34 additions & 3 deletions cfdm/test/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
warnings = False

# Set up temporary files
n_tmpfiles = 6
n_tmpfiles = 8
tmpfiles = [
tempfile.mkstemp("_test_read_write.nc", dir=os.getcwd())[1]
for i in range(n_tmpfiles)
Expand All @@ -27,7 +27,9 @@
tmpfile,
tmpfileh,
tmpfileh2,
tmpfileh3,
tmpfilec,
tmpfilec2,
tmpfile0,
tmpfile1,
) = tmpfiles
Expand Down Expand Up @@ -336,6 +338,11 @@ def test_read_CDL(self):
shell=True,
check=True,
)

# For the cases of '-h' and '-c', i.e. only header info or coordinates,
# notably no data, take two cases each: one where there is sufficient
# info from the metadata to map to fields, and one where there isn't:
# 1. Sufficient metadata, so should be read-in successfully
subprocess.run(
" ".join(["ncdump", "-h", self.filename, ">", tmpfileh]),
shell=True,
Expand All @@ -347,11 +354,35 @@ def test_read_CDL(self):
check=True,
)

# 2. Insufficient metadata, so should error with a message as such
geometry_1_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "geometry_1.nc"
)
subprocess.run(
" ".join(["ncdump", "-h", geometry_1_file, ">", tmpfileh2]),
shell=True,
check=True,
)
subprocess.run(
" ".join(["ncdump", "-c", geometry_1_file, ">", tmpfilec2]),
shell=True,
check=True,
)

f0 = cfdm.read(self.filename)[0]

# Case (1) as above, so read in and check the fields are as should be
f = cfdm.read(tmpfile)[0]
cfdm.read(tmpfileh)[0]
c = cfdm.read(tmpfilec)[0]

# Case (2) as above, so the right error should be raised on read
with self.assertRaises(ValueError):
cfdm.read(tmpfileh2)[0]

with self.assertRaises(ValueError):
cfdm.read(tmpfilec2)[0]

self.assertTrue(f0.equals(f, verbose=3))

self.assertTrue(
Expand Down Expand Up @@ -391,10 +422,10 @@ def test_read_CDL(self):
"-e",
regex,
tmpfileh,
">" + tmpfileh2,
">" + tmpfileh3,
"&&",
"mv",
tmpfileh2,
tmpfileh3,
tmpfileh,
]
),
Expand Down
Loading