diff --git a/docs/api.rst b/docs/api.rst index 24fe9a9a..73f7c04b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -195,6 +195,8 @@ UGRID1D Topology Ugrid1d.sizes Ugrid1d.attrs Ugrid1d.coords + Ugrid1d.fill_value + Ugrid1d.start_index Ugrid1d.n_node Ugrid1d.node_dimension @@ -258,6 +260,8 @@ UGRID2D Topology Ugrid2d.sizes Ugrid2d.attrs Ugrid2d.coords + Ugrid2d.fill_value + Ugrid2d.start_index Ugrid2d.n_node Ugrid2d.node_dimension diff --git a/docs/changelog.rst b/docs/changelog.rst index 62c793d7..9cc1f95d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -26,6 +26,29 @@ Fixed value; the fill value is also set when calling :meth:`xugrid.UgridDataArrayAccessor.to_netcdf` or :meth:`xugrid.UgridDatasetAccessor.to_netcdf`. + +Added +~~~~~ + +- :class:`xugrid.Ugrid1d` and :class:`xugrid.Ugrid2d` now take an optional + ``start_index`` which controls the start index for the UGRID connectivity + arrays. +- :attr:`xugrid.Ugrid1d.fill_value`, :attr:`xugrid.Ugrid1d.start_index`, + :attr:`xugrid.Ugrid2d.fill_value`, and :attr:`xugrid.Ugrid2d.start_index`, + have been added to get and set the fill value and start index for the UGRID + connectivity arrays. (Internally, every array is 0-based, and has a fill + value of -1.) + +Changed +~~~~~~~ + +- :class:`xugrid.Ugrid1d` and :class:`xugrid.Ugrid2d` will generally preserve + the fill value and start index of grids when roundtripping from and to xarray + Dataset. An exception is when the start index or fill value varies per + connectivity: ``xugrid`` will enforce a single start index and a single fill + value per grid. In case of inconsistent values across connectivity arrays, + the values associated with the core connectivity are used: for Ugrid2d, this + is the face node connectivity. [0.12.0] 2024-09-03 ------------------- diff --git a/tests/test_ugrid1d.py b/tests/test_ugrid1d.py index 56fe04ba..dd121589 100644 --- a/tests/test_ugrid1d.py +++ b/tests/test_ugrid1d.py @@ -110,6 +110,11 @@ def test_ugrid1d_properties(): assert np.array_equal(coords[grid.node_dimension], grid.node_coordinates) assert np.array_equal(coords[grid.edge_dimension], grid.edge_coordinates) + with pytest.raises(ValueError, match="start_index must be 0 or 1, received: 2"): + grid.start_index = 2 + grid.start_index = 1 + assert grid._start_index == 1 + def test_ugrid1d_egde_bounds(): grid = grid1d() diff --git a/tests/test_ugrid2d.py b/tests/test_ugrid2d.py index 944816a8..adf3fda8 100644 --- a/tests/test_ugrid2d.py +++ b/tests/test_ugrid2d.py @@ -167,6 +167,11 @@ def test_ugrid2d_properties(): assert np.array_equal(coords[grid.edge_dimension], grid.edge_coordinates) assert np.array_equal(coords[grid.face_dimension], grid.face_coordinates) + with pytest.raises(ValueError, match="start_index must be 0 or 1, received: 2"): + grid.start_index = 2 + grid.start_index = 1 + assert grid._start_index == 1 + def test_validate_edge_node_connectivity(): # Full test at test_connectivity @@ -332,6 +337,24 @@ def test_ugrid2d_dataset_no_mutation(): assert ds.identical(reference) +@pytest.mark.parametrize("edge_start_index", [0, 1]) +@pytest.mark.parametrize("face_start_index", [0, 1]) +def test_ugrid2d_from_dataset__different_start_index( + face_start_index, edge_start_index +): + grid = grid2d() + ds = grid.to_dataset(optional_attributes=True) # include edge_nodes + faces = ds["mesh2d_face_nodes"].to_numpy() + faces[faces != -1] += face_start_index + ds["mesh2d_face_nodes"].attrs["start_index"] = face_start_index + ds["mesh2d_edge_nodes"] += edge_start_index + ds["mesh2d_edge_nodes"].attrs["start_index"] = edge_start_index + new = xugrid.Ugrid2d.from_dataset(ds) + assert new.start_index == face_start_index + assert np.array_equal(new.face_node_connectivity, grid.face_node_connectivity) + assert np.array_equal(new.edge_node_connectivity, grid.edge_node_connectivity) + + def test_ugrid2d_from_meshkernel(): # Setup a meshkernel Mesh2d mimick class Mesh2d(NamedTuple): diff --git a/tests/test_ugrid_dataset.py b/tests/test_ugrid_dataset.py index 48e65ced..c0db9e52 100644 --- a/tests/test_ugrid_dataset.py +++ b/tests/test_ugrid_dataset.py @@ -1191,19 +1191,48 @@ def test_fm_fillvalue_startindex_isel(): # xugrid 0.6.0 raises "ValueError: Invalid edge_node_connectivity" uds.isel({uds.grid.face_dimension: [1]}) + +def test_alternative_fill_value_start_index(): + uds = get_ugrid_fillvaluem999_startindex1_uds() # Check internal fill value. Should be FILL_VALUE grid = uds.ugrid.grid + assert grid.face_node_connectivity.dtype == "int64" + assert grid.start_index == 1 + assert grid.fill_value == -999 assert (grid.face_node_connectivity != -999).all() gridds = grid.to_dataset() # Should be set back to the origina fill value. - assert (gridds["mesh2d_face_nodes"] != xugrid.constants.FILL_VALUE).all() + faces = gridds["mesh2d_face_nodes"] + assert faces.attrs["start_index"] == 1 + uniq = np.unique(faces) + assert uniq[0] == -999 + assert uniq[1] == 1 # And similarly for the UgridAccessors. ds = uds.ugrid.to_dataset() - assert (ds["mesh2d_face_nodes"] != xugrid.constants.FILL_VALUE).all() + faces = ds["mesh2d_face_nodes"] + assert faces.attrs["start_index"] == 1 + uniq = np.unique(faces) + assert uniq[0] == -999 + assert uniq[1] == 1 ds_uda = uds["mesh2d_facevar"].ugrid.to_dataset() - assert (ds_uda["mesh2d_face_nodes"] != xugrid.constants.FILL_VALUE).all() + faces = ds_uda["mesh2d_face_nodes"] + assert faces.attrs["start_index"] == 1 + uniq = np.unique(faces) + assert uniq[0] == -999 + assert uniq[1] == 1 + + # Alternative value + grid.start_index = 0 + grid.fill_value = -2 + gridds = grid.to_dataset() + # Should be set back to the origina fill value. + faces = gridds["mesh2d_face_nodes"] + assert faces.attrs["start_index"] == 0 + uniq = np.unique(faces) + assert uniq[0] == -2 + assert uniq[1] == 0 def test_fm_facenodeconnectivity_fillvalue(): diff --git a/xugrid/ugrid/ugrid1d.py b/xugrid/ugrid/ugrid1d.py index 4d8f74bf..7f82268c 100644 --- a/xugrid/ugrid/ugrid1d.py +++ b/xugrid/ugrid/ugrid1d.py @@ -51,6 +51,9 @@ class Ugrid1d(AbstractUgrid): UGRID topology attributes. Should not be provided together with dataset: if other names are required, update the dataset instead. A name entry is ignored, as name is given explicitly. + start_index: int, 0 or 1, default is 0. + Start index of the connectivity arrays. Must match the start index + of the provided face_node_connectivity and edge_node_connectivity. """ def __init__( @@ -65,11 +68,13 @@ def __init__( projected: bool = True, crs: Any = None, attrs: Dict[str, str] = None, + start_index: int = 0, ): self.node_x = np.ascontiguousarray(node_x) self.node_y = np.ascontiguousarray(node_y) self.fill_value = fill_value - self.edge_node_connectivity = edge_node_connectivity + self.start_index = start_index + self.edge_node_connectivity = edge_node_connectivity - self.start_index self.name = name self.projected = projected @@ -143,8 +148,10 @@ def from_dataset(cls, dataset: xr.Dataset, topology: str = None): node_y_coordinates = ds[y_index].astype(FloatDType).to_numpy() edge_nodes = connectivity["edge_node_connectivity"] + fill_value = ds[edge_nodes].encoding.get("_FillValue", -1) + start_index = ds[edge_nodes].attrs.get("start_index", 0) edge_node_connectivity = cls._prepare_connectivity( - ds[edge_nodes], FILL_VALUE, dtype=IntDType + ds[edge_nodes], fill_value, dtype=IntDType ).to_numpy() indexes["node_x"] = x_index @@ -154,13 +161,14 @@ def from_dataset(cls, dataset: xr.Dataset, topology: str = None): return cls( node_x_coordinates, node_y_coordinates, - FILL_VALUE, + fill_value, edge_node_connectivity, name=topology, dataset=dataset[ugrid_vars], indexes=indexes, projected=projected, crs=None, + start_index=start_index, ) def _clear_geometry_properties(self): @@ -229,7 +237,7 @@ def to_dataset( data_vars = { self.name: 0, edge_nodes: xr.DataArray( - data=self.edge_node_connectivity, + data=self._adjust_connectivity(self.edge_node_connectivity), attrs=edge_nodes_attrs, dims=(self.edge_dimension, "two"), ), @@ -241,7 +249,7 @@ def to_dataset( dataset = xr.Dataset(data_vars, attrs=attrs) if self._dataset: - dataset.update(self._dataset) + dataset = dataset.merge(self._dataset, compat="override") if other is not None: dataset = dataset.merge(other) if node_x not in dataset or node_y not in dataset: @@ -569,10 +577,10 @@ def topology_subset( new_edges = connectivity.renumber(edge_subset) node_x = self.node_x[node_index] node_y = self.node_y[node_index] - grid = self.__class__( + grid = Ugrid1d( node_x, node_y, - self.fill_value, + FILL_VALUE, new_edges, name=self.name, indexes=self._indexes, @@ -580,6 +588,7 @@ def topology_subset( crs=self.crs, attrs=self._attrs, ) + self._propagate_properties(grid) if return_index: indexes = { self.node_dimension: pd.Index(node_index), diff --git a/xugrid/ugrid/ugrid2d.py b/xugrid/ugrid/ugrid2d.py index 377c1b3e..7bcee102 100644 --- a/xugrid/ugrid/ugrid2d.py +++ b/xugrid/ugrid/ugrid2d.py @@ -82,6 +82,9 @@ class Ugrid2d(AbstractUgrid): UGRID topology attributes. Should not be provided together with dataset: if other names are required, update the dataset instead. A name entry is ignored, as name is given explicitly. + start_index: int, 0 or 1, default is 0. + Start index of the connectivity arrays. Must match the start index + of the provided face_node_connectivity and edge_node_connectivity. """ def __init__( @@ -97,10 +100,12 @@ def __init__( projected: bool = True, crs: Any = None, attrs: Dict[str, str] = None, + start_index: int = 0, ): self.node_x = np.ascontiguousarray(node_x) self.node_y = np.ascontiguousarray(node_y) self.fill_value = fill_value + self.start_index = start_index self.name = name self.projected = projected @@ -113,10 +118,13 @@ def __init__( "face_node_connectivity should be an array of integers or a sparse matrix" ) - # Ensure the fill value is FILL_VALUE (-1) - self.face_node_connectivity[ - self.face_node_connectivity == self.fill_value - ] = FILL_VALUE + # Ensure the fill value is FILL_VALUE (-1) and the array is 0-based. + if self.fill_value != -1 or self.start_index != 0: + is_fill = self.face_node_connectivity == self.fill_value + if self.start_index != 0: + self.face_node_connectivity[~is_fill] -= self.start_index + if self.fill_value != FILL_VALUE: + self.face_node_connectivity[is_fill] = FILL_VALUE # TODO: do this in validation instead. While UGRID conventions demand it, # where does it go wrong? @@ -149,7 +157,9 @@ def __init__( self._edge_x = None self._edge_y = None # Connectivity - self.edge_node_connectivity = edge_node_connectivity + self._edge_node_connectivity = edge_node_connectivity + if self._edge_node_connectivity is not None: + self._edge_node_connectivity -= self.start_index self._edge_face_connectivity = None self._node_node_connectivity = None self._node_edge_connectivity = None @@ -283,6 +293,7 @@ def from_dataset(cls, dataset: xr.Dataset, topology: str = None): face_nodes = connectivity["face_node_connectivity"] fill_value = ds[face_nodes].encoding.get("_FillValue", -1) + start_index = ds[face_nodes].attrs.get("start_index", 0) face_node_connectivity = cls._prepare_connectivity( ds[face_nodes], fill_value, dtype=IntDType ).to_numpy() @@ -292,6 +303,13 @@ def from_dataset(cls, dataset: xr.Dataset, topology: str = None): edge_node_connectivity = cls._prepare_connectivity( ds[edge_nodes], fill_value, dtype=IntDType ).to_numpy() + # Make sure the single passed start index is valid for both + # connectivity arrays. + edge_start_index = ds[edge_nodes].attrs.get("start_index", 0) + if edge_start_index != start_index: + # start_index = 1, edge_start_index = 0, then add one + # start_index = 0, edge_start_index = 1, then subtract one + edge_node_connectivity += start_index - edge_start_index else: edge_node_connectivity = None @@ -310,11 +328,14 @@ def from_dataset(cls, dataset: xr.Dataset, topology: str = None): indexes=indexes, projected=projected, crs=None, + start_index=start_index, ) def _get_name_and_attrs(self, name: str): key = f"{name}_connectivity" attrs = conventions.DEFAULT_ATTRS[key] + if "start_index" in attrs: + attrs["start_index"] = self.start_index if "_FillValue" in attrs: attrs["_FillValue"] = self.fill_value return self._attrs[key], attrs @@ -331,14 +352,14 @@ def to_dataset( data_vars = { self.name: 0, face_nodes: xr.DataArray( - data=self._set_fillvalue(self.face_node_connectivity), + data=self._adjust_connectivity(self.face_node_connectivity), attrs=face_nodes_attrs, dims=(self.face_dimension, nmax_node_dim), ), } if self.edge_node_connectivity is not None or optional_attributes: data_vars[edge_nodes] = xr.DataArray( - data=self.edge_node_connectivity, # has no fill values + data=self._adjust_connectivity(self.edge_node_connectivity), attrs=edge_nodes_attrs, dims=(self.edge_dimension, "two"), ) @@ -350,12 +371,12 @@ def to_dataset( boundary_edge_dim = self._attrs["boundary_edge_dimension"] data_vars[face_edges] = xr.DataArray( - data=self._set_fillvalue(self.face_edge_connectivity), + data=self._adjust_connectivity(self.face_edge_connectivity), attrs=face_edges_attrs, dims=(self.face_dimension, nmax_node_dim), ) data_vars[face_faces] = xr.DataArray( - data=self._set_fillvalue( + data=self._adjust_connectivity( connectivity.to_dense( self.face_face_connectivity, self.n_max_node_per_face ) @@ -364,12 +385,12 @@ def to_dataset( dims=(self.face_dimension, nmax_node_dim), ) data_vars[edge_faces] = xr.DataArray( - data=self._set_fillvalue(self.edge_face_connectivity), + data=self._adjust_connectivity(self.edge_face_connectivity), attrs=edge_faces_attrs, dims=(self.edge_dimension, "two"), ) data_vars[bound_nodes] = xr.DataArray( - data=self._set_fillvalue(self.boundary_node_connectivity), + data=self._adjust_connectivity(self.boundary_node_connectivity), attrs=bound_nodes_attrs, dims=(boundary_edge_dim, "two"), ) @@ -380,7 +401,7 @@ def to_dataset( dataset = xr.Dataset(data_vars, attrs=attrs) if self._dataset: - dataset.update(self._dataset) + dataset = dataset.merge(self._dataset, compat="override") if other is not None: dataset = dataset.merge(other) if node_x not in dataset or node_y not in dataset: @@ -1128,10 +1149,10 @@ def topology_subset( edge_subset = self.edge_node_connectivity[edge_index] new_edges = connectivity.renumber(edge_subset) - grid = self.__class__( + grid = Ugrid2d( node_x, node_y, - self.fill_value, + FILL_VALUE, new_faces, name=self.name, edge_node_connectivity=new_edges, @@ -1140,6 +1161,7 @@ def topology_subset( crs=self.crs, attrs=self._attrs, ) + self._propagate_properties(grid) if return_index: indexes = { self.node_dimension: pd.Index(node_index), @@ -1622,6 +1644,8 @@ def merge_partitions( crs=grid.crs, attrs=grid._attrs, ) + # Maintain fill_value, start_index + grid._propagate_properties(merged_grid) return merged_grid, indexes def to_periodic(self, obj=None): @@ -1674,7 +1698,7 @@ def to_periodic(self, obj=None): node_x=new_xy[:, 0], node_y=new_xy[:, 1], face_node_connectivity=new_faces, - fill_value=self.fill_value, + fill_value=FILL_VALUE, name=self.name, edge_node_connectivity=new_edges, indexes=self._indexes, @@ -1682,6 +1706,7 @@ def to_periodic(self, obj=None): crs=self.crs, attrs=self.attrs, ) + self._propagate_properties(new) if obj is not None: indexes = { @@ -1734,7 +1759,7 @@ def to_nonperiodic(self, xmax: float, obj=None): node_x=np.concatenate((self.node_x, new_x)), node_y=np.concatenate((self.node_y, new_y)), face_node_connectivity=new_faces, - fill_value=self.fill_value, + fill_value=FILL_VALUE, name=self.name, edge_node_connectivity=None, indexes=self._indexes, @@ -1742,6 +1767,7 @@ def to_nonperiodic(self, xmax: float, obj=None): crs=self.crs, attrs=self.attrs, ) + self._propagate_properties(new) edge_index = None if self._edge_node_connectivity is not None: @@ -1854,7 +1880,9 @@ def triangulate(self): triangles: Ugrid2d """ triangles, _ = connectivity.triangulate(self.face_node_connectivity) - return Ugrid2d(self.node_x, self.node_y, FILL_VALUE, triangles) + grid = Ugrid2d(self.node_x, self.node_y, FILL_VALUE, triangles) + self._propagate_properties(grid) + return grid def _tesselate_voronoi(self, centroids, add_exterior, add_vertices, skip_concave): if add_exterior: @@ -1874,7 +1902,9 @@ def _tesselate_voronoi(self, centroids, add_exterior, add_vertices, skip_concave add_vertices, skip_concave, ) - return Ugrid2d(vertices[:, 0], vertices[:, 1], FILL_VALUE, faces) + grid = Ugrid2d(vertices[:, 0], vertices[:, 1], FILL_VALUE, faces) + self._propagate_properties(grid) + return grid def tesselate_centroidal_voronoi( self, add_exterior=True, add_vertices=True, skip_concave=False @@ -1942,9 +1972,10 @@ def reverse_cuthill_mckee(self, dimension=None): reordered_grid = Ugrid2d( self.node_x, self.node_y, - self.fill_value, + FILL_VALUE, self.face_node_connectivity[reordering], ) + self._propagate_properties(reordered_grid) return reordered_grid, reordering def refine_polygon( diff --git a/xugrid/ugrid/ugridbase.py b/xugrid/ugrid/ugridbase.py index 77c940f8..f94f164b 100644 --- a/xugrid/ugrid/ugridbase.py +++ b/xugrid/ugrid/ugridbase.py @@ -2,7 +2,7 @@ import copy import warnings from itertools import chain -from typing import Dict, Set, Tuple, Type, Union, cast +from typing import Dict, Literal, Set, Tuple, Type, Union, cast import numpy as np import pandas as pd @@ -288,6 +288,10 @@ def rename(self, name: str, return_name_dict: bool = False): else: return new + def _propagate_properties(self, other) -> None: + other.start_index = self.start_index + other.fill_value = self.fill_value + @staticmethod def _single_topology(dataset: xr.Dataset): topologies = dataset.ugrid_roles.topology @@ -347,6 +351,26 @@ def copy(self): """Create a deepcopy.""" return copy.deepcopy(self) + @property + def fill_value(self) -> int: + """Fill value for UGRID connectivity arrays.""" + return self._fill_value + + @fill_value.setter + def fill_value(self, value: int): + self._fill_value = value + + @property + def start_index(self) -> int: + """Start index for UGRID connectivity arrays.""" + return self._start_index + + @start_index.setter + def start_index(self, value: Literal[0, 1]): + if value not in (0, 1): + raise ValueError(f"start_index must be 0 or 1, received: {value}") + self._start_index = value + @property def attrs(self): return copy.deepcopy(self._attrs) @@ -458,12 +482,13 @@ def edge_bounds(self) -> FloatArray: @staticmethod def _prepare_connectivity( - da: xr.DataArray, fill_value: Union[float, int], dtype: type + da: xr.DataArray, fill_value: Union[int, float], dtype: type ) -> xr.DataArray: - start_index = da.attrs.get("start_index", 0) - if start_index not in (0, 1): - raise ValueError(f"start_index should be 0 or 1, received: {start_index}") - + """ + Undo the work xarray does when it encounters a _FillValue for UGRID + connectivity arrays. Set an external unified value back (across all + connectivities!), and cast back to the desired dtype. + """ data = da.to_numpy().copy() # If xarray detects a _FillValue, it converts the array to floats and # replaces the fill value by NaN, and moves the _FillValue to @@ -475,18 +500,21 @@ def _prepare_connectivity( # Set the fill_value before casting: otherwise the cast may fail. data[is_fill] = fill_value cast = data.astype(dtype, copy=False) - not_fill = ~is_fill - if start_index: - cast[not_fill] -= start_index if (cast[not_fill] < 0).any(): raise ValueError("connectivity contains negative values") return da.copy(data=cast) - def _set_fillvalue(self, connectivity: IntArray) -> IntArray: + def _adjust_connectivity(self, connectivity: IntArray) -> IntArray: + """Adjust connectivity for desired fill_value and start_index.""" c = connectivity.copy() + if self.start_index == 0 and self.fill_value == FILL_VALUE: + return c + is_fill = c == FILL_VALUE + if self.start_index: + c[~is_fill] += self.start_index if self.fill_value != FILL_VALUE: - c[c == FILL_VALUE] = self.fill_value + c[is_fill] = self.fill_value return c def _precheck(self, multi_index):