Skip to content

Commit

Permalink
Merge branch 'main' into remove-deprecated-dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
savente93 committed Aug 3, 2023
2 parents 21015f4 + 7a78837 commit 90edca3
Show file tree
Hide file tree
Showing 11 changed files with 81 additions and 22 deletions.
1 change: 1 addition & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Added
- docs now include a dropdown for selecting older versions of the docs. (#457)
- Support for loading the same data source but from different places (e.g. local & aws)
- Add support for reading and writing tabular data in ``parquet`` format. (PR #445)
- Add support for reading model configs in ``TOML`` format. (PR #444)

Changed
-------
Expand Down
4 changes: 2 additions & 2 deletions hydromt/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_datasets(data_libs: Union[List, str]) -> Dict:
Parameters
----------
data_libs: (list of) str, Path, optional
One or more paths to data catalog yaml files or names of predefined
One or more paths to data catalog configuration files or names of predefined
data catalogs. By default the data catalog is initiated without data entries.
See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml`
for accepted yaml format.
Expand Down Expand Up @@ -139,7 +139,7 @@ def get_region(
region : dict
dictionary containing region definition
data_libs : (list of) str, Path, optional
One or more paths to data catalog yaml files or names of predefined
One or more paths to data catalog configuration files or names of predefined
data catalogs. By default the data catalog is initiated without data entries.
See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml`
for accepted yaml format.
Expand Down
47 changes: 45 additions & 2 deletions hydromt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,43 @@
from typing import Dict, List, Union

import yaml
from tomli import load as load_toml
from tomli_w import dump as dump_toml

__all__ = [
"configread",
"configwrite",
]


def _process_config_out(d):
ret = {}
if isinstance(d, dict):
for k, v in d.items():
if v is None:
ret[k] = "NONE"
else:
ret[k] = _process_config_out(v)
else:
ret = d

return ret


def _process_config_in(d):
ret = {}
if isinstance(d, dict):
for k, v in d.items():
if v == "NONE":
ret[k] = None
else:
ret[k] = _process_config_in(v)
else:
ret = d

return ret


def configread(
config_fn: Union[Path, str],
defaults: Dict = dict(),
Expand Down Expand Up @@ -48,15 +78,22 @@ def configread(
Configuration dictionary.
"""
# read
if splitext(config_fn)[-1] in [".yaml", ".yml"]:
ext = splitext(config_fn)[-1].strip()
if ext in [".yaml", ".yml"]:
with open(config_fn, "rb") as f:
cfdict = yaml.safe_load(f)
cfdict = _process_config_in(cfdict)
elif ext == ".toml": # user defined
with open(config_fn, "rb") as f:
cfdict = load_toml(f)
cfdict = _process_config_in(cfdict)
else:
cfdict = read_ini_config(config_fn, **kwargs)
# parse absolute paths
if abs_path:
root = Path(dirname(config_fn))
cfdict = parse_abspath(cfdict, root, skip_abspath_sections)

# update defaults
if defaults:
_cfdict = defaults.copy()
Expand Down Expand Up @@ -89,9 +126,15 @@ def configwrite(config_fn: Union[str, Path], cfdict: dict, **kwargs) -> None:
"""
root = Path(dirname(config_fn))
_cfdict = parse_relpath(cfdict.copy(), root)
if splitext(config_fn)[-1] in [".yaml", ".yml"]:
ext = splitext(config_fn)[-1].strip()
if ext in [".yaml", ".yml"]:
_cfdict = _process_config_out(_cfdict) # should not be done for ini
with open(config_fn, "w") as f:
yaml.dump(_cfdict, f, sort_keys=False)
elif ext == ".toml": # user defined
_cfdict = _process_config_out(_cfdict)
with open(config_fn, "wb") as f:
dump_toml(_cfdict, f)
else:
write_ini_config(config_fn, _cfdict, **kwargs)

Expand Down
8 changes: 6 additions & 2 deletions hydromt/data_adapter/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def to_file(
driver : str
Name of the driver used to read the data.
See :py:func:`~hydromt.data_catalog.DataCatalog.get_geodataset`.
kwargs: dict
The additional keyword arguments that were passed in.
"""
Expand All @@ -162,13 +164,15 @@ def to_file(
)
except IndexError as err: # out of bounds for time
logger.warning(str(err))
return None, None
return None, None, None

read_kwargs = dict()
if driver is None or driver == "csv":
# always write as CSV
driver = "csv"
fn_out = join(data_root, f"{data_name}.csv")
obj.to_csv(fn_out, **kwargs)
read_kwargs["index_col"] = 0
elif driver == "parquet":
fn_out = join(data_root, f"{data_name}.parquet")
obj.to_parquet(fn_out, **kwargs)
Expand All @@ -178,7 +182,7 @@ def to_file(
else:
raise ValueError(f"DataFrame: Driver {driver} is unknown.")

return fn_out, driver
return fn_out, driver, read_kwargs

def get_data(
self,
Expand Down
6 changes: 4 additions & 2 deletions hydromt/data_adapter/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,9 @@ def to_file(
kwargs.pop("time_tuple", None)
gdf = self.get_data(bbox=bbox, variables=variables, logger=logger)
if gdf.index.size == 0:
return None, None
return None, None, None

read_kwargs = {}
if driver is None:
_lst = ["csv", "parquet", "xls", "xlsx", "xy", "vector_table"]
driver = "csv" if self.driver in _lst else "GPKG"
Expand All @@ -182,6 +183,7 @@ def to_file(
)
gdf["x"], gdf["y"] = gdf.geometry.x, gdf.geometry.y
gdf.drop(columns="geometry").to_csv(fn_out, **kwargs)
read_kwargs["index_col"] = 0
elif driver == "parquet":
fn_out = join(data_root, f"{data_name}.parquet")
if not np.all(gdf.geometry.type == "Point"):
Expand All @@ -200,7 +202,7 @@ def to_file(
gdf.to_file(fn_out, driver=driver, **kwargs)
driver = "vector"

return fn_out, driver
return fn_out, driver, read_kwargs

def get_data(
self,
Expand Down
6 changes: 4 additions & 2 deletions hydromt/data_adapter/geodataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,9 @@ def to_file(
single_var_as_array=variables is None,
)
if obj.vector.index.size == 0 or ("time" in obj.coords and obj.time.size == 0):
return None, None
return None, None, None

read_kwargs = {}

# much better for mem/storage/processing if dtypes are set correctly
for name, coord in obj.coords.items():
Expand Down Expand Up @@ -218,7 +220,7 @@ def to_file(
else:
raise ValueError(f"GeoDataset: Driver {driver} unknown.")

return fn_out, driver
return fn_out, driver, read_kwargs

def get_data(
self,
Expand Down
7 changes: 5 additions & 2 deletions hydromt/data_adapter/rasterdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def to_file(
driver: str
Name of driver to read data with, see
:py:func:`~hydromt.data_catalog.DataCatalog.get_rasterdataset`
kwargs: dict
the additional kwyeord arguments that were passed to `to_netcdf`
"""
try:
obj = self.get_data(
Expand All @@ -190,8 +192,9 @@ def to_file(
)
except IndexError as err: # out of bounds
logger.warning(str(err))
return None, None
return None, None, None

read_kwargs = {}
if driver is None:
# by default write 2D raster data to GeoTiff and 3D raster data to netcdf
driver = "netcdf" if len(obj.dims) == 3 else "GTiff"
Expand Down Expand Up @@ -228,7 +231,7 @@ def to_file(
)
driver = "raster"

return fn_out, driver
return fn_out, driver, read_kwargs

def get_data(
self,
Expand Down
14 changes: 8 additions & 6 deletions hydromt/data_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ def __init__(
Arguments
---------
data_libs: (list of) str, Path, optional
One or more paths to data catalog yaml files or names of predefined data
catalogs. By default the data catalog is initiated without data entries.
See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for
One or more paths to data catalog configuration files or names of predefined
data catalogs. By default the data catalog is initiated without data
entries. See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for
accepted yaml format.
fallback_lib:
Name of pre-defined data catalog to read if no data_libs are provided,
Expand Down Expand Up @@ -499,7 +499,7 @@ def from_yml(
{'RasterDataset', 'GeoDataset', 'GeoDataFrame'}. See the specific data adapters
for more information about the required and optional arguments.
.. code-block:: console
.. code-block:: yaml
meta:
root: <path>
Expand Down Expand Up @@ -703,7 +703,7 @@ def to_dict(
List of source names to export, by default None in which case all sources
are exported.
root : str, Path, optional
Global root for all relative paths in yml file.
Global root for all relative paths in the file.
meta: dict, optional
key-value pairs to add to the data catalog meta section, such as 'version',
by default empty.
Expand Down Expand Up @@ -867,7 +867,7 @@ def export_data(
unit_add = source.unit_add
source.unit_mult = {}
source.unit_add = {}
fn_out, driver = source.to_file(
fn_out, driver, driver_kwargs = source.to_file(
data_root=data_root,
data_name=key,
variables=source_vars.get(key, None),
Expand All @@ -892,6 +892,8 @@ def export_data(
source.driver = driver
source.filesystem = "local"
source.driver_kwargs = {}
if driver_kwargs is not None:
source.driver_kwargs.update(driver_kwargs)
source.rename = {}
if key in sources_out:
self.logger.warning(
Expand Down
6 changes: 3 additions & 3 deletions hydromt/models/model_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def __init__(
Model simulation configuration file, by default None.
Note that this is not the HydroMT model setup configuration file!
data_libs : List[str], optional
List of data catalog yaml files, by default None
List of data catalog configuration files, by default None
**artifact_keys:
Additional keyword arguments to be passed down.
logger:
Expand Down Expand Up @@ -548,7 +548,7 @@ def write_data_catalog(
Parameters
----------
root: str, Path, optional
Global root for all relative paths in yaml file.
Global root for all relative paths in configuration file.
If "auto" the data source paths are relative to the yaml output ``path``.
data_lib_fn: str, Path, optional
Path of output yml file, absolute or relative to the model root,
Expand All @@ -560,7 +560,7 @@ def write_data_catalog(
"""
path = data_lib_fn if isabs(data_lib_fn) else join(self.root, data_lib_fn)
cat = DataCatalog(logger=self.logger, fallback_lib=None)
# read hydromt_data yaml file and add to data catalog
# read hydromt_data configuration file and add to data catalog
if self._read and isfile(path) and append:
cat.from_yml(path)
# update data catalog with new used sources
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ dependencies = [
"rasterio", # raster wrapper around gdal
"shapely>=2.0.0", # geometry transforms
"scipy", # scientific utilities
"tomli", # parsing toml files
"tomli-w", # writing toml files
"xarray", # ndim data
"universal_pathlib", # provides path compatability between different filesystems
"xmltodict", # xml parser also used to read VRT
Expand Down
2 changes: 1 addition & 1 deletion tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hydromt import config


@pytest.mark.parametrize("ext", ["ini", "yaml"])
@pytest.mark.parametrize("ext", ["ini", "yaml", "toml"])
def test_config(tmpdir, ext):
cfdict = {
"section1": {
Expand Down

0 comments on commit 90edca3

Please sign in to comment.