From c1f0f6f13d79b3efd07f31be6c07c7641ff6fb1a Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Thu, 3 Aug 2023 18:24:53 +0200 Subject: [PATCH 1/2] return kwargs from adapter.to_file funtions (#456) --- hydromt/data_adapter/dataframe.py | 8 ++++++-- hydromt/data_adapter/geodataframe.py | 6 ++++-- hydromt/data_adapter/geodataset.py | 6 ++++-- hydromt/data_adapter/rasterdataset.py | 7 +++++-- hydromt/data_catalog.py | 4 +++- 5 files changed, 22 insertions(+), 9 deletions(-) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index be4093baf..f10bd1527 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -152,6 +152,8 @@ def to_file( driver : str Name of the driver used to read the data. See :py:func:`~hydromt.data_catalog.DataCatalog.get_geodataset`. + kwargs: dict + The additional keyword arguments that were passed in. """ @@ -162,13 +164,15 @@ def to_file( ) except IndexError as err: # out of bounds for time logger.warning(str(err)) - return None, None + return None, None, None + read_kwargs = dict() if driver is None or driver == "csv": # always write as CSV driver = "csv" fn_out = join(data_root, f"{data_name}.csv") obj.to_csv(fn_out, **kwargs) + read_kwargs["index_col"] = 0 elif driver == "parquet": fn_out = join(data_root, f"{data_name}.parquet") obj.to_parquet(fn_out, **kwargs) @@ -178,7 +182,7 @@ def to_file( else: raise ValueError(f"DataFrame: Driver {driver} is unknown.") - return fn_out, driver + return fn_out, driver, read_kwargs def get_data( self, diff --git a/hydromt/data_adapter/geodataframe.py b/hydromt/data_adapter/geodataframe.py index 3b47ba518..d692d12a6 100644 --- a/hydromt/data_adapter/geodataframe.py +++ b/hydromt/data_adapter/geodataframe.py @@ -167,8 +167,9 @@ def to_file( kwargs.pop("time_tuple", None) gdf = self.get_data(bbox=bbox, variables=variables, logger=logger) if gdf.index.size == 0: - return None, None + return None, None, None + read_kwargs = {} if driver is None: _lst = ["csv", "parquet", "xls", "xlsx", "xy", "vector_table"] driver = "csv" if self.driver in _lst else "GPKG" @@ -182,6 +183,7 @@ def to_file( ) gdf["x"], gdf["y"] = gdf.geometry.x, gdf.geometry.y gdf.drop(columns="geometry").to_csv(fn_out, **kwargs) + read_kwargs["index_col"] = 0 elif driver == "parquet": fn_out = join(data_root, f"{data_name}.parquet") if not np.all(gdf.geometry.type == "Point"): @@ -200,7 +202,7 @@ def to_file( gdf.to_file(fn_out, driver=driver, **kwargs) driver = "vector" - return fn_out, driver + return fn_out, driver, read_kwargs def get_data( self, diff --git a/hydromt/data_adapter/geodataset.py b/hydromt/data_adapter/geodataset.py index 635c0da13..56554e909 100644 --- a/hydromt/data_adapter/geodataset.py +++ b/hydromt/data_adapter/geodataset.py @@ -182,7 +182,9 @@ def to_file( single_var_as_array=variables is None, ) if obj.vector.index.size == 0 or ("time" in obj.coords and obj.time.size == 0): - return None, None + return None, None, None + + read_kwargs = {} # much better for mem/storage/processing if dtypes are set correctly for name, coord in obj.coords.items(): @@ -218,7 +220,7 @@ def to_file( else: raise ValueError(f"GeoDataset: Driver {driver} unknown.") - return fn_out, driver + return fn_out, driver, read_kwargs def get_data( self, diff --git a/hydromt/data_adapter/rasterdataset.py b/hydromt/data_adapter/rasterdataset.py index ee46055b2..9834d5ef0 100644 --- a/hydromt/data_adapter/rasterdataset.py +++ b/hydromt/data_adapter/rasterdataset.py @@ -179,6 +179,8 @@ def to_file( driver: str Name of driver to read data with, see :py:func:`~hydromt.data_catalog.DataCatalog.get_rasterdataset` + kwargs: dict + the additional kwyeord arguments that were passed to `to_netcdf` """ try: obj = self.get_data( @@ -190,8 +192,9 @@ def to_file( ) except IndexError as err: # out of bounds logger.warning(str(err)) - return None, None + return None, None, None + read_kwargs = {} if driver is None: # by default write 2D raster data to GeoTiff and 3D raster data to netcdf driver = "netcdf" if len(obj.dims) == 3 else "GTiff" @@ -228,7 +231,7 @@ def to_file( ) driver = "raster" - return fn_out, driver + return fn_out, driver, read_kwargs def get_data( self, diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py index 66ac96de6..f619289af 100644 --- a/hydromt/data_catalog.py +++ b/hydromt/data_catalog.py @@ -867,7 +867,7 @@ def export_data( unit_add = source.unit_add source.unit_mult = {} source.unit_add = {} - fn_out, driver = source.to_file( + fn_out, driver, driver_kwargs = source.to_file( data_root=data_root, data_name=key, variables=source_vars.get(key, None), @@ -892,6 +892,8 @@ def export_data( source.driver = driver source.filesystem = "local" source.driver_kwargs = {} + if driver_kwargs is not None: + source.driver_kwargs.update(driver_kwargs) source.rename = {} if key in sources_out: self.logger.warning( From 7a7883770099ce9b61595dd22e82d39308246eb5 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Thu, 3 Aug 2023 18:37:16 +0200 Subject: [PATCH 2/2] Add support for configure using TOML files (#444) --- docs/changelog.rst | 1 + hydromt/cli/api.py | 4 ++-- hydromt/config.py | 47 +++++++++++++++++++++++++++++++++++-- hydromt/data_catalog.py | 10 ++++---- hydromt/models/model_api.py | 6 ++--- pyproject.toml | 2 ++ tests/test_config.py | 2 +- 7 files changed, 59 insertions(+), 13 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 33df66276..821edda6b 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -14,6 +14,7 @@ Added - docs now include a dropdown for selecting older versions of the docs. (#457) - Support for loading the same data source but from different places (e.g. local & aws) - Add support for reading and writing tabular data in ``parquet`` format. (PR #445) +- Add support for reading model configs in ``TOML`` format. (PR #444) Changed ------- diff --git a/hydromt/cli/api.py b/hydromt/cli/api.py index 76b52434a..4c1c148ec 100644 --- a/hydromt/cli/api.py +++ b/hydromt/cli/api.py @@ -94,7 +94,7 @@ def get_datasets(data_libs: Union[List, str]) -> Dict: Parameters ---------- data_libs: (list of) str, Path, optional - One or more paths to data catalog yaml files or names of predefined + One or more paths to data catalog configuration files or names of predefined data catalogs. By default the data catalog is initiated without data entries. See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for accepted yaml format. @@ -139,7 +139,7 @@ def get_region( region : dict dictionary containing region definition data_libs : (list of) str, Path, optional - One or more paths to data catalog yaml files or names of predefined + One or more paths to data catalog configuration files or names of predefined data catalogs. By default the data catalog is initiated without data entries. See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for accepted yaml format. diff --git a/hydromt/config.py b/hydromt/config.py index 316da724b..1d75a6982 100644 --- a/hydromt/config.py +++ b/hydromt/config.py @@ -9,6 +9,8 @@ from typing import Dict, List, Union import yaml +from tomli import load as load_toml +from tomli_w import dump as dump_toml __all__ = [ "configread", @@ -16,6 +18,34 @@ ] +def _process_config_out(d): + ret = {} + if isinstance(d, dict): + for k, v in d.items(): + if v is None: + ret[k] = "NONE" + else: + ret[k] = _process_config_out(v) + else: + ret = d + + return ret + + +def _process_config_in(d): + ret = {} + if isinstance(d, dict): + for k, v in d.items(): + if v == "NONE": + ret[k] = None + else: + ret[k] = _process_config_in(v) + else: + ret = d + + return ret + + def configread( config_fn: Union[Path, str], defaults: Dict = dict(), @@ -48,15 +78,22 @@ def configread( Configuration dictionary. """ # read - if splitext(config_fn)[-1] in [".yaml", ".yml"]: + ext = splitext(config_fn)[-1].strip() + if ext in [".yaml", ".yml"]: with open(config_fn, "rb") as f: cfdict = yaml.safe_load(f) + cfdict = _process_config_in(cfdict) + elif ext == ".toml": # user defined + with open(config_fn, "rb") as f: + cfdict = load_toml(f) + cfdict = _process_config_in(cfdict) else: cfdict = read_ini_config(config_fn, **kwargs) # parse absolute paths if abs_path: root = Path(dirname(config_fn)) cfdict = parse_abspath(cfdict, root, skip_abspath_sections) + # update defaults if defaults: _cfdict = defaults.copy() @@ -89,9 +126,15 @@ def configwrite(config_fn: Union[str, Path], cfdict: dict, **kwargs) -> None: """ root = Path(dirname(config_fn)) _cfdict = parse_relpath(cfdict.copy(), root) - if splitext(config_fn)[-1] in [".yaml", ".yml"]: + ext = splitext(config_fn)[-1].strip() + if ext in [".yaml", ".yml"]: + _cfdict = _process_config_out(_cfdict) # should not be done for ini with open(config_fn, "w") as f: yaml.dump(_cfdict, f, sort_keys=False) + elif ext == ".toml": # user defined + _cfdict = _process_config_out(_cfdict) + with open(config_fn, "wb") as f: + dump_toml(_cfdict, f) else: write_ini_config(config_fn, _cfdict, **kwargs) diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py index f619289af..52f6c8ef6 100644 --- a/hydromt/data_catalog.py +++ b/hydromt/data_catalog.py @@ -71,9 +71,9 @@ def __init__( Arguments --------- data_libs: (list of) str, Path, optional - One or more paths to data catalog yaml files or names of predefined data - catalogs. By default the data catalog is initiated without data entries. - See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for + One or more paths to data catalog configuration files or names of predefined + data catalogs. By default the data catalog is initiated without data + entries. See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for accepted yaml format. fallback_lib: Name of pre-defined data catalog to read if no data_libs are provided, @@ -499,7 +499,7 @@ def from_yml( {'RasterDataset', 'GeoDataset', 'GeoDataFrame'}. See the specific data adapters for more information about the required and optional arguments. - .. code-block:: console + .. code-block:: yaml meta: root: @@ -703,7 +703,7 @@ def to_dict( List of source names to export, by default None in which case all sources are exported. root : str, Path, optional - Global root for all relative paths in yml file. + Global root for all relative paths in the file. meta: dict, optional key-value pairs to add to the data catalog meta section, such as 'version', by default empty. diff --git a/hydromt/models/model_api.py b/hydromt/models/model_api.py index 99d29a14f..2b746c0aa 100644 --- a/hydromt/models/model_api.py +++ b/hydromt/models/model_api.py @@ -77,7 +77,7 @@ def __init__( Model simulation configuration file, by default None. Note that this is not the HydroMT model setup configuration file! data_libs : List[str], optional - List of data catalog yaml files, by default None + List of data catalog configuration files, by default None **artifact_keys: Additional keyword arguments to be passed down. logger: @@ -548,7 +548,7 @@ def write_data_catalog( Parameters ---------- root: str, Path, optional - Global root for all relative paths in yaml file. + Global root for all relative paths in configuration file. If "auto" the data source paths are relative to the yaml output ``path``. data_lib_fn: str, Path, optional Path of output yml file, absolute or relative to the model root, @@ -560,7 +560,7 @@ def write_data_catalog( """ path = data_lib_fn if isabs(data_lib_fn) else join(self.root, data_lib_fn) cat = DataCatalog(logger=self.logger, fallback_lib=None) - # read hydromt_data yaml file and add to data catalog + # read hydromt_data configuration file and add to data catalog if self._read and isfile(path) and append: cat.from_yml(path) # update data catalog with new used sources diff --git a/pyproject.toml b/pyproject.toml index a91be6509..d46a9f6c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ dependencies = [ "rasterio", # raster wrapper around gdal "shapely>=2.0.0", # geometry transforms "scipy", # scientific utilities + "tomli", # parsing toml files + "tomli-w", # writing toml files "xarray", # ndim data "universal_pathlib", # provides path compatability between different filesystems "xmltodict", # xml parser also used to read VRT diff --git a/tests/test_config.py b/tests/test_config.py index 7d233fe8b..8757e5020 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -5,7 +5,7 @@ from hydromt import config -@pytest.mark.parametrize("ext", ["ini", "yaml"]) +@pytest.mark.parametrize("ext", ["ini", "yaml", "toml"]) def test_config(tmpdir, ext): cfdict = { "section1": {