From c1f0f6f13d79b3efd07f31be6c07c7641ff6fb1a Mon Sep 17 00:00:00 2001
From: Sam Vente <savente93@gmail.com>
Date: Thu, 3 Aug 2023 18:24:53 +0200
Subject: [PATCH 1/2] return kwargs from adapter.to_file funtions (#456)

---
 hydromt/data_adapter/dataframe.py     | 8 ++++++--
 hydromt/data_adapter/geodataframe.py  | 6 ++++--
 hydromt/data_adapter/geodataset.py    | 6 ++++--
 hydromt/data_adapter/rasterdataset.py | 7 +++++--
 hydromt/data_catalog.py               | 4 +++-
 5 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py
index be4093baf..f10bd1527 100644
--- a/hydromt/data_adapter/dataframe.py
+++ b/hydromt/data_adapter/dataframe.py
@@ -152,6 +152,8 @@ def to_file(
         driver : str
             Name of the driver used to read the data.
             See :py:func:`~hydromt.data_catalog.DataCatalog.get_geodataset`.
+        kwargs: dict
+            The additional keyword arguments that were passed in.
 
 
         """
@@ -162,13 +164,15 @@ def to_file(
             )
         except IndexError as err:  # out of bounds for time
             logger.warning(str(err))
-            return None, None
+            return None, None, None
 
+        read_kwargs = dict()
         if driver is None or driver == "csv":
             # always write as CSV
             driver = "csv"
             fn_out = join(data_root, f"{data_name}.csv")
             obj.to_csv(fn_out, **kwargs)
+            read_kwargs["index_col"] = 0
         elif driver == "parquet":
             fn_out = join(data_root, f"{data_name}.parquet")
             obj.to_parquet(fn_out, **kwargs)
@@ -178,7 +182,7 @@ def to_file(
         else:
             raise ValueError(f"DataFrame: Driver {driver} is unknown.")
 
-        return fn_out, driver
+        return fn_out, driver, read_kwargs
 
     def get_data(
         self,
diff --git a/hydromt/data_adapter/geodataframe.py b/hydromt/data_adapter/geodataframe.py
index 3b47ba518..d692d12a6 100644
--- a/hydromt/data_adapter/geodataframe.py
+++ b/hydromt/data_adapter/geodataframe.py
@@ -167,8 +167,9 @@ def to_file(
         kwargs.pop("time_tuple", None)
         gdf = self.get_data(bbox=bbox, variables=variables, logger=logger)
         if gdf.index.size == 0:
-            return None, None
+            return None, None, None
 
+        read_kwargs = {}
         if driver is None:
             _lst = ["csv", "parquet", "xls", "xlsx", "xy", "vector_table"]
             driver = "csv" if self.driver in _lst else "GPKG"
@@ -182,6 +183,7 @@ def to_file(
                 )
             gdf["x"], gdf["y"] = gdf.geometry.x, gdf.geometry.y
             gdf.drop(columns="geometry").to_csv(fn_out, **kwargs)
+            read_kwargs["index_col"] = 0
         elif driver == "parquet":
             fn_out = join(data_root, f"{data_name}.parquet")
             if not np.all(gdf.geometry.type == "Point"):
@@ -200,7 +202,7 @@ def to_file(
             gdf.to_file(fn_out, driver=driver, **kwargs)
             driver = "vector"
 
-        return fn_out, driver
+        return fn_out, driver, read_kwargs
 
     def get_data(
         self,
diff --git a/hydromt/data_adapter/geodataset.py b/hydromt/data_adapter/geodataset.py
index 635c0da13..56554e909 100644
--- a/hydromt/data_adapter/geodataset.py
+++ b/hydromt/data_adapter/geodataset.py
@@ -182,7 +182,9 @@ def to_file(
             single_var_as_array=variables is None,
         )
         if obj.vector.index.size == 0 or ("time" in obj.coords and obj.time.size == 0):
-            return None, None
+            return None, None, None
+
+        read_kwargs = {}
 
         # much better for mem/storage/processing if dtypes are set correctly
         for name, coord in obj.coords.items():
@@ -218,7 +220,7 @@ def to_file(
         else:
             raise ValueError(f"GeoDataset: Driver {driver} unknown.")
 
-        return fn_out, driver
+        return fn_out, driver, read_kwargs
 
     def get_data(
         self,
diff --git a/hydromt/data_adapter/rasterdataset.py b/hydromt/data_adapter/rasterdataset.py
index ee46055b2..9834d5ef0 100644
--- a/hydromt/data_adapter/rasterdataset.py
+++ b/hydromt/data_adapter/rasterdataset.py
@@ -179,6 +179,8 @@ def to_file(
         driver: str
             Name of driver to read data with, see
             :py:func:`~hydromt.data_catalog.DataCatalog.get_rasterdataset`
+        kwargs: dict
+            the additional kwyeord arguments that were passed to `to_netcdf`
         """
         try:
             obj = self.get_data(
@@ -190,8 +192,9 @@ def to_file(
             )
         except IndexError as err:  # out of bounds
             logger.warning(str(err))
-            return None, None
+            return None, None, None
 
+        read_kwargs = {}
         if driver is None:
             # by default write 2D raster data to GeoTiff and 3D raster data to netcdf
             driver = "netcdf" if len(obj.dims) == 3 else "GTiff"
@@ -228,7 +231,7 @@ def to_file(
                 )
             driver = "raster"
 
-        return fn_out, driver
+        return fn_out, driver, read_kwargs
 
     def get_data(
         self,
diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py
index 66ac96de6..f619289af 100644
--- a/hydromt/data_catalog.py
+++ b/hydromt/data_catalog.py
@@ -867,7 +867,7 @@ def export_data(
                             unit_add = source.unit_add
                             source.unit_mult = {}
                             source.unit_add = {}
-                        fn_out, driver = source.to_file(
+                        fn_out, driver, driver_kwargs = source.to_file(
                             data_root=data_root,
                             data_name=key,
                             variables=source_vars.get(key, None),
@@ -892,6 +892,8 @@ def export_data(
                         source.driver = driver
                         source.filesystem = "local"
                         source.driver_kwargs = {}
+                        if driver_kwargs is not None:
+                            source.driver_kwargs.update(driver_kwargs)
                         source.rename = {}
                         if key in sources_out:
                             self.logger.warning(

From 7a7883770099ce9b61595dd22e82d39308246eb5 Mon Sep 17 00:00:00 2001
From: Sam Vente <savente93@gmail.com>
Date: Thu, 3 Aug 2023 18:37:16 +0200
Subject: [PATCH 2/2] Add support for configure using TOML files (#444)

---
 docs/changelog.rst          |  1 +
 hydromt/cli/api.py          |  4 ++--
 hydromt/config.py           | 47 +++++++++++++++++++++++++++++++++++--
 hydromt/data_catalog.py     | 10 ++++----
 hydromt/models/model_api.py |  6 ++---
 pyproject.toml              |  2 ++
 tests/test_config.py        |  2 +-
 7 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 33df66276..821edda6b 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -14,6 +14,7 @@ Added
 - docs now include a dropdown for selecting older versions of the docs. (#457)
 - Support for loading the same data source but from different places (e.g. local & aws)
 - Add support for reading and writing tabular data in ``parquet`` format. (PR #445)
+- Add support for reading model configs in ``TOML`` format. (PR #444)
 
 Changed
 -------
diff --git a/hydromt/cli/api.py b/hydromt/cli/api.py
index 76b52434a..4c1c148ec 100644
--- a/hydromt/cli/api.py
+++ b/hydromt/cli/api.py
@@ -94,7 +94,7 @@ def get_datasets(data_libs: Union[List, str]) -> Dict:
     Parameters
     ----------
     data_libs: (list of) str, Path, optional
-        One or more paths to data catalog yaml files or names of predefined
+        One or more paths to data catalog configuration files or names of predefined
         data catalogs. By default the data catalog is initiated without data entries.
         See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml`
         for accepted yaml format.
@@ -139,7 +139,7 @@ def get_region(
     region : dict
         dictionary containing region definition
     data_libs : (list of) str, Path, optional
-        One or more paths to data catalog yaml files or names of predefined
+        One or more paths to data catalog configuration files or names of predefined
         data catalogs. By default the data catalog is initiated without data entries.
         See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml`
         for accepted yaml format.
diff --git a/hydromt/config.py b/hydromt/config.py
index 316da724b..1d75a6982 100644
--- a/hydromt/config.py
+++ b/hydromt/config.py
@@ -9,6 +9,8 @@
 from typing import Dict, List, Union
 
 import yaml
+from tomli import load as load_toml
+from tomli_w import dump as dump_toml
 
 __all__ = [
     "configread",
@@ -16,6 +18,34 @@
 ]
 
 
+def _process_config_out(d):
+    ret = {}
+    if isinstance(d, dict):
+        for k, v in d.items():
+            if v is None:
+                ret[k] = "NONE"
+            else:
+                ret[k] = _process_config_out(v)
+    else:
+        ret = d
+
+    return ret
+
+
+def _process_config_in(d):
+    ret = {}
+    if isinstance(d, dict):
+        for k, v in d.items():
+            if v == "NONE":
+                ret[k] = None
+            else:
+                ret[k] = _process_config_in(v)
+    else:
+        ret = d
+
+    return ret
+
+
 def configread(
     config_fn: Union[Path, str],
     defaults: Dict = dict(),
@@ -48,15 +78,22 @@ def configread(
         Configuration dictionary.
     """
     # read
-    if splitext(config_fn)[-1] in [".yaml", ".yml"]:
+    ext = splitext(config_fn)[-1].strip()
+    if ext in [".yaml", ".yml"]:
         with open(config_fn, "rb") as f:
             cfdict = yaml.safe_load(f)
+        cfdict = _process_config_in(cfdict)
+    elif ext == ".toml":  # user defined
+        with open(config_fn, "rb") as f:
+            cfdict = load_toml(f)
+        cfdict = _process_config_in(cfdict)
     else:
         cfdict = read_ini_config(config_fn, **kwargs)
     # parse absolute paths
     if abs_path:
         root = Path(dirname(config_fn))
         cfdict = parse_abspath(cfdict, root, skip_abspath_sections)
+
     # update defaults
     if defaults:
         _cfdict = defaults.copy()
@@ -89,9 +126,15 @@ def configwrite(config_fn: Union[str, Path], cfdict: dict, **kwargs) -> None:
     """
     root = Path(dirname(config_fn))
     _cfdict = parse_relpath(cfdict.copy(), root)
-    if splitext(config_fn)[-1] in [".yaml", ".yml"]:
+    ext = splitext(config_fn)[-1].strip()
+    if ext in [".yaml", ".yml"]:
+        _cfdict = _process_config_out(_cfdict)  # should not be done for ini
         with open(config_fn, "w") as f:
             yaml.dump(_cfdict, f, sort_keys=False)
+    elif ext == ".toml":  # user defined
+        _cfdict = _process_config_out(_cfdict)
+        with open(config_fn, "wb") as f:
+            dump_toml(_cfdict, f)
     else:
         write_ini_config(config_fn, _cfdict, **kwargs)
 
diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py
index f619289af..52f6c8ef6 100644
--- a/hydromt/data_catalog.py
+++ b/hydromt/data_catalog.py
@@ -71,9 +71,9 @@ def __init__(
         Arguments
         ---------
         data_libs: (list of) str, Path, optional
-            One or more paths to data catalog yaml files or names of predefined data
-            catalogs. By default the data catalog is initiated without data entries.
-            See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for
+            One or more paths to data catalog configuration files or names of predefined
+            data catalogs. By default the data catalog is initiated without data
+            entries. See :py:func:`~hydromt.data_adapter.DataCatalog.from_yml` for
             accepted yaml format.
         fallback_lib:
             Name of pre-defined data catalog to read if no data_libs are provided,
@@ -499,7 +499,7 @@ def from_yml(
         {'RasterDataset', 'GeoDataset', 'GeoDataFrame'}. See the specific data adapters
         for more information about the required and optional arguments.
 
-        .. code-block:: console
+        .. code-block:: yaml
 
             meta:
               root: <path>
@@ -703,7 +703,7 @@ def to_dict(
             List of source names to export, by default None in which case all sources
             are exported.
         root : str, Path, optional
-            Global root for all relative paths in yml file.
+            Global root for all relative paths in the file.
         meta: dict, optional
             key-value pairs to add to the data catalog meta section, such as 'version',
             by default empty.
diff --git a/hydromt/models/model_api.py b/hydromt/models/model_api.py
index 99d29a14f..2b746c0aa 100644
--- a/hydromt/models/model_api.py
+++ b/hydromt/models/model_api.py
@@ -77,7 +77,7 @@ def __init__(
             Model simulation configuration file, by default None.
             Note that this is not the HydroMT model setup configuration file!
         data_libs : List[str], optional
-            List of data catalog yaml files, by default None
+            List of data catalog configuration files, by default None
         **artifact_keys:
             Additional keyword arguments to be passed down.
         logger:
@@ -548,7 +548,7 @@ def write_data_catalog(
         Parameters
         ----------
         root: str, Path, optional
-            Global root for all relative paths in yaml file.
+            Global root for all relative paths in configuration file.
             If "auto" the data source paths are relative to the yaml output ``path``.
         data_lib_fn: str, Path, optional
             Path of output yml file, absolute or relative to the model root,
@@ -560,7 +560,7 @@ def write_data_catalog(
         """
         path = data_lib_fn if isabs(data_lib_fn) else join(self.root, data_lib_fn)
         cat = DataCatalog(logger=self.logger, fallback_lib=None)
-        # read hydromt_data yaml file and add to data catalog
+        # read hydromt_data configuration file and add to data catalog
         if self._read and isfile(path) and append:
             cat.from_yml(path)
         # update data catalog with new used sources
diff --git a/pyproject.toml b/pyproject.toml
index a91be6509..d46a9f6c2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -28,6 +28,8 @@ dependencies = [
     "rasterio",          # raster wrapper around gdal
     "shapely>=2.0.0",    # geometry transforms
     "scipy",             # scientific utilities
+    "tomli",             # parsing toml files
+    "tomli-w",           # writing toml files
     "xarray",            # ndim data
     "universal_pathlib", # provides path compatability between different filesystems
     "xmltodict",         # xml parser also used to read VRT
diff --git a/tests/test_config.py b/tests/test_config.py
index 7d233fe8b..8757e5020 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -5,7 +5,7 @@
 from hydromt import config
 
 
-@pytest.mark.parametrize("ext", ["ini", "yaml"])
+@pytest.mark.parametrize("ext", ["ini", "yaml", "toml"])
 def test_config(tmpdir, ext):
     cfdict = {
         "section1": {