From 17077bef6bf4101b138f14c36c2a935adb8a1c9b Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Tue, 25 Jul 2023 10:24:10 +0200 Subject: [PATCH 1/7] return kwargs from adapter.to_file funtions --- hydromt/data_adapter/dataframe.py | 6 ++++-- hydromt/data_adapter/geodataframe.py | 4 ++-- hydromt/data_adapter/geodataset.py | 4 ++-- hydromt/data_adapter/rasterdataset.py | 6 ++++-- hydromt/data_catalog.py | 2 +- hydromt/stats/extremes.py | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index d100ebe54..ab1b8a8d7 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -148,6 +148,8 @@ def to_file( driver : str Name of the driver used to read the data. See :py:func:`~hydromt.data_catalog.DataCatalog.get_geodataset`. + kwargs: dict + The additional keyword arguments that were passed in. """ @@ -158,7 +160,7 @@ def to_file( ) except IndexError as err: # out of bounds for time logger.warning(str(err)) - return None, None + return None, None, None if driver is None or driver == "csv": # always write as CSV @@ -171,7 +173,7 @@ def to_file( else: raise ValueError(f"DataFrame: Driver {driver} is unknown.") - return fn_out, driver + return fn_out, driver, kwargs def get_data( self, diff --git a/hydromt/data_adapter/geodataframe.py b/hydromt/data_adapter/geodataframe.py index ed911ae10..eddfac0de 100644 --- a/hydromt/data_adapter/geodataframe.py +++ b/hydromt/data_adapter/geodataframe.py @@ -162,7 +162,7 @@ def to_file( kwargs.pop("time_tuple", None) gdf = self.get_data(bbox=bbox, variables=variables, logger=logger) if gdf.index.size == 0: - return None, None + return None, None, None if driver is None: _lst = ["csv", "xls", "xlsx", "xy", "vector_table"] @@ -186,7 +186,7 @@ def to_file( gdf.to_file(fn_out, driver=driver, **kwargs) driver = "vector" - return fn_out, driver + return fn_out, driver, kwargs def get_data( self, diff --git a/hydromt/data_adapter/geodataset.py b/hydromt/data_adapter/geodataset.py index 057609588..12e84e14c 100644 --- a/hydromt/data_adapter/geodataset.py +++ b/hydromt/data_adapter/geodataset.py @@ -178,7 +178,7 @@ def to_file( single_var_as_array=variables is None, ) if obj.vector.index.size == 0 or ("time" in obj.coords and obj.time.size == 0): - return None, None + return None, None, None # much better for mem/storage/processing if dtypes are set correctly for name, coord in obj.coords.items(): @@ -214,7 +214,7 @@ def to_file( else: raise ValueError(f"GeoDataset: Driver {driver} unknown.") - return fn_out, driver + return fn_out, driver, kwargs def get_data( self, diff --git a/hydromt/data_adapter/rasterdataset.py b/hydromt/data_adapter/rasterdataset.py index d3213beba..5c0e8a6b7 100644 --- a/hydromt/data_adapter/rasterdataset.py +++ b/hydromt/data_adapter/rasterdataset.py @@ -175,6 +175,8 @@ def to_file( driver: str Name of driver to read data with, see :py:func:`~hydromt.data_catalog.DataCatalog.get_rasterdataset` + kwargs: dict + the additional kwyeord arguments that were passed to `to_netcdf` """ try: obj = self.get_data( @@ -186,7 +188,7 @@ def to_file( ) except IndexError as err: # out of bounds logger.warning(str(err)) - return None, None + return None, None, None if driver is None: # by default write 2D raster data to GeoTiff and 3D raster data to netcdf @@ -224,7 +226,7 @@ def to_file( ) driver = "raster" - return fn_out, driver + return fn_out, driver, kwargs def get_data( self, diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py index 7c57e156b..e59d2fe93 100644 --- a/hydromt/data_catalog.py +++ b/hydromt/data_catalog.py @@ -585,7 +585,7 @@ def export_data( unit_add = source.unit_add source.unit_mult = {} source.unit_add = {} - fn_out, driver = source.to_file( + fn_out, driver, source_kwargs = source.to_file( data_root=data_root, data_name=key, variables=source_vars.get(key, None), diff --git a/hydromt/stats/extremes.py b/hydromt/stats/extremes.py index 4b3d9cf00..165fdcc06 100644 --- a/hydromt/stats/extremes.py +++ b/hydromt/stats/extremes.py @@ -898,7 +898,7 @@ def get_lmom(x, nmom=4): vector of (nmom) L-moments """ n = len(x) - xs = np.msort(x) + xs = np.sort(x, axis=0) bb = np.zeros(nmom - 1) ll = np.zeros(nmom - 1) b0 = xs.mean(axis=0) From 815a33e40cf4c534666da592bbafed8db131554f Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Thu, 27 Jul 2023 17:24:23 +0200 Subject: [PATCH 2/7] add index_col to df kwargs return --- hydromt/data_adapter/dataframe.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index ab1b8a8d7..67e5691cc 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -167,6 +167,7 @@ def to_file( driver = "csv" fn_out = join(data_root, f"{data_name}.csv") obj.to_csv(fn_out, **kwargs) + kwargs["index_col"] = obj.index.name elif driver == "excel": fn_out = join(data_root, f"{data_name}.xlsx") obj.to_excel(fn_out, **kwargs) From 7f0a534e8c9cdd55db2029a4a6a551e2d4d65e1c Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 31 Jul 2023 14:27:44 +0000 Subject: [PATCH 3/7] Update hydromt/data_adapter/dataframe.py Co-authored-by: DirkEilander --- hydromt/data_adapter/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index 747ded2ce..368e96380 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -178,7 +178,7 @@ def to_file( elif driver == "excel": fn_out = join(data_root, f"{data_name}.xlsx") obj.to_excel(fn_out, **kwargs) - else: + return fn_out, driver, read_kwargs raise ValueError(f"DataFrame: Driver {driver} is unknown.") return fn_out, driver, kwargs From f194905ecdbaef78a1dd506284e4575ed9a4dc8d Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 31 Jul 2023 14:37:16 +0000 Subject: [PATCH 4/7] Update hydromt/data_adapter/dataframe.py Co-authored-by: DirkEilander --- hydromt/data_adapter/dataframe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index 368e96380..f4f6db5de 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -166,12 +166,13 @@ def to_file( logger.warning(str(err)) return None, None, None + read_kwargs = dict() if driver is None or driver == "csv": # always write as CSV driver = "csv" fn_out = join(data_root, f"{data_name}.csv") obj.to_csv(fn_out, **kwargs) - kwargs["index_col"] = obj.index.name + read_kwargs["index_col"] = obj.index.name elif driver == "parquet": fn_out = join(data_root, f"{data_name}.parquet") obj.to_parquet(fn_out, **kwargs) From ea5a999068daec7e9d32a249dce58c26041185b5 Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Mon, 31 Jul 2023 16:38:48 +0200 Subject: [PATCH 5/7] cleaup --- hydromt/data_adapter/dataframe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index f4f6db5de..1584a78dc 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -166,7 +166,7 @@ def to_file( logger.warning(str(err)) return None, None, None - read_kwargs = dict() + read_kwargs = dict() if driver is None or driver == "csv": # always write as CSV driver = "csv" @@ -179,10 +179,10 @@ def to_file( elif driver == "excel": fn_out = join(data_root, f"{data_name}.xlsx") obj.to_excel(fn_out, **kwargs) - return fn_out, driver, read_kwargs + else: raise ValueError(f"DataFrame: Driver {driver} is unknown.") - return fn_out, driver, kwargs + return fn_out, driver, read_kwargs def get_data( self, From b593f8640c43a731e9e3cc2f08f30b5425f6895d Mon Sep 17 00:00:00 2001 From: Sam Vente Date: Tue, 1 Aug 2023 17:45:01 +0200 Subject: [PATCH 6/7] implment requested changes --- hydromt/data_adapter/dataframe.py | 2 +- hydromt/data_adapter/geodataframe.py | 4 +++- hydromt/data_adapter/geodataset.py | 4 +++- hydromt/data_adapter/rasterdataset.py | 3 ++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/hydromt/data_adapter/dataframe.py b/hydromt/data_adapter/dataframe.py index 1584a78dc..f10bd1527 100644 --- a/hydromt/data_adapter/dataframe.py +++ b/hydromt/data_adapter/dataframe.py @@ -172,7 +172,7 @@ def to_file( driver = "csv" fn_out = join(data_root, f"{data_name}.csv") obj.to_csv(fn_out, **kwargs) - read_kwargs["index_col"] = obj.index.name + read_kwargs["index_col"] = 0 elif driver == "parquet": fn_out = join(data_root, f"{data_name}.parquet") obj.to_parquet(fn_out, **kwargs) diff --git a/hydromt/data_adapter/geodataframe.py b/hydromt/data_adapter/geodataframe.py index dba203a4c..d692d12a6 100644 --- a/hydromt/data_adapter/geodataframe.py +++ b/hydromt/data_adapter/geodataframe.py @@ -169,6 +169,7 @@ def to_file( if gdf.index.size == 0: return None, None, None + read_kwargs = {} if driver is None: _lst = ["csv", "parquet", "xls", "xlsx", "xy", "vector_table"] driver = "csv" if self.driver in _lst else "GPKG" @@ -182,6 +183,7 @@ def to_file( ) gdf["x"], gdf["y"] = gdf.geometry.x, gdf.geometry.y gdf.drop(columns="geometry").to_csv(fn_out, **kwargs) + read_kwargs["index_col"] = 0 elif driver == "parquet": fn_out = join(data_root, f"{data_name}.parquet") if not np.all(gdf.geometry.type == "Point"): @@ -200,7 +202,7 @@ def to_file( gdf.to_file(fn_out, driver=driver, **kwargs) driver = "vector" - return fn_out, driver, kwargs + return fn_out, driver, read_kwargs def get_data( self, diff --git a/hydromt/data_adapter/geodataset.py b/hydromt/data_adapter/geodataset.py index c7b6d39db..56554e909 100644 --- a/hydromt/data_adapter/geodataset.py +++ b/hydromt/data_adapter/geodataset.py @@ -184,6 +184,8 @@ def to_file( if obj.vector.index.size == 0 or ("time" in obj.coords and obj.time.size == 0): return None, None, None + read_kwargs = {} + # much better for mem/storage/processing if dtypes are set correctly for name, coord in obj.coords.items(): if coord.values.dtype != object: @@ -218,7 +220,7 @@ def to_file( else: raise ValueError(f"GeoDataset: Driver {driver} unknown.") - return fn_out, driver, kwargs + return fn_out, driver, read_kwargs def get_data( self, diff --git a/hydromt/data_adapter/rasterdataset.py b/hydromt/data_adapter/rasterdataset.py index 97b40f47e..9834d5ef0 100644 --- a/hydromt/data_adapter/rasterdataset.py +++ b/hydromt/data_adapter/rasterdataset.py @@ -194,6 +194,7 @@ def to_file( logger.warning(str(err)) return None, None, None + read_kwargs = {} if driver is None: # by default write 2D raster data to GeoTiff and 3D raster data to netcdf driver = "netcdf" if len(obj.dims) == 3 else "GTiff" @@ -230,7 +231,7 @@ def to_file( ) driver = "raster" - return fn_out, driver, kwargs + return fn_out, driver, read_kwargs def get_data( self, From 7be4922e924f3f18ce731a702b4e9b1e05f18ea0 Mon Sep 17 00:00:00 2001 From: Dirk Eilander Date: Thu, 3 Aug 2023 18:01:23 +0200 Subject: [PATCH 7/7] add driver_kwargs to source --- hydromt/data_catalog.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hydromt/data_catalog.py b/hydromt/data_catalog.py index e20889ebb..f619289af 100644 --- a/hydromt/data_catalog.py +++ b/hydromt/data_catalog.py @@ -867,7 +867,7 @@ def export_data( unit_add = source.unit_add source.unit_mult = {} source.unit_add = {} - fn_out, driver, source_kwargs = source.to_file( + fn_out, driver, driver_kwargs = source.to_file( data_root=data_root, data_name=key, variables=source_vars.get(key, None), @@ -892,6 +892,8 @@ def export_data( source.driver = driver source.filesystem = "local" source.driver_kwargs = {} + if driver_kwargs is not None: + source.driver_kwargs.update(driver_kwargs) source.rename = {} if key in sources_out: self.logger.warning(