From f0864b51faebc6d61c60c6e6c41aa66dc8cb8e5a Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Thu, 29 Feb 2024 16:46:23 +0100 Subject: [PATCH] unique station names in gdf and fix timezone --- dfm_tools/observations.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/dfm_tools/observations.py b/dfm_tools/observations.py index 006254805..86706509f 100644 --- a/dfm_tools/observations.py +++ b/dfm_tools/observations.py @@ -572,11 +572,17 @@ def ddl_ssh_read_catalog(meta_dict=None, old=False): # import re # stat_naam_alphanumeric = df["Naam"].str.replace('[^0-9a-zA-Z]+', '-', regex=True) + # generate unique names, we need suffixes since not all station codes are unique + # TODO: make this more generic station_name_unique = ddl_slev_gdf["Code"] station_name_unique2 = [name + "-1" if duplicated else name for duplicated, name in zip(station_name_unique.duplicated(), station_name_unique)] - ddl_slev_gdf["station_name_unique"] = station_name_unique2 - if ddl_slev_gdf["station_name_unique"].duplicated().sum(): - raise Exception("still duplicated station codes") + station_name_unique2 = pd.Series(station_name_unique2) + station_name_unique3 = [name.replace("-1","-2") if duplicated else name for duplicated, name in zip(station_name_unique2.duplicated(), station_name_unique2)] + ddl_slev_gdf["station_name_unique"] = station_name_unique3 + dupl_bool = ddl_slev_gdf["station_name_unique"].duplicated(keep=False) + dupl_df = ddl_slev_gdf.loc[dupl_bool, ["Code", "Naam", "station_name_unique"]].sort_values("Code") + if dupl_bool.sum(): + raise Exception(f"still duplicated station codes\n{dupl_df}") return ddl_slev_gdf @@ -951,7 +957,7 @@ def ddl_ssh_retrieve_data(ssh_catalog_gpd, dir_output, time_min, time_max, meta_ # data = data.rename("Meetwaarde.Waarde_Numeriek", "values") # dropping timezone is required to get proper encoding in time variable (in file) - data.index = data.index.tz_localize(None) + data.index = data.index.tz_convert(None) # set name so xarray recognizes the time coordinate/index data.index.name = "time"