From a9eb4b49fe895c7246058684fb545b1a745efb8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20M=C3=A4licke?= Date: Tue, 9 Jul 2024 08:19:17 +0200 Subject: [PATCH 1/2] remove the unsave usage of db connection URIs --- README.md | 1 - src/run.py | 17 ++++------------- src/tool.yml | 11 +---------- 3 files changed, 5 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 93b71ec..510ed41 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,6 @@ tables are listed accordingly and license information is extracted and presented | keep_data_files | If set to `false`, the data files clipped to the spatial and temporal scale will not be kept. | | precision | The precision for aggregations along the temporal scale of the datasets. | | resolution | The resolution of the output data. This parameter is only relevant for areal datasets. | -| connection | The connection URI to the MetaCatalog instance. | ## Development and local run diff --git a/src/run.py b/src/run.py index b1833f3..68d7200 100644 --- a/src/run.py +++ b/src/run.py @@ -20,6 +20,9 @@ from clip import reference_area_to_file from version import __version__ +# always load .env files +load_dotenv() + # parse parameters kwargs = get_parameter() @@ -30,20 +33,8 @@ if toolname != 'vforwater_loader': raise AttributeError(f"[{dt.now().isocalendar()}] Either no TOOL_RUN environment variable available, or '{toolname}' is not valid.\n") - # use the pydantic model to handle the input parameters -params = load_params(**kwargs) - -# check if a connection was given and if it is a valid path -# this is handled extra to keep the pydantic model simpler -if 'connection' in kwargs: - if Path(kwargs['connection']).exists(): - load_dotenv(dotenv_path=kwargs['connection']) - else: - # it is interpreted as a connection uri - connection = kwargs['connection'] -else: - load_dotenv() +params = load_params(**kwargs) # check if a connection evironment variable is given if 'VFW_POSTGRES_URI' in os.environ: diff --git a/src/tool.yml b/src/tool.yml index d552089..567abf2 100644 --- a/src/tool.yml +++ b/src/tool.yml @@ -1,6 +1,6 @@ tools: vforwater_loader: - title: Loader for datasets stored in a metacatalog instance + title: Dataset Loader description: | This tool will use `metacatalog` to load datasets stored in a metacatalog instance, like V-FOR-WaTer. The requested datasources will be made available in the output directory of the tool. Areal datasets @@ -76,12 +76,3 @@ tools: If the dataset is areal and the parameter is set, the dataset will be resampled to the given resolution. Note: Right now, the aggregations can only write parquet files. For larger (espeically spatio-temporal) datasets, these aggreations can be large. A future version will write netCDF or Zarr for these cases. - connection: - type: string - optional: true - description: | - THIS OPTION IS DEPRECATED. PLEASE SET THE ENVIRONMENT VARIABLE METACATALOG_URI INSTEAD. - To connect to the V-FOR-WaTer database, you need to provide a connection URI. These URIs should be saved to an evnironment variable - called `VFW_POSTGRES_URI`. Alternatively, you can pass in a file path to an `.env` file that contains the connection URI. - Passing the string directly as this parameter is technically also possible, but not recommended. - From f1b54755de1560ee020b0f855f135a4c741fef81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20M=C3=A4licke?= Date: Tue, 9 Jul 2024 08:21:28 +0200 Subject: [PATCH 2/2] pre-load duckdb spatial extension --- Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile b/Dockerfile index ec0ca1a..8b09bd6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,6 +54,9 @@ RUN arch=$(uname -m | sed s/x86_64/amd64/) && \ chmod +x ./duckdb && \ mv ./duckdb /duck/duckdb +# pre-install the spatial extension into duckdb as it will be used +RUN /duck/duckdb -c "INSTALL spatial;" + # go to the source directory of this tool WORKDIR /src CMD ["python", "run.py"]