diff --git a/CITATION.cff b/CITATION.cff index 74fa47b..254ecf8 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,10 +2,13 @@ # Visit https://bit.ly/cffinit to generate yours today! cff-version: 1.2.0 -title: Tool Spec Python template +title: Metacatalog GeoCube aggregator message: >- - Please replace this citation information with appropriate - metadata for your tool + This tool is designed to be used together with the V-FOR-WaTer Metacatalog data loader. + It uses a number of data source files along with either a metacatalog entry or JSON dumps + of the metadata. + The data is aggregated to a target precision (temporal) and spatial resolution and then + ingested into a geocube that is stored as a netCDF file. type: software authors: - given-names: Mirko @@ -15,14 +18,7 @@ authors: Institute for Water and Environment, Hydrology, Karlsruhe Institute for Technology (KIT) orcid: 'https://orcid.org/0000-0002-0424-2651' - - given-names: Alexander - family-names: Dolich - email: alexander.dolich@kit.edu - affiliation: >- - nstitute for Water and Environment, Hydrology, - Karlsruhe Institute for Technology (KIT) - orcid: 'https://orcid.org/0000-0003-4160-6765' -repository-code: 'https://github.com/VForWaTer/tool_template_python' +repository-code: 'https://github.com/hydrocode-de/metacatalog_aggregator' url: 'https://vforwater.github.io/tool-specs/' abstract: >- This is a Github repository template for scientific data @@ -34,6 +30,10 @@ keywords: - docker - tool-spec - V-For-WaTer + - MetaCatalog + - netCDF + - DataCube + - open data cube license: CC-BY-4.0 -version: '0.5' -date-released: '2024-07-30' +version: '0.1' +date-released: '2024-08-20' diff --git a/Dockerfile b/Dockerfile index 98d1fa7..3016688 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,16 +1,16 @@ # Pull any base image that includes python3 -FROM python:3.12 +FROM python:3.12.2 # install the toolbox runner tools -RUN pip install "json2args[data]>=0.6.2" - -# if you do not need data-preloading as your tool does that on its own -# you can use this instread of the line above to use a json2args version -# with less dependencies -# RUN pip install json2args>=0.6.2 - -# Do anything you need to install tool dependencies here -RUN echo "Replace this line with a tool" +RUN pip install "json2args>=0.6.2" \ + metacatalog==0.9.2 \ + ipython==8.26.0 \ + pandas==2.2.2 \ + geopandas==1.0.1 \ + xarray[complete]==2024.7.0 \ + rioxarray==0.17.0 \ + polars-lts-cpu==1.1.0 \ + geocube==0.6.0 # create the tool input structure RUN mkdir /in diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 0e259d4..0000000 --- a/LICENSE +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. diff --git a/README.md b/README.md index 98b0652..0d04e29 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,50 @@ -# tool_template_python +# Metacatalog aggregator -[![Docker Image CI](https://github.com/VForWaTer/tool_template_python/actions/workflows/docker-image.yml/badge.svg)](https://github.com/VForWaTer/tool_template_python/actions/workflows/docker-image.yml) -[![DOI](https://zenodo.org/badge/558416591.svg)](https://zenodo.org/badge/latestdoi/558416591) -This is the template for a generic containerized Python tool following the [Tool Specification](https://vforwater.github.io/tool-specs/) for reusable research software using Docker. +This tool is designed to be used together with the V-FOR-WaTer [Metacatalog data loader](https://github.com/VForWaTer/tool_vforwater_loader). +It uses a number of data source files along with either a metacatalog entry or JSON dumps of the metadata. The data is aggregated to a target precision (temporal) and spatial resolution and then ingested into a geocube that is stored as a netCDF file. -This template can be used to generate new Github repositories from it. +This tool is based on the [Python template](https://github.com/vforwater/tool_template_python) for a generic containerized Python tool following the [Tool Specification](https://vforwater.github.io/tool-specs/) for reusable research software using Docker. -## How generic? - -Tools using this template can be run by the [toolbox-runner](https://github.com/hydrocode-de/tool-runner). -That is only convenience, the tools implemented using this template are independent of any framework. - -The main idea is to implement a common file structure inside container to load inputs and outputs of the -tool. The template shares this structures with the [R template](https://github.com/vforwater/tool_template_r), -[NodeJS template](https://github.com/vforwater/tool_template_node) and [Octave template](https://github.com/vforwater/tool_template_octave), -but can be mimiced in any container. - -Each container needs at least the following structure: +## Structure ``` / |- in/ -| |- parameters.json +| |- input.json |- out/ | |- ... |- src/ | |- tool.yml | |- run.py +| |- CITATON.cff ``` -* `parameters.json` are parameters. Whichever framework runs the container, this is how parameters are passed. +* `input.json` are parameters. Whichever framework runs the container, this is how parameters are passed. * `tool.yml` is the tool specification. It contains metadata about the scope of the tool, the number of endpoints (functions) and their parameters -* `run.py` is the tool itself, or a Python script that handles the execution. It has to capture all outputs and either `print` them to console or create files in `/out` +* `run.py` is the tool itself +* `CITATION.cff` is a citation file that describes the tool and its authors. It is used by the ## How to build the image? You can build the image from within the root of this repo by ``` -docker build -t tbr_python_tempate . +docker build -t metacatalog_geocube . ``` -Use any tag you like. If you want to run and manage the container with [toolbox-runner](https://github.com/hydrocode-de/tool-runner) -they should be prefixed by `tbr_` to be recognized. - -Alternatively, the contained `.github/workflows/docker-image.yml` will build the image for you -on new releases on Github. You need to change the target repository in the aforementioned yaml. - ## How to run? -This template installs the json2args python package to parse the parameters in the `/in/parameters.json`. This assumes that -the files are not renamed and not moved and there is actually only one tool in the container. For any other case, the environment variables -`PARAM_FILE` can be used to specify a new location for the `parameters.json` and `TOOL_RUN` can be used to specify the tool to be executed. +This template installs the json2args python package to parse the parameters in the `/in/input.json`. This assumes that +the files are not renamed and not moved and there is actually only one tool in the container. For any other case, the environment variables `PARAM_FILE` can be used to specify a new location for the `parameters.json` and `TOOL_RUN` can be used to specify the tool to be executed. The `run.py` has to take care of that. To invoke the docker container directly run something similar to: ``` -docker run --rm -it -v /path/to/local/in:/in -v /path/to/local/out:/out -e TOOL_RUN=foobar tbr_python_template +docker run --rm -it -v /path/to/local/in:/in -v /path/to/local/out:/out -e TOOL_RUN=geocube metacatalog_geocube ``` Then, the output will be in your local out and based on your local input folder. Stdout and Stderr are also connected to the host. -With the [toolbox runner](https://github.com/hydrocode-de/tool-runner), this is simplyfied: - -```python -from toolbox_runner import list_tools -tools = list_tools() # dict with tool names as keys - -foobar = tools.get('foobar') # it has to be present there... -foobar.run(result_path='./', foo_int=1337, foo_string="Please change me") -``` -The example above will create a temporary file structure to be mounted into the container and then create a `.tar.gz` on termination of all -inputs, outputs, specifications and some metadata, including the image sha256 used to create the output in the current working directory. - -## What about real tools, no foobar? - -Yeah. -1. change the `tool.yml` to describe your actual tool -2. add any `pip install` or `apt-get install` needed to the dockerfile -3. add additional source code to `/src` -4. change the `run.py` to consume parameters and data from `/in` and useful output in `out` -5. build, run, rock! diff --git a/RELEASE.md b/RELEASE.md index 7eb71b2..8abc014 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,5 +1,7 @@ -# tool_template_python +# Metacatalog aggregator -This is the template for a generic containerized Python tool following the [Tool Specification](https://vforwater.github.io/tool-specs/) for reusable research software using Docker. -This template can be used to generate new Github repositories from it. +This tool is designed to be used together with the V-FOR-WaTer [Metacatalog data loader](https://github.com/VForWaTer/tool_vforwater_loader). +It uses a number of data source files along with either a metacatalog entry or JSON dumps of the metadata. The data is aggregated to a target precision (temporal) and spatial resolution and then ingested into a geocube that is stored as a netCDF file. + +This tool is based on the [Python template](https://github.com/vforwater/tool_template_python) for a generic containerized Python tool following the [Tool Specification](https://vforwater.github.io/tool-specs/) for reusable research software using Docker. diff --git a/src/tool.yml b/src/tool.yml index 39f0bb1..23ff90c 100644 --- a/src/tool.yml +++ b/src/tool.yml @@ -1,8 +1,12 @@ tools: - foobar: - title: Foo Bar - description: A dummy tool to exemplify the YAML file - version: 0.1 + geocube: + title: Metacatalog GeoCube + description: | + This tool is designed to be used together with the V-FOR-WaTer Metacatalog data loader. + It uses a number of data source files along with either a metacatalog entry or JSON dumps + of the metadata. + The data is aggregated to a target precision (temporal) and spatial resolution and then + ingested into a geocube that is stored as a netCDF file. parameters: foo_int: type: integer