diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..dc5f855 --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,58 @@ +name: Docker Image CI + +on: push + +jobs: + release: + name: Create Github release + runs-on: ubuntu-20.04 + if: startsWith(github.event.ref, 'refs/tags/v') + + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Release + uses: softprops/action-gh-release@v1 + with: + body_path: RELEASE.md + generate_release_notes: true + + build: + name: Build docker images + if: startsWith(github.event.ref, 'refs/tags/v') + + runs-on: ubuntu-latest + permissions: + packages: write + + steps: + - name: Checkout repo + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to ghcr.io + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v4 + with: + images: ghcr.io/vforwater/tbr_template_python + + - name: Build and push + uses: docker/build-push-action@v3 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b6e4761 --- /dev/null +++ b/.gitignore @@ -0,0 +1,129 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..74fa47b --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,39 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: Tool Spec Python template +message: >- + Please replace this citation information with appropriate + metadata for your tool +type: software +authors: + - given-names: Mirko + family-names: Mälicke + email: mirko.maelicke@KIT.edu + affiliation: >- + Institute for Water and Environment, Hydrology, + Karlsruhe Institute for Technology (KIT) + orcid: 'https://orcid.org/0000-0002-0424-2651' + - given-names: Alexander + family-names: Dolich + email: alexander.dolich@kit.edu + affiliation: >- + nstitute for Water and Environment, Hydrology, + Karlsruhe Institute for Technology (KIT) + orcid: 'https://orcid.org/0000-0003-4160-6765' +repository-code: 'https://github.com/VForWaTer/tool_template_python' +url: 'https://vforwater.github.io/tool-specs/' +abstract: >- + This is a Github repository template for scientific data + (pre-)processing tools follwoing the tool specs + (https://vforwater.github.io/tool-specs/) for + containerized scientific tools. You can use this + repository as a starting point for scientific tools. +keywords: + - docker + - tool-spec + - V-For-WaTer +license: CC-BY-4.0 +version: '0.5' +date-released: '2024-07-30' diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..98d1fa7 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,26 @@ +# Pull any base image that includes python3 +FROM python:3.12 + +# install the toolbox runner tools +RUN pip install "json2args[data]>=0.6.2" + +# if you do not need data-preloading as your tool does that on its own +# you can use this instread of the line above to use a json2args version +# with less dependencies +# RUN pip install json2args>=0.6.2 + +# Do anything you need to install tool dependencies here +RUN echo "Replace this line with a tool" + +# create the tool input structure +RUN mkdir /in +COPY ./in /in +RUN mkdir /out +RUN mkdir /src +COPY ./src /src + +# copy the citation file - looks funny to make COPY not fail if the file is not there +COPY ./CITATION.cf[f] /src/CITATION.cff + +WORKDIR /src +CMD ["python", "run.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0e259d4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/README.md b/README.md new file mode 100644 index 0000000..98b0652 --- /dev/null +++ b/README.md @@ -0,0 +1,86 @@ +# tool_template_python + +[![Docker Image CI](https://github.com/VForWaTer/tool_template_python/actions/workflows/docker-image.yml/badge.svg)](https://github.com/VForWaTer/tool_template_python/actions/workflows/docker-image.yml) +[![DOI](https://zenodo.org/badge/558416591.svg)](https://zenodo.org/badge/latestdoi/558416591) + +This is the template for a generic containerized Python tool following the [Tool Specification](https://vforwater.github.io/tool-specs/) for reusable research software using Docker. + +This template can be used to generate new Github repositories from it. + + +## How generic? + +Tools using this template can be run by the [toolbox-runner](https://github.com/hydrocode-de/tool-runner). +That is only convenience, the tools implemented using this template are independent of any framework. + +The main idea is to implement a common file structure inside container to load inputs and outputs of the +tool. The template shares this structures with the [R template](https://github.com/vforwater/tool_template_r), +[NodeJS template](https://github.com/vforwater/tool_template_node) and [Octave template](https://github.com/vforwater/tool_template_octave), +but can be mimiced in any container. + +Each container needs at least the following structure: + +``` +/ +|- in/ +| |- parameters.json +|- out/ +| |- ... +|- src/ +| |- tool.yml +| |- run.py +``` + +* `parameters.json` are parameters. Whichever framework runs the container, this is how parameters are passed. +* `tool.yml` is the tool specification. It contains metadata about the scope of the tool, the number of endpoints (functions) and their parameters +* `run.py` is the tool itself, or a Python script that handles the execution. It has to capture all outputs and either `print` them to console or create files in `/out` + +## How to build the image? + +You can build the image from within the root of this repo by +``` +docker build -t tbr_python_tempate . +``` + +Use any tag you like. If you want to run and manage the container with [toolbox-runner](https://github.com/hydrocode-de/tool-runner) +they should be prefixed by `tbr_` to be recognized. + +Alternatively, the contained `.github/workflows/docker-image.yml` will build the image for you +on new releases on Github. You need to change the target repository in the aforementioned yaml. + +## How to run? + +This template installs the json2args python package to parse the parameters in the `/in/parameters.json`. This assumes that +the files are not renamed and not moved and there is actually only one tool in the container. For any other case, the environment variables +`PARAM_FILE` can be used to specify a new location for the `parameters.json` and `TOOL_RUN` can be used to specify the tool to be executed. +The `run.py` has to take care of that. + +To invoke the docker container directly run something similar to: +``` +docker run --rm -it -v /path/to/local/in:/in -v /path/to/local/out:/out -e TOOL_RUN=foobar tbr_python_template +``` + +Then, the output will be in your local out and based on your local input folder. Stdout and Stderr are also connected to the host. + +With the [toolbox runner](https://github.com/hydrocode-de/tool-runner), this is simplyfied: + +```python +from toolbox_runner import list_tools +tools = list_tools() # dict with tool names as keys + +foobar = tools.get('foobar') # it has to be present there... +foobar.run(result_path='./', foo_int=1337, foo_string="Please change me") +``` +The example above will create a temporary file structure to be mounted into the container and then create a `.tar.gz` on termination of all +inputs, outputs, specifications and some metadata, including the image sha256 used to create the output in the current working directory. + +## What about real tools, no foobar? + +Yeah. + +1. change the `tool.yml` to describe your actual tool +2. add any `pip install` or `apt-get install` needed to the dockerfile +3. add additional source code to `/src` +4. change the `run.py` to consume parameters and data from `/in` and useful output in `out` +5. build, run, rock! + diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..7eb71b2 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,5 @@ +# tool_template_python + +This is the template for a generic containerized Python tool following the [Tool Specification](https://vforwater.github.io/tool-specs/) for reusable research software using Docker. + +This template can be used to generate new Github repositories from it. diff --git a/in/foo_csv.csv b/in/foo_csv.csv new file mode 100644 index 0000000..fb32ee6 --- /dev/null +++ b/in/foo_csv.csv @@ -0,0 +1,11 @@ +A,B,C,D +3,14,10,17 +2,0,2,24 +2,17,19,24 +3,13,1,18 +13,13,7,10 +10,7,13,14 +12,12,9,5 +16,24,3,16 +11,3,2,14 +19,4,5,20 diff --git a/in/foo_matrix.dat b/in/foo_matrix.dat new file mode 100644 index 0000000..948f60b --- /dev/null +++ b/in/foo_matrix.dat @@ -0,0 +1,10 @@ +# Created by Octave 6.4.0, Thu Oct 27 08:57:13 2022 UTC +# name: foo_matrix +# type: matrix +# rows: 3 +# columns: 2 + 1 2 + 3 4 + 5 6.0999999999999996 + + diff --git a/in/input.json b/in/input.json new file mode 100644 index 0000000..07ec2e4 --- /dev/null +++ b/in/input.json @@ -0,0 +1,15 @@ +{ + "foobar": { + "parameters": { + "foo_int": 42, + "foo_float": 13.37, + "foo_string": "Never eat yellow snow", + "foo_enum": "bar", + "foo_array": [34, 55, 23, 43, 23] + }, + "data": { + "foo_matrix": "/in/foo_matrix.dat", + "foo_csv": "/in/foo_csv.csv" + } + } +} diff --git a/src/run.py b/src/run.py new file mode 100644 index 0000000..abf4230 --- /dev/null +++ b/src/run.py @@ -0,0 +1,30 @@ +import os +from datetime import datetime as dt +from pprint import pprint + +from json2args import get_parameter +from json2args.data import get_data + +# parse parameters +kwargs = get_parameter() +data = get_data(as_dict=True) + +# check if a toolname was set in env +toolname = os.environ.get('TOOL_RUN', 'foobar').lower() + +# switch the tool +if toolname == 'foobar': + # RUN the tool here and create the output in /out + print('This toolbox does not include any tool. Did you run the template?\n') + + # write parameters to STDOUT.log + pprint(kwargs) + + for name, ds in data.items(): + print(f"\n### {name}") + print(ds) + + +# In any other case, it was not clear which tool to run +else: + raise AttributeError(f"[{dt.now().isocalendar()}] Either no TOOL_RUN environment variable available, or '{toolname}' is not valid.\n") diff --git a/src/tool.yml b/src/tool.yml new file mode 100644 index 0000000..39f0bb1 --- /dev/null +++ b/src/tool.yml @@ -0,0 +1,28 @@ +tools: + foobar: + title: Foo Bar + description: A dummy tool to exemplify the YAML file + version: 0.1 + parameters: + foo_int: + type: integer + foo_float: + type: float + foo_string: + type: string + foo_enum: + type: enum + values: + - foo + - bar + - baz + foo_array: + type: integer + array: true + data: + foo_matrix: + extension: dat + description: A matrix file that can be read by numpy + foo_csv: + extension: csv + description: A standard formatted CSV file, for autoloading using pandas