diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..aeb26d82 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,38 @@ +name: Build and Deploy Sphinx Documentation + +# on: +# push: +# branches: +# - docs_dev +# tags: +# - 'v*' + +jobs: + build-and-deploy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + persist-credentials: false + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + + - name: Install dependencies + run: | + pip install -r docs/requirements.txt + pip install sphinx sphinx-rtd-theme + + - name: Build documentation + run: | + cd docs + make html + + - name: Deploy to GitHub Pages + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./docs/_build/html diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..790af042 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,32 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.10" + # You can also specify other tool versions: + # nodejs: "19" + # rust: "1.64" + # golang: "1.19" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index f55f6395..00000000 --- a/docs/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -# Copied from https://github.com/github/gitignore/blob/main/Jekyll.gitignore -# Ignore metadata generated by Jekyll -_site/ -.sass-cache/ -.jekyll-cache/ -.jekyll-metadata - -# Ignore folders generated by Bundler -.bundle/ -vendor/ diff --git a/docs/3dgan_doc.rst b/docs/3dgan_doc.rst new file mode 100644 index 00000000..1b8added --- /dev/null +++ b/docs/3dgan_doc.rst @@ -0,0 +1,124 @@ +3DGAN +===== + +This section covers the CERN use case that utilizes the `torch-lightning` framework for training and evaluation. The following files are integral to this use case: + +itwinai x 3DGAN +--------------- + + +.. toctree:: + :maxdepth: 5 + + +model.py +++++++++ + +.. literalinclude:: ../use-cases/3dgan/model.py + :language: python + + +trainer.py +++++++++++ +.. literalinclude:: ../use-cases/3dgan/trainer.py + :language: python + + +saver.py +++++++++ + +.. literalinclude:: ../use-cases/3dgan/saver.py + :language: python + + +dataloader.py ++++++++++++++ + +.. literalinclude:: ../use-cases/3dgan/dataloader.py + :language: python + + +cern-pipeline.yaml +++++++++++++++++++ + +This YAML file defines the pipeline configuration for the CERN use case. + +.. literalinclude:: ../use-cases/3dgan/cern-pipeline.yaml + :language: yaml + + +inference-pipeline.yaml ++++++++++++++++++++++++ + +This YAML file defines the pipeline configuration for the CERN use case inference. + +.. literalinclude:: ../use-cases/3dgan/inference-pipeline.yaml + :language: yaml + + +Dockerfile +++++++++++ + +.. literalinclude:: ../use-cases/3dgan/Dockerfile + :language: bash + + +pipeline.yaml ++++++++++++++ + +This YAML file defines the pipeline configuration for the CERN use case. It includes settings for the model, training, and evaluation. + +.. literalinclude:: ../use-cases/3dgan/pipeline.yaml + :language: yaml + + + +This section covers the CERN use case integration with `interLink `_ using ``itwinai``. The following files are integral to this use case: + +interLink x 3DGAN +----------------- + +.. toctree:: + :maxdepth: 5 + + +3dgan-inference-cpu.yaml +++++++++++++++++++++++++ + +.. literalinclude:: ../use-cases/3dgan/interLink/3dgan-inference-cpu.yaml + :language: yaml + + +3dgan-inference.yaml +++++++++++++++++++++++++ + +.. literalinclude:: ../use-cases/3dgan/interLink/3dgan-inference.yaml + :language: yaml + + + + +.. .. automodule:: 3dgan.model +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: 3dgan.train +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: 3dgan.trainer +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: 3dgan.saver +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: 3dgan.dataloader +.. :members: +.. :undoc-members: +.. :show-inheritance: diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index b8547546..00000000 --- a/docs/404.html +++ /dev/null @@ -1,12 +0,0 @@ ---- -layout: default -title: 404 -permalink: /404 -nav_exclude: true -search_exclude: true ---- - -

Page not found

- -

The page you requested could not be found. Try using the navigation {% if site.search_enabled != false %}or search {% - endif %}to find what you're looking for or go to this site's home page.

\ No newline at end of file diff --git a/docs/Gemfile b/docs/Gemfile deleted file mode 100644 index 387154f8..00000000 --- a/docs/Gemfile +++ /dev/null @@ -1,7 +0,0 @@ -source 'https://rubygems.org' - -gem "jekyll", "~> 4.3" # installed by `gem jekyll` -# gem "webrick" # required when using Ruby >= 3 and Jekyll <= 4.2.2 - -gem "just-the-docs", "0.5.0" # pinned to the current release -# gem "just-the-docs" # always download the latest release diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock deleted file mode 100644 index 81efc419..00000000 --- a/docs/Gemfile.lock +++ /dev/null @@ -1,79 +0,0 @@ -GEM - remote: https://rubygems.org/ - specs: - addressable (2.8.1) - public_suffix (>= 2.0.2, < 6.0) - colorator (1.1.0) - concurrent-ruby (1.1.10) - em-websocket (0.5.3) - eventmachine (>= 0.12.9) - http_parser.rb (~> 0) - eventmachine (1.2.7) - ffi (1.15.5) - forwardable-extended (2.6.0) - http_parser.rb (0.8.0) - i18n (1.12.0) - concurrent-ruby (~> 1.0) - jekyll (4.3.0) - addressable (~> 2.4) - colorator (~> 1.0) - em-websocket (~> 0.5) - i18n (~> 1.0) - jekyll-sass-converter (>= 2.0, < 4.0) - jekyll-watch (~> 2.0) - kramdown (~> 2.3, >= 2.3.1) - kramdown-parser-gfm (~> 1.0) - liquid (~> 4.0) - mercenary (>= 0.3.6, < 0.5) - pathutil (~> 0.9) - rouge (>= 3.0, < 5.0) - safe_yaml (~> 1.0) - terminal-table (>= 1.8, < 4.0) - webrick (~> 1.7) - jekyll-sass-converter (2.2.0) - sassc (> 2.0.1, < 3.0) - jekyll-seo-tag (2.8.0) - jekyll (>= 3.8, < 5.0) - jekyll-watch (2.2.1) - listen (~> 3.0) - just-the-docs (0.5.0) - jekyll (>= 3.8.5) - jekyll-seo-tag (>= 2.0) - rake (>= 12.3.1) - kramdown (2.4.0) - rexml - kramdown-parser-gfm (1.1.0) - kramdown (~> 2.0) - liquid (4.0.3) - listen (3.7.1) - rb-fsevent (~> 0.10, >= 0.10.3) - rb-inotify (~> 0.9, >= 0.9.10) - mercenary (0.4.0) - pathutil (0.16.2) - forwardable-extended (~> 2.6) - public_suffix (5.0.0) - rake (13.0.6) - rb-fsevent (0.11.2) - rb-inotify (0.10.1) - ffi (~> 1.0) - rexml (3.2.5) - rouge (4.0.0) - safe_yaml (1.0.5) - sassc (2.4.0) - ffi (~> 1.9) - terminal-table (3.0.2) - unicode-display_width (>= 1.1.1, < 3) - unicode-display_width (2.3.0) - webrick (1.7.0) - -PLATFORMS - arm64-darwin-21 - x86_64-darwin-19 - x86_64-linux - -DEPENDENCIES - jekyll (~> 4.3) - just-the-docs (= 0.5.0) - -BUNDLED WITH - 2.3.9 diff --git a/docs/LICENSE b/docs/LICENSE deleted file mode 100644 index 7d510d02..00000000 --- a/docs/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 just-the-docs - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..d4bb2cbb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 4a405acd..00000000 --- a/docs/README.md +++ /dev/null @@ -1,181 +0,0 @@ -# just-the-docs-template - -This is a *bare-minimum* template to create a [Jekyll] site that: - -- uses the [Just the Docs] theme; -- can be built and published on [GitHub Pages]; -- can be built and previewed locally, and published on other platforms. - -More specifically, the created site: - -- uses a gem-based approach, i.e. uses a `Gemfile` and loads the `just-the-docs` gem; -- uses the [GitHub Pages / Actions workflow] to build and publish the site on GitHub Pages. - -To get started with creating a site, just click "[use this template]"! - -If you want to maintain your docs in the `docs` directory of an existing project repository, see -[Hosting your docs from an existing project repository](#hosting-your-docs-from-an-existing-project-repository). - -After completing the creation of your new site on GitHub, update it as needed: - -## Replace the content of the template pages - -Update the following files to your own content: - -- `index.md` (your new home page) -- `README.md` (information for those who access your site repository on GitHub) - -## Changing the version of the theme and/or Jekyll - -Simply edit the relevant line(s) in the `Gemfile`. - -## Adding a plugin - -The Just the Docs theme automatically includes the [`jekyll-seo-tag`] plugin. - -To add an extra plugin, you need to add it in the `Gemfile` *and* in `_config.yml`. For example, to add [`jekyll-default-layout`]: - -- Add the following to your site's `Gemfile`: - - ```ruby - gem "jekyll-default-layout" - ``` - -- And add the following to your site's `_config.yml`: - - ```yaml - plugins: - - jekyll-default-layout - ``` - -Note: If you are using a Jekyll version less than 3.5.0, use the `gems` key instead of `plugins`. - -## Publishing your site on GitHub Pages - -1. If your created site is `YOUR-USERNAME/YOUR-SITE-NAME`, update `_config.yml` to: - - ```yaml - title: YOUR TITLE - description: YOUR DESCRIPTION - theme: just-the-docs - - url: https://YOUR-USERNAME.github.io/YOUR-SITE-NAME - - aux_links: # remove if you don't want this link to appear on your pages - Template Repository: https://github.com/YOUR-USERNAME/YOUR-SITE-NAME - ``` - -2. Push your updated `_config.yml` to your site on GitHub. - -3. In your newly created repository on GitHub: - - go to the `Settings` tab -> `Pages` -> `Build and deployment`, then select `Source`: `GitHub Actions`. - - if there were any failed Actions, go to the `Actions` tab and click on `Re-run jobs`. - -## Building and previewing your site locally - -Assuming [Jekyll] and [Bundler] are installed on your computer: - -1. Change your working directory to the root directory of your site. - -2. Run `bundle install`. - -3. Run `bundle exec jekyll serve` to build your site and preview it at `localhost:4000`. - - The built site is stored in the directory `_site`. - -## Publishing your built site on a different platform - -Just upload all the files in the directory `_site`. - -## Customization - -You're free to customize sites that you create with this template, however you like! - -[Browse our documentation][Just the Docs] to learn more about how to use this theme. - -## Hosting your docs from an existing project repository - -You might want to maintain your docs in an existing project repository. Instead of creating a new repository using -the [just-the-docs template](https://github.com/just-the-docs/just-the-docs-template), you can copy the template -files into your existing repository and configure the template's GitHub Actions workflow to -build from a `docs` directory. You can clone the template to your local machine or download the `.zip` file -to access the files. - -### Copy the template files - -1. Create a `.github/workflows` directory at your project root if your repository doesn't already have one. -Copy the `pages.yml` file into this directory. GitHub Actions searches this directory for workflow files. - -2. Create a `docs` directory at your project root and copy all remaining template files into this directory. - -### Modify the GitHub Actions workflow - -The GitHub Actions workflow that builds and deploys your site to GitHub Pages is defined by the `pages.yml` file. -You'll need to edit this file to that so that your build and deploy steps look to your `docs` directory, -rather than the project root. - -1. Set the default `working-directory` param for the build job. - - ```yaml - build: - runs-on: ubuntu-latest - defaults: - run: - working-directory: docs - ``` - -2. Set the `working-directory` param for the Setup Ruby step. - - ```yaml - - name: Setup Ruby - uses: ruby/setup-ruby@v1 - with: - ruby-version: '3.1' - bundler-cache: true - cache-version: 0 - working-directory: '${{ github.workspace }}/docs' - ``` - -3. Set the path param for the Upload artifact step: - - ```yaml - - name: Upload artifact - uses: actions/upload-pages-artifact@v1 - with: - path: "docs/_site/" - ``` - -4. Modify the trigger so that only changes within the `docs` directory start the workflow. -Otherwise, every change to your project (even those that don't affect the docs) would trigger a new site build and deploy. - - ```yaml - on: - push: - branches: - - "main" - paths: - - "docs/**" - ``` - -## Licensing and Attribution - -This repository is licensed under the [MIT License]. You are generally free to reuse or extend upon this code as you -see fit; just include the original copy of the license (which is preserved when you "make a template"). -While it's not necessary, we'd love to hear from you if you do use this template, and how we can improve it for future use! - -The deployment GitHub Actions workflow is heavily based on GitHub's mixed-party [starter workflows]. -A copy of their MIT License is available in [actions/starter-workflows]. - ----- - -[Jekyll]: https://jekyllrb.com -[Just the Docs]: https://just-the-docs.github.io/just-the-docs/ -[GitHub Pages]: https://docs.github.com/en/pages -[GitHub Pages / Actions workflow]: https://github.blog/changelog/2022-07-27-github-pages-custom-github-actions-workflows-beta/ -[Bundler]: https://bundler.io -[use this template]: https://github.com/just-the-docs/just-the-docs-template -[`jekyll-default-layout`]: https://github.com/benbalter/jekyll-default-layout -[`jekyll-seo-tag`]: https://jekyll.github.io/jekyll-seo-tag -[MIT License]: https://en.wikipedia.org/wiki/MIT_License -[starter workflows]: https://github.com/actions/starter-workflows/blob/main/pages/jekyll.yml -[actions/starter-workflows]: https://github.com/actions/starter-workflows/blob/main/LICENSE diff --git a/docs/_config.yml b/docs/_config.yml deleted file mode 100644 index ba7498ac..00000000 --- a/docs/_config.yml +++ /dev/null @@ -1,26 +0,0 @@ -title: itwinai -description: Docs for task T6.5 prototype of interTwin project -theme: just-the-docs - -url: https://interTwin-eu.github.io/T6.5-AI-and-ML - -aux_links: - Template Repository: https://github.com/interTwin-eu/T6.5-AI-and-ML - -favicon_ico: "favicon.ico" - -nav_external_links: - - title: itwinai on GitHub - url: https://github.com/interTwin-eu/T6.5-AI-and-ML - hide_icon: false # set to true to hide the external link icon - defaults to false - - title: interTwin on GitHub - url: https://github.com/interTwin-eu/ - hide_icon: false # set to true to hide the external link icon - defaults to false - - title: interTwin project - url: https://www.intertwin.eu/ - hide_icon: false # set to true to hide the external link icon - defaults to false - -mermaid: - # Version of mermaid library - # Pick an available version from https://cdn.jsdelivr.net/npm/mermaid/ - version: "10.1.0" diff --git a/docs/advanced_workflow.rst b/docs/advanced_workflow.rst new file mode 100644 index 00000000..121d3acc --- /dev/null +++ b/docs/advanced_workflow.rst @@ -0,0 +1,19 @@ +Advanced workflow +================= + +tutorial_2_advanced_workflow.py ++++++++++++++++++++++++++++++++ + +The `tutorial_2_advanced_workflow.py` script is ... + +.. .. literalinclude:: ../use-cases/mnist/torch-lightning/dataloader.py +.. :language: python + +.. automodule:: tutorial_2_advanced_workflow + :members: + :undoc-members: + :show-inheritance: + + +.. literalinclude:: ../tutorials/ml-workflows/tutorial_2_advanced_workflow.py + :language: python diff --git a/docs/basic_comp.rst b/docs/basic_comp.rst new file mode 100644 index 00000000..5acfe66d --- /dev/null +++ b/docs/basic_comp.rst @@ -0,0 +1,16 @@ +Basic components +================ + +basic_components.py ++++++++++++++++++++ + +The `basic_components.py` script is ... + +.. .. literalinclude:: ../use-cases/mnist/torch-lightning/dataloader.py +.. :language: python + +.. automodule:: basic_components + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/basic_workflow.rst b/docs/basic_workflow.rst new file mode 100644 index 00000000..cd77d328 --- /dev/null +++ b/docs/basic_workflow.rst @@ -0,0 +1,19 @@ +Basic workflow +============== + +tutorial_0_basic_workflow.py +++++++++++++++++++++++++++++ + +The `tutorial_0_basic_workflow.py` script is ... + +.. .. literalinclude:: ../use-cases/mnist/torch-lightning/dataloader.py +.. :language: python + +.. .. automodule:: tutorial_0_basic_workflow +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. literalinclude:: ../tutorials/ml-workflows/tutorial_0_basic_workflow.py + :language: python + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 00000000..f4c9b297 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,73 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +import os +import sys +import subprocess + +sys.path.insert(0, os.path.abspath('../use-cases/')) +sys.path.insert(0, os.path.abspath('../use-cases/3dgan/')) +sys.path.insert(0, os.path.abspath('../use-cases/mnist/torch-lightning/')) +sys.path.insert(0, os.path.abspath('../use-cases/mnist/torch/')) +sys.path.insert(0, os.path.abspath('../tutorials/ml-workflows/')) +sys.path.insert(0, os.path.abspath('../src/itwinai')) +sys.path.insert(0, os.path.abspath('../src/itwinai/tensorflow')) +sys.path.insert(0, os.path.abspath('../src/itwinai/torch')) +sys.path.insert(0, os.path.abspath('../')) + +project = 'itwinai' +copyright = '2024, Matteo Bunino, Alexander Zoechbauer, Kalliopi Tsolaki, Rakesh Sarma on behalf of CERN & JSC' +author = 'Matteo Bunino, Alexander Zoechbauer, Kalliopi Tsolaki' +# version = '0.0' # short version +# release = '0.0.2' # full version + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest', + 'sphinx.ext.viewcode'] # 'myst_parser' + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +autodoc_mock_imports = ["mlflow"] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + + +def get_git_tag(): + try: + return subprocess.check_output(['git', 'describe', '--tags', '--abbrev=0']).decode('utf-8').strip() + except subprocess.CalledProcessError: + return 'unknown' + + +# Set the version to the latest tag +version = get_git_tag() +release = version + +html_theme = 'sphinx_rtd_theme' # 'alabaster' +html_static_path = ['_static'] + +html_context = { + 'display_version': True, + 'release': release +} + +html_footer = """ + +""" + +html_sidebars = { + '**': [ + html_footer # Adds the custom footer with version information + ] +} diff --git a/docs/docs/CLI.md b/docs/docs/CLI.md deleted file mode 100644 index a2383b46..00000000 --- a/docs/docs/CLI.md +++ /dev/null @@ -1,53 +0,0 @@ ---- -layout: default -title: CLI -nav_order: 3 ---- - -# Command-line interface (CLI) - - -The `itwinai` package provides a custom CLI, which can be accessed, for instance -from the development environment: - -```bash -# Activate development environment -micromamba activate ./.venv-dev - -# Access itwinai CLI -itwinai --help -``` - -## Visualization - -Some visualization functionalities offered by `itwinai` CLI. - -```bash -# Datasets registry -itwinai datasets --help - -# Workflows (any file '*-workflow.yml') -itwinai workflows --help -``` - -## Machine learning - -```bash -# Training -itwinai train --help - -# Launch MLFlow UI to visualize ML logs -itwinai mlflow-ui --help - -# Inference -itwinai predict --help -``` diff --git a/docs/docs/Concepts.md b/docs/docs/Concepts.md deleted file mode 100644 index 5830fcfe..00000000 --- a/docs/docs/Concepts.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -layout: default -title: Concepts -nav_order: 4 ---- - -# Concepts - - -Here we presents the key concepts on which `itwinai` is based. - -## Workflow - -We define a workflow as a directed acyclic graph (DAG) of data processing -operations, in which each step can have multiple inputs and outputs, and -each input or output is a dataset. - -![image](img/Workflow%20DAG%20concept.png) - -In the picture above, the yellow boxes with numbers represent the steps -in the example workflow, whereas the blue cylinders represent data -(e.g., dataset, configuration file). - -Each step runs in *isolation* from the others, and data is the *interface*. -Isolation can be guaranteed by executing each step in a Docker container or -in a separate Python virtual environment. diff --git a/docs/docs/How-to-use-this-software.md b/docs/docs/How-to-use-this-software.md deleted file mode 100644 index 15a36b44..00000000 --- a/docs/docs/How-to-use-this-software.md +++ /dev/null @@ -1,540 +0,0 @@ ---- -layout: default -title: How to use this software -nav_order: 2 ---- - -# How to use this software -{: .no_toc } - -## Table of contents -{: .no_toc .text-delta } - -1. TOC -{:toc} - ---- - -This guide provides a detailed explanation on how to use the AI/ML workflow -tool, developed in the context of [interTwin](https://github.com/interTwin-eu/). - -**Target audience**: anyone aiming to simplify MLOps for their digital twin (DT) -use case/project. Use cases from interTwin project. - -## Clone this repo - -```bash -git clone git@github.com:interTwin-eu/T6.5-AI-and-ML.git -``` - -A new use case/project can be added under `./use-cases` folder. - -Build the workflow runner environment and development environment -following the instructions on the README file. - -## Define a DT workflow - -Before delving into workflow definition rules, make sure to have -understood *what is* a [workflow](./Concepts#workflow) in this context. - -You can define one or more workflows for your DT use case (e.g., ML training, -ML inference, other). A workflow is -defined through configuration files in the use case subfolder. -For the same use case, a DT developer can define multiple workflows, -in which multiple datasets are involved. - -Currently, each step is executed in an isolated Python virtual environment, -built according to [conda](https://docs.conda.io/en/latest/) standards. -In practice, it is built with -[Micromamba](https://mamba.readthedocs.io/en/latest/user_guide/micromamba.html), -which is faster. - -To begin with, you can start by looking at an example of the -[MNIST toy use case](use-cases/mnist), located at `use-cases/mnist` -in the code repository. - -### Use case metadata - -The main configuration file of an use case is `meta.yml`, which stores -the metadata of it. When creating a new use case, you need to update the -`root` field with the path to the use case folder, with respect to the -repo root. - -The datasets registry is a field in this configuration file, -which stores the metadata -for all datasets involved in a use case. This configuration provides a -unified place where datasets can be maintained, making it easy to access -them from other configuration files. - -The dataset registry has the format: - -```yaml -datasets: - some-dataset-name: - doc: Documentation string for this dataset - location: path/to/dataset/disk/location -``` - -Example of `meta.yml` from [MNIST use case](use-cases/mnist): - -```yaml -# Use case root location. End without path '/' char! -root: ./use-cases/mnist - -# AI folder location. End without path '/' char! -ai-root: ./ai - -# Datasets registry -datasets: - preproc-images: - doc: Preprocessed MNIST images - location: ${root}/data/preproc-images - ml-logs: - doc: MLflow tracking URI for local logging - location: ${root}/data/ml-logs - ml-predictions: - doc: predictions on unseen data - location: ${root}/data/ml-predictions -``` - -Datasets are imported from the datasets registry to other files by means -of [OmegaConf](https://omegaconf.readthedocs.io/)'s -[variable interpolation](https://omegaconf.readthedocs.io/en/2.3_branch/usage.html#variable-interpolation). -This way, you can easily import datasets metadata (e.g., location on -file system) from datasets registry. - -Dataset registry of an use case can be visualized using [itwinai CLI](./CLI#visualization): - -```bash -USE_CASE_ROOT='use-cases/mnist/' -micromamba activate ./.venv-dev && \ - itwinai datasets --use-case $USE_CASE_ROOT -``` - -### Workflow configuration - -Use case workflows are defined with `*-workflow.yml` files in the use case root, -and there are two ways to define a workflow: - -- "Custom" format for workflow definition is an intuitive standard we created -for this prototype, for easy prototyping. -- [Common Workflow Language](https://www.commonwl.org/) (CWL), which is -currently under development, and not ready to be used. - -Which of the two is used is defined by setting the `--cwl` flag (explained -[below](#run-the-workflow)). - -#### Custom workflow definition - -To define a workflow with the custom format, the DT developer must follow -the structure provided below. - -The `steps` section defines the steps of the workflow, in the order in which -they have to be executed: - -```yaml -steps: - - some-step-name: - doc: Documentation string for this step - env: # micromamba environment metadata - file: some-conda-env.yml - prefix: path/to/conda/env/ - command: Command to execute inside micromamba env - args: # Command arguments. - # Note interpolation with datasets registry here! - some-arg: ${datasets.my-dataset.location} - some-other-arg: 42 - - next-step-name: - ... -``` - -Example workflow from [MNIST use case](use-cases/mnist), defined in -`training-workflow.yml`: - -```yaml -steps: - - preprocessing: - doc: Download and split MNIST dataset into train and test sets - command: python ${root}/mnist-preproc.py - env: - file: ${root}/env-files/preproc-env.yml - prefix: ${root}/.venv-preproc - args: - output: ${datasets.preproc-images.location} - stage: train - - ml-training: - doc: Train a neural network to classify MNIST images - command: itwinai train - env: - file: ${ai-root}/env-files/pytorch-lock.yml - prefix: ${ai-root}/.venv-pytorch - source: ${ai-root} - args: - train-dataset: ${datasets.preproc-images.location} - ml-logs: ${datasets.ml-logs.location} - config: ${root}/mnist-ai-train.yml -``` - -Step 1 is named `preprocessing` and uses the `mnist-preproc.py` script to pre-process the MNIST dataset. It takes no -input, generates an output dataset named `preproc-images`, and uses an environment defined in a YAML file named -`preproc-env.yml` located in the `./use-cases/mnist` directory. -Step 2 is named `ml-training` and trains a machine learning model using the preprocessed image data generated in -the first step. The training is performed using the train command from the `itwinai` tool. The input dataset is -`preproc-images`, and the output is `ml-logs`. The step uses an environment defined in a YAML file named -`pytorch-env.yml` located in the `./ai` directory. The machine learning model is configured using the `mnist-ai-train.yml` -file located in the `./use-cases/mnist` directory. - -#### CWL: under development and not ready to be used yet - -**NOTE**. At the moment, support for CWL is under development, -and is not available. - - - -## Implement workflow steps - -Implement use case-specific steps. -Note that the implementation of steps involving AI/ML are addressed in the next -step, and they can be implemented a bit more easily. - -Each step of a workflow is characterized by its python virtual environment -and a command to be executed in that -environment. A command can be implemented by providing, for instance, a python script to be executed in some environment. - -To execute a step, the workflow engine will run something like: - -```bash -micromamba run -p PATH_TO_STEP_ENV CMD --arg1 ARG_1_VAL ... --argN ARG_N_VAL -``` - -Where: - -- `PATH_TO_STEP_ENV` is the path to the micromamba environment for this step. -- `CMD` is the command to execute in that environment. -- The developer can use additional parameters which are automatically appended -to the command. - -*Example*: in the [MNIST use case](use-cases/mnist), -the preprocessing step is implemented by a python script, which downloads and -splits the MNIST dataset in a specific location. Using a command similar to: - -```bash -micromamba run -p ./use-cases/mnist/.venv-preproc \ - python ./use-cases/mnist/mnist-preproc.py \ - --output ./data/mnist/preproc-images-inference \ - --stage train -``` - -## Define AI/ML workflow - -AI/ML workflows are implemented by the `itwinai` module. -The DT developer, who wants to include a new use case, needs to provide -only a reduced amount of code to describe a neural network, plus some -configuration files. - -The developer must implement the neural network to train and include it inside -`itwinai` python package, under `ai/src/itwinai`. For instance, under -`ai/src/itwinai/plmodels` when using PyTorch Lightning. - -For instance, `LitMNIST` neural network used in [MNIST use case](use-cases/mnist) -has been added under `ai/src/itwinai/plmodels/mnist.py` - -Once a model has been included inside the `itwinai` python module, it can be imported during training. -In the future, `itwinai` will support also neural networks not provided out-of-the-box by `itwinai`. - -The developer must define two configuration files to access `itwinai` -functionalities. -First, ML training configuration, associated with `$ itwinai train` [CLI](./CLI) command. -Second, ML inference configuration, associated with `$ itwinai predict` [CLI](./CLI) command. - -MLOps heavily relies on commands provided by [itwinai CLI](./CLI). -Therefore, before continuing, make sure to have understood how -[itwinai CLI](./CLI) works. - -### ML training configuration - -ML training configuration is provided in a with naming convention `*-ai-train.yml` -under the use case root directory. - -An example configuration file is provided below, where the fields have been replaced with their respective description: - -```yaml -# Configuration file of AI workflows for X use case - -# Training configuration -train: - type: > - can be 'lightning' or 'tf', depending whether the neural network is defined - using PyTorch Lightning or TensorFlow. - At the moment, only 'lightning' is supported. - - # Configuration format defined by PyTorch Lightning CLI - # https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html - conf: - # See discussion below - ... - -# MLFlow logger configuration -logger: - experiment_name: > - Unique name for an experiment, to group all similar - runs under the same experiment - description: Description for this specific run. - log_every_n_epoch: how often to log (epochs) - log_every_n_steps: how often to log (steps, i.e., batches) - registered_model_name: > - Unique name used in Models Registry to identify an ML model. - If given, it is automatically registered in the Models Registry. -``` - -When using PyTorch Lightning (PL) ML framework, the training configuration is easy to define, as it follows the schema -pre-defined by lightning authors for the PL CLI. See its documentation -[here](https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html#trainer-callbacks-and-arguments-with-class-type), -[here](https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html#trainer-callbacks-and-arguments-with-class-type), -[here](https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html#multiple-models-and-or-datasets), and -[here](https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html#optimizers-and-learning-rate-schedulers). - -An example taken from -[MNIST use case](use-cases/mnist) located at `use-cases/mnist/mnist-ai-training.yml`: - -```yaml -# Pytorch lightning config for training -train: - type: lightning - # Follows lightning config file format: - # https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html#multiple-models-and-or-datasets - conf: - seed_everything: 4231162351 - - # Lightning Trainer configuration - trainer: - accelerator: auto - strategy: auto - devices: auto - num_nodes: 1 - precision: 32-true - - # MLFlow logger (initial) configuration. - # To be completed with run details, later on - logger: - class_path: lightning.pytorch.loggers.MLFlowLogger - init_args: - experiment_name: ${logger.experiment_name} - save_dir: ./mlruns - - # Callbacks - callbacks: - - class_path: lightning.pytorch.callbacks.early_stopping.EarlyStopping - init_args: - monitor: val_loss - patience: 2 - - class_path: lightning.pytorch.callbacks.lr_monitor.LearningRateMonitor - init_args: - logging_interval: step - - class_path: lightning.pytorch.callbacks.ModelCheckpoint - init_args: - dirpath: checkpoints - filename: best-checkpoint - save_top_k: 1 - verbose: true - monitor: val_loss - mode: min - - max_epochs: 1 - - # Lightning Model configuration - model: - class_path: itwinai.plmodels.mnist.LitMNIST - init_args: - hidden_size: 64 - - # Lightning data module configuration - data: - class_path: itwinai.plmodels.mnist.MNISTDataModule - init_args: - data_dir: ${cli.train_dataset} - batch_size: 32 - - # Torch Optimizer configuration - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.001 - - # Torch LR scheduler configuration - lr_scheduler: - class_path: torch.optim.lr_scheduler.ExponentialLR - init_args: - gamma: 0.1 - -# Mlflow -logger: - experiment_name: MNIST classification lite - description: A MLP classifier for MNIST dataset. - log_every_n_epoch: 1 - log_every_n_steps: 1 - # Name used in Models Registry. If given, it is automatically - # registered in the Models Registry. - registered_model_name: MNIST-clf-lite -``` - -Note the field `data_dir: ${cli.train_dataset}` in the above configuration. -More on this later. - -### ML inference configuration - -ML training configuration is provided in a with naming convention -`*-ai-inference.yml` under the use case root directory. - -An example configuration file is provided below, where the fields have been replaced with their respective description: - -```yaml -inference: - experiment_name: > - Unique name for an experiment, to group all similar - runs under the same experiment - run_id: Run ID in MLFlow server of pre-trained model - ckpt_path: model/checkpoints/best-checkpoint/best-checkpoint.ckpt - train_config_artifact_path: name of training config saved to MLFlow artifacts folder - type: > - can be 'lightning' or 'tf', depending whether the neural network is defined - using PyTorch Lightning or TensorFlow. - At the moment, only 'lightning' is supported. - - # Configuration format defined by PyTorch Lightning CLI - # https://pytorch-lightning.readthedocs.io/en/1.6.5/common/lightning_cli.html - conf: - # See discussion below - ... -``` - -Regarding the `inference.conf` field, same considerations hold as for `train.conf` field of ML training configuration. - -An example taken from -[MNIST use case](use-cases/mnist) located at `use-cases/mnist/mnist-ai-training.yml`: - -```yaml -inference: - type: lightning - experiment_name: MNIST classification lite - # Run ID in MLFlow server: pre-trained model - run_id: 54f790100be646e0a7ccbb1235729d00 - ckpt_path: model/checkpoints/best-checkpoint/best-checkpoint.ckpt - train_config_artifact_path: pl-training.yml - conf: - # Lightning data module configuration - data: - class_path: itwinai.plmodels.mnist.MNISTDataModule - init_args: - data_dir: ${cli.input_dataset} - batch_size: 32 -``` - -### Accessing CLI args from config file - -As explained above, train and predict commands in itwinai CLI receive as input -specific configuration files: - -- The `train` command receives `*-ai-train.yml` as configuration. -- The `predict` command receives `*-ai-inference.yml` as configuration. - -With [OmegaConf](https://omegaconf.readthedocs.io/)'s -[variable interpolation](https://omegaconf.readthedocs.io/en/2.3_branch/usage.html#variable-interpolation) -you can access the args from the itwinai CLI command from the configuration file -associated with this command. - -Example: the field `data_dir: ${cli.input_dataset}` in the above configuration -accesses the value of `--input-dataset` argument of `itwinai predict` command. - -### ML framework: PyTorch vs. TensorFlow - -At the moment, only PyTorch are supported. TensorFlow support is planned for -future releases. - -## Run the workflow - -Once a workflow has been configured, it can be run by executing `run-workflow.py` in the root of this repo: - -```bash -micromamba run -p ./.venv python run-workflow.py -f WORKFLOW_DEFINITION_FILE -``` - -This script performs two main actions: - -1. Deploy ste steps of a workflow as python environments, managed with Conda. -2. Run a workflow step-by-step, following the directives given in the config file. - -See some examples of workflow executions in `examples.sh`, for instance: - -```bash -# Run workflow for MNIST toy use case -micromamba run -p ./.venv python run-workflow.py -f ./use-cases/mnist/training-workflow.yml -``` - - - -## Write tests cases - -Integrating an new use case means defining new workflows for it. -It is strongly suggested to define "integration" test cases for -those workflows. This way, every time `itwinai` -framework is updated, integration tests automatically verify that -the use case integrates well with the new changes introduced in the -main framework. -Moreover, integration tests verify that an use case case is stable, -and is not hiding some "bug". - -Add test for your use case under the `test/` folder. You can take -inspiration from other use cases' tests. diff --git a/docs/docs/img/Workflow DAG concept.png b/docs/docs/img/Workflow DAG concept.png deleted file mode 100644 index f09d8146..00000000 Binary files a/docs/docs/img/Workflow DAG concept.png and /dev/null differ diff --git a/docs/docs/img/cwl-workflow.png b/docs/docs/img/cwl-workflow.png deleted file mode 100644 index 9c434e3f..00000000 Binary files a/docs/docs/img/cwl-workflow.png and /dev/null differ diff --git a/docs/docs/img/user-platform interaction full.png b/docs/docs/img/user-platform interaction full.png deleted file mode 100644 index 0c64ee0c..00000000 Binary files a/docs/docs/img/user-platform interaction full.png and /dev/null differ diff --git a/docs/docs/img/user-platform interaction.png b/docs/docs/img/user-platform interaction.png deleted file mode 100644 index 19dc3eeb..00000000 Binary files a/docs/docs/img/user-platform interaction.png and /dev/null differ diff --git a/docs/docs/use-cases/index.md b/docs/docs/use-cases/index.md deleted file mode 100644 index 681caeb6..00000000 --- a/docs/docs/use-cases/index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -layout: default -title: Use cases -nav_order: 4 -has_children: true ---- - -# Use cases - -Digital twin use cases employing machine learning workflows, currently integrated -in this prototype. -To integrate a new use case, please refer to [this page](../How-to-use-this-software). diff --git a/docs/docs/use-cases/mnist.md b/docs/docs/use-cases/mnist.md deleted file mode 100644 index e22f7ab1..00000000 --- a/docs/docs/use-cases/mnist.md +++ /dev/null @@ -1,185 +0,0 @@ ---- -layout: default -title: MNIST -parent: Use cases -nav_order: 1 ---- - -# MNIST: toy example for DT workflows -{: .no_toc } - -## Table of contents -{: .no_toc .text-delta } - -1. TOC -{:toc} - ---- - -Of course MNIST images classification is not a digital twin. Still, it is useful to -provide an example on how to define an end-to-end digital twin workflow with the -software provided in this repository. - -The MNIST use case implements two workflows: - -1. Training workflow: train a neural network to classify MNIST images, and save the trained -neural network to the Models Registry. - - ```mermaid - flowchart LR - %% Nodes - preproc(Pre-processing) - ai(ML training) - reg[(Models Registry:\npre-trained ML models)] - - %% Workflow - preproc --> ai - - %% Connections - ai -.-> |Saves to| reg - ``` - - This workflow is executed by running the command: - - ```bash - micromamba run -p ./.venv python run-workflow.py -f ./use-cases/mnist/training-workflow.yml - ``` - -1. Inference workflow: use the pre-trained neural network to classify unseen images (the test set, in this case). - - ```mermaid - flowchart LR - %% Nodes - preproc(Pre-processing) - ai_depl(ML inference) - pred[(Predictions)] - - %% Workflow - preproc --> ai_depl - - %% Connections - ai_depl -.-> |Saves to| pred - ``` - - This workflow is executed by running the command: - - ```bash - micromamba run -p ./.venv python run-workflow.py -f ./use-cases/mnist/inference-workflow.yml - ``` - -The interactions among workflows and their steps can be described in more details as the following, where conceptual ordering -among different workflow steps is represented by solid arrows: - -```mermaid -graph TD - %% Nodes - remote_repo[(Remote repo)] - preproc(Pre-processing) - ai(ML training) - ai_depl(ML inference) - train_set[(Train dataset)] - test_set[(Test dataset)] - ml_logs[(ML logs)] - reg[(Models Registry:\npre-trained ML models)] - pred[(Predictions)] - - %% Workflow - preproc --> ai ---> ai_depl - - %% Connections - preproc -.-> |Fetches| remote_repo - preproc -.-> |Stores| train_set - preproc -.-> |Stores| test_set - ai -.-> |Trains/validates model| train_set - ai -.-> |Tests model| test_set - ai -.-> |Stores model to| reg - ai -.-> |Logs| ml_logs - ai_depl -.-> |Loads from| reg - ai_depl -.-> |Predict from| test_set - ai_depl -.-> |Stores| pred -``` - -## Workflow steps - -Here we explain in more details how the workflow steps have been configured. -Configuration files and Python scripts are organized under `use-cases/mnist/` -folder, in the core repository. - -### Pre-processing - -This step is implemented by executing `mnist-preproc.py` script in its dedicated micromamba environment, defined by -`preproc-env.yml`. This solution gives full freedom to the DT developer to implement any preprocessing logic, adaptable -to any custom dataset format. - -### ML training - -Is the step in which a neural network is trained on the training dataset, and -validated on the validation dataset. -The mentioned datasets are a result from a split of the pre-processed training -dataset, produced by the pre-processing step. -This step completes the **training workflow**, and results into ML logs and a -trained neural network, which is saved to -the Models Registry. The training workflow can be re-run multiple times with different (hyper)parameters, with the goal -of optimizing some ML validation metric. The neural network with the best validation performances is used to make -predictions on unseen data, in the inference step. - -ML training logic is implemented by the `itwinai` library, requiring the DT developer to produce only a set of YAML -configuration files. For the moment, we assume that the neural network and the training code is already present in -`itwinai` library. - -Both ML training and inference are implemented by commands executed in the same virtual environment. At the moment, -only PyTorch is supported. The corresponding virtual environment definition, used by the `itwinai` library, -is located at `ai/pytorch-env.yml`. - -The ML training logic provided by `itwinai` library is accessed via the -[itwinai CLI](../CLI). - -The DT developer must provide a training configuration file, following some -rules explained in [this section](../How-to-use-this-software#ml-training-configuration). For MNIST use case, the -training configuration is provided by `mnist-ai-train.yml` file. - -Training command is automatically managed by the workflow runner, but it can also -be triggered from withing the ai environment running the following command: - -```bash -micromamba activate ./ai/.venv-pytorch && \ - itwinai train --train-dataset $TRAINING_DATASET_PATH \ - --ml-logs $MLFLOW_TRACKING_URI \ - --config ./use-cases/mnist/mnist-ai-train.yml -``` - -While training is running, the produced ML logs can be inspected in real-time from MLFlow UI by running the command in -the training virtual environment (Conda): - -```bash -micromamba activate ./ai/.venv-pytorch && \ - itwinai mlflow-ui --path $PATH_TO_ML_LOGS -``` - -### ML inference - -A pre-trained neural network is applied to a set of data which was not used to train it. In fact, this is defined as -"unseen" data, from the neural network perspective. An example of ML inference is the application of a trained neural -network to make predictions on new data, to support decision making. *Example*: forecast fire risk maps in Sicily in -August 2023, starting from newly-collected satellite images, to alert local authorities in case of elevated fire risk. - -To select a pre-trained ML model, the DT developer must retrieve the `RUN ID` of -the training run created by MLFLow for some specific training. - -The, the DT developer can update the inference configuration file -`mnist-ai-inference.yml` and run inference workflow. - -Inference/prediction command is automatically managed by the workflow runner, but it can also be triggered from within -the ai environment running the following command: - -```bash -micromamba activate ./ai/.venv-pytorch && \ - itwinai predict --input-dataset $UNSEEN_EXAMPLES_DATASET_PATH \ - --predictions-location $PREDICTIONS_LOCATION \ - --ml-logs $PATH_TO_ML_LOGS \ - --config ./use-cases/mnist/mnist-ai-inference.yml -``` - -## References - -To learn more on how to use this software, e.g., to deploy a new use case, please refer to [this guide](../How-to-use-this-software). diff --git a/docs/favicon.ico b/docs/favicon.ico deleted file mode 100644 index 16659192..00000000 Binary files a/docs/favicon.ico and /dev/null differ diff --git a/docs/getting_started_with_itwinai.rst b/docs/getting_started_with_itwinai.rst new file mode 100644 index 00000000..dc69a5b2 --- /dev/null +++ b/docs/getting_started_with_itwinai.rst @@ -0,0 +1,190 @@ +Getting started with itwinai +============================ + +In this section, we will run you through the installation and give some instructions for the use of the itwinai framework for HPC and local systems. + + +🌐 HPC systems +--------------- + +Here, we lay out how to use torch DistributedDataParallel (DDP), Horovod, and DeepSpeed from the same client code. +Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. + + +Environments +++++++++++++ + +Install PyTorch env (GPU support) on Juelich Super Computer (tested on HDFML system) + +.. code-block:: bash + + torch-gpu-jsc: env-files/torch/createEnvJSC.sh + sh env-files/torch/createEnvJSC.sh + + +Install Tensorflow env (GPU support) on Juelich Super Computer (tested on HDFML system) + +.. code-block:: bash + + tf-gpu-jsc: env-files/tensorflow/createEnvJSCTF.sh + sh env-files/tensorflow/createEnvJSCTF.sh + + +Setup ++++++ + +First, from the root of `this repository `_, build the environment containing pytorch, horovod, and deepspeed. You can try with: + +.. code-block:: bash + + # Creates a Python environment called envAI_hdfml + make torch-gpu-jsc + + +Distributed training +++++++++++++++++++++ + + Each distributed strategy is described with a SLURM job script used to run that strategy. + +So if you want to distribute the code in `train.py` with, for example, **torch DDP**, run from terminal: + +.. code-block:: bash + + sbatch ddp_slurm.sh + +Similarly, if you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: + +.. code-block:: bash + + sbatch deepspeed_slurm.sh + +To distribute the code in `train.py` with **Horovod**, run from terminal: + +.. code-block:: bash + + sbatch hvd_slurm.sh + +Finally, you can run all of them with: + +.. code-block:: bash + + bash runall.sh + + + + + +💻 Local systems +----------------- + +**Requirements** + +* Linux environment. + +Windows and macOS were never tested. + + +Micromamba installation ++++++++++++++++++++++++ + +To manage Conda environments we use micromamba, a lightweight version of Conda. + +In order to install micromamba, please refer to the `Manual installation guide `_. + +Consider that Micromamba can eat a lot of space when building environments because packages are cached on the local filesystem after being downloaded. To clear cache, you can use `micromamba clean -a`. +Micromamba data are kept under the `$HOME` location. However, in some systems, `$HOME` has a limited storage space so it is recommended to install Micromamba in another location with more storage space by changing the `$MAMBA_ROOT_PREFIX` variable. +Below is a complete installation example where the default `$MAMBA_ROOT_PREFIX` is overridden for Linux: + + +.. code-block:: bash + + cd $HOME + + # Download micromamba (This command is for Linux Intel (x86_64) systems. Find the right one for your system!) + curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + + # Install micromamba in a custom directory + MAMBA_ROOT_PREFIX='my-mamba-root' + ./bin/micromamba shell init $MAMBA_ROOT_PREFIX + + # To invoke micromamba from Makefile, you need to add explicitly to $PATH + echo 'PATH="$(dirname $MAMBA_EXE):$PATH"' >> ~/.bashrc + +**Reference**: `Micromamba installation guide `_. + + +Environment setup ++++++++++++++++++ + +**Requirements:** + +* Linux environment. Windows and macOS were never tested. +* Micromamba: see the installation instructions above. +* VS Code, for development. + +Tensorflow +++++++++++ + +Installation: + +.. code-block:: bash + + # Install TensorFlow 2.13 + make tf-2.13 + + # Activate env + micromamba activate ./.venv-tf + +Other TensorFlow versions are available, using the following targets `tf-2.10`, and `tf-2.11`. + + +PyTorch (+ Lightning) ++++++++++++++++++++++ + +Installation: + +.. code-block:: bash + + # Install PyTorch + lightning + make torch-gpu + + # Activate env + micromamba activate ./.venv-pytorch + +Other similarly CPU-only version is available at the target `torch-cpu`. + + +Development environment ++++++++++++++++++++++++ + +This is for developers only. To have it, update the installed `itwinai` package adding the `dev` extra: + +.. code-block:: bash + + pip install -e .[dev] + + +**Test with `pytest`** +To run tests on itwinai package: + +.. code-block:: bash + + # Activate env + micromamba activate ./.venv-pytorch # or ./.venv-tf + + pytest -v -m "not slurm" tests/ + + +However, some tests are intended to be executed only on HPC systems, where SLURM is available. They are marked with "slurm" tags. To run these tests, use the dedicated job script: + +.. code-block:: bash + + sbatch tests/slurm_tests_startscript + + # Upon completion, check the output: + cat job.err + cat job.out + + + + diff --git a/docs/hpc_setup.rst b/docs/hpc_setup.rst new file mode 100644 index 00000000..607c876c --- /dev/null +++ b/docs/hpc_setup.rst @@ -0,0 +1,69 @@ +.. 🌐 HPC systems +.. --------------- +How to use torch `DistributedDataParallel` (DDP), Horovod and DeepSpeed from the same client code. +Note that the environment is tested on the HDFML system at JSC. For other systems, the module versions might need change accordingly. + + +.. toctree:: + :maxdepth: 5 + + +Environments +++++++++++++ + +Install PyTorch env (GPU support) on Juelich Super Computer (tested on HDFML system) + +.. code-block:: bash + + torch-gpu-jsc: env-files/torch/createEnvJSC.sh + sh env-files/torch/createEnvJSC.sh + + +Install Tensorflow env (GPU support) on Juelich Super Computer (tested on HDFML system) + +.. code-block:: bash + + tf-gpu-jsc: env-files/tensorflow/createEnvJSCTF.sh + sh env-files/tensorflow/createEnvJSCTF.sh + + + +Setup ++++++ + +First, from the root of this `repository `_, build the environment containing pytorch, horovod and deepspeed. You can try with: + +.. code-block:: bash + + # Creates a Python venv called envAI_hdfml + make torch-gpu-jsc + + +Distributed training +++++++++++++++++++++ + +Each distributed strategy has its own SLURM job script, which should be used to run it: + +If you want to distribute the code in `train.py` with **torch DDP**, run from terminal: + +.. code-block:: bash + + sbatch ddp_slurm.sh + +If you want to distribute the code in `train.py` with **DeepSpeed**, run from terminal: + +.. code-block:: bash + + sbatch deepspeed_slurm.sh + +If you want to distribute the code in `train.py` with **Horovod**, run from terminal: + +.. code-block:: bash + + sbatch hvd_slurm.sh + +You can run all of them with: + +.. code-block:: bash + + bash runall.sh \ No newline at end of file diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index ae78ffa7..00000000 --- a/docs/index.md +++ /dev/null @@ -1,64 +0,0 @@ ---- -title: Overview -layout: home -nav_order: 1 ---- - -# Overview - -Welcome to the `itwinai` docs! A framework for advanced AI/ML workflows in digital twins (DTs). - -Below we you are going to find an overview of interTwin's AI/ML workflows component. This platform -is intended to support general-purpose MLOps for digital twin use cases in [interTwin](https://www.intertwin.eu/) project. - -> Beware! As the code is frequently changed, the docs are unstable and may not reflect the actual state of the code base. -> Therefore, if you are looking for a more stable version, check out our -> [releases](https://github.com/interTwin-eu/T6.5-AI-and-ML/releases). - -Additional resources include: - -- Detailed instructions on [How to use this software](docs/How-to-use-this-software). -- Roadmap towards a prototype for T6.5 AI workflows for -digital twins here: [Prototype for T6.5](https://github.com/interTwin-eu/T6.5-AI-and-ML/wiki/Prototype-for-T6.5). - -## Platform for machine learning workflows in digital twins - -The goal of this platform is to provide ML researchers with an easy-to-use endpoint -to manage general-purpose machine learning (ML) workflows, with limited engineering overhead, -while providing state-of-the-art MLOps best practices. - -We call this platform `itwinai`. - -The user is going to provide as input a set of configuration files, to fully -describe ML workflows, in the context of digital twin (DT) applications. -`itwinai` platform instantiates ML workflows with the configurations -provided by the DT developer. The execution of ML workflows produces as output a -set of ML metrics, which are visualized by `itwinai` via -[MLFlow](https://mlflow.org/). -As a result of ML training, the best model (on validation dataset) -is saved to the *Models Registry* for future predictions. - -![image](docs/img/user-platform%20interaction%20full.png) - -### Simulating a whole DT workflow - -A DT workflow is more than ML. Generally speaking, MLOps -(i.e., ML model lifecycle management), -can be considered just as a step of a larger DT workflow. - -![image](docs/img/cwl-workflow.png) - -In `itwinai` platform, we focus mainly on the MLOps step, simulating or oversimplifying all the rest -(e.g., pre-processing, authentication, workflow execution). - -For further details on how to define a DT workflow in `itwinai`, follow [this guide](docs/How-to-use-this-software#2-define-a-dt-workflow). - -### How to integrate a new use case - -To integrate an existing use case in this platform, the ML engineer rewrites -the ML experiments according to a format supported by `itwinai`. -Some examples can be found by looking at the use cases -already integrated [here](docs/use-cases/index), located under `use-cases/` -in the code repository. - -A detailed guide on how to integrate a new use case in `itwinai` can be found [here](docs/How-to-use-this-software). diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 00000000..82b192f8 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,70 @@ +.. itwinai documentation master file, created by + sphinx-quickstart on Fri Feb 9 13:58:30 2024. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +🚧 UNDER CONSTRUCTION 🚧 +========================= + +Welcome to itwinai's documentation! +=================================== + +``itwinai`` is a framework for advanced AI/ML workflows in Digital Twins (DTs). + +This platform is intended to support general-purpose MLOps for Digital Twin use cases in the `interTwin `_ project. + +Platform for machine learning workflows in digital twins +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +The goal of this platform is to provide ML researchers with an easy-to-use endpoint to manage general-purpose ML workflows, +with limited engineering overhead, while providing state-of-the-art MLOps best practices. + +The user can fully describe ML workflows for DT applications by providing a set of configuration files as input. +The ``itwinai`` platform instantiates ML workflows with the configurations provided by the DT developer. +The execution of ML workflows outputs a set of ML metrix, which are visualised by ``itwinai`` via `MLFlow `_. +The trained ML model that performed best on the validation dataset is saved to the Models Registry for future predictions. + +In ``itwinai`` platform, we focus mainly on the MLOps step, simulating or oversimplifying the rest (e.g., pre-processing, authentication, workflow execution). + + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: 💡 Installation + + getting_started_with_itwinai + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: 🪄 itwinai Modules + + modules + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: 📚 Integrated Use-cases + + use_cases + +.. toctree:: + :maxdepth: 2 + :hidden: + :caption: 🚀 Tutorials + + tutorials + + +`interTwin Demo: itwinai integration with other DTE modules `_ ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` + +.. * :ref:`search` + diff --git a/docs/intermediate_workflow.rst b/docs/intermediate_workflow.rst new file mode 100644 index 00000000..860b12e5 --- /dev/null +++ b/docs/intermediate_workflow.rst @@ -0,0 +1,20 @@ +Intermediate workflow +===================== + +tutorial_1_intermediate_workflow.py ++++++++++++++++++++++++++++++++++++ + +The `tutorial_1_intermediate_workflow.py` script is ... + +.. .. literalinclude:: ../use-cases/mnist/torch-lightning/dataloader.py +.. :language: python + +.. .. automodule:: tutorial_1_intermediate_workflow +.. :members: +.. :undoc-members: +.. :show-inheritance: + + +.. literalinclude:: ../tutorials/ml-workflows/tutorial_1_intermediate_workflow.py + :language: python + diff --git a/docs/itwinai.cli.rst b/docs/itwinai.cli.rst new file mode 100644 index 00000000..18e0d0ef --- /dev/null +++ b/docs/itwinai.cli.rst @@ -0,0 +1,7 @@ +itwinai.cli +=========== + +.. automodule:: itwinai.cli + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/itwinai.cluster.rst b/docs/itwinai.cluster.rst new file mode 100644 index 00000000..7360c981 --- /dev/null +++ b/docs/itwinai.cluster.rst @@ -0,0 +1,7 @@ +itwinai.cluster +=============== + +.. automodule:: itwinai.cluster + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/itwinai.components.rst b/docs/itwinai.components.rst new file mode 100644 index 00000000..db3b0956 --- /dev/null +++ b/docs/itwinai.components.rst @@ -0,0 +1,8 @@ +itwinai.components +================== + +.. automodule:: itwinai.components + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/itwinai.loggers.rst b/docs/itwinai.loggers.rst new file mode 100644 index 00000000..513e3942 --- /dev/null +++ b/docs/itwinai.loggers.rst @@ -0,0 +1,8 @@ +itwinai.loggers +=============== + +.. automodule:: itwinai.loggers + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/itwinai.parser.rst b/docs/itwinai.parser.rst new file mode 100644 index 00000000..f9c7d930 --- /dev/null +++ b/docs/itwinai.parser.rst @@ -0,0 +1,8 @@ +itwinai.parser +============== + +.. automodule:: itwinai.parser + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/itwinai.pipeline.rst b/docs/itwinai.pipeline.rst new file mode 100644 index 00000000..b849240e --- /dev/null +++ b/docs/itwinai.pipeline.rst @@ -0,0 +1,8 @@ +itwinai.pipeline +================ + +.. automodule:: itwinai.pipeline + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/itwinai.serialization.rst b/docs/itwinai.serialization.rst new file mode 100644 index 00000000..691a3721 --- /dev/null +++ b/docs/itwinai.serialization.rst @@ -0,0 +1,8 @@ +itwinai.serialization +===================== + +.. automodule:: itwinai.serialization + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/itwinai.tf.modules.rst b/docs/itwinai.tf.modules.rst new file mode 100644 index 00000000..8b923bea --- /dev/null +++ b/docs/itwinai.tf.modules.rst @@ -0,0 +1,26 @@ +itwinai Tensorflow Modules +========================== + +trainer.py +++++++++++ + +.. literalinclude:: ../src/itwinai/tensorflow/trainer.py + :language: python + +utils.py +++++++++ + +.. literalinclude:: ../src/itwinai/tensorflow/utils.py + :language: python + + +.. .. automodule:: itwinai.tensorflow.trainer +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: itwinai.tensorflow.utils +.. :members: +.. :undoc-members: +.. :show-inheritance: + diff --git a/docs/itwinai.torch.modules.rst b/docs/itwinai.torch.modules.rst new file mode 100644 index 00000000..d551af40 --- /dev/null +++ b/docs/itwinai.torch.modules.rst @@ -0,0 +1,72 @@ +itwinai PyTorch Modules +======================= + +cluster.py +++++++++++ + +.. literalinclude:: ../src/itwinai/torch/cluster.py + :language: python + +inference.py +++++++++++++ + +.. literalinclude:: ../src/itwinai/torch/inference.py + :language: python + +mlflow.py ++++++++++ + +.. literalinclude:: ../src/itwinai/torch/mlflow.py + :language: python + +trainer.py +++++++++++ + +.. literalinclude:: ../src/itwinai/torch/trainer.py + :language: python + +types.py +++++++++ + +.. literalinclude:: ../src/itwinai/torch/types.py + :language: python + +utils.py +++++++++ + +.. literalinclude:: ../src/itwinai/torch/utils.py + :language: python + + + +.. .. automodule:: itwinai.torch.cluster +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: itwinai.torch.inference +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: itwinai.torch.mlflow +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: itwinai.torch.trainer +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: itwinai.torch.types +.. :members: +.. :undoc-members: +.. :show-inheritance: + +.. .. automodule:: itwinai.torch.utils +.. :members: +.. :undoc-members: +.. :show-inheritance: + + diff --git a/docs/itwinai.types.rst b/docs/itwinai.types.rst new file mode 100644 index 00000000..20367596 --- /dev/null +++ b/docs/itwinai.types.rst @@ -0,0 +1,8 @@ +itwinai.types +============= + +.. automodule:: itwinai.types + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/itwinai.utils.rst b/docs/itwinai.utils.rst new file mode 100644 index 00000000..b487da7f --- /dev/null +++ b/docs/itwinai.utils.rst @@ -0,0 +1,8 @@ +itwinai.utils +============= + +.. automodule:: itwinai.utils + :members: + :undoc-members: + :show-inheritance: + diff --git a/docs/local_setup.rst b/docs/local_setup.rst new file mode 100644 index 00000000..72b5f377 --- /dev/null +++ b/docs/local_setup.rst @@ -0,0 +1,210 @@ +.. 💻 Local systems +.. ----------------- + +**Requirements** + +* Linux environment. +* Windows and macOS were never tested. + + +.. toctree:: + :maxdepth: 5 + + +Micromamba installation ++++++++++++++++++++++++ + +To manage Conda environments we use micromamba, a light weight version of conda. + +It is suggested to refer to the `Manual installation guide `_. + +Consider that Micromamba can eat a lot of space when building environments because packages are cached on +the local filesystem after being downloaded. To clear cache you can use `micromamba clean -a`. +Micromamba data are kept under the `$HOME` location. However, in some systems, `$HOME` has a limited storage +space and it would be cleverer to install Micromamba in another location with more storage space. +Thus by changing the `$MAMBA_ROOT_PREFIX` variable. See a complete installation example for Linux below, where the +default `$MAMBA_ROOT_PREFIX` is overridden: + + +.. code-block:: bash + + cd $HOME + + # Download micromamba (This command is for Linux Intel (x86_64) systems. Find the right one for your system!) + curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + + # Install micromamba in a custom directory + MAMBA_ROOT_PREFIX='my-mamba-root' + ./bin/micromamba shell init $MAMBA_ROOT_PREFIX + + # To invoke micromamba from Makefile, you need to add explicitly to $PATH + echo 'PATH="$(dirname $MAMBA_EXE):$PATH"' >> ~/.bashrc + +**Reference**: `Micromamba installation guide `_. + + +Environment setup ++++++++++++++++++ + +**Requirements:** + +* Linux environment. Windows and macOS were never tested. +* Micromamba: see the installation instructions above. +* VS Code, for development. + +Tensorflow +++++++++++ + +Installation: + +.. code-block:: bash + + # Install TensorFlow 2.13 + make tf-2.13 + + # Activate env + micromamba activate ./.venv-tf + +Other TF versions are available, using the following targets `tf-2.10`, and `tf-2.11`. + + +PyTorch (+ Lightning) ++++++++++++++++++++++ + +Installation: + +.. code-block:: bash + + # Install PyTorch + lightning + make torch-gpu + + # Activate env + micromamba activate ./.venv-pytorch + +Other also CPU-only version is available at the target `torch-cpu`. + + +Development environment ++++++++++++++++++++++++ + +This is for developers only. To have it, update the installed `itwinai` package adding the `dev` extra: + +.. code-block:: bash + + pip install -e .[dev] + + +**Test with `pytest`** +To run tests on itwinai package: + +.. code-block:: bash + + # Activate env + micromamba activate ./.venv-pytorch # or ./.venv-tf + + pytest -v -m "not slurm" tests/ + + +However, some tests are intended to be executed only on an HPC system, where SLURM is available. They are marked with "slurm" tag. To run also those tests, use the dedicated job script: + +.. code-block:: bash + + sbatch tests/slurm_tests_startscript + + # Upon completion, check the output: + cat job.err + cat job.out + + + + +.. Workflow orchestrator +.. +++++++++++++++++++++ + +.. Install the (custom) orchestrator virtual environment. + +.. .. code-block:: bash + +.. source ~/.bashrc +.. # Create local env +.. make + +.. # Activate env +.. micromamba activate ./.venv + +.. To run tests on workflows use: + +.. .. code-block:: bash + +.. # Activate env +.. micromamba activate ./.venv + +.. pytest tests/ + + +.. Development env setup +.. --------------------- + +.. Requirements: + +.. * Linux, macOS environment. Windows was never tested. +.. * Micromamba: see the installation instructions above. +.. * VS Code, for development. + +.. Installation: + +.. .. code-block:: bash + +.. make dev-env + +.. # Activate env +.. micromamba activate ./.venv-dev + +.. To run tests on itwinai package: + +.. .. code-block:: bash + +.. # Activate env +.. micromamba activate ./.venv-dev + +.. pytest tests/ai/ + + +.. AI environment setup +.. -------------------- + +.. Requirements: + +.. * Linux, macOS environment. Windows was never tested. +.. * Micromamba: see the installation instructions above. +.. * VS Code, for development. + +.. **NOTE**: this environment gets automatically setup when a workflow is executed! + +.. However, you can also set it up explicitly with: + +.. .. code-block:: bash + +.. make ai-env + +.. # Activate env +.. micromamba activate ./ai/.venv-pytorch + +.. Updating the environment files +.. ++++++++++++++++++++++++++++++ + +.. The files under `ai/env-files/` are of two categories: + +.. * Simple environment definition, such as `pytorch-env.yml` and `pytorch-env-gpu.yml` +.. * Lockfiles, such as `pytorch-lock.yml` and `pytorch-gpu-lock.yml`, generated by `conda-lock `_. + +.. **When you install the ai environment, install it from the lock file!** + +.. When the "simple" environment file (e.g., `pytorch-env.yml`) changes, lock it with `conda-lock `_: + +.. .. code-block:: bash + +.. micromamba activate ./.venv + +.. make lock-ai + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..32bb2452 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/mnist_doc.rst b/docs/mnist_doc.rst new file mode 100644 index 00000000..fd26c4e7 --- /dev/null +++ b/docs/mnist_doc.rst @@ -0,0 +1,151 @@ +MNIST +===== + +This section covers the MNIST use case, which utilizes the `torch-lightning` framework for training and evaluation. The following files are integral to this use case: + +torch-lightning +--------------- + +.. toctree:: + :maxdepth: 5 + +dataloader.py ++++++++++++++ + +The `dataloader.py` script is responsible for loading the MNIST dataset and preparing it for training. + +.. literalinclude:: ../use-cases/mnist/torch-lightning/dataloader.py + :language: python + +.. .. automodule:: torch-lightning.dataloader +.. :members: +.. :undoc-members: +.. :show-inheritance: + +pipeline.yaml ++++++++++++++ + +This YAML file defines the pipeline configuration for the MNIST use case. It includes settings for the model, training, and evaluation. + +.. literalinclude:: ../use-cases/mnist/torch-lightning/pipeline.yaml + :language: yaml + +startscript ++++++++++++ + +The `startscript` is a shell script to initiate the training process. It sets up the environment and starts the training using the `train.py` script. + +.. literalinclude:: ../use-cases/mnist/torch-lightning/startscript + :language: bash + +train.py +++++++++ + +This script contains the training loop and is where the model is trained using the data prepared by `dataloader.py`. + +.. literalinclude:: ../use-cases/mnist/torch-lightning/train.py + :language: python + +.. .. automodule:: torch-lightning.train +.. :members: +.. :undoc-members: +.. :show-inheritance: + +trainer.py +++++++++++ + +The `trainer.py` file defines the `Trainer` class which sets up the training parameters and the training process. + +.. literalinclude:: ../use-cases/mnist/torch-lightning/trainer.py + :language: python + +.. .. automodule:: torch-lightning.trainer +.. :members: +.. :undoc-members: +.. :show-inheritance: + +utils.py +++++++++ + +The `utils.py` script includes utility functions and classes that are used across the MNIST use case. + +.. literalinclude:: ../use-cases/mnist/torch-lightning/utils.py + :language: python + +.. .. automodule:: torch-lightning.utils +.. :members: +.. :undoc-members: +.. :show-inheritance: + + +This section covers the MNIST use case, which utilizes the `torch` framework for training and evaluation. The following files are integral to this use case: + +torch +----- + +.. toctree:: + :maxdepth: 5 + +dataloader.py ++++++++++++++ + +The `dataloader.py` script is responsible for loading the MNIST dataset and preparing it for training. + +.. literalinclude:: ../use-cases/mnist/torch/dataloader.py + :language: python + + +Dockerfile +++++++++++ + +.. literalinclude:: ../use-cases/mnist/torch/Dockerfile + :language: bash + + +inference-pipeline.yaml ++++++++++++++++++++++++ + +This YAML file defines the pipeline configuration for the MNIST use case inference. + +.. literalinclude:: ../use-cases/mnist/torch/inference-pipeline.yaml + :language: yaml + +model.py +++++++++ + +The `model.py` script is responsible for loading a simple model. + +.. literalinclude:: ../use-cases/mnist/torch/model.py + :language: python + +pipeline.yaml ++++++++++++++ + +This YAML file defines the pipeline configuration for the MNIST use case. It includes settings for the model, training, and evaluation. + +.. literalinclude:: ../use-cases/mnist/torch/pipeline.yaml + :language: yaml + +startscript ++++++++++++ + +The `startscript` is a shell script to initiate the training process. It sets up the environment and starts the training using the `train.py` script. + +.. literalinclude:: ../use-cases/mnist/torch/startscript + :language: bash + +train.py +++++++++ + +This script contains the training loop and is where the model is trained using the data prepared by `dataloader.py`. + +.. literalinclude:: ../use-cases/mnist/torch/train.py + :language: python + +saver.py +++++++++ +... + +.. literalinclude:: ../use-cases/mnist/torch/saver.py + :language: python + diff --git a/docs/modules.rst b/docs/modules.rst new file mode 100644 index 00000000..a9e96c97 --- /dev/null +++ b/docs/modules.rst @@ -0,0 +1,23 @@ +itwinai +======= + +.. toctree:: + :maxdepth: 4 + + itwinai.cli + itwinai.cluster + itwinai.components + itwinai.loggers + itwinai.parser + itwinai.pipeline + itwinai.serialization + itwinai.types + itwinai.utils + + +.. toctree:: + :maxdepth: 4 + + itwinai.tf.modules + itwinai.torch.modules + diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..31776555 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,185 @@ +absl-py==1.2.0 +alabaster==0.7.16 +alembic==1.8.1 +antlr4-python3-runtime==4.9.3 +anyio +appdirs==1.4.4 +arrow +attrs +autopage==0.5.1 +Babel==2.14.0 +backports.functools-lru-cache +beautifulsoup4 +blessed +blinker==1.6.3 +Brotli +build +CacheControl +certifi==2023.7.22 +cffi +charset-normalizer +cleo +click +cliff==4.0.0 +cloudpickle==2.2.1 +cmaes==0.8.2 +cmd2==2.4.2 +colorama +colorlog==6.7.0 +crashtest +croniter +cryptography +cycler==0.12.1 +databricks-cli==0.18.0 +dateutils +deepdiff +distlib +docker==6.1.3 +docker-pycreds==0.4.0 +docstring-parser==0.15 +docutils==0.20.1 +dulwich +einops==0.4.1 +entrypoints==0.4 +exceptiongroup +fastapi +filelock +Flask==2.3.3 +fonttools==4.37.4 +fsspec +gast==0.4.0 +gitdb==4.0.9 +GitPython==3.1.27 +google==3.0.0 +greenlet==1.1.3 +gunicorn==21.2.0 +h11 +h5py==3.7.0 +idna +imagesize==1.4.1 +importlib-metadata==5.0.0 +importlib-resources +inquirer +installer +itsdangerous +git+https://github.com/interTwin-eu/itwinai.git@a8f9ccb035c7736553eaafb12e06fd7b3fc73fb6#egg=itwinai +jaraco.classes +jeepney +Jinja2 +joblib==1.3.2 +jsonargparse==4.26.1 +jsonschema +keyring +kiwisolver==1.4.5 +libclang==14.0.6 +lightning +lightning-cloud +lightning-utilities +Mako==1.2.3 +Markdown==3.5 +markdown-it-py +MarkupSafe==2.1.1 +matplotlib==3.5.2 +mdurl +mlflow==2.7.1 +more-itertools +msgpack +mysqlclient==2.1.1 +numpy +oauthlib==3.2.2 +omegaconf==2.3.0 +optuna==2.10.1 +ordered-set +orjson +packaging +pandas==2.1.1 +pathtools==0.1.2 +pexpect +Pillow +pkginfo +pkgutil_resolve_name +platformdirs +plotly==5.10.0 +poetry +poetry-core +poetry-plugin-export +promise==2.3 +protobuf==4.24.4 +psutil +ptyprocess +pyarrow==13.0.0 +pycparser +pydantic +Pygments +PyJWT +PyMySQL==1.0.2 +pyparsing==3.1.1 +pyperclip==1.8.2 +pyproject_hooks +pyrsistent +PySocks +python-dateutil +python-editor==1.0.4 +python-multipart +pytorch-lightning +pytz +PyYAML +querystring-parser==1.2.4 +rapidfuzz +readchar +requests +requests-toolbelt +rich +scikit-learn==1.3.2 +scipy==1.12.0 +SecretStorage +sentry-sdk==1.9.10 +setproctitle==1.3.2 +shellingham +shortuuid==1.0.9 +six +smmap==5.0.0 +sniffio +snowballstemmer==2.2.0 +soupsieve +Sphinx==7.2.6 +sphinx-rtd-theme==2.0.0 +sphinxcontrib-applehelp==1.0.8 +sphinxcontrib-devhelp==1.0.6 +sphinxcontrib-htmlhelp==2.0.5 +sphinxcontrib-jquery==4.1 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.7 +sphinxcontrib-serializinghtml==1.1.10 +SQLAlchemy==1.4.41 +sqlparse==0.4.4 +starlette +starsessions +submitit==1.5.0 +tabulate==0.9.0 +tenacity==8.1.0 +tensorflow-io-gcs-filesystem==0.27.0 +threadpoolctl==3.2.0 +tomli +tomlkit +torch==1.13.1 +torchaudio==0.13.1 +torchmetrics +torchvision==0.14.1 +tqdm +traitlets +trove-classifiers +typer==0.9.0 +types-python-dateutil +typeshed-client==2.4.0 +typing_extensions==4.5.0 +tzdata==2023.3 +urllib3 +uvicorn +virtualenv +wandb==0.15.12 +wcwidth +websocket-client +websockets +Werkzeug==3.0.0 +zipp diff --git a/docs/tutorials.rst b/docs/tutorials.rst new file mode 100644 index 00000000..623582f2 --- /dev/null +++ b/docs/tutorials.rst @@ -0,0 +1,19 @@ +.. _tutorials: + +ML workflow tutorials +===================== + +Here you can find a collection of tutorials for various complexity ML workflows. + +Tutorials +--------- + +.. toctree:: + :maxdepth: 2 + + basic_comp + basic_workflow + intermediate_workflow + advanced_workflow + + \ No newline at end of file diff --git a/docs/use_cases.rst b/docs/use_cases.rst new file mode 100644 index 00000000..4a584702 --- /dev/null +++ b/docs/use_cases.rst @@ -0,0 +1,32 @@ +Integrated Use Cases +==================== + +Here you can find a collection of use cases including various projects. + +3DGAN CERN use case +------------------- + +The first ``interTwin`` use case integrated with ``itwinai`` framework is the DT for fast particle detector simulation. +3D Generative Adversarial Network (3DGAN) for generation of images of calorimeter depositions. +This project is based on the prototype `3DGAN `_ model developed at CERN and is implemented on PyTorch Lightning framework. + +.. toctree:: + :maxdepth: 2 + + 3dgan_doc + + +MNIST use case +-------------- + +MNIST image classification is used to provide an example on +how to define an end-to-end digital twin workflow with the ``itwinai`` software. + +.. toctree:: + :maxdepth: 2 + + mnist_doc + + + + diff --git a/use-cases/3dgan/__init__.py b/use-cases/3dgan/__init__.py new file mode 100644 index 00000000..079a8283 --- /dev/null +++ b/use-cases/3dgan/__init__.py @@ -0,0 +1 @@ +## This file can be empty but must be present \ No newline at end of file