Merge branch 'dev' into docs_dev

interTwin-eu · May 2, 2024 · 6e52fa9 · 6e52fa9
2 parents 17b38ff + e052d1c
commit 6e52fa9
Show file tree

Hide file tree

Showing 98 changed files with 2,803 additions and 4,702 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -12,6 +12,10 @@ CHANGELOG
 # Docs
 docs
 
+# interLink pods
+**/interLink
+**/interlink
+
 # Data
 **/MNIST
 **/*-predictions/

diff --git a/.github/linters/.jscpd.json b/.github/linters/.jscpd.json
@@ -1,7 +1,6 @@
 {
     "threshold": 2.0,
     "ignore": [
-        "**/itwinai/loggers.py",
-        "**/itwinai/torch/engine.py"
+        "**/itwinai/loggers.py"
     ]
 }
diff --git a/.github/workflows/workflows-dt.yml → .github/workflows/pytest.yml b/.github/workflows/workflows-dt.yml → .github/workflows/pytest.yml
@@ -1,10 +1,12 @@
 ---
-name: Test workflows
+name: Unit and integration tests
 
 on:
   pull_request:
     branches: [main, dev]
 
+# TODO: use container and set custom TORCH_ENV and TF_ENV env variables
+
 jobs:
   test-itwinai:
     name: Test itwinai with pytest

diff --git a/.gitignore b/.gitignore
@@ -26,6 +26,9 @@ mnist-sample-data/
 exp_data/
 
 
+# Kubernetes
+secret*.yaml
+
 # Custom envs
 .venv*
 envAI_*

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -11,6 +11,7 @@
         "Convolutional",
         "cuda",
         "dataloaders",
+        "dataloading",
         "fromlist",
         "hyperparameters",
         "hyperparams",

diff --git a/README.md b/README.md
@@ -96,7 +96,35 @@ pip install -e .[dev]
 
 #### Test with `pytest`
 
-To run tests on itwinai package:
+Do this only if you are a developer wanting to test your code with pytest.
+
+First, you need to create virtual environments both for torch and tensorflow.
+For instance, you can use:
+
+```bash
+make torch-cpu
+make make tf-2.13-cpu
+```
+
+To select the name of the torch and tf environments you can set the following
+environment variables, which allow to run the tests in environments with
+custom names which are different from `.venv-pytorch` and `.venv-tf`.
+
+```bash
+export TORCH_ENV="my_torch_env"
+export TF_ENV="my_tf_env"
+```
+
+Functional tests (marked with `pytest.mark.functional`) will be executed under
+`/tmp/pytest` location to guarantee they are run in a clean environment.
+
+To run functional tests use:
+
+```bash
+pytest -v tests/ -m "functional"
+```
+
+To run all tests on itwinai package:
 
 ```bash
 # Activate env

diff --git a/docs/conf.py b/docs/conf.py
@@ -21,7 +21,8 @@
 sys.path.insert(0, os.path.abspath('../'))
 
 project = 'itwinai'
-copyright = '2024, Matteo Bunino, Alexander Zoechbauer, Kalliopi Tsolaki, Rakesh Sarma on behalf of CERN & JSC'
+copyright = ('2024, Matteo Bunino, Alexander Zoechbauer, '
+             'Kalliopi Tsolaki, Rakesh Sarma on behalf of CERN & JSC')
 author = 'Matteo Bunino, Alexander Zoechbauer, Kalliopi Tsolaki'
 # version = '0.0'  # short version
 # release = '0.0.2'  # full version
@@ -43,7 +44,9 @@
 
 def get_git_tag():
     try:
-        return subprocess.check_output(['git', 'describe', '--tags', '--abbrev=0']).decode('utf-8').strip()
+        return subprocess.check_output(
+            ['git', 'describe', '--tags', '--abbrev=0']
+        ).decode('utf-8').strip()
     except subprocess.CalledProcessError:
         return 'unknown'
 

diff --git a/env-files/tensorflow/createEnvJSCTF.sh b/env-files/tensorflow/createEnvJSCTF.sh
@@ -104,5 +104,8 @@ if [ "$cont1" = true ] ; then
   pip3 install -r reqs_TF.txt --ignore-installed
 fi
 
+# Install itwinai
+pip install --upgrade pip
+pip install -e .[dev]
 
 # eof
diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py
@@ -16,7 +16,7 @@
 import typer
 
 
-app = typer.Typer()
+app = typer.Typer(pretty_exceptions_enable=False)
 
 
 @app.command()
@@ -27,9 +27,6 @@ def scalability_report(
     plot_title: Annotated[Optional[str], typer.Option(
         help=("Plot name.")
     )] = None,
-    logy: Annotated[bool, typer.Option(
-        help=("Log scale on y axis.")
-    )] = False,
     skip_id: Annotated[Optional[int], typer.Option(
         help=("Skip epoch ID.")
     )] = None,
@@ -43,15 +40,17 @@ def scalability_report(
 
     Example:
     >>> itwinai scalability-report --pattern="^epoch.+\\.csv$" --skip-id 0 \\
-    >>>     --plot-title "Some title" --logy --archive archive_name
+    >>>     --plot-title "Some title" --archive archive_name
     """
     # TODO: add max depth and path different from CWD
     import os
     import re
+    import glob
     import shutil
     import pandas as pd
+    import matplotlib
     import matplotlib.pyplot as plt
-    # import numpy as np
+    import numpy as np
 
     regex = re.compile(r'{}'.format(pattern))
     combined_df = pd.DataFrame()
@@ -83,7 +82,13 @@ def scalability_report(
     if plot_title is not None:
         fig.suptitle(plot_title)
 
-    for name in set(avg_times.name.values):
+    sp_up_ax.set_yscale("log")
+    sp_up_ax.set_xscale("log")
+
+    markers = iter("ov^s*dXpD.+12348")
+
+    series_names = sorted(set(avg_times.name.values))
+    for name in series_names:
         df = avg_times[avg_times.name == name].drop(columns='name')
 
         # Debug
@@ -104,32 +109,27 @@ def scalability_report(
         df["Efficiency"] = df["Threadscaled Sim. Time / s"].iloc[0] / \
             df["Threadscaled Sim. Time / s"]
 
-        # Plot
-        # when lines are very close to each other
-        if logy:
-            sp_up_ax.semilogy(
-                df["NGPUs"].values, df["Speedup"].values,
-                marker='*', lw=1.0, label=name)
-        else:
-            sp_up_ax.plot(
-                df["NGPUs"].values, df["Speedup"].values,
-                marker='*', lw=1.0, label=name)
-
-    if logy:
-        sp_up_ax.semilogy(df["NGPUs"].values, df["Speedup - ideal"].values,
-                          ls='dashed', lw=1.0, c='k', label="ideal")
-    else:
-        sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values,
-                      ls='dashed', lw=1.0, c='k', label="ideal")
+        sp_up_ax.plot(
+            df["NGPUs"].values, df["Speedup"].values,
+            marker=next(markers), lw=1.0, label=name, alpha=0.7)
+
+    sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values,
+                  ls='dashed', lw=1.0, c='k', label="ideal")
     sp_up_ax.legend(ncol=1)
 
     sp_up_ax.set_xticks(df["NGPUs"].values)
-    # sp_up_ax.set_yticks(
-    #     np.arange(1, np.max(df["Speedup - ideal"].values) + 2, 1))
+    sp_up_ax.get_xaxis().set_major_formatter(
+        matplotlib.ticker.ScalarFormatter())
 
     sp_up_ax.set_ylabel('Speedup')
     sp_up_ax.set_xlabel('NGPUs (4 per node)')
     sp_up_ax.grid()
+
+    # Sort legend
+    handles, labels = sp_up_ax.get_legend_handles_labels()
+    order = np.argsort(labels)
+    plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order])
+
     plot_png = f"scaling_plot_{plot_title}.png"
     plt.tight_layout()
     plt.savefig(plot_png, bbox_inches='tight', format='png', dpi=300)
@@ -151,6 +151,18 @@ def scalability_report(
                                                   os.path.basename(csvfile)))
         shutil.copyfile(plot_png, os.path.join(archive, plot_png))
         avg_times.to_csv(os.path.join(archive, "avg_times.csv"), index=False)
+        print("Archived AVG epoch times CSV")
+
+        # Copy SLURM logs: *.err *.out files
+        if os.path.exists('logs_slurm'):
+            print("Archived SLURM logs")
+            shutil.copytree('logs_slurm', os.path.join(archive, 'logs_slurm'))
+        # Copy other SLURM logs
+        for ext in ['*.out', '*.err']:
+            for file in glob.glob(ext):
+                shutil.copyfile(file, os.path.join(archive, file))
+
+        # Create archive
         archive_name = shutil.make_archive(
             base_name=archive,  # archive file name
             format='gztar',
@@ -170,6 +182,11 @@ def exec_pipeline(
         help=("Key in the configuration file identifying "
               "the pipeline object to execute.")
     )] = "pipeline",
+    steps: Annotated[Optional[str], typer.Option(
+        help=("Run only some steps of the pipeline. Accepted values are "
+              "indices, python slices (e.g., 0:3 or 2:10:100), and "
+              "string names of steps.")
+    )] = None,
     print_config: Annotated[bool, typer.Option(
         help=("Print config to be executed after overrides.")
     )] = False,
@@ -195,11 +212,14 @@ def exec_pipeline(
     # to find the local python files imported from the pipeline file
     import os
     import sys
+    import re
+    from .utils import str_to_slice
     sys.path.append(os.path.dirname(config))
     sys.path.append(os.getcwd())
 
     # Parse and execute pipeline
     from itwinai.parser import ConfigParser
+    overrides_list = overrides_list if overrides_list is not None else []
     overrides = {
         k: v for k, v
         in map(lambda x: (x.split('=')[0], x.split('=')[1]), overrides_list)
@@ -213,8 +233,18 @@ def exec_pipeline(
         print("#="*50)
         print()
     pipeline = parser.parse_pipeline(pipeline_nested_key=pipe_key)
+    if steps:
+        if not re.match(r"\d+(:\d+)?(:\d+)?", steps):
+            print(f"Looking for step name '{steps}'")
+        else:
+            steps = str_to_slice(steps)
+        pipeline = pipeline[steps]
     pipeline.execute()
 
+    # Cleanup PYTHONPATH
+    sys.path.pop()
+    sys.path.pop()
+
 
 @app.command()
 def mlflow_ui(

diff --git a/src/itwinai/cluster.py b/src/itwinai/cluster.py
diff --git a/src/itwinai/components.py b/src/itwinai/components.py
@@ -216,14 +216,6 @@ def execute(
             validation dataset, test dataset, trained model.
         """
 
-    @abstractmethod
-    def save_state(self):
-        pass
-
-    @abstractmethod
-    def load_state(self):
-        pass
-
 
 class Predictor(BaseComponent):
     """Applies a pre-trained machine learning model to unseen data."""