Skip to content

Commit

Permalink
Merge branch 'dev' into docs_dev
Browse files Browse the repository at this point in the history
  • Loading branch information
matbun committed May 2, 2024
2 parents 17b38ff + e052d1c commit 6e52fa9
Show file tree
Hide file tree
Showing 98 changed files with 2,803 additions and 4,702 deletions.
4 changes: 4 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ CHANGELOG
# Docs
docs

# interLink pods
**/interLink
**/interlink

# Data
**/MNIST
**/*-predictions/
Expand Down
3 changes: 1 addition & 2 deletions .github/linters/.jscpd.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{
"threshold": 2.0,
"ignore": [
"**/itwinai/loggers.py",
"**/itwinai/torch/engine.py"
"**/itwinai/loggers.py"
]
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
---
name: Test workflows
name: Unit and integration tests

on:
pull_request:
branches: [main, dev]

# TODO: use container and set custom TORCH_ENV and TF_ENV env variables

jobs:
test-itwinai:
name: Test itwinai with pytest
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ mnist-sample-data/
exp_data/


# Kubernetes
secret*.yaml

# Custom envs
.venv*
envAI_*
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"Convolutional",
"cuda",
"dataloaders",
"dataloading",
"fromlist",
"hyperparameters",
"hyperparams",
Expand Down
30 changes: 29 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,35 @@ pip install -e .[dev]

#### Test with `pytest`

To run tests on itwinai package:
Do this only if you are a developer wanting to test your code with pytest.

First, you need to create virtual environments both for torch and tensorflow.
For instance, you can use:

```bash
make torch-cpu
make make tf-2.13-cpu
```

To select the name of the torch and tf environments you can set the following
environment variables, which allow to run the tests in environments with
custom names which are different from `.venv-pytorch` and `.venv-tf`.

```bash
export TORCH_ENV="my_torch_env"
export TF_ENV="my_tf_env"
```

Functional tests (marked with `pytest.mark.functional`) will be executed under
`/tmp/pytest` location to guarantee they are run in a clean environment.

To run functional tests use:

```bash
pytest -v tests/ -m "functional"
```

To run all tests on itwinai package:

```bash
# Activate env
Expand Down
7 changes: 5 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
sys.path.insert(0, os.path.abspath('../'))

project = 'itwinai'
copyright = '2024, Matteo Bunino, Alexander Zoechbauer, Kalliopi Tsolaki, Rakesh Sarma on behalf of CERN & JSC'
copyright = ('2024, Matteo Bunino, Alexander Zoechbauer, '
'Kalliopi Tsolaki, Rakesh Sarma on behalf of CERN & JSC')
author = 'Matteo Bunino, Alexander Zoechbauer, Kalliopi Tsolaki'
# version = '0.0' # short version
# release = '0.0.2' # full version
Expand All @@ -43,7 +44,9 @@

def get_git_tag():
try:
return subprocess.check_output(['git', 'describe', '--tags', '--abbrev=0']).decode('utf-8').strip()
return subprocess.check_output(
['git', 'describe', '--tags', '--abbrev=0']
).decode('utf-8').strip()
except subprocess.CalledProcessError:
return 'unknown'

Expand Down
3 changes: 3 additions & 0 deletions env-files/tensorflow/createEnvJSCTF.sh
Original file line number Diff line number Diff line change
Expand Up @@ -104,5 +104,8 @@ if [ "$cont1" = true ] ; then
pip3 install -r reqs_TF.txt --ignore-installed
fi

# Install itwinai
pip install --upgrade pip
pip install -e .[dev]

# eof
82 changes: 56 additions & 26 deletions src/itwinai/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import typer


app = typer.Typer()
app = typer.Typer(pretty_exceptions_enable=False)


@app.command()
Expand All @@ -27,9 +27,6 @@ def scalability_report(
plot_title: Annotated[Optional[str], typer.Option(
help=("Plot name.")
)] = None,
logy: Annotated[bool, typer.Option(
help=("Log scale on y axis.")
)] = False,
skip_id: Annotated[Optional[int], typer.Option(
help=("Skip epoch ID.")
)] = None,
Expand All @@ -43,15 +40,17 @@ def scalability_report(
Example:
>>> itwinai scalability-report --pattern="^epoch.+\\.csv$" --skip-id 0 \\
>>> --plot-title "Some title" --logy --archive archive_name
>>> --plot-title "Some title" --archive archive_name
"""
# TODO: add max depth and path different from CWD
import os
import re
import glob
import shutil
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
# import numpy as np
import numpy as np

regex = re.compile(r'{}'.format(pattern))
combined_df = pd.DataFrame()
Expand Down Expand Up @@ -83,7 +82,13 @@ def scalability_report(
if plot_title is not None:
fig.suptitle(plot_title)

for name in set(avg_times.name.values):
sp_up_ax.set_yscale("log")
sp_up_ax.set_xscale("log")

markers = iter("ov^s*dXpD.+12348")

series_names = sorted(set(avg_times.name.values))
for name in series_names:
df = avg_times[avg_times.name == name].drop(columns='name')

# Debug
Expand All @@ -104,32 +109,27 @@ def scalability_report(
df["Efficiency"] = df["Threadscaled Sim. Time / s"].iloc[0] / \
df["Threadscaled Sim. Time / s"]

# Plot
# when lines are very close to each other
if logy:
sp_up_ax.semilogy(
df["NGPUs"].values, df["Speedup"].values,
marker='*', lw=1.0, label=name)
else:
sp_up_ax.plot(
df["NGPUs"].values, df["Speedup"].values,
marker='*', lw=1.0, label=name)

if logy:
sp_up_ax.semilogy(df["NGPUs"].values, df["Speedup - ideal"].values,
ls='dashed', lw=1.0, c='k', label="ideal")
else:
sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values,
ls='dashed', lw=1.0, c='k', label="ideal")
sp_up_ax.plot(
df["NGPUs"].values, df["Speedup"].values,
marker=next(markers), lw=1.0, label=name, alpha=0.7)

sp_up_ax.plot(df["NGPUs"].values, df["Speedup - ideal"].values,
ls='dashed', lw=1.0, c='k', label="ideal")
sp_up_ax.legend(ncol=1)

sp_up_ax.set_xticks(df["NGPUs"].values)
# sp_up_ax.set_yticks(
# np.arange(1, np.max(df["Speedup - ideal"].values) + 2, 1))
sp_up_ax.get_xaxis().set_major_formatter(
matplotlib.ticker.ScalarFormatter())

sp_up_ax.set_ylabel('Speedup')
sp_up_ax.set_xlabel('NGPUs (4 per node)')
sp_up_ax.grid()

# Sort legend
handles, labels = sp_up_ax.get_legend_handles_labels()
order = np.argsort(labels)
plt.legend([handles[idx] for idx in order], [labels[idx] for idx in order])

plot_png = f"scaling_plot_{plot_title}.png"
plt.tight_layout()
plt.savefig(plot_png, bbox_inches='tight', format='png', dpi=300)
Expand All @@ -151,6 +151,18 @@ def scalability_report(
os.path.basename(csvfile)))
shutil.copyfile(plot_png, os.path.join(archive, plot_png))
avg_times.to_csv(os.path.join(archive, "avg_times.csv"), index=False)
print("Archived AVG epoch times CSV")

# Copy SLURM logs: *.err *.out files
if os.path.exists('logs_slurm'):
print("Archived SLURM logs")
shutil.copytree('logs_slurm', os.path.join(archive, 'logs_slurm'))
# Copy other SLURM logs
for ext in ['*.out', '*.err']:
for file in glob.glob(ext):
shutil.copyfile(file, os.path.join(archive, file))

# Create archive
archive_name = shutil.make_archive(
base_name=archive, # archive file name
format='gztar',
Expand All @@ -170,6 +182,11 @@ def exec_pipeline(
help=("Key in the configuration file identifying "
"the pipeline object to execute.")
)] = "pipeline",
steps: Annotated[Optional[str], typer.Option(
help=("Run only some steps of the pipeline. Accepted values are "
"indices, python slices (e.g., 0:3 or 2:10:100), and "
"string names of steps.")
)] = None,
print_config: Annotated[bool, typer.Option(
help=("Print config to be executed after overrides.")
)] = False,
Expand All @@ -195,11 +212,14 @@ def exec_pipeline(
# to find the local python files imported from the pipeline file
import os
import sys
import re
from .utils import str_to_slice
sys.path.append(os.path.dirname(config))
sys.path.append(os.getcwd())

# Parse and execute pipeline
from itwinai.parser import ConfigParser
overrides_list = overrides_list if overrides_list is not None else []
overrides = {
k: v for k, v
in map(lambda x: (x.split('=')[0], x.split('=')[1]), overrides_list)
Expand All @@ -213,8 +233,18 @@ def exec_pipeline(
print("#="*50)
print()
pipeline = parser.parse_pipeline(pipeline_nested_key=pipe_key)
if steps:
if not re.match(r"\d+(:\d+)?(:\d+)?", steps):
print(f"Looking for step name '{steps}'")
else:
steps = str_to_slice(steps)
pipeline = pipeline[steps]
pipeline.execute()

# Cleanup PYTHONPATH
sys.path.pop()
sys.path.pop()


@app.command()
def mlflow_ui(
Expand Down
72 changes: 0 additions & 72 deletions src/itwinai/cluster.py

This file was deleted.

8 changes: 0 additions & 8 deletions src/itwinai/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,6 @@ def execute(
validation dataset, test dataset, trained model.
"""

@abstractmethod
def save_state(self):
pass

@abstractmethod
def load_state(self):
pass


class Predictor(BaseComponent):
"""Applies a pre-trained machine learning model to unseen data."""
Expand Down
Loading

0 comments on commit 6e52fa9

Please sign in to comment.