From 8eb1bf75cbeb0a0d966e4d8cf887f8420bb62785 Mon Sep 17 00:00:00 2001 From: hasan7n Date: Wed, 18 Sep 2024 14:01:56 +0200 Subject: [PATCH] draft setup for mlcube ci --- mlcube/ci/metrics_mlcube/.gitignore | 2 + mlcube/ci/metrics_mlcube/README.md | 17 ++++++ mlcube/ci/metrics_mlcube/build.sh | 2 + mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml | 32 ++++++++++++ mlcube/ci/metrics_mlcube/project/Dockerfile | 4 ++ .../ci/metrics_mlcube/project/entrypoint.py | 52 +++++++++++++++++++ mlcube/ci/metrics_mlcube/setup.sh | 3 ++ mlcube/ci/metrics_mlcube/test.sh | 15 ++++++ 8 files changed, 127 insertions(+) create mode 100644 mlcube/ci/metrics_mlcube/.gitignore create mode 100644 mlcube/ci/metrics_mlcube/README.md create mode 100644 mlcube/ci/metrics_mlcube/build.sh create mode 100644 mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml create mode 100644 mlcube/ci/metrics_mlcube/project/Dockerfile create mode 100644 mlcube/ci/metrics_mlcube/project/entrypoint.py create mode 100644 mlcube/ci/metrics_mlcube/setup.sh create mode 100644 mlcube/ci/metrics_mlcube/test.sh diff --git a/mlcube/ci/metrics_mlcube/.gitignore b/mlcube/ci/metrics_mlcube/.gitignore new file mode 100644 index 000000000..35f26dbbb --- /dev/null +++ b/mlcube/ci/metrics_mlcube/.gitignore @@ -0,0 +1,2 @@ +!build.sh +!setup.sh diff --git a/mlcube/ci/metrics_mlcube/README.md b/mlcube/ci/metrics_mlcube/README.md new file mode 100644 index 000000000..c8a2de174 --- /dev/null +++ b/mlcube/ci/metrics_mlcube/README.md @@ -0,0 +1,17 @@ +Build: + +``` +sh build.sh +``` + +Setup assets to test mlcubes: + +``` +sh setup.sh +``` + +Test: (modify `test.sh` to change the test) + +``` +sh test.sh +``` diff --git a/mlcube/ci/metrics_mlcube/build.sh b/mlcube/ci/metrics_mlcube/build.sh new file mode 100644 index 000000000..aed84bd78 --- /dev/null +++ b/mlcube/ci/metrics_mlcube/build.sh @@ -0,0 +1,2 @@ +docker build -t gandlfcpu -f ../../../Dockerfile-CPU ../../.. +mlcube configure --mlcube ./mlcube -Pdocker.build_strategy=always diff --git a/mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml b/mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml new file mode 100644 index 000000000..d89fda0f3 --- /dev/null +++ b/mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml @@ -0,0 +1,32 @@ +# Metadata. If you are an author, change this to reflect your organization and metrics specifics. +name: MLCommons GaNDLF Generic MLCube +description: MLCommons GaNDLF MLCube, containing functionality for calculating metrics. +authors: + - {name: "MLCommons Medical Working Group", email: "gandlf@mlcommons.org", org: "MLCommons" } + + +docker: + # The image tag that will be built/pulled/used. Change to suit your organization/model:version. + image: mlcommons/gandlf-metrics-mlcube:0.0.1 + + build_context: "../project" + # Docker file name within docker build context, default is `Dockerfile`. + build_file: "Dockerfile" + +## Everything below this point affects how the GaNDLF container is invoked. +## If you are an author, it is strongly recommended that you do not edit these. +## Please request any new features for deployed containers from the GaNDLF maintainers: +## https://github.com/mlcommons/GaNDLF/issues/new?template=---feature-request.md + +tasks: + evaluate: + # Runs metrics calculation on predictions + entrypoint: "python3.9 /entrypoint.py" + parameters: + inputs: { + predictions: predictions/, + labels: labels/, + # GaNDLF config file. The name should be `parameters.yaml` + config: {type: file, default: parameters.yaml} + } + outputs: { output-file: { type: "file", default: "results.yaml" } } diff --git a/mlcube/ci/metrics_mlcube/project/Dockerfile b/mlcube/ci/metrics_mlcube/project/Dockerfile new file mode 100644 index 000000000..80a923b75 --- /dev/null +++ b/mlcube/ci/metrics_mlcube/project/Dockerfile @@ -0,0 +1,4 @@ +FROM gandlfcpu + +COPY entrypoint.py /entrypoint.py + diff --git a/mlcube/ci/metrics_mlcube/project/entrypoint.py b/mlcube/ci/metrics_mlcube/project/entrypoint.py new file mode 100644 index 000000000..6e3db3c9a --- /dev/null +++ b/mlcube/ci/metrics_mlcube/project/entrypoint.py @@ -0,0 +1,52 @@ +import os +import argparse +import json +import yaml + + +def convert_json_to_yaml(tmp_json_output, output_file): + with open(tmp_json_output) as f: + results = json.load(f) + with open(output_file, "w") as f: + yaml.dump(results, f) + os.remove(tmp_json_output) + + +def run_gandlf(predictions, labels, output_file, config): + """ + A function that calls GaNDLF's generate metrics command. + + Args: + predictions (str): The path to predictions folder. It must contain a "predictions.csv" file + labels (str): The path to labels folder. It must contain a "targets.csv" file. + output_file (str): The path to the output file/folder + config (str): The path to the parameters file + + Note: If predictions and labels CSVs contain paths, + those paths should be relative to the containing folder. + """ + predictions_csv = os.path.join(predictions, "predictions.csv") + labels_csv = os.path.join(labels, "targets.csv") + + output_folder = os.path.dirname(output_file) + tmp_json_output = os.path.join(output_folder, "results.json") + + exit_status = os.system( + f"gandlf generate-metrics -c {config} -i {labels_csv},{predictions_csv} -o {tmp_json_output}" + ) + exit_code = os.WEXITSTATUS(exit_status) + if exit_code != 0: + raise RuntimeError(f"GaNDLF process failed with exit code {exit_code}") + convert_json_to_yaml(tmp_json_output, output_file) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--config", metavar="", type=str, required=True) + parser.add_argument("--predictions", metavar="", type=str, required=True) + parser.add_argument("--output-file", metavar="", type=str, default=None) + parser.add_argument("--labels", metavar="", type=str, required=True) + + args = parser.parse_args() + + run_gandlf(args.predictions, args.labels, args.output_file, args.config) diff --git a/mlcube/ci/metrics_mlcube/setup.sh b/mlcube/ci/metrics_mlcube/setup.sh new file mode 100644 index 000000000..fc5dfb351 --- /dev/null +++ b/mlcube/ci/metrics_mlcube/setup.sh @@ -0,0 +1,3 @@ +wget https://storage.googleapis.com/medperf-storage/gandlf_mlcube_test.tar.gz +tar -xf gandlf_mlcube_test.tar.gz +rm gandlf_mlcube_test.tar.gz diff --git a/mlcube/ci/metrics_mlcube/test.sh b/mlcube/ci/metrics_mlcube/test.sh new file mode 100644 index 000000000..efec5e64a --- /dev/null +++ b/mlcube/ci/metrics_mlcube/test.sh @@ -0,0 +1,15 @@ +# classification +rm ./mlcube/workspace/results.yaml +mlcube run --mlcube ./mlcube \ + --task evaluate \ + predictions=../../test_classification/predictions \ + labels=../../test_classification/labels \ + config=../../test_classification/config.yaml + +# # segmentation (FAILS BECAUSE OF RELATIVE PATHS) +# rm ./mlcube/workspace/results.yaml +# mlcube run --mlcube ./mlcube \ +# --task evaluate \ +# predictions=../../test_segmentation/predictions \ +# labels=../../test_segmentation/labels \ +# config=../../test_segmentation/config.yaml