From 8eb1bf75cbeb0a0d966e4d8cf887f8420bb62785 Mon Sep 17 00:00:00 2001
From: hasan7n <hasankassim7@hotmail.com>
Date: Wed, 18 Sep 2024 14:01:56 +0200
Subject: [PATCH] draft setup for mlcube ci

---
 mlcube/ci/metrics_mlcube/.gitignore           |  2 +
 mlcube/ci/metrics_mlcube/README.md            | 17 ++++++
 mlcube/ci/metrics_mlcube/build.sh             |  2 +
 mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml   | 32 ++++++++++++
 mlcube/ci/metrics_mlcube/project/Dockerfile   |  4 ++
 .../ci/metrics_mlcube/project/entrypoint.py   | 52 +++++++++++++++++++
 mlcube/ci/metrics_mlcube/setup.sh             |  3 ++
 mlcube/ci/metrics_mlcube/test.sh              | 15 ++++++
 8 files changed, 127 insertions(+)
 create mode 100644 mlcube/ci/metrics_mlcube/.gitignore
 create mode 100644 mlcube/ci/metrics_mlcube/README.md
 create mode 100644 mlcube/ci/metrics_mlcube/build.sh
 create mode 100644 mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml
 create mode 100644 mlcube/ci/metrics_mlcube/project/Dockerfile
 create mode 100644 mlcube/ci/metrics_mlcube/project/entrypoint.py
 create mode 100644 mlcube/ci/metrics_mlcube/setup.sh
 create mode 100644 mlcube/ci/metrics_mlcube/test.sh

diff --git a/mlcube/ci/metrics_mlcube/.gitignore b/mlcube/ci/metrics_mlcube/.gitignore
new file mode 100644
index 000000000..35f26dbbb
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/.gitignore
@@ -0,0 +1,2 @@
+!build.sh
+!setup.sh
diff --git a/mlcube/ci/metrics_mlcube/README.md b/mlcube/ci/metrics_mlcube/README.md
new file mode 100644
index 000000000..c8a2de174
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/README.md
@@ -0,0 +1,17 @@
+Build:
+
+```
+sh build.sh
+```
+
+Setup assets to test mlcubes:
+
+```
+sh setup.sh
+```
+
+Test: (modify `test.sh` to change the test)
+
+```
+sh test.sh
+```
diff --git a/mlcube/ci/metrics_mlcube/build.sh b/mlcube/ci/metrics_mlcube/build.sh
new file mode 100644
index 000000000..aed84bd78
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/build.sh
@@ -0,0 +1,2 @@
+docker build -t gandlfcpu -f ../../../Dockerfile-CPU ../../..
+mlcube configure --mlcube ./mlcube -Pdocker.build_strategy=always
diff --git a/mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml b/mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml
new file mode 100644
index 000000000..d89fda0f3
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/mlcube/mlcube.yaml
@@ -0,0 +1,32 @@
+# Metadata. If you are an author, change this to reflect your organization and metrics specifics.
+name: MLCommons GaNDLF Generic MLCube
+description: MLCommons GaNDLF MLCube, containing functionality for calculating metrics.
+authors: 
+ - {name: "MLCommons Medical Working Group", email: "gandlf@mlcommons.org", org: "MLCommons" }
+
+
+docker:
+  # The image tag that will be built/pulled/used. Change to suit your organization/model:version.
+  image: mlcommons/gandlf-metrics-mlcube:0.0.1
+  
+  build_context: "../project"
+  # Docker file name within docker build context, default is `Dockerfile`.
+  build_file: "Dockerfile"
+
+## Everything below this point affects how the GaNDLF container is invoked.
+## If you are an author, it is strongly recommended that you do not edit these.
+## Please request any new features for deployed containers from the GaNDLF maintainers:
+## https://github.com/mlcommons/GaNDLF/issues/new?template=---feature-request.md
+
+tasks:
+  evaluate:
+  # Runs metrics calculation on predictions
+    entrypoint: "python3.9 /entrypoint.py"
+    parameters:
+      inputs: {
+        predictions: predictions/,
+        labels: labels/,
+        # GaNDLF config file. The name should be `parameters.yaml`
+        config: {type: file, default: parameters.yaml}
+      }
+      outputs: { output-file: { type: "file", default: "results.yaml" } }
diff --git a/mlcube/ci/metrics_mlcube/project/Dockerfile b/mlcube/ci/metrics_mlcube/project/Dockerfile
new file mode 100644
index 000000000..80a923b75
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/project/Dockerfile
@@ -0,0 +1,4 @@
+FROM gandlfcpu
+
+COPY entrypoint.py /entrypoint.py
+
diff --git a/mlcube/ci/metrics_mlcube/project/entrypoint.py b/mlcube/ci/metrics_mlcube/project/entrypoint.py
new file mode 100644
index 000000000..6e3db3c9a
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/project/entrypoint.py
@@ -0,0 +1,52 @@
+import os
+import argparse
+import json
+import yaml
+
+
+def convert_json_to_yaml(tmp_json_output, output_file):
+    with open(tmp_json_output) as f:
+        results = json.load(f)
+    with open(output_file, "w") as f:
+        yaml.dump(results, f)
+    os.remove(tmp_json_output)
+
+
+def run_gandlf(predictions, labels, output_file, config):
+    """
+    A function that calls GaNDLF's generate metrics command.
+
+    Args:
+        predictions (str): The path to predictions folder. It must contain a "predictions.csv" file
+        labels (str): The path to labels folder. It must contain a "targets.csv" file.
+        output_file (str): The path to the output file/folder
+        config (str): The path to the parameters file
+
+    Note: If predictions and labels CSVs contain paths,
+          those paths should be relative to the containing folder.
+    """
+    predictions_csv = os.path.join(predictions, "predictions.csv")
+    labels_csv = os.path.join(labels, "targets.csv")
+
+    output_folder = os.path.dirname(output_file)
+    tmp_json_output = os.path.join(output_folder, "results.json")
+
+    exit_status = os.system(
+        f"gandlf generate-metrics -c {config} -i {labels_csv},{predictions_csv} -o {tmp_json_output}"
+    )
+    exit_code = os.WEXITSTATUS(exit_status)
+    if exit_code != 0:
+        raise RuntimeError(f"GaNDLF process failed with exit code {exit_code}")
+    convert_json_to_yaml(tmp_json_output, output_file)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", metavar="", type=str, required=True)
+    parser.add_argument("--predictions", metavar="", type=str, required=True)
+    parser.add_argument("--output-file", metavar="", type=str, default=None)
+    parser.add_argument("--labels", metavar="", type=str, required=True)
+
+    args = parser.parse_args()
+
+    run_gandlf(args.predictions, args.labels, args.output_file, args.config)
diff --git a/mlcube/ci/metrics_mlcube/setup.sh b/mlcube/ci/metrics_mlcube/setup.sh
new file mode 100644
index 000000000..fc5dfb351
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/setup.sh
@@ -0,0 +1,3 @@
+wget https://storage.googleapis.com/medperf-storage/gandlf_mlcube_test.tar.gz
+tar -xf gandlf_mlcube_test.tar.gz
+rm gandlf_mlcube_test.tar.gz
diff --git a/mlcube/ci/metrics_mlcube/test.sh b/mlcube/ci/metrics_mlcube/test.sh
new file mode 100644
index 000000000..efec5e64a
--- /dev/null
+++ b/mlcube/ci/metrics_mlcube/test.sh
@@ -0,0 +1,15 @@
+# classification
+rm ./mlcube/workspace/results.yaml
+mlcube run --mlcube ./mlcube \
+    --task evaluate \
+    predictions=../../test_classification/predictions \
+    labels=../../test_classification/labels \
+    config=../../test_classification/config.yaml
+
+# # segmentation (FAILS BECAUSE OF RELATIVE PATHS)
+# rm ./mlcube/workspace/results.yaml
+# mlcube run --mlcube ./mlcube \
+#     --task evaluate \
+#     predictions=../../test_segmentation/predictions \
+#     labels=../../test_segmentation/labels \
+#     config=../../test_segmentation/config.yaml