From 3f2a826206b39c2e27523c6c870e65296ffc4e3c Mon Sep 17 00:00:00 2001
From: Henry LE BERRE <hberre3@gatech.edu>
Date: Thu, 14 Dec 2023 19:42:05 -0800
Subject: [PATCH] #49: GitHub CI Benchmarking

---
 .github/workflows/bench.yml            | 56 +++++++++++++++
 .github/workflows/docker.yml           |  2 +-
 .github/workflows/phoenix/bench.sh     | 11 +++
 .github/workflows/phoenix/compare.py   | 51 ++++++++++++++
 .github/workflows/phoenix/submit.sh    | 61 +++++++++++++++++
 .github/workflows/phoenix/test.sh      | 19 ++++++
 .github/workflows/{ci.yml => test.yml} | 21 ++----
 README.md                              |  2 +-
 misc/run-phoenix-release-cpu.sh        | 16 -----
 misc/run-phoenix-release-gpu.sh        | 24 -------
 toolchain/bench.yaml                   |  3 +
 toolchain/mfc/args.py                  |  4 +-
 toolchain/mfc/bench.py                 | 95 ++++++++++++--------------
 toolchain/mfc/common.py                | 11 ++-
 14 files changed, 261 insertions(+), 115 deletions(-)
 create mode 100644 .github/workflows/bench.yml
 create mode 100644 .github/workflows/phoenix/bench.sh
 create mode 100644 .github/workflows/phoenix/compare.py
 create mode 100644 .github/workflows/phoenix/submit.sh
 create mode 100644 .github/workflows/phoenix/test.sh
 rename .github/workflows/{ci.yml => test.yml} (90%)
 delete mode 100644 misc/run-phoenix-release-cpu.sh
 delete mode 100644 misc/run-phoenix-release-gpu.sh
 create mode 100644 toolchain/bench.yaml

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
new file mode 100644
index 0000000000..793fd40d5b
--- /dev/null
+++ b/.github/workflows/bench.yml
@@ -0,0 +1,56 @@
+name: 'Benchmark'
+
+on:
+  pull_request:
+    paths:
+        - '**.f90'
+        - '**.fpp'
+        - '**.py'
+        - '**.yml'
+        - 'mfc.sh'
+        - 'CMakeLists.txt'
+        - 'requirements.txt'
+
+jobs:
+  self:
+    name: Georgia Tech | Phoenix (NVHPC)
+    if: github.repository == 'MFlowCode/MFC'
+    strategy:
+      matrix:
+        device: ['cpu', 'gpu']
+    runs-on:
+      group:  phoenix
+      labels: self-hosted
+    steps:
+      - name: Clone - PR
+        uses: actions/checkout@v3
+
+      - name: Bench - PR
+        run: |
+          bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
+          mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-pr.out
+          mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-pr.yaml
+
+      - name: Clone - Master
+        uses: actions/checkout@v3
+        with:
+          repository: henryleberre/MFC
+          ref: master
+
+      - name: Bench - Master
+        run: |
+          bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
+          mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-master.out
+          mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-master.yaml
+
+      - name: Post Comment
+        run: |
+          python3 .github/workflows/phoenix/compare.py ~/bench-${{ matrix.device }}-master.yaml ~/bench-${{ matrix.device }}-pr.yaml
+
+      - name: Archive Logs
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: logs-${{ matrix.device }}
+          path: |
+            ~/bench-${{ matrix.device }}-*
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 4e3431ab2a..ac27cf6dc3 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -22,7 +22,7 @@ jobs:
         password: ${{ secrets.DOCKER_PASSWORD }}
 
     - name: Build & Publish thereto
-      uses: docker/build-push-action@v4
+      uses: docker/build-push-action@v3
       with:
         file: toolchain/Dockerfile
         push: true
diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
new file mode 100644
index 0000000000..55c07dea22
--- /dev/null
+++ b/.github/workflows/phoenix/bench.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+n_ranks=12
+
+if [ "$job_device" == "gpu" ]; then
+    n_ranks=$(nvidia-smi -L | wc -l)        # number of GPUs on node
+    gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
+    device_opts="--gpu -g $gpu_ids"
+fi
+
+./mfc.sh bench "$job_slug.yaml" -j $(nproc) -b mpirun $device_opts -n $n_ranks
diff --git a/.github/workflows/phoenix/compare.py b/.github/workflows/phoenix/compare.py
new file mode 100644
index 0000000000..90be62996e
--- /dev/null
+++ b/.github/workflows/phoenix/compare.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import argparse
+
+import yaml
+
+parser = argparse.ArgumentParser()
+parser.add_argument('master', metavar="MASTER", type=str)
+parser.add_argument('pr',     metavar="PR",     type=str)
+
+args = parser.parse_args()
+
+def load_cases(filepath):
+    return { case["name"]: case for case in yaml.safe_load(open(filepath))["cases"] }
+
+master, pr = load_cases(args.master), load_cases(args.pr)
+
+master_keys = set(master.keys())
+pr_keys     = set(pr.keys())
+
+missing_cases = master_keys.symmetric_difference(pr_keys)
+
+if len(missing_cases) > 0:
+    print("**Warning:** The following cases are **missing** from master or this PR:\n")
+
+    for case in missing_cases:
+        print(f" - {case}.")
+    
+    print("")
+
+speedups = {}
+
+for case in master_keys.intersection(pr_keys):
+    speedups[case] = {
+        "pre_proess": pr[case]["pre_process"] / master[case]["pre_process"],
+        "simulation": pr[case]["simulation"] / master[case]["simulation"],
+    }
+
+avg_speedup = sum([ speedups[case]["simulation"] for case in speedups ]) / len(speedups)
+
+print(f"""\
+**[Benchmark Results]** Compared to Master, this PR's `simulation` is on average **~{avg_speedup:0.2f}x faster**.
+
+| **Case** | **Master** | **PR** | **Speedup** |
+| -------- | ---------- | ------ | ----------- |\
+""")
+
+for case in sorted(speedups.keys()):
+    speedup = speedups[case]
+
+    print(f"| {case} | {master[case]['simulation']:0.2f}s | {pr[case]['simulation']:0.2f}s | {speedups[case]['simulation']:0.2f}x |")
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
new file mode 100644
index 0000000000..e4a09d26da
--- /dev/null
+++ b/.github/workflows/phoenix/submit.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+set -e
+
+usage() {
+    echo "Usage: $0 [script.sh] [cpu|gpu]"
+}
+
+if [ ! -z "$1" ]; then
+    sbatch_script_contents=`cat $1`
+else
+    usage
+    exit 1
+fi
+
+sbatch_cpu_opts="\
+#SBATCH --ntasks-per-node=12       # Number of cores per node required
+#SBATCH --mem-per-cpu=2G           # Memory per core\
+"
+
+sbatch_gpu_opts="\
+#SBATCH -CV100-16GB
+#SBATCH -G2\
+"
+
+if [ "$2" == "cpu" ]; then
+    sbatch_device_opts="$sbatch_cpu_opts"
+elif [ "$2" == "gpu" ]; then
+    sbatch_device_opts="$sbatch_gpu_opts"
+else
+    usage
+    exit 1
+fi
+
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+
+sbatch <<EOT
+#!/bin/bash
+#SBATCH -Jshb-$job_slug            # Job name
+#SBATCH --account=gts-sbryngelson3 # charge account
+#SBATCH -N1                        # Number of nodes required
+$sbatch_device_opts
+#SBATCH -t 04:00:00                # Duration of the job (Ex: 15 mins)
+#SBATCH -q embers                  # QOS Name
+#SBATCH -o$job_slug.out            # Combined output and error messages file
+#SBATCH -W                         # Do not exit until the submitted job terminates.
+
+set -e
+set -x
+
+cd "\$SLURM_SUBMIT_DIR"
+echo "Running in $(pwd):"
+
+job_slug="$job_slug"
+job_device="$2"
+
+. ./mfc.sh load -c p -m $2
+
+$sbatch_script_contents
+
+EOT
\ No newline at end of file
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
new file mode 100644
index 0000000000..17bee7b50d
--- /dev/null
+++ b/.github/workflows/phoenix/test.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+build_opts=""
+if [ "$job_device" == "gpu" ]; then
+    build_opts="--gpu"
+fi
+
+./mfc.sh build -j $(nproc) $build_opts
+
+n_test_threads=$(nproc)
+
+if [ "$job_device" == "gpu" ]; then
+    gpu_count=$(nvidia-smi -L | wc -l)        # number of GPUs on node
+    gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
+    device_opts="-g $gpu_ids"
+    n_test_threads=`expr $gpu_count \* 2`
+fi
+
+./mfc.sh test -a -b mpirun -j $n_test_threads $device_opts
diff --git a/.github/workflows/ci.yml b/.github/workflows/test.yml
similarity index 90%
rename from .github/workflows/ci.yml
rename to .github/workflows/test.yml
index c520767a1e..aafdbe698c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/test.yml
@@ -114,19 +114,12 @@ jobs:
       - name: Clone
         uses: actions/checkout@v3
 
-      - name: Build
-        run:  |
-          . ./mfc.sh load -c p -m gpu
-          ./mfc.sh build -j 2 $(if [ '${{ matrix.device }}' == 'gpu' ]; then echo '--gpu'; fi)
+      - name: Build & Test
+        run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}
 
-      - name: Test
-        run: |
-          . ./mfc.sh load -c p -m gpu
-          mv misc/run-phoenix-release-${{ matrix.device }}.sh ./
-          sbatch run-phoenix-release-${{ matrix.device }}.sh
-
-      - name: Print
+      - name: Archive Logs
+        uses: actions/upload-artifact@v3
         if: always()
-        run: |
-          cat test.out
-
+        with:
+          name: logs
+          path: test-${{ matrix.device }}.out
diff --git a/README.md b/README.md
index 38128495d7..98c0be90bb 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
     <img src="https://zenodo.org/badge/doi/10.1016/j.cpc.2020.107396.svg" />
   </a>
   <a href="https://github.com/MFlowCode/MFC/actions">
-    <img src="https://github.com/MFlowCode/MFC/actions/workflows/ci.yml/badge.svg" />
+    <img src="https://github.com/MFlowCode/MFC/actions/workflows/test.yml/badge.svg" />
   </a>
   <a href="https://lbesson.mit-license.org/">
     <img src="https://img.shields.io/badge/License-MIT-blue.svg" />
diff --git a/misc/run-phoenix-release-cpu.sh b/misc/run-phoenix-release-cpu.sh
deleted file mode 100644
index 2f649d07a7..0000000000
--- a/misc/run-phoenix-release-cpu.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-#SBATCH -Jshb-test-jobs            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1 --ntasks-per-node=12   # Number of nodes and cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core
-#SBATCH -t 04:00:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -q embers                  # QOS Name
-#SBATCH -otest.out                 # Combined output and error messages file
-#SBATCH -W                         # Do not exit until the submitted job terminates.
-
-cd "$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-. ./mfc.sh load -c p -m gpu
-./mfc.sh test -j $(nproc) -b mpirun -a
-
diff --git a/misc/run-phoenix-release-gpu.sh b/misc/run-phoenix-release-gpu.sh
deleted file mode 100644
index bb27af6b50..0000000000
--- a/misc/run-phoenix-release-gpu.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-#SBATCH -Jshb-test-jobs            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes and cores per node required
-#SBATCH -CV100-16GB
-#SBATCH -G2
-#SBATCH -t 02:00:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -q embers                  # QOS Name
-#SBATCH -otest.out                 # Combined output and error messages file
-#SBATCH -W                         # Do not exit until the submitted job terminates.
-
-cd "$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
-
-set -x
-
-. ./mfc.sh load -c p -m GPU
-
-gpu_count=$(nvidia-smi -L | wc -l)        # number of GPUs on node
-gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
-
-./mfc.sh test -a -b mpirun -j $(nproc) \
-              --gpu -g $gpu_ids
-
diff --git a/toolchain/bench.yaml b/toolchain/bench.yaml
new file mode 100644
index 0000000000..110f4e354f
--- /dev/null
+++ b/toolchain/bench.yaml
@@ -0,0 +1,3 @@
+- name: 1D_bubblescreen
+  path: examples/1D_bubblescreen/case.py
+  args: []
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
index 7d7a4ca12e..ba870940ab 100644
--- a/toolchain/mfc/args.py
+++ b/toolchain/mfc/args.py
@@ -122,7 +122,9 @@ def add_common_arguments(p, mask = None):
     run.add_argument("--wait",                 action="store_true",                       default=False,      help="(Batch) Wait for the job to finish.")
 
     # === BENCH ===
-    add_common_arguments(bench, "t")
+    add_common_arguments(bench, "tjgn")
+    bench.add_argument("output", metavar="OUTPUT", default=None, type=str, help="Path to the YAML output file to write the results to.")
+    bench.add_argument(metavar="FORWARDED", default=[], dest='forwarded', nargs=argparse.REMAINDER, help="Arguments to forward to the ./mfc.sh run invocations.")
 
     # === COUNT ===
     add_common_arguments(count, "g")
diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py
index 65bacbe902..d36b756755 100644
--- a/toolchain/mfc/bench.py
+++ b/toolchain/mfc/bench.py
@@ -1,62 +1,53 @@
-import os, json, time, typing, datetime, subprocess
-
-import rich.table
+import sys, time, subprocess, dataclasses
 
 from .printer import cons
-from .state   import ARG
+from .state   import ARG, CFG
 from .build   import PRE_PROCESS, SIMULATION, build_targets
-from .common  import system, MFC_SUBDIR
+from .common  import system, MFC_BENCH_FILEPATH, file_load_yaml, file_dump_yaml
 from .        import sched
 
+
+@dataclasses.dataclass
+class BenchCase:
+    name: str
+    path: str
+    args: list[str]
+
+
 def bench():
-    build_targets([PRE_PROCESS, SIMULATION])
-    
+    cons.print()
     cons.print("[bold]Benchmarking [magenta]simulation[/magenta]:[/bold]")
     cons.indent()
-    
-    CASES   = ["1D_bubblescreen", "1D_exercise_WENO", "1D_kapilashocktube"]
-    RESULTS = []
-    
-    table = rich.table.Table(show_lines=False, show_edge=False)
-    table.add_column("Case")
-    table.add_column("(Simulation) Runtime (s)")
-    
-    def __worker(case: str, devices: typing.Set[int]):
-        nonlocal RESULTS
-        
-        system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "pre_process"], stdout=subprocess.DEVNULL)
-        start   = time.monotonic()
-        system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "simulation"], stdout=subprocess.DEVNULL)
-        end     = time.monotonic()
-        runtime = datetime.timedelta(seconds=end - start).total_seconds()
-
-        RESULTS.append({
-            "name":  f"Simulation: {case}",
-            "unit":  "seconds",
-            "value": runtime
-        })
-        
-        table.add_row(case, str(runtime))
-    
-    tasks: typing.List[sched.Task] = [
-        sched.Task(1, __worker, [ case ], 1) for case in CASES
-    ]
-    
     cons.print()
-    nThreads = min(ARG('jobs'), len(ARG('gpus'))) if ARG("gpu") else ARG('jobs')
-    if ARG('case_optimization'):
-        nThreads = 1
 
-    sched.sched(tasks, nThreads, ARG("gpus"))
-    cons.print()
-    cons.unindent()
-    cons.print("[bold]Benchmark Results:[/bold]")
-    cons.print()
-    cons.raw.print(table)
-    cons.print()
-    
-    filepath = os.path.join(MFC_SUBDIR, "bench.json")
-    with open(filepath, "w") as f:
-        json.dump(RESULTS, f)
-    
-    cons.print(f"[bold green]✓[/bold green] Saved results to [magenta]{filepath}[/magenta].")
+    CASES = [ BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH) ]
+
+    for case in CASES:
+        case.args = case.args + ARG("forwarded")
+
+    cons.print(f"Found [magenta]{len(CASES)}[/magenta] cases.")
+
+    results = {
+        "metadata": {
+            "invocation": sys.argv[1:],
+            "lock":       dataclasses.asdict(CFG())
+        },
+        "cases": [],
+    }
+
+    for i, case in enumerate(CASES):
+        cons.print(f"{str(i+1).zfill(len(CASES) // 10 + 1)}/{len(CASES)}: {case.name} @ [bold]{case.path}[/bold]")
+        system(["./mfc.sh", "build", "--targets", "pre_process", "simulation", "--case-optimization", "--input", case.path], stdout=subprocess.DEVNULL)
+
+        case_results = dataclasses.asdict(case)
+
+        for target in [PRE_PROCESS, SIMULATION]:
+            start = time.time()
+            system(["./mfc.sh", "run", case.path, "--targets", target.name, "--case-optimization", *case.args], stdout=subprocess.DEVNULL)
+            case_results[target.name] = time.time() - start
+
+        results["cases"].append(case_results)
+
+    file_dump_yaml(ARG("output"), results)
+
+    cons.unindent()
\ No newline at end of file
diff --git a/toolchain/mfc/common.py b/toolchain/mfc/common.py
index b6a475fe79..866c5d00ac 100644
--- a/toolchain/mfc/common.py
+++ b/toolchain/mfc/common.py
@@ -7,12 +7,11 @@
 from os.path import abspath, normpath, dirname, realpath
 
 
-MFC_ROOTDIR       = normpath(f"{dirname(realpath(__file__))}/../..")
-MFC_TESTDIR       = abspath(f"{MFC_ROOTDIR}/tests")
-MFC_SUBDIR        = abspath(f"{MFC_ROOTDIR}/build")
-MFC_DEV_FILEPATH  = abspath(f"{MFC_ROOTDIR}/toolchain/mfc.dev.yaml")
-MFC_USER_FILEPATH = abspath(f"{MFC_ROOTDIR}/defaults.yaml")
-MFC_LOCK_FILEPATH = abspath(f"{MFC_SUBDIR}/lock.yaml")
+MFC_ROOTDIR        = normpath(f"{dirname(realpath(__file__))}/../..")
+MFC_TESTDIR        = abspath(f"{MFC_ROOTDIR}/tests")
+MFC_SUBDIR         = abspath(f"{MFC_ROOTDIR}/build")
+MFC_LOCK_FILEPATH  = abspath(f"{MFC_SUBDIR}/lock.yaml")
+MFC_BENCH_FILEPATH = abspath(f"{MFC_ROOTDIR}/toolchain/bench.yaml")
 
 MFC_LOGO = f"""
      .=++*:          -+*+=.