#49: GitHub CI Benchmarking

MFlowCode · Dec 16, 2023 · 3f2a826 · 3f2a826
1 parent 371c51a
commit 3f2a826
Show file tree

Hide file tree

Showing 14 changed files with 261 additions and 115 deletions.
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -0,0 +1,56 @@
+name: 'Benchmark'
+
+on:
+  pull_request:
+    paths:
+        - '**.f90'
+        - '**.fpp'
+        - '**.py'
+        - '**.yml'
+        - 'mfc.sh'
+        - 'CMakeLists.txt'
+        - 'requirements.txt'
+
+jobs:
+  self:
+    name: Georgia Tech | Phoenix (NVHPC)
+    if: github.repository == 'MFlowCode/MFC'
+    strategy:
+      matrix:
+        device: ['cpu', 'gpu']
+    runs-on:
+      group:  phoenix
+      labels: self-hosted
+    steps:
+      - name: Clone - PR
+        uses: actions/checkout@v3
+
+      - name: Bench - PR
+        run: |
+          bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
+          mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-pr.out
+          mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-pr.yaml
+
+      - name: Clone - Master
+        uses: actions/checkout@v3
+        with:
+          repository: henryleberre/MFC
+          ref: master
+
+      - name: Bench - Master
+        run: |
+          bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
+          mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-master.out
+          mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-master.yaml
+
+      - name: Post Comment
+        run: |
+          python3 .github/workflows/phoenix/compare.py ~/bench-${{ matrix.device }}-master.yaml ~/bench-${{ matrix.device }}-pr.yaml
+
+      - name: Archive Logs
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: logs-${{ matrix.device }}
+          path: |
+            ~/bench-${{ matrix.device }}-*
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -22,7 +22,7 @@ jobs:
         password: ${{ secrets.DOCKER_PASSWORD }}
 
     - name: Build & Publish thereto
-      uses: docker/build-push-action@v4
+      uses: docker/build-push-action@v3
       with:
         file: toolchain/Dockerfile
         push: true

diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+n_ranks=12
+
+if [ "$job_device" == "gpu" ]; then
+    n_ranks=$(nvidia-smi -L | wc -l)        # number of GPUs on node
+    gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
+    device_opts="--gpu -g $gpu_ids"
+fi
+
+./mfc.sh bench "$job_slug.yaml" -j $(nproc) -b mpirun $device_opts -n $n_ranks
diff --git a/.github/workflows/phoenix/compare.py b/.github/workflows/phoenix/compare.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import argparse
+
+import yaml
+
+parser = argparse.ArgumentParser()
+parser.add_argument('master', metavar="MASTER", type=str)
+parser.add_argument('pr',     metavar="PR",     type=str)
+
+args = parser.parse_args()
+
+def load_cases(filepath):
+    return { case["name"]: case for case in yaml.safe_load(open(filepath))["cases"] }
+
+master, pr = load_cases(args.master), load_cases(args.pr)
+
+master_keys = set(master.keys())
+pr_keys     = set(pr.keys())
+
+missing_cases = master_keys.symmetric_difference(pr_keys)
+
+if len(missing_cases) > 0:
+    print("**Warning:** The following cases are **missing** from master or this PR:\n")
+
+    for case in missing_cases:
+        print(f" - {case}.")
+
+    print("")
+
+speedups = {}
+
+for case in master_keys.intersection(pr_keys):
+    speedups[case] = {
+        "pre_proess": pr[case]["pre_process"] / master[case]["pre_process"],
+        "simulation": pr[case]["simulation"] / master[case]["simulation"],
+    }
+
+avg_speedup = sum([ speedups[case]["simulation"] for case in speedups ]) / len(speedups)
+
+print(f"""\
+**[Benchmark Results]** Compared to Master, this PR's `simulation` is on average **~{avg_speedup:0.2f}x faster**.
+
+| **Case** | **Master** | **PR** | **Speedup** |
+| -------- | ---------- | ------ | ----------- |\
+""")
+
+for case in sorted(speedups.keys()):
+    speedup = speedups[case]
+
+    print(f"| {case} | {master[case]['simulation']:0.2f}s | {pr[case]['simulation']:0.2f}s | {speedups[case]['simulation']:0.2f}x |")
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+set -e
+
+usage() {
+    echo "Usage: $0 [script.sh] [cpu|gpu]"
+}
+
+if [ ! -z "$1" ]; then
+    sbatch_script_contents=`cat $1`
+else
+    usage
+    exit 1
+fi
+
+sbatch_cpu_opts="\
+#SBATCH --ntasks-per-node=12       # Number of cores per node required
+#SBATCH --mem-per-cpu=2G           # Memory per core\
+"
+
+sbatch_gpu_opts="\
+#SBATCH -CV100-16GB
+#SBATCH -G2\
+"
+
+if [ "$2" == "cpu" ]; then
+    sbatch_device_opts="$sbatch_cpu_opts"
+elif [ "$2" == "gpu" ]; then
+    sbatch_device_opts="$sbatch_gpu_opts"
+else
+    usage
+    exit 1
+fi
+
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+
+sbatch <<EOT
+#!/bin/bash
+#SBATCH -Jshb-$job_slug            # Job name
+#SBATCH --account=gts-sbryngelson3 # charge account
+#SBATCH -N1                        # Number of nodes required
+$sbatch_device_opts
+#SBATCH -t 04:00:00                # Duration of the job (Ex: 15 mins)
+#SBATCH -q embers                  # QOS Name
+#SBATCH -o$job_slug.out            # Combined output and error messages file
+#SBATCH -W                         # Do not exit until the submitted job terminates.
+
+set -e
+set -x
+
+cd "\$SLURM_SUBMIT_DIR"
+echo "Running in $(pwd):"
+
+job_slug="$job_slug"
+job_device="$2"
+
+. ./mfc.sh load -c p -m $2
+
+$sbatch_script_contents
+
+EOT
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+build_opts=""
+if [ "$job_device" == "gpu" ]; then
+    build_opts="--gpu"
+fi
+
+./mfc.sh build -j $(nproc) $build_opts
+
+n_test_threads=$(nproc)
+
+if [ "$job_device" == "gpu" ]; then
+    gpu_count=$(nvidia-smi -L | wc -l)        # number of GPUs on node
+    gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
+    device_opts="-g $gpu_ids"
+    n_test_threads=`expr $gpu_count \* 2`
+fi
+
+./mfc.sh test -a -b mpirun -j $n_test_threads $device_opts
diff --git a/.github/workflows/ci.yml → .github/workflows/test.yml b/.github/workflows/ci.yml → .github/workflows/test.yml
@@ -114,19 +114,12 @@ jobs:
       - name: Clone
         uses: actions/checkout@v3
 
-      - name: Build
-        run:  |
-          . ./mfc.sh load -c p -m gpu
-          ./mfc.sh build -j 2 $(if [ '${{ matrix.device }}' == 'gpu' ]; then echo '--gpu'; fi)
+      - name: Build & Test
+        run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}
 
-      - name: Test
-        run: |
-          . ./mfc.sh load -c p -m gpu
-          mv misc/run-phoenix-release-${{ matrix.device }}.sh ./
-          sbatch run-phoenix-release-${{ matrix.device }}.sh
-
-      - name: Print
+      - name: Archive Logs
+        uses: actions/upload-artifact@v3
         if: always()
-        run: |
-          cat test.out
-
+        with:
+          name: logs
+          path: test-${{ matrix.device }}.out
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@
     <img src="https://zenodo.org/badge/doi/10.1016/j.cpc.2020.107396.svg" />
   </a>
   <a href="https://github.com/MFlowCode/MFC/actions">
-    <img src="https://github.com/MFlowCode/MFC/actions/workflows/ci.yml/badge.svg" />
+    <img src="https://github.com/MFlowCode/MFC/actions/workflows/test.yml/badge.svg" />
   </a>
   <a href="https://lbesson.mit-license.org/">
     <img src="https://img.shields.io/badge/License-MIT-blue.svg" />

diff --git a/misc/run-phoenix-release-cpu.sh b/misc/run-phoenix-release-cpu.sh
diff --git a/misc/run-phoenix-release-gpu.sh b/misc/run-phoenix-release-gpu.sh
diff --git a/toolchain/bench.yaml b/toolchain/bench.yaml
@@ -0,0 +1,3 @@
+- name: 1D_bubblescreen
+  path: examples/1D_bubblescreen/case.py
+  args: []
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py
@@ -122,7 +122,9 @@ def add_common_arguments(p, mask = None):
     run.add_argument("--wait",                 action="store_true",                       default=False,      help="(Batch) Wait for the job to finish.")
 
     # === BENCH ===
-    add_common_arguments(bench, "t")
+    add_common_arguments(bench, "tjgn")
+    bench.add_argument("output", metavar="OUTPUT", default=None, type=str, help="Path to the YAML output file to write the results to.")
+    bench.add_argument(metavar="FORWARDED", default=[], dest='forwarded', nargs=argparse.REMAINDER, help="Arguments to forward to the ./mfc.sh run invocations.")
 
     # === COUNT ===
     add_common_arguments(count, "g")