diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000000..793fd40d5b --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,56 @@ +name: 'Benchmark' + +on: + pull_request: + paths: + - '**.f90' + - '**.fpp' + - '**.py' + - '**.yml' + - 'mfc.sh' + - 'CMakeLists.txt' + - 'requirements.txt' + +jobs: + self: + name: Georgia Tech | Phoenix (NVHPC) + if: github.repository == 'MFlowCode/MFC' + strategy: + matrix: + device: ['cpu', 'gpu'] + runs-on: + group: phoenix + labels: self-hosted + steps: + - name: Clone - PR + uses: actions/checkout@v3 + + - name: Bench - PR + run: | + bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }} + mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-pr.out + mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-pr.yaml + + - name: Clone - Master + uses: actions/checkout@v3 + with: + repository: henryleberre/MFC + ref: master + + - name: Bench - Master + run: | + bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }} + mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-master.out + mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-master.yaml + + - name: Post Comment + run: | + python3 .github/workflows/phoenix/compare.py ~/bench-${{ matrix.device }}-master.yaml ~/bench-${{ matrix.device }}-pr.yaml + + - name: Archive Logs + uses: actions/upload-artifact@v3 + if: always() + with: + name: logs-${{ matrix.device }} + path: | + ~/bench-${{ matrix.device }}-* diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 4e3431ab2a..ac27cf6dc3 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -22,7 +22,7 @@ jobs: password: ${{ secrets.DOCKER_PASSWORD }} - name: Build & Publish thereto - uses: docker/build-push-action@v4 + uses: docker/build-push-action@v3 with: file: toolchain/Dockerfile push: true diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh new file mode 100644 index 0000000000..55c07dea22 --- /dev/null +++ b/.github/workflows/phoenix/bench.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +n_ranks=12 + +if [ "$job_device" == "gpu" ]; then + n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node + gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1 + device_opts="--gpu -g $gpu_ids" +fi + +./mfc.sh bench "$job_slug.yaml" -j $(nproc) -b mpirun $device_opts -n $n_ranks diff --git a/.github/workflows/phoenix/compare.py b/.github/workflows/phoenix/compare.py new file mode 100644 index 0000000000..90be62996e --- /dev/null +++ b/.github/workflows/phoenix/compare.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +import argparse + +import yaml + +parser = argparse.ArgumentParser() +parser.add_argument('master', metavar="MASTER", type=str) +parser.add_argument('pr', metavar="PR", type=str) + +args = parser.parse_args() + +def load_cases(filepath): + return { case["name"]: case for case in yaml.safe_load(open(filepath))["cases"] } + +master, pr = load_cases(args.master), load_cases(args.pr) + +master_keys = set(master.keys()) +pr_keys = set(pr.keys()) + +missing_cases = master_keys.symmetric_difference(pr_keys) + +if len(missing_cases) > 0: + print("**Warning:** The following cases are **missing** from master or this PR:\n") + + for case in missing_cases: + print(f" - {case}.") + + print("") + +speedups = {} + +for case in master_keys.intersection(pr_keys): + speedups[case] = { + "pre_proess": pr[case]["pre_process"] / master[case]["pre_process"], + "simulation": pr[case]["simulation"] / master[case]["simulation"], + } + +avg_speedup = sum([ speedups[case]["simulation"] for case in speedups ]) / len(speedups) + +print(f"""\ +**[Benchmark Results]** Compared to Master, this PR's `simulation` is on average **~{avg_speedup:0.2f}x faster**. + +| **Case** | **Master** | **PR** | **Speedup** | +| -------- | ---------- | ------ | ----------- |\ +""") + +for case in sorted(speedups.keys()): + speedup = speedups[case] + + print(f"| {case} | {master[case]['simulation']:0.2f}s | {pr[case]['simulation']:0.2f}s | {speedups[case]['simulation']:0.2f}x |") diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh new file mode 100644 index 0000000000..e4a09d26da --- /dev/null +++ b/.github/workflows/phoenix/submit.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +set -e + +usage() { + echo "Usage: $0 [script.sh] [cpu|gpu]" +} + +if [ ! -z "$1" ]; then + sbatch_script_contents=`cat $1` +else + usage + exit 1 +fi + +sbatch_cpu_opts="\ +#SBATCH --ntasks-per-node=12 # Number of cores per node required +#SBATCH --mem-per-cpu=2G # Memory per core\ +" + +sbatch_gpu_opts="\ +#SBATCH -CV100-16GB +#SBATCH -G2\ +" + +if [ "$2" == "cpu" ]; then + sbatch_device_opts="$sbatch_cpu_opts" +elif [ "$2" == "gpu" ]; then + sbatch_device_opts="$sbatch_gpu_opts" +else + usage + exit 1 +fi + +job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2" + +sbatch < - + diff --git a/misc/run-phoenix-release-cpu.sh b/misc/run-phoenix-release-cpu.sh deleted file mode 100644 index 2f649d07a7..0000000000 --- a/misc/run-phoenix-release-cpu.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -#SBATCH -Jshb-test-jobs # Job name -#SBATCH --account=gts-sbryngelson3 # charge account -#SBATCH -N1 --ntasks-per-node=12 # Number of nodes and cores per node required -#SBATCH --mem-per-cpu=2G # Memory per core -#SBATCH -t 04:00:00 # Duration of the job (Ex: 15 mins) -#SBATCH -q embers # QOS Name -#SBATCH -otest.out # Combined output and error messages file -#SBATCH -W # Do not exit until the submitted job terminates. - -cd "$SLURM_SUBMIT_DIR" -echo "Running in $(pwd):" - -. ./mfc.sh load -c p -m gpu -./mfc.sh test -j $(nproc) -b mpirun -a - diff --git a/misc/run-phoenix-release-gpu.sh b/misc/run-phoenix-release-gpu.sh deleted file mode 100644 index bb27af6b50..0000000000 --- a/misc/run-phoenix-release-gpu.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -#SBATCH -Jshb-test-jobs # Job name -#SBATCH --account=gts-sbryngelson3 # charge account -#SBATCH -N1 # Number of nodes and cores per node required -#SBATCH -CV100-16GB -#SBATCH -G2 -#SBATCH -t 02:00:00 # Duration of the job (Ex: 15 mins) -#SBATCH -q embers # QOS Name -#SBATCH -otest.out # Combined output and error messages file -#SBATCH -W # Do not exit until the submitted job terminates. - -cd "$SLURM_SUBMIT_DIR" -echo "Running in $(pwd):" - -set -x - -. ./mfc.sh load -c p -m GPU - -gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node -gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1 - -./mfc.sh test -a -b mpirun -j $(nproc) \ - --gpu -g $gpu_ids - diff --git a/toolchain/bench.yaml b/toolchain/bench.yaml new file mode 100644 index 0000000000..110f4e354f --- /dev/null +++ b/toolchain/bench.yaml @@ -0,0 +1,3 @@ +- name: 1D_bubblescreen + path: examples/1D_bubblescreen/case.py + args: [] diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index 7d7a4ca12e..ba870940ab 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -122,7 +122,9 @@ def add_common_arguments(p, mask = None): run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.") # === BENCH === - add_common_arguments(bench, "t") + add_common_arguments(bench, "tjgn") + bench.add_argument("output", metavar="OUTPUT", default=None, type=str, help="Path to the YAML output file to write the results to.") + bench.add_argument(metavar="FORWARDED", default=[], dest='forwarded', nargs=argparse.REMAINDER, help="Arguments to forward to the ./mfc.sh run invocations.") # === COUNT === add_common_arguments(count, "g") diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py index 65bacbe902..d36b756755 100644 --- a/toolchain/mfc/bench.py +++ b/toolchain/mfc/bench.py @@ -1,62 +1,53 @@ -import os, json, time, typing, datetime, subprocess - -import rich.table +import sys, time, subprocess, dataclasses from .printer import cons -from .state import ARG +from .state import ARG, CFG from .build import PRE_PROCESS, SIMULATION, build_targets -from .common import system, MFC_SUBDIR +from .common import system, MFC_BENCH_FILEPATH, file_load_yaml, file_dump_yaml from . import sched + +@dataclasses.dataclass +class BenchCase: + name: str + path: str + args: list[str] + + def bench(): - build_targets([PRE_PROCESS, SIMULATION]) - + cons.print() cons.print("[bold]Benchmarking [magenta]simulation[/magenta]:[/bold]") cons.indent() - - CASES = ["1D_bubblescreen", "1D_exercise_WENO", "1D_kapilashocktube"] - RESULTS = [] - - table = rich.table.Table(show_lines=False, show_edge=False) - table.add_column("Case") - table.add_column("(Simulation) Runtime (s)") - - def __worker(case: str, devices: typing.Set[int]): - nonlocal RESULTS - - system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "pre_process"], stdout=subprocess.DEVNULL) - start = time.monotonic() - system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "simulation"], stdout=subprocess.DEVNULL) - end = time.monotonic() - runtime = datetime.timedelta(seconds=end - start).total_seconds() - - RESULTS.append({ - "name": f"Simulation: {case}", - "unit": "seconds", - "value": runtime - }) - - table.add_row(case, str(runtime)) - - tasks: typing.List[sched.Task] = [ - sched.Task(1, __worker, [ case ], 1) for case in CASES - ] - cons.print() - nThreads = min(ARG('jobs'), len(ARG('gpus'))) if ARG("gpu") else ARG('jobs') - if ARG('case_optimization'): - nThreads = 1 - sched.sched(tasks, nThreads, ARG("gpus")) - cons.print() - cons.unindent() - cons.print("[bold]Benchmark Results:[/bold]") - cons.print() - cons.raw.print(table) - cons.print() - - filepath = os.path.join(MFC_SUBDIR, "bench.json") - with open(filepath, "w") as f: - json.dump(RESULTS, f) - - cons.print(f"[bold green]✓[/bold green] Saved results to [magenta]{filepath}[/magenta].") + CASES = [ BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH) ] + + for case in CASES: + case.args = case.args + ARG("forwarded") + + cons.print(f"Found [magenta]{len(CASES)}[/magenta] cases.") + + results = { + "metadata": { + "invocation": sys.argv[1:], + "lock": dataclasses.asdict(CFG()) + }, + "cases": [], + } + + for i, case in enumerate(CASES): + cons.print(f"{str(i+1).zfill(len(CASES) // 10 + 1)}/{len(CASES)}: {case.name} @ [bold]{case.path}[/bold]") + system(["./mfc.sh", "build", "--targets", "pre_process", "simulation", "--case-optimization", "--input", case.path], stdout=subprocess.DEVNULL) + + case_results = dataclasses.asdict(case) + + for target in [PRE_PROCESS, SIMULATION]: + start = time.time() + system(["./mfc.sh", "run", case.path, "--targets", target.name, "--case-optimization", *case.args], stdout=subprocess.DEVNULL) + case_results[target.name] = time.time() - start + + results["cases"].append(case_results) + + file_dump_yaml(ARG("output"), results) + + cons.unindent() \ No newline at end of file diff --git a/toolchain/mfc/common.py b/toolchain/mfc/common.py index b6a475fe79..866c5d00ac 100644 --- a/toolchain/mfc/common.py +++ b/toolchain/mfc/common.py @@ -7,12 +7,11 @@ from os.path import abspath, normpath, dirname, realpath -MFC_ROOTDIR = normpath(f"{dirname(realpath(__file__))}/../..") -MFC_TESTDIR = abspath(f"{MFC_ROOTDIR}/tests") -MFC_SUBDIR = abspath(f"{MFC_ROOTDIR}/build") -MFC_DEV_FILEPATH = abspath(f"{MFC_ROOTDIR}/toolchain/mfc.dev.yaml") -MFC_USER_FILEPATH = abspath(f"{MFC_ROOTDIR}/defaults.yaml") -MFC_LOCK_FILEPATH = abspath(f"{MFC_SUBDIR}/lock.yaml") +MFC_ROOTDIR = normpath(f"{dirname(realpath(__file__))}/../..") +MFC_TESTDIR = abspath(f"{MFC_ROOTDIR}/tests") +MFC_SUBDIR = abspath(f"{MFC_ROOTDIR}/build") +MFC_LOCK_FILEPATH = abspath(f"{MFC_SUBDIR}/lock.yaml") +MFC_BENCH_FILEPATH = abspath(f"{MFC_ROOTDIR}/toolchain/bench.yaml") MFC_LOGO = f""" .=++*: -+*+=.