forked from MFlowCode/MFC
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Batch files per computer (MFlowCode#240 & MFlowCode#287)
- Loading branch information
1 parent
15bd177
commit 5083b0a
Showing
13 changed files
with
350 additions
and
369 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py | ||
index 8b2d48e..06add8b 100644 | ||
--- a/toolchain/mfc/args.py | ||
+++ b/toolchain/mfc/args.py | ||
@@ -104,27 +104,28 @@ started, run ./mfc.sh build -h.""", | ||
engines = [ e.slug for e in ENGINES ] | ||
|
||
add_common_arguments(run) | ||
- run.add_argument("input", metavar="INPUT", type=str, help="Input file to run.") | ||
- run.add_argument("arguments", metavar="ARGUMENTS", nargs="*", type=str, default=[], help="Additional arguments to pass to the case file.") | ||
- run.add_argument("-e", "--engine", choices=engines, type=str, default=engines[0], help="Job execution/submission engine choice.") | ||
+ run.add_argument("input", metavar="INPUT", type=str, help="Input file to run.") | ||
+ run.add_argument("arguments", metavar="ARGUMENTS", nargs="*", type=str, default=[], help="Additional arguments to pass to the case file.") | ||
+ run.add_argument("-e", "--engine", choices=engines, type=str, default=engines[0], help="Job execution/submission engine choice.") | ||
run.add_argument("--output-summary", type=str, default=None, help="(Interactive) Output a YAML summary file.") | ||
- run.add_argument("-p", "--partition", metavar="PARTITION", type=str, default="", help="(Batch) Partition for job submission.") | ||
- run.add_argument("-N", "--nodes", metavar="NODES", type=int, default=1, help="(Batch) Number of nodes.") | ||
- run.add_argument("-n", "--tasks-per-node", metavar="TASKS", type=int, default=1, help="Number of tasks per node.") | ||
- run.add_argument("-w", "--walltime", metavar="WALLTIME", type=str, default="01:00:00", help="(Batch) Walltime.") | ||
- run.add_argument("-a", "--account", metavar="ACCOUNT", type=str, default="", help="(Batch) Account to charge.") | ||
- run.add_argument("-@", "--email", metavar="EMAIL", type=str, default="", help="(Batch) Email for job notification.") | ||
- run.add_argument("-#", "--name", metavar="NAME", type=str, default="MFC", help="(Batch) Job name.") | ||
- run.add_argument("-b", "--binary", choices=binaries, type=str, default=None, help="(Interactive) Override MPI execution binary") | ||
- run.add_argument("-s", "--scratch", action="store_true", default=False, help="Build from scratch.") | ||
- run.add_argument("--ncu", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Compute.") | ||
- run.add_argument("--nsys", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Systems.") | ||
- run.add_argument( "--dry-run", action="store_true", default=False, help="(Batch) Run without submitting batch file.") | ||
- run.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") | ||
- run.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") | ||
- run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.") | ||
- run.add_argument("-f", "--flags", metavar="FLAGS", dest="--", nargs=argparse.REMAINDER, type=str, default=[], help="(Interactive) Arguments to forward to the MPI invocation.") | ||
- run.add_argument("-c", "--computer", metavar="COMPUTER", type=str, default=None, help=f"(Batch) Path to a custom submission file template or one of {format_list_to_string(list(get_baked_templates().keys()))}.") | ||
+ run.add_argument("-p", "--partition", metavar="PARTITION", type=str, default="", help="(Batch) Partition for job submission.") | ||
+ run.add_argument("-q", "--quality_of_service", metavar="QOS", type=str, default="", help="(Batch) Quality of Service for job submission.") | ||
+ run.add_argument("-N", "--nodes", metavar="NODES", type=int, default=1, help="(Batch) Number of nodes.") | ||
+ run.add_argument("-n", "--tasks-per-node", metavar="TASKS", type=int, default=1, help="Number of tasks per node.") | ||
+ run.add_argument("-w", "--walltime", metavar="WALLTIME", type=str, default="01:00:00", help="(Batch) Walltime.") | ||
+ run.add_argument("-a", "--account", metavar="ACCOUNT", type=str, default="", help="(Batch) Account to charge.") | ||
+ run.add_argument("-@", "--email", metavar="EMAIL", type=str, default="", help="(Batch) Email for job notification.") | ||
+ run.add_argument("-#", "--name", metavar="NAME", type=str, default="MFC", help="(Batch) Job name.") | ||
+ run.add_argument("-b", "--binary", choices=binaries, type=str, default=None, help="(Interactive) Override MPI execution binary") | ||
+ run.add_argument("-s", "--scratch", action="store_true", default=False, help="Build from scratch.") | ||
+ run.add_argument("--ncu", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Compute.") | ||
+ run.add_argument("--nsys", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Systems.") | ||
+ run.add_argument( "--dry-run", action="store_true", default=False, help="(Batch) Run without submitting batch file.") | ||
+ run.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") | ||
+ run.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") | ||
+ run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.") | ||
+ run.add_argument("-f", "--flags", metavar="FLAGS", dest="--", nargs=argparse.REMAINDER, type=str, default=[], help="(Interactive) Arguments to forward to the MPI invocation.") | ||
+ run.add_argument("-c", "--computer", metavar="COMPUTER", type=str, default=None, help=f"(Batch) Path to a custom submission file template or one of {format_list_to_string(list(get_baked_templates().keys()))}.") | ||
|
||
# === BENCH === | ||
add_common_arguments(bench, "t") | ||
diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py | ||
index c095e9b..b9830fb 100644 | ||
--- a/toolchain/mfc/run/engines.py | ||
+++ b/toolchain/mfc/run/engines.py | ||
@@ -264,15 +264,15 @@ exit $code | ||
s = re.sub(r"^#>.*\n", "", s, flags=re.MULTILINE) | ||
s = re.sub(r"^\n{2,}", "\n", s, flags=re.MULTILINE) | ||
|
||
- # Evaluate expressions of the form "{expression}" | ||
- for match in re.findall(r"{[^\{]+}", s, flags=re.MULTILINE): | ||
- repl = self.__evaluate_expression(match[1:-1]) | ||
+ # Evaluate expressions of the form "{{{expression}}}" | ||
+ for match in re.findall(r"{{{[\s\S]+?}}}", s, flags=re.MULTILINE): | ||
+ repl = self.__evaluate_expression(match[3:-3]) | ||
|
||
if repl is not None: | ||
s = s.replace(match, repl) | ||
else: | ||
# If not specified, then remove the line it appears on | ||
- s = re.sub(rf"^.*{match}.*$\n", "", s, flags=re.MULTILINE) | ||
+ s = re.sub(rf"^.*{re.escape(match)}[\s\S]*?$\n", "", s, flags=re.MULTILINE) | ||
|
||
cons.print(f"> > [bold yellow]Warning:[/bold yellow] [magenta]{match[1:-1]}[/magenta] was not specified. Thus, any line it appears on will be discarded.") | ||
|
||
diff --git a/toolchain/mfc/run/queues.py b/toolchain/mfc/run/queues.py | ||
index 2248945..124716a 100644 | ||
--- a/toolchain/mfc/run/queues.py | ||
+++ b/toolchain/mfc/run/queues.py | ||
@@ -21,11 +21,11 @@ class QueueSystem: | ||
self.name = name | ||
|
||
def get_template(self) -> str: | ||
- if computer := ARG("computer") is None: | ||
+ if (computer := ARG("computer")) is None: | ||
raise common.MFCException(f"{self.name}: --computer is required.") | ||
|
||
baked = get_baked_templates() | ||
- if content := baked.get(self.name.lower()): | ||
+ if (content := baked.get(computer)) is not None: | ||
cons.print(f"Using baked-in template for [magenta]{self.name}[/magenta].") | ||
return content | ||
|
||
diff --git a/toolchain/templates/phoenix.sh b/toolchain/templates/phoenix.sh | ||
index b77d4d9..c180ab4 100644 | ||
--- a/toolchain/templates/phoenix.sh | ||
+++ b/toolchain/templates/phoenix.sh | ||
@@ -5,16 +5,19 @@ | ||
#> data from ./mfc.sh run. The resulting file is submitted to the queue system. | ||
#> | ||
|
||
-#SBATCH --job-name="{name}" | ||
-#SBATCH --account={account} | ||
-#SBATCH --partition={partition} | ||
-#SBATCH --nodes={nodes} | ||
-#SBATCH --ntasks-per-node={tasks_per_node} | ||
-#SBATCH --gres=gpu:V100:{tasks_per_node if gpu else 0} | ||
-#SBATCH --mem-per-gpu=16G | ||
-#SBATCH --output="{name}.out" | ||
-#SBATCH --time={walltime} | ||
-#SBATCH --mail-user={email} | ||
+#SBATCH --job-name="{{{name}}}" | ||
+#SBATCH --account={{{account}}} | ||
+#SBATCH --partition={{{partition}}} | ||
+#SBATCH --qos={{{quality_of_service}}} | ||
+#SBATCH --nodes={{{nodes}}} | ||
+#SBATCH --ntasks-per-node={{{tasks_per_node}}} | ||
+{{{f'''\ | ||
+#SBATCH --gres=gpu:V100:{tasks_per_node} | ||
+#SBATCH --mem-per-gpu=16G\ | ||
+''' if gpu else ''}}} | ||
+#SBATCH --output="{{{name}}}.out" | ||
+#SBATCH --time={{{walltime}}} | ||
+#SBATCH --mail-user={{{email}}} | ||
#SBATCH --mail-type="BEGIN, END, FAIL" | ||
|
||
|
||
@@ -40,8 +43,8 @@ for binpath in {MFC::BINARIES}; do | ||
|
||
echo -e ":) Running $binpath:\n" | ||
|
||
- mpirun \ | ||
- -np {nodes*tasks_per_node} \ | ||
+ mpirun \ | ||
+ -np {{{nodes*tasks_per_node}}} \ | ||
{MFC::PROFILER} "$binpath" | ||
|
||
echo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py | ||
index b8c45f7..53f9f9e 100644 | ||
--- a/toolchain/mfc/run/engines.py | ||
+++ b/toolchain/mfc/run/engines.py | ||
@@ -307,11 +307,11 @@ exit $code | ||
cons.print("> Writing batch file...") | ||
file_write(filepath, content) | ||
|
||
- def __execute_batch_file(self, system: queues.QueueSystem): | ||
+ def __execute_batch_file(self, queue: queues.QueueSystem): | ||
# We CD to the case directory before executing the batch file so that | ||
# any files the queue system generates (like .err and .out) are created | ||
# in the correct directory. | ||
- cmd = system.gen_submit_cmd(self.__get_batch_filename()) | ||
+ cmd = queue.gen_submit_cmd(self.__get_batch_filename()) | ||
|
||
if system(cmd, cwd=self.__get_batch_dirpath()) != 0: | ||
raise MFCException(f"Submitting batch file for {system.name} failed. It can be found here: {self.__get_batch_filepath()}. Please check the file for errors.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
2D_shockbubble | ||
|
||
4522792 cpu-small hberre3- hberre3 PD 0:00 1 (AssocGrpBillingMinutes) | ||
4522791 gpu-v100 hberre3- hberre3 PD 0:00 1 (AssocGrpBillingMinutes) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.