From 8544f36557cb5411822f978364f9c73f20de05f6 Mon Sep 17 00:00:00 2001 From: Henry LE BERRE Date: Sat, 6 Jan 2024 01:26:10 -0500 Subject: [PATCH] Batch files per computer (#240 & #287) --- toolchain/mfc/args.py | 20 +++-- toolchain/mfc/common.py | 1 + toolchain/mfc/run/engines.py | 41 +++------- toolchain/mfc/run/queues.py | 41 +++++++--- toolchain/mfc/run/run.py | 10 --- toolchain/templates/computer/phoenix.sh | 53 +++++++++++++ toolchain/templates/computer/summit.sh | 50 +++++++++++++ toolchain/templates/generic/lsf.sh | 54 ++++++++++++++ toolchain/templates/generic/pbs.sh | 65 ++++++++++++++++ toolchain/templates/generic/slurm.sh | 76 +++++++++++++++++++ toolchain/templates/lsf.sh | 88 ---------------------- toolchain/templates/pbs.sh | 90 ---------------------- toolchain/templates/slurm.sh | 99 ------------------------- 13 files changed, 357 insertions(+), 331 deletions(-) create mode 100644 toolchain/templates/computer/phoenix.sh create mode 100644 toolchain/templates/computer/summit.sh create mode 100644 toolchain/templates/generic/lsf.sh create mode 100644 toolchain/templates/generic/pbs.sh create mode 100644 toolchain/templates/generic/slurm.sh delete mode 100644 toolchain/templates/lsf.sh delete mode 100644 toolchain/templates/pbs.sh delete mode 100644 toolchain/templates/slurm.sh diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index eeff424654..8b2d48ef88 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -1,12 +1,13 @@ import re, os.path, argparse, dataclasses -from .build import TARGETS, DEFAULT_TARGETS, DEPENDENCY_TARGETS -from .common import MFCException, format_list_to_string -from .test.cases import generate_cases +from .run.queues import get_baked_templates +from .build import TARGETS, DEFAULT_TARGETS, DEPENDENCY_TARGETS +from .common import MFCException, format_list_to_string +from .test.cases import generate_cases from .run.engines import ENGINES from .run.mpi_bins import BINARIES -# pylint: disable=too-many-locals, too-many-statements +# pylint: disable=too-many-locals, too-many-branches, too-many-statements def parse(config): parser = argparse.ArgumentParser( prog="./mfc.sh", @@ -123,6 +124,7 @@ def add_common_arguments(p, mask = None): run.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.") run.add_argument("-f", "--flags", metavar="FLAGS", dest="--", nargs=argparse.REMAINDER, type=str, default=[], help="(Interactive) Arguments to forward to the MPI invocation.") + run.add_argument("-c", "--computer", metavar="COMPUTER", type=str, default=None, help=f"(Batch) Path to a custom submission file template or one of {format_list_to_string(list(get_baked_templates().keys()))}.") # === BENCH === add_common_arguments(bench, "t") @@ -153,10 +155,16 @@ def add_common_arguments(p, mask = None): # "Slugify" the name of the job args["name"] = re.sub(r'[\W_]+', '-', args["name"]) - # build's --case-optimization and --input depend on each other + # We need to check for some invalid combinations of arguments because of + # the limitations of argparse. if args["command"] == "build": if (args["input"] is not None) ^ args["case_optimization"] : - raise MFCException("./mfc.sh build's --case-optimization requires --input") + raise MFCException("./mfc.sh build's --case-optimization and --input must be used together.") + if args["command"] == "run" and args["engine"] == "batch": + if args["computer"] is None: + raise MFCException("./mfc.sh run's --computer is required when --engine=batch") + if args["binary"] is not None: + raise MFCException("./mfc.sh run's --binary is not allowed when --engine=batch") # Input files to absolute paths for e in ["input", "input1", "input2"]: diff --git a/toolchain/mfc/common.py b/toolchain/mfc/common.py index 2e3a4d7cae..ddb1589256 100644 --- a/toolchain/mfc/common.py +++ b/toolchain/mfc/common.py @@ -8,6 +8,7 @@ MFC_ROOTDIR = normpath(f"{dirname(realpath(__file__))}/../..") MFC_TESTDIR = abspath(f"{MFC_ROOTDIR}/tests") MFC_SUBDIR = abspath(f"{MFC_ROOTDIR}/build") +MFC_TEMPLATEDIR = abspath(f"{MFC_ROOTDIR}/toolchain/templates") MFC_LOCK_FILEPATH = abspath(f"{MFC_SUBDIR}/lock.yaml") MFC_BENCH_FILEPATH = abspath(f"{MFC_ROOTDIR}/toolchain/bench.yaml") diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py index dc8f431a48..7f592ef98d 100644 --- a/toolchain/mfc/run/engines.py +++ b/toolchain/mfc/run/engines.py @@ -45,9 +45,6 @@ def init(self, _input: MFCInputFile) -> None: def _init(self) -> None: pass - def get_args(self) -> typing.List[str]: - raise MFCException(f"MFCEngine::get_args: not implemented for {self.name}.") - def run(self, targets: typing.List[MFCTarget]) -> None: raise MFCException(f"MFCEngine::run: not implemented for {self.name}.") @@ -71,14 +68,6 @@ def _init(self) -> None: # If using MPI, we don't know yet whether this engine works self.bKnowWorks = not ARG("mpi") - def get_args(self) -> str: - return f"""\ -Nodes (-N) {ARG('nodes')} -Tasks (/node) (-n) {ARG('tasks_per_node')} -MPI Binary (-b) {self.mpibin.bin}\ -""" - - def __get_exec_cmd(self, target: MFCTarget) -> typing.List[str]: cmd = [] if ARG("mpi"): @@ -176,16 +165,6 @@ class BatchEngine(Engine): def __init__(self) -> None: super().__init__("Batch", "batch") - def get_args(self) -> str: - return f"""\ -Nodes (-N) {ARG('nodes')} -Tasks (/node) (-n) {ARG('tasks_per_node')} -Walltime (-w) {ARG("walltime")} -Partition (-p) {ARG("partition")} -Account (-a) {ARG("account")} -Email (-@) {ARG("email")} -""" - def run(self, targets) -> None: qsystem = queues.get_system() cons.print(f"Detected the [bold magenta]{qsystem.name}[/bold magenta] queue system.") @@ -218,17 +197,22 @@ def __get_batch_filepath(self): self.__get_batch_filename() ])) - def __generate_prologue(self, qsystem: queues.QueueSystem) -> str: - modules = f"" - + def __generate_module_load(self) -> str: if does_system_use_modules(): - modules = f"""\ -printf ":) Loading modules...\\n" + return f"""\ +printf ":) Loading modules...\\n module purge module load {' '.join(get_loaded_modules())} """ + return f"""\ +printf ":) Loading modules...\\n + +# No modules to load. +""" + + def __generate_prologue(self, qsystem: queues.QueueSystem) -> str: return f"""\ TABLE_FORMAT_LINE="| - %-14s %-35s - %-14s %-35s |\\n" TABLE_HEADER="+-----------------------------------------------------------------------------------------------------------+ \\n" @@ -248,8 +232,6 @@ def __generate_prologue(self, qsystem: queues.QueueSystem) -> str: printf "$TABLE_CONTENT\\n" printf "$TABLE_FOOTER\\n" -{modules} - cd "{self.input.case_dirpath}" t_start=$(date +%s) @@ -287,6 +269,7 @@ def __batch_evaluate(self, s: str, qsystem: queues.QueueSystem, targets): ("{MFC::PROLOGUE}", self.__generate_prologue(qsystem)), ("{MFC::PROFILER}", ' '.join(profiler_prepend())), ("{MFC::EPILOGUE}", self.__generate_epilogue()), + ("{MFC::MODULES}", self.__generate_module_load()), ("{MFC::BINARIES}", ' '.join([f"'{target.get_install_binpath()}'" for target in targets])), ] @@ -315,7 +298,7 @@ def __create_batch_file(self, qsystem: queues.QueueSystem, targets: typing.List[ cons.print("> Generating batch file...") filepath = self.__get_batch_filepath() cons.print("> Evaluating template file...") - content = self.__batch_evaluate(qsystem.template, qsystem, targets) + content = self.__batch_evaluate(qsystem.get_template(), qsystem, targets) cons.print("> Writing batch file...") file_write(filepath, content) diff --git a/toolchain/mfc/run/queues.py b/toolchain/mfc/run/queues.py index acb833888c..22489459fb 100644 --- a/toolchain/mfc/run/queues.py +++ b/toolchain/mfc/run/queues.py @@ -1,16 +1,39 @@ -import os, typing, dataclasses +import os, glob, typing, dataclasses from mfc import common +from ..printer import cons from ..state import ARG +from ..common import MFC_TEMPLATEDIR + + +def get_baked_templates() -> dict: + return { + os.path.splitext(os.path.basename(f))[0] : common.file_read(f) + for f in glob.glob(os.path.join(MFC_TEMPLATEDIR, "*.sh")) + } + @dataclasses.dataclass class QueueSystem: - name: str - template: str + name: str + + def __init__(self, name: str) -> None: + self.name = name + + def get_template(self) -> str: + if computer := ARG("computer") is None: + raise common.MFCException(f"{self.name}: --computer is required.") + + baked = get_baked_templates() + if content := baked.get(self.name.lower()): + cons.print(f"Using baked-in template for [magenta]{self.name}[/magenta].") + return content + + if os.path.isfile(computer): + cons.print(f"Using template from [magenta]{computer}[/magenta].") + return common.file_read(computer) - def __init__(self, name: str, filename: str) -> None: - self.name = name - self.template = common.file_read(os.sep.join(["toolchain", "templates", filename])) + raise common.MFCException(f"{self.name}: Failed to find a template for --computer '{computer}'.") def is_active(self) -> bool: raise common.MFCException("QueueSystem::is_active: not implemented.") @@ -21,7 +44,7 @@ def gen_submit_cmd(self, filepath: str) -> typing.List[str]: class PBSSystem(QueueSystem): def __init__(self) -> None: - super().__init__("PBS", "pbs.sh") + super().__init__("PBS") def is_active(self) -> bool: return common.does_command_exist("qsub") @@ -35,7 +58,7 @@ def gen_submit_cmd(self, filepath: str) -> typing.List[str]: class LSFSystem(QueueSystem): def __init__(self) -> None: - super().__init__("LSF", "lsf.sh") + super().__init__("LSF") def is_active(self) -> bool: return common.does_command_exist("bsub") and common.does_command_exist("bqueues") @@ -51,7 +74,7 @@ def gen_submit_cmd(self, filepath: str) -> None: class SLURMSystem(QueueSystem): def __init__(self) -> None: - super().__init__("SLURM", "slurm.sh") + super().__init__("SLURM") def is_active(self) -> bool: return common.does_command_exist("sbatch") diff --git a/toolchain/mfc/run/run.py b/toolchain/mfc/run/run.py index 2fd7ec8b48..d8c5ebfd01 100644 --- a/toolchain/mfc/run/run.py +++ b/toolchain/mfc/run/run.py @@ -37,16 +37,6 @@ def run(targets = None): engine = engines.get_engine(ARG("engine")) engine.init(input_file) - cons.print(f"Configuration:") - cons.indent() - cons.print(f"""\ -Input {ARG('input')} -Job Name (-#) {ARG('name')} -Engine (-e) {ARG('engine')} -{engine.get_args()}\ -""") - cons.unindent() - validate_job_options() for target in targets: diff --git a/toolchain/templates/computer/phoenix.sh b/toolchain/templates/computer/phoenix.sh new file mode 100644 index 0000000000..f2c089bfcc --- /dev/null +++ b/toolchain/templates/computer/phoenix.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +#> +#> This file is part of the ./mfc.sh run subsystem. Expressions enclosed in +#> curly braces are expanded and evaluated using Python's eval() function and +#> data from ./mfc.sh run. The resulting file is submitted to the queue system. +#> + +#SBATCH --job-name="{name}" +#SBATCH --account={account} +#SBATCH --partition={partition} +#SBATCH --nodes={nodes} +#SBATCH --ntasks-per-node={tasks_per_node} +#SBATCH --gres=gpu:V100:{tasks_per_node if gpu else 0} +#SBATCH --mem-per-gpu=16G +#SBATCH --output="{name}.out" +#SBATCH --time={walltime} +#SBATCH --mail-user={email} +#SBATCH --mail-type="BEGIN, END, FAIL" + + +. ./mfc.sh load -c p -m {'g' if gpu else 'c'} + + +#> +#> The MFC prologue sets up the environment required to run MFC prior to +#> execution and starts the timer. +#> +{MFC::PROLOGUE} + + +#> +#> Iterate over all MFC binaries (as specified through --targets) and execute +#> them, one by one, with profiling enabled if requested. +#> +for binpath in {MFC::BINARIES}; do + + echo -e ":) Running $binpath:" + echo "" + + mpirun \ + -np {nodes*tasks_per_node} \ + {MFC::PROFILER} "$binpath" + + echo "" + +done + + +#> +#> The MFC epilogue stops the timer and prints the execution summary. It also +#> performs some cleanup and housekeeping tasks before exiting. +#> +{MFC::EPILOGUE} diff --git a/toolchain/templates/computer/summit.sh b/toolchain/templates/computer/summit.sh new file mode 100644 index 0000000000..444e075a19 --- /dev/null +++ b/toolchain/templates/computer/summit.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +#> +#> This file is part of the ./mfc.sh run subsystem. Expressions enclosed in +#> curly braces are expanded and evaluated using Python's eval() function and +#> data from ./mfc.sh run. The resulting file is submitted to the queue system. +#> +#BSUB -J {name} +#BSUB -nnodes {nodes} +#BSUB -N +#BSUB -P {account} +#BSUB -W {walltime[:-3]} + + +. ./mfc.sh load -c s -m {'g' if gpu else 'c'} + + +#> +#> The MFC prologue sets up the environment required to run MFC prior to +#> execution and starts the timer. +#> +{MFC::PROLOGUE} + + +#> +#> Iterate over all MFC binaries (as specified through --targets) and execute +#> them, one by one, with profiling enabled if requested. +#> +for binpath in {MFC::BINARIES}; do + + echo -e ":) Running $binpath:" + echo "" + + jsrun \ + {'--smpiargs="-gpu"' if gpu else ''} \ + --nrs {tasks_per_node*nodes} \ + --cpu_per_rs 1 \ + --gpu_per_rs {1 if gpu else 0} \ + --tasks_per_rs 1 \ + {MFC::PROFILER} "$binpath" + + echo "" + +done + + +#> +#> The MFC epilogue stops the timer and prints the execution summary. It also +#> performs some cleanup and housekeeping tasks before exiting. +#> +{MFC::EPILOGUE} diff --git a/toolchain/templates/generic/lsf.sh b/toolchain/templates/generic/lsf.sh new file mode 100644 index 0000000000..feed1b911a --- /dev/null +++ b/toolchain/templates/generic/lsf.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +#> +#> This file is part of the ./mfc.sh run subsystem. Expressions enclosed in +#> curly braces are expanded and evaluated using Python's eval() function and +#> data from ./mfc.sh run. The resulting file is submitted to the queue system. +#> +#BSUB -J {name} +#BSUB -nnodes {nodes} +#BSUB -N +#BSUB -P {account} +#BSUB -W {walltime[:-3]} + + +#> +#> Load the same modules as the ones currently loaded in the login shell. These +#> are usually the ones used to compile MFC. +#> +{MFC::MODULES} + + +#> +#> The MFC prologue sets up the environment required to run MFC prior to +#> execution and starts the timer. +#> +{MFC::PROLOGUE} + + +#> +#> Iterate over all MFC binaries (as specified through --targets) and execute +#> them, one by one, with profiling enabled if requested. +#> +for binpath in {MFC::BINARIES}; do + + echo -e ":) Running $binpath:" + echo "" + + jsrun \ + {'--smpiargs="-gpu"' if gpu else ''} \ + --nrs {tasks_per_node*nodes} \ + --cpu_per_rs 1 \ + --gpu_per_rs {1 if gpu else 0} \ + --tasks_per_rs 1 \ + {MFC::PROFILER} "$binpath" + + echo "" + +done + + +#> +#> The MFC epilogue stops the timer and prints the execution summary. It also +#> performs some cleanup and housekeeping tasks before exiting. +#> +{MFC::EPILOGUE} diff --git a/toolchain/templates/generic/pbs.sh b/toolchain/templates/generic/pbs.sh new file mode 100644 index 0000000000..215358c226 --- /dev/null +++ b/toolchain/templates/generic/pbs.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +#> +#> This file is part of the ./mfc.sh run subsystem. Expressions enclosed in +#> curly braces are expanded and evaluated using Python's eval() function and +#> data from ./mfc.sh run. The resulting file is submitted to the queue system. +#> +#PBS -N {name} +#PBS -l nodes={nodes}:ppn={tasks_per_node} +#PBS -A {account} +#PBS -l walltime={walltime} +#PBS -q {partition} +#PBS -M {email} +#> +#> Note: The following options aren't enabled by default. +#> They serve as a guide to users that wish to pass +#> more options to the batch system. +#> + + +#> +#> Load the same modules as the ones currently loaded in the login shell. These +#> are usually the ones used to compile MFC. +#> +{MFC::MODULES} + + +#> +#> The MFC prologue sets up the environment required to run MFC prior to +#> execution and starts the timer. +#> +{MFC::PROLOGUE} + + +#> +#> Iterate over all MFC binaries (as specified through --targets) and execute +#> them, one by one, with profiling enabled if requested. +#> +for binpath in {MFC::BINARIES}; do + + echo -e ":) Running $binpath:" + + if command -v srun > /dev/null 2>&1; then + srun \ + --nodes {nodes} \ + --ntasks-per-node {tasks_per_node} \ + {MFC::PROFILER} "$binpath" + + #> + #> srun --mpi=pmix \ + #> {MFC::PROFILER} "$binpath" + else + mpirun \ + -np {tasks_per_node*nodes} \ + {MFC::PROFILER} "$binpath" + + fi + +done + + +#> +#> The MFC epilogue stops the timer and prints the execution summary. It also +#> performs some cleanup and housekeeping tasks before exiting. +#> +{MFC::EPILOGUE} diff --git a/toolchain/templates/generic/slurm.sh b/toolchain/templates/generic/slurm.sh new file mode 100644 index 0000000000..7e19068b3c --- /dev/null +++ b/toolchain/templates/generic/slurm.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +#> +#> This file is part of the ./mfc.sh run subsystem. Expressions enclosed in +#> curly braces are expanded and evaluated using Python's eval() function and +#> data from ./mfc.sh run. The resulting file is submitted to the queue system. +#> +#SBATCH --job-name="{name}" +#SBATCH --nodes={nodes} +#SBATCH --ntasks-per-node={tasks_per_node} +#SBATCH --cpus-per-task=1 +#SBATCH --gpu-bind=verbose,closest +#SBATCH --gpus=v100-16:{(1 if gpu else 0)*tasks_per_node*nodes} +#SBATCH --time={walltime} +#SBATCH --partition="{partition}" +#SBATCH --output="{name}.out" +#SBATCH --account="{account}" +#SBATCH --error="{name}.err" +#SBATCH --mail-user="{email}" +#SBATCH --export=ALL +#SBATCH --mail-type="BEGIN, END, FAIL" +#> +#> Note: The following options aren't enabled by default. +#> They serve as a guide to users that wish to pass +#> more options to the batch system. +#> +#> #SBATCH --mem=... +#> #SBATCH --constraint="lustre" +#> #SBATCH --gpus-per-task={1 if gpu else 0} + + +#> +#> Load the same modules as the ones currently loaded in the login shell. These +#> are usually the ones used to compile MFC. +#> +{MFC::MODULES} + + +#> +#> The MFC prologue sets up the environment required to run MFC prior to +#> execution and starts the timer. +#> +{MFC::PROLOGUE} + + +#> +#> Iterate over all MFC binaries (as specified through --targets) and execute +#> them, one by one, with profiling enabled if requested. +#> +for binpath in {MFC::BINARIES}; do + + echo -e ":) Running $binpath:" + + if command -v srun > /dev/null 2>&1; then + srun \ + --nodes {nodes} \ + --ntasks-per-node {tasks_per_node} \ + {MFC::PROFILER} "$binpath" + + #> + #> srun --mpi=pmix \ + #> {MFC::PROFILER} "$binpath" + #> + else + mpirun \ + -np {nodes*tasks_per_node} \ + {MFC::PROFILER} "$binpath" + fi + +done + + +#> +#> The MFC epilogue stops the timer and prints the execution summary. It also +#> performs some cleanup and housekeeping tasks before exiting. +#> +{MFC::EPILOGUE} diff --git a/toolchain/templates/lsf.sh b/toolchain/templates/lsf.sh deleted file mode 100644 index 1db260b0ba..0000000000 --- a/toolchain/templates/lsf.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env bash -#> -#> - LSF Batch File Template - -#> -#> This file is part of the ./mfc.sh run subsystem. -#> For more information, please consult the README. -#> -#> - You are invited to modify this file to suit your -#> needs, in order to get MFC running properly on -#> your system. -#> -#> - Lines that begin with "#>" are ignored and won't -#> figure in the final batch script, not even as a -#> comment. -#> -#> - Statements of the form `${expression}` are string- -#> -replaced by mfc.sh run to provide runtime parameters, -#> most notably execution options. They reference the -#> variables in the same format as those under the "run" -#> section of [mfc.user.yaml](mfc.user.yaml), replacing -#> "-" for "_". You can perform therein any Python operation -#> recognized by the built-in `expr()` function. -#> -#> - Statements of the form {MFC::expression} tell MFC -#> where to place the common code, across all batch -#> files that is required to run MFC. They are not -#> intended to be modified by users. -#> -#BSUB -J {name} -#BSUB -nnodes {nodes} -#BSUB -N -#BSUB -P {account} -#BSUB -W {walltime[:-3]} -#> -#> Note: The above expression for the walltime converts -#> the expression "hh:mm:ss" to the appropriate -#> format for the batch system ("hh:mm"). It is -#> a python expression evaluated at runtime. -#> -#> -#> Note: The following options aren't enabled by default. -#> They serve as a guide to users that wish to pass -#> more options to the batch system. -#> - - - -#> -#> Note: If your system requires you to load environment -#> modules inside of your batch script, please load -#> them bellow. -#> - - - -#> -#> Note: The MFC prologue sets up the environment required -#> prior to execution. -#> -{MFC::PROLOGUE} - -#> -#> Note: This MPI executable might not be well supported -#> on your system - if at all. {MFC::BIN} refers to -#> the path the MFC executable. -#> - -for binpath in {MFC::BINARIES}; do - - echo -e ":) Running $binpath:" - echo "" - - jsrun \ - {'--smpiargs="-gpu"' if gpu else ''} \ - --nrs {tasks_per_node*nodes} \ - --cpu_per_rs 1 \ - --gpu_per_rs {1 if gpu else 0} \ - --tasks_per_rs 1 \ - {MFC::PROFILER} "$binpath" - - echo "" - -done - -{MFC::EPILOGUE} -#> -#> Note: Lines after the MFC Epilogue will not be executed. -#> diff --git a/toolchain/templates/pbs.sh b/toolchain/templates/pbs.sh deleted file mode 100644 index 839b9fe3cf..0000000000 --- a/toolchain/templates/pbs.sh +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env bash -#> -#> - PBS Batch File Template - -#> -#> This file is part of the ./mfc.sh run subsystem. -#> For more information, please consult the README. -#> -#> - You are invited to modify this file to suit your -#> needs, in order to get MFC running properly on -#> your system. -#> -#> - Lines that begin with "#>" are ignored and won't -#> figure in the final batch script, not even as a -#> comment. -#> -#> - Statements of the form `${expression}` are string- -#> -replaced by mfc.sh run to provide runtime parameters, -#> most notably execution options. They reference the -#> variables in the same format as those under the "run" -#> section of [mfc.user.yaml](mfc.user.yaml), replacing -#> "-" for "_". You can perform therein any Python operation -#> recognized by the built-in `expr()` function. -#> -#> - Statements of the form {MFC::expression} tell MFC -#> where to place the common code, across all batch -#> files that is required to run MFC. They are not -#> intended to be modified by users. -#> -#PBS -N {name} -#PBS -l nodes={nodes}:ppn={tasks_per_node} -#PBS -A {account} -#PBS -l walltime={walltime} -#PBS -q {partition} -#PBS -M {email} -#> -#> Note: The following options aren't enabled by default. -#> They serve as a guide to users that wish to pass -#> more options to the batch system. -#> - - - - -#> -#> Note: If your system requires you to load environment -#> modules inside of your batch script, please load -#> them bellow. -#> - - - -#> -#> Note: The MFC prologue sets up the environment required -#> prior to execution. -#> -{MFC::PROLOGUE} - -#> -#> Note: This MPI executable might not be well supported -#> on your system - if at all. {MFC::BIN} refers to -#> the path the MFC executable. -#> - -for binpath in {MFC::BINARIES}; do - - echo -e ":) Running $binpath:" - - if command -v srun > /dev/null 2>&1; then - srun \ - --nodes {nodes} \ - --ntasks-per-node {tasks_per_node} \ - {MFC::PROFILER} "$binpath" - - #> - #> srun --mpi=pmix \ - #> {MFC::PROFILER} "$binpath" - else - mpirun \ - -np {tasks_per_node*nodes} \ - {MFC::PROFILER} "$binpath" - - fi - -done - -{MFC::EPILOGUE} -#> -#> Note: Lines after the MFC Epilogue will not be executed. -#> - diff --git a/toolchain/templates/slurm.sh b/toolchain/templates/slurm.sh deleted file mode 100644 index 11982cb408..0000000000 --- a/toolchain/templates/slurm.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env bash -#> -#> - SLURM Batch File Template - -#> -#> This file is part of the ./mfc.sh run subsystem. -#> For more information, please consult the README. -#> -#> - You are invited to modify this file to suit your -#> needs, in order to get MFC running properly on -#> your system. -#> -#> - Lines that begin with "#>" are ignored and won't -#> figure in the final batch script, not even as a -#> comment. -#> -#> - Statements of the form `${expression}` are string- -#> -replaced by mfc.sh run to provide runtime parameters, -#> most notably execution options. They reference the -#> variables in the same format as those under the "run" -#> section of [mfc.user.yaml](mfc.user.yaml), replacing -#> "-" for "_". You can perform therein any Python operation -#> recognized by the built-in `expr()` function. -#> -#> - Statements of the form {MFC::expression} tell MFC -#> where to place the common code, across all batch -#> files that is required to run MFC. They are not -#> intended to be modified by users. -#> -#SBATCH --job-name="{name}" -#SBATCH --nodes={nodes} -#SBATCH --ntasks-per-node={tasks_per_node} -#SBATCH --cpus-per-task=1 -#SBATCH --gpu-bind=verbose,closest -#SBATCH --gpus=v100-16:{(1 if gpu else 0)*tasks_per_node*nodes} -#SBATCH --time={walltime} -#SBATCH --partition="{partition}" -#SBATCH --output="{name}.out" -#SBATCH --account="{account}" -#SBATCH --error="{name}.err" -#SBATCH --mail-user="{email}" -#SBATCH --export=ALL -#SBATCH --mail-type="BEGIN, END, FAIL" -#> -#> Note: The following options aren't enabled by default. -#> They serve as a guide to users that wish to pass -#> more options to the batch system. -#> -#> #SBATCH --mem=... -#> #SBATCH --constraint="lustre" -#> #SBATCH --gpus-per-task={1 if gpu else 0} - - -#> -#> Note: If your system requires you to load environment -#> modules inside of your batch script, please load -#> them bellow. -#> - - -#> -#> Note: The MFC prologue sets up the environment required -#> prior to execution. -#> -{MFC::PROLOGUE} - - -#> -#> Note: This MPI executable might not be well supported -#> on your system - if at all. {MFC::BIN} refers to -#> the path the MFC executable. -#> - -for binpath in {MFC::BINARIES}; do - - echo -e ":) Running $binpath:" - - if command -v srun > /dev/null 2>&1; then - srun \ - --nodes {nodes} \ - --ntasks-per-node {tasks_per_node} \ - {MFC::PROFILER} "$binpath" - - #> - #> srun --mpi=pmix \ - #> {MFC::PROFILER} "$binpath" - #> - else - mpirun \ - -np {nodes*tasks_per_node} \ - {MFC::PROFILER} "$binpath" - fi - -done - -{MFC::EPILOGUE} - -#> -#> Note: Lines after the MFC Epilogue will not be executed. -#>