glotzerlab · b-butler · Oct 23, 2023 · Feb 27, 2023 · Feb 27, 2023 · Mar 7, 2023
@@ -34,6 +34,7 @@
 from .scheduling.pbs import PBSScheduler
 from .scheduling.simple_scheduler import SimpleScheduler
 from .scheduling.slurm import SlurmScheduler
+from .util.template_filters import calc_num_nodes, calc_tasks
 
 logger = logging.getLogger(__name__)
 
@@ -108,6 +109,10 @@ class ComputeEnvironment(metaclass=_ComputeEnvironmentType):
     template = "base_script.sh"
     mpi_cmd = "mpiexec"
 
+    _cpus_per_node = {"default": -1}
+    _gpus_per_node = {"default": -1}
+    _shared_partitions = set()
+
     @classmethod
     def is_present(cls):
         """Determine whether this specific compute environment is present.
@@ -283,6 +288,67 @@ def _get_default_directives(cls):
             )
         )
 
+    @classmethod
+    def _get_scheduler_values(cls, context):
+        """Return a dictionary of computed quantities regarding submission.
+
+        Warning
+        -------
+            Must be called after the rest of the template context has been gathered.
+        """
+        partition = context.get("partition", None)
+        force = context.get("force", False)
+        if force or partition in cls._shared_partitions:
+            threshold = 0.0
+        else:
+            threshold = 0.9
+        cpu_tasks_total = calc_tasks(
+            context["operations"],
+            "np",
+            context.get("parallel", False),
+            context.get("force", False),
+        )
+        gpu_tasks_total = calc_tasks(
+            context["operations"],
+            "ngpu",
+            context.get("parallel", False),
+            context.get("force", False),
+        )
+
+        if gpu_tasks_total > 0:
+            num_nodes_gpu = cls._calc_num_nodes(
+                gpu_tasks_total, cls._get_gpus_per_node(partition), threshold
+            )
+            num_nodes_cpu = cls._calc_num_nodes(
+                cpu_tasks_total, cls._get_cpus_per_node(partition), 0
+            )
+        else:
+            num_nodes_gpu = 0
+            num_nodes_cpu = cls._calc_num_nodes(
+                cpu_tasks_total, cls._get_cpus_per_node(partition), threshold
+            )
+        num_nodes = max(num_nodes_cpu, num_nodes_gpu, 1)
+        return {
+            "ncpu_tasks": cpu_tasks_total,
+            "ngpu_tasks": gpu_tasks_total,
+            "num_nodes": num_nodes,
+        }
+
+    @classmethod
+    def _get_cpus_per_node(cls, partition):
+        return cls._cpus_per_node.get(partition, cls._cpus_per_node["default"])
+
+    @classmethod
+    def _get_gpus_per_node(cls, partition):
+        return cls._gpus_per_node.get(partition, cls._gpus_per_node["default"])
+
+    @classmethod
+    def _calc_num_nodes(cls, tasks, processors, threshold):
+        """Call calc_num_nodes but handles the -1 sentinal value."""
+        if processors == -1:
+            return 1
+        return calc_num_nodes(tasks, processors, threshold)
+
 
 class StandardEnvironment(ComputeEnvironment):
     """Default environment which is always present."""

@@ -36,8 +36,8 @@ def my_operation(job):
     hostname_pattern = r".*\.summit\.olcf\.ornl\.gov"
     template = "summit.sh"
     mpi_cmd = "jsrun"
-    cores_per_node = 42
-    gpus_per_node = 6
+    _cpus_per_node = {"default": 42}
+    _gpus_per_node = {"default": 6}
 
     @template_filter
     def calc_num_nodes(cls, resource_sets, parallel=False):
@@ -187,7 +187,8 @@ class AndesEnvironment(DefaultSlurmEnvironment):
     hostname_pattern = r"andes-.*\.olcf\.ornl\.gov"
     template = "andes.sh"
     mpi_cmd = "srun"
-    cores_per_node = 32
+    _cpus_per_node = {"default": 32, "gpu": 28}
+    _gpus_per_node = {"default": 0, "gpu": 2}
 
     @classmethod
     def add_args(cls, parser):
@@ -216,8 +217,9 @@ class CrusherEnvironment(DefaultSlurmEnvironment):
 
     hostname_pattern = r".*\.crusher\.olcf\.ornl\.gov"
     template = "crusher.sh"
-    cores_per_node = 56
-    gpus_per_node = 8
+    _cpus_per_node = {"default": 56}
+    _gpus_per_node = {"default": 8}
+
     mpi_cmd = "srun"
 
     @template_filter
@@ -267,25 +269,10 @@ class FrontierEnvironment(DefaultSlurmEnvironment):
 
     hostname_pattern = r".*\.frontier\.olcf\.ornl\.gov"
     template = "frontier.sh"
-    cores_per_node = 56
-    gpus_per_node = 8
+    _cpus_per_node = {"default": 56}
+    _gpus_per_node = {"default": 8}
     mpi_cmd = "srun"
 
-    @template_filter
-    def calc_num_nodes(cls, ngpus, ncpus, threshold):
-        """Compute the number of nodes needed to meet the resource request.
-
-        Also raise an error when the requested resource do not come close to saturating the asked
-        for nodes.
-        """
-        nodes_gpu = max(1, int(ceil(ngpus / cls.gpus_per_node)))
-        nodes_cpu = max(1, int(ceil(ncpus / cls.cores_per_node)))
-        if nodes_gpu >= nodes_cpu:
-            check_utilization(nodes_gpu, ngpus, cls.gpus_per_node, threshold, "compute")
-            return nodes_gpu
-        check_utilization(nodes_cpu, ncpus, cls.cores_per_node, threshold, "compute")
-        return nodes_cpu
-
     @classmethod
     def _get_mpi_prefix(cls, operation, parallel):
         """Get the correct srun command for the job.

@@ -13,7 +13,11 @@ class GreatLakesEnvironment(DefaultSlurmEnvironment):
 
     hostname_pattern = r"gl(-login)?[0-9]+\.arc-ts\.umich\.edu"
     template = "umich-greatlakes.sh"
-    cores_per_node = 1
+    _cpus_per_node = {"default": 36, "gpu": 40}
+    _gpus_per_node = {"default": 2}
+    _shared_partitions = {"standard", "gpu"}
+
+    mpi_cmd = "srun"
 
     @classmethod
     def add_args(cls, parser):

@@ -25,6 +25,14 @@ class Stampede2Environment(DefaultSlurmEnvironment):
     mpi_cmd = "ibrun"
     offset_counter = 0
     base_offset = _STAMPEDE_OFFSET
+    _cpus_per_node = {
+        "default": 48,
+        "skx-dev": 68,
+        "skx-normal": 68,
+        "skx-large": 68,
+        "icx-normal": 80,
+    }
+    _gpus_per_node = {"default": 0.0}
 
     @template_filter
     def return_and_increment(cls, increment):
@@ -138,8 +146,10 @@ class Bridges2Environment(DefaultSlurmEnvironment):
 
     hostname_pattern = r".*\.bridges2\.psc\.edu$"
     template = "bridges2.sh"
-    cores_per_node = 128
     mpi_cmd = "mpirun"
+    _cpus_per_node = {"default": 128, "EM": 96, "GPU": 40, "GPU-shared": 40}
+    _gpus_per_node = {"default": 8}
+    _shared_partitions = {"RM-shared", "GPU-shared"}
 
     @classmethod
     def add_args(cls, parser):
@@ -175,8 +185,9 @@ class ExpanseEnvironment(DefaultSlurmEnvironment):
 
     hostname_pattern = r".*\.expanse\.sdsc\.edu$"
     template = "expanse.sh"
-    cores_per_node = 128
-    gpus_per_node = 4
+    _cpus_per_node = {"default": 128, "GPU": 40}
+    _gpus_per_node = {"default": 4}
+    _shared_partitions = {"shared", "gpu-shared"}
 
     @classmethod
     def add_args(cls, parser):
@@ -218,7 +229,15 @@ class DeltaEnvironment(DefaultSlurmEnvironment):
     # be safer given the parts listed are less likely to change.
     hostname_pattern = r"(gpua|dt|cn)(-login)?[0-9]+\.delta.*\.ncsa.*\.edu"
     template = "delta.sh"
-    cores_per_node = 128
+    _cpus_per_node = {
+        "default": 128,
+        "gpuA40x4": 64,
+        "gpuA100x4": 64,
+        "gpuA100x8": 128,
+        "gpuMI100x8": 128,
+    }
+    _gpus_per_node = {"default": 4, "gpuA100x8": 8, "gpuMI100x8": 8}
+    _shared_partitions = {"cpu", "gpuA100x4", "gpuA40x4", "gpuA100x8", "gpuMI100x8"}
 
     @classmethod
     def add_args(cls, parser):

@@ -663,11 +663,6 @@ def __init__(
         self._project = project
         self.submit_options = submit_options
         self.run_options = run_options
-        # We register aggregators associated with operation functions in
-        # `_register_groups` and we do not set the aggregator explicitly.
-        # We delay setting the aggregator because we do not restrict the
-        # decorator placement in terms of `@FlowGroupEntry`, `@aggregator`, or
-        # `@operation`.
         self.group_aggregator = group_aggregator
 
     def __call__(self, func=None, /, *, directives=None):
@@ -1534,11 +1529,6 @@ def _internal_call(
 
                 # Append the name and function to the class registry
                 self._parent_class._OPERATION_FUNCTIONS.append((name, func))
-                # We register aggregators associated with operation functions in
-                # `_register_groups` and we do not set the aggregator explicitly.  We
-                # delay setting the aggregator because we do not restrict the decorator
-                # placement in terms of `@FlowGroupEntry`, `@aggregator`, or
-                # `@operation`.
                 self._parent_class._GROUPS.append(
                     FlowGroupEntry(name=name, project=self._parent_class)
                 )
@@ -4004,6 +3994,7 @@ def _generate_submit_script(
         context["id"] = _id
         context["operations"] = list(operations)
         context.update(kwargs)
+        context["resources"] = self._environment._get_scheduler_values(context)
         if show_template_help:
             self._show_template_help_and_exit(template_environment, context)
         return template.render(**context)

@@ -1,37 +1,24 @@
 {# Templated in accordance with: https://docs.olcf.ornl.gov/systems/andes_user_guide.html #}
 {% extends "slurm.sh" %}
 {% block tasks %}
-    {% set threshold = 0 if force else 0.9 %}
-    {% set cpu_tasks = operations|calc_tasks('np', parallel, force) %}
-    {% set gpu_tasks = operations|calc_tasks('ngpu', parallel, force) %}
-    {% if gpu_tasks %}
+    {% if resources.ngpu_tasks %}
         {% if not ('GPU' in partition or force) %}
             {% raise "GPU operations require a GPU partition!" %}
         {% endif %}
-        {# GPU nodes have 2 NVIDIA K80s #}
-        {% set nn_gpu = gpu_tasks|calc_num_nodes(2) %}
-        {% set nn = nn_gpu %}
     {% else %}
         {% if 'gpu' in partition and not force %}
             {% raise "Requesting gpu partition, but no GPUs requested!" %}
         {% endif %}
-        {% set nn = nn|default(cpu_tasks|calc_num_nodes(32), true) %}
     {% endif %}
     {% if 'gpu' in partition %}
-        {% set gpus_per_node = (gpu_tasks / nn)|round(0, 'ceil')|int %}
-        {% set cpus_per_node = (cpu_tasks / nn)|round(0, 'ceil')|int %}
-        {% if cpus_per_node > gpus_per_node * 14 and not force %}
+        {% if resources.ncpu_tasks > resources.ngpu_tasks * 14 and not force %}
             {% raise "Cannot request more than 14 CPUs per GPU." %}
         {% endif %}
     {% endif %}
+#SBATCH -N {{ resources.num_nodes }}
+#SBATCH --ntasks={{ resources.ncpu_tasks }}
     {% if partition == 'gpu' %}
-#SBATCH -N {{ nn|check_utilization(gpu_tasks, 2, threshold, 'GPU') }}
-#SBATCH --ntasks-per-node={{ cpus_per_node }}
-#SBATCH --gpus={{ gpu_tasks }}
-    {% else %}
-        {# This should cover batch #}
-#SBATCH -N {{ nn|check_utilization(cpu_tasks, 32, threshold, 'CPU') }}
-#SBATCH --ntasks-per-node={{ (32, cpu_tasks)|min }}
+#SBATCH --gpus={{ resources.ngpu_tasks }}
     {% endif %}
 {% endblock tasks %}
 {% block header %}

@@ -1,48 +1,27 @@
 {# Templated in accordance with: https://www.psc.edu/resources/bridges-2/user-guide #}
 {% extends "slurm.sh" %}
 {% block tasks %}
-    {% set threshold = 0 if force else 0.9 %}
-    {% set cpu_tasks = operations|calc_tasks('np', parallel, force) %}
-    {% set gpu_tasks = operations|calc_tasks('ngpu', parallel, force) %}
-    {% if gpu_tasks %}
+    {% if resources.ngpu_tasks %}
         {% if not ('GPU' in partition or force) %}
             {% raise "GPU operations require a GPU partition!" %}
         {% endif %}
-        {#- GPU nodes have 8 NVIDIA V100-32GB SXM2 #}
-        {% set nn_gpu = gpu_tasks|calc_num_nodes(8) %}
-        {% set nn = nn_gpu %}
+        {% if partition == "GPU-shared" and resources.ngpu_tasks > 4 %}
+            {% raise "Cannot request GPU-shared with more than 4 GPUs." %}
+        {% endif %}
     {% else %}
         {% if 'GPU' in partition and not force %}
             {% raise "Requesting GPU partition, but no GPUs requested!" %}
         {% endif %}
-        {% set nn = nn|default(cpu_tasks|calc_num_nodes(128), true) %}
     {% endif %}
-    {% if 'GPU' in partition %}
-        {% set gpus_per_node = (gpu_tasks / nn)|round(0, 'ceil')|int %}
-        {% set cpus_per_node = (cpu_tasks / nn)|round(0, 'ceil')|int %}
-        {% if cpus_per_node > gpus_per_node * 5 and not force %}
-            {% raise "Cannot request more than 5 CPUs per GPU." %}
-        {% endif %}
+    {% if partition == 'RM-shared' and resources.ncpu_tasks > 64 %}
+        {% raise "Cannot request RM-shared with more than 64 tasks or multiple nodes." %}
     {% endif %}
-    {% if partition == 'GPU' %}
-#SBATCH -N {{ nn|check_utilization(gpu_tasks, 8, threshold, 'GPU') }}
-#SBATCH --gpus={{ gpu_tasks }}
-    {% elif partition == 'GPU-shared' %}
-#SBATCH -N {{ nn|check_utilization(gpu_tasks, 1, threshold, 'GPU') }}
-#SBATCH --gpus={{ gpu_tasks }}
-    {% elif partition == 'EM' %}
-#SBATCH -N {{ nn|check_utilization(cpu_tasks, 96, threshold, 'CPU') }}
-#SBATCH --ntasks-per-node={{ (96, cpu_tasks)|min }}
-    {% elif partition == 'RM-shared' %}
-        {% if nn|default(1, true) > 1 or cpu_tasks > 64 %}
-            {% raise "Cannot request RM-shared with more than 64 tasks or multiple nodes." %}
-        {% endif %}
-#SBATCH -N {{ nn|default(1, true) }}
-#SBATCH --ntasks={{ cpu_tasks }}
-    {% else %}
-{#- This should cover RM, RM-512, and possibly RM-small (not documented) #}
-#SBATCH -N {{ nn|check_utilization(cpu_tasks, 128, threshold, 'CPU') }}
-#SBATCH --ntasks-per-node={{ (128, cpu_tasks)|min }}
+    {% if resources.num_nodes > 1 or resources.ncpu_tasks >= 128 or resources.ngpu_tasks >= 8 %}
+#SBATCH -N {{ resources.num_nodes }}
+    {% endif %}
+#SBATCH --ntasks={{ resources.ncpu_tasks }}
+    {% if 'GPU' in partition %}
+#SBATCH --gpus={{ resources.ngpu_tasks }}
     {% endif %}
 {% endblock tasks %}
 {% block header %}

@@ -1,11 +1,7 @@
 {# Templated in accordance with: https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html #}
 {% extends "slurm.sh" %}
 {% block tasks %}
-    {% set threshold = 0 if force else 0.9 %}
-    {% set cpu_tasks = operations|calc_tasks('np', parallel, force) %}
-    {% set gpu_tasks = operations|calc_tasks('ngpu', parallel, force) %}
-    {% set nn = gpu_tasks|calc_num_nodes(cpu_tasks, threshold) %}
-#SBATCH --nodes={{ nn }}
+#SBATCH --nodes={{ resources.num_nodes }}
 {% endblock tasks %}
 {% block header %}
     {{- super() -}}