From 52a47b44b2c8b970c1e942434de814105eda40a5 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 1 Oct 2024 20:13:41 -0600 Subject: [PATCH] Setup the new "limit" qualifier for bind-to Add a new qualifier for the "bind-to" directive: "LIMIT=n" limits the number of processes bound to each eligible representative of the specified type to the given number. For example, speifying "--bind-to l3:limit=2" would direct PRRTE to bind ranks to the L3caches, limiting the number of processes bound to each l3cache to two - i.e., bind 2 processes to a given l3cache, and then move on to the next. Add new help text to "prun" and "prterun" to include new "limit=n" modifier for the "bind-to" directive. The binding algorithm has not been updated to support this yet. Requires some additional work that will be done in a later commit. Signed-off-by: Ralph Castain --- src/docs/prrte-rst-content/cli-bind-to.rst | 9 ++++++- .../show-help-files/help-prte-hwloc-base.txt | 16 +++--------- src/docs/show-help-files/help-prterun.txt | 11 ++++++-- src/docs/show-help-files/help-prun.txt | 7 +++++ src/hwloc/hwloc.c | 26 ++++++++++++++++--- src/mca/rmaps/base/rmaps_base_binding.c | 1 + src/mca/rmaps/base/rmaps_base_map_job.c | 5 +++- src/mca/rmaps/rmaps_types.h | 3 ++- src/mca/rmaps/round_robin/rmaps_rr_mappers.c | 3 +++ src/mca/schizo/base/schizo_base_frame.c | 1 + src/util/attr.c | 2 ++ src/util/attr.h | 2 ++ src/util/prte_cmd_line.h | 1 + 13 files changed, 66 insertions(+), 21 deletions(-) diff --git a/src/docs/prrte-rst-content/cli-bind-to.rst b/src/docs/prrte-rst-content/cli-bind-to.rst index 24690af99b..b7343e2b46 100644 --- a/src/docs/prrte-rst-content/cli-bind-to.rst +++ b/src/docs/prrte-rst-content/cli-bind-to.rst @@ -1,6 +1,6 @@ .. -*- rst -*- - Copyright (c) 2022-2023 Nanook Consulting. All rights reserved. + Copyright (c) 2022-2024 Nanook Consulting All rights reserved. Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. $COPYRIGHT$ @@ -79,5 +79,12 @@ option: be launched and executed even if binding cannot be performed as requested. +* ``LIMIT=n`` limits the number of processes bound to each eligible + representative of the specified type to the given number. For + example, speifying "--bind-to l3:limit=2" would direct PRRTE + to bind ranks to the L3caches, limiting the number of processes + bound to each l3cache to two - i.e., bind 2 processes to a + given l3cache, and then move on to the next. + .. note:: Directives and qualifiers are case-insensitive. ``OVERLOAD`` is the same as ``overload``. diff --git a/src/docs/show-help-files/help-prte-hwloc-base.txt b/src/docs/show-help-files/help-prte-hwloc-base.txt index c8ba4cd61a..21f182c249 100644 --- a/src/docs/show-help-files/help-prte-hwloc-base.txt +++ b/src/docs/show-help-files/help-prte-hwloc-base.txt @@ -1,16 +1,6 @@ -[bogus section] - -This section is not used by PRTE code. But we have to put a RST -section title in this file somewhere, or Sphinx gets unhappy. So we -put it in a section that is ignored by PRTE code. - - -Hello, world -============ - [mbind failure] -PRTE failed to bind internal memory to a specific NUMA node. This +PRRTE failed to bind internal memory to a specific NUMA node. This message will only be reported at most once per process. Local host: %s @@ -50,12 +40,12 @@ been deprecated and replaced as follows: Deprecated: %s Replacement: %s -The deprecated forms *will* disappear in a future version of PRTE. +The deprecated forms *will* disappear in a future version of PRRTE. Please update to the new syntax. [obj-idx-failed] -PRTE failed to find a cache of a specified type. This is a highly +PRRTE failed to find a cache of a specified type. This is a highly unusual error; it may indicate a system configuration error. This additional information may be of help: diff --git a/src/docs/show-help-files/help-prterun.txt b/src/docs/show-help-files/help-prterun.txt index 017690b029..fab08171bc 100644 --- a/src/docs/show-help-files/help-prterun.txt +++ b/src/docs/show-help-files/help-prterun.txt @@ -1182,10 +1182,17 @@ combination of one or more of the following to the "--bind-to" option: * "IF-SUPPORTED" indicates that the job should continue to be launched and executed even if binding cannot be performed as requested. +* "LIMIT=n" limits the number of processes bound to each eligible + representative of the specified type to the given number. For + example, speifying "--bind-to l3:limit=2" would direct PRRTE + to bind ranks to the L3caches, limiting the number of processes + bound to each l3cache to two - i.e., bind 2 processes to a + given l3cache, and then move on to the next. + Note: - Directives and qualifiers are case-insensitive. "OVERLOAD" is the - same as "overload". + Directives and qualifiers are case-insensitive - e.g., "OVERLOAD" + is the same as "overload". [runtime-options] diff --git a/src/docs/show-help-files/help-prun.txt b/src/docs/show-help-files/help-prun.txt index 37c75f59c2..31d6f6235b 100644 --- a/src/docs/show-help-files/help-prun.txt +++ b/src/docs/show-help-files/help-prun.txt @@ -782,6 +782,13 @@ combination of one or more of the following to the "--bind-to" option: * "IF-SUPPORTED" indicates that the job should continue to be launched and executed even if binding cannot be performed as requested. +* "LIMIT=n" limits the number of processes bound to each eligible + representative of the specified type to the given number. For + example, speifying "--bind-to l3:limit=2" would direct PRRTE + to bind ranks to the L3caches, limiting the number of processes + bound to each l3cache to two - i.e., bind 2 processes to a + given l3cache, and then move on to the next. + Note: Directives and qualifiers are case-insensitive. "OVERLOAD" is the diff --git a/src/hwloc/hwloc.c b/src/hwloc/hwloc.c index aad1ce57cd..9cf23c9339 100644 --- a/src/hwloc/hwloc.c +++ b/src/hwloc/hwloc.c @@ -3,7 +3,7 @@ * Copyright (c) 2013-2020 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -166,7 +166,7 @@ int prte_hwloc_base_register(void) "l3cache, numa, package, (\"none\" is the default when oversubscribed, \"core\" is " "the default otherwise). Allowed " "colon-delimited qualifiers: " - "overload-allowed, if-supported", + "overload-allowed, if-supported, limit", PMIX_MCA_BASE_VAR_TYPE_STRING, &prte_hwloc_base_binding_policy); if (NULL == prte_hwloc_base_binding_policy) { @@ -566,8 +566,9 @@ int prte_hwloc_base_set_binding_policy(void *jdat, char *spec) { int i; prte_binding_policy_t tmp; - char **quals, *myspec, *ptr; + char **quals, *myspec, *ptr, *p2; prte_job_t *jdata = (prte_job_t *) jdat; + uint16_t u16; /* set default */ tmp = 0; @@ -606,6 +607,25 @@ int prte_hwloc_base_set_binding_policy(void *jdat, char *spec) prte_set_attribute(&jdata->attributes, PRTE_JOB_REPORT_BINDINGS, PRTE_ATTR_GLOBAL, NULL, PMIX_BOOL); + } else if (PMIX_CHECK_CLI_OPTION(quals[i], PRTE_CLI_LIMIT)) { + if (NULL == jdata) { + pmix_show_help("help-prte-rmaps-base.txt", "unsupported-default-modifier", true, + "binding policy", quals[i]); + free(myspec); + return PRTE_ERR_SILENT; + } + /* Numeric value must immediately follow '=' (LIMIT=2) */ + u16 = strtol(&quals[i][6], &p2, 10); + if ('\0' != *p2) { + /* missing the value or value is invalid */ + pmix_show_help("help-prte-rmaps-base.txt", "invalid-value", true, + "binding limit", "LIMIT", quals[i]); + PMIX_ARGV_FREE_COMPAT(quals); + return PRTE_ERR_SILENT; + } + prte_set_attribute(&jdata->attributes, PRTE_JOB_BINDING_LIMIT, PRTE_ATTR_GLOBAL, + &u16, PMIX_UINT16); + } else { /* unknown option */ pmix_show_help("help-prte-hwloc-base.txt", "unrecognized-modifier", true, spec); diff --git a/src/mca/rmaps/base/rmaps_base_binding.c b/src/mca/rmaps/base/rmaps_base_binding.c index fc5abe1cf7..87525d2c5a 100644 --- a/src/mca/rmaps/base/rmaps_base_binding.c +++ b/src/mca/rmaps/base/rmaps_base_binding.c @@ -169,6 +169,7 @@ static int bind_generic(prte_job_t *jdata, prte_proc_t *proc, tmp_obj = hwloc_get_obj_inside_cpuset_by_type(node->topology->topo, prte_rmaps_base.available, type, 0); + #if HWLOC_API_VERSION < 0x20000 hwloc_bitmap_andnot(node->available, node->available, tmp_obj->allowed_cpuset); if (hwloc_bitmap_iszero(node->available) && options->overload) { diff --git a/src/mca/rmaps/base/rmaps_base_map_job.c b/src/mca/rmaps/base/rmaps_base_map_job.c index 88cb789581..9843c86c77 100644 --- a/src/mca/rmaps/base/rmaps_base_map_job.c +++ b/src/mca/rmaps/base/rmaps_base_map_job.c @@ -109,6 +109,9 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata) prte_get_attribute(&jdata->attributes, PRTE_JOB_DISPLAY_DEVEL_MAP, NULL, PMIX_BOOL)) { options.dobind = true; } + if (prte_get_attribute(&jdata->attributes, PRTE_JOB_BINDING_LIMIT, (void**) &u16ptr, PMIX_UINT16)) { + options.limit = u16; + } pmix_output_verbose(5, prte_rmaps_base_framework.framework_output, "mca:rmaps: mapping job %s", @@ -468,7 +471,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata) #else } else if (HWLOC_OBJ_L1CACHE == options.maptype || HWLOC_OBJ_L2CACHE == options.maptype || - HWLOC_OBJ_L1CACHE == options.maptype) { + HWLOC_OBJ_L3CACHE == options.maptype) { /* add in #cache for each node */ PMIX_LIST_FOREACH (node, &nodes, prte_node_t) { app->num_procs += options.pprn * prte_hwloc_base_get_nbobjs_by_type(node->topology->topo, diff --git a/src/mca/rmaps/rmaps_types.h b/src/mca/rmaps/rmaps_types.h index 8182ef5a3f..56acfdf908 100644 --- a/src/mca/rmaps/rmaps_types.h +++ b/src/mca/rmaps/rmaps_types.h @@ -13,7 +13,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2014-2020 Intel, Inc. All rights reserved. - * Copyright (c) 2021-2023 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2024 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -113,6 +113,7 @@ typedef struct { bool dobind; hwloc_obj_type_t hwb; unsigned clvl; + uint16_t limit; /* usage tracking */ hwloc_cpuset_t target; diff --git a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c index 86c72f7143..4c193f8613 100644 --- a/src/mca/rmaps/round_robin/rmaps_rr_mappers.c +++ b/src/mca/rmaps/round_robin/rmaps_rr_mappers.c @@ -708,6 +708,9 @@ int prte_rmaps_rr_byobj(prte_job_t *jdata, prte_app_context_t *app, } errout: + if (PRTE_ERR_SILENT == rc) { + return rc; + } if (outofcpus) { /* ran out of cpus */ pmix_show_help("help-prte-rmaps-base.txt", diff --git a/src/mca/schizo/base/schizo_base_frame.c b/src/mca/schizo/base/schizo_base_frame.c index ad99aa0af7..22d5f46873 100644 --- a/src/mca/schizo/base/schizo_base_frame.c +++ b/src/mca/schizo/base/schizo_base_frame.c @@ -378,6 +378,7 @@ int prte_schizo_base_sanity(pmix_cli_result_t *cmd_line) PRTE_CLI_OVERLOAD, PRTE_CLI_NOOVERLOAD, PRTE_CLI_IF_SUPP, + PRTE_CLI_LIMIT, NULL }; diff --git a/src/util/attr.c b/src/util/attr.c index 2bcca8e33f..2e929f3d08 100644 --- a/src/util/attr.c +++ b/src/util/attr.c @@ -505,6 +505,8 @@ const char *prte_attr_key_to_str(prte_attribute_key_t key) return "ALLOC ID"; case PRTE_JOB_REF_ID: return "ALLOC REF ID"; + case PRTE_JOB_BINDING_LIMIT: + return "JOB BINDING LIMIT"; case PRTE_PROC_NOBARRIER: return "PROC-NOBARRIER"; diff --git a/src/util/attr.h b/src/util/attr.h index 7d8d48086d..d583f5190d 100644 --- a/src/util/attr.h +++ b/src/util/attr.h @@ -226,6 +226,8 @@ typedef uint16_t prte_job_flags_t; #define PRTE_JOB_REF_ID (PRTE_JOB_START_KEY + 114) // char* - string identifier assigned by the user to an allocation // request - carried along with the session that resulted // from the request +#define PRTE_JOB_BINDING_LIMIT (PRTE_JOB_START_KEY + 115) // (uint16_t) - Max number of procs to bind to specified + // target type before moving to next target #define PRTE_JOB_MAX_KEY (PRTE_JOB_START_KEY + 200) diff --git a/src/util/prte_cmd_line.h b/src/util/prte_cmd_line.h index 15f5727898..17990d020b 100644 --- a/src/util/prte_cmd_line.h +++ b/src/util/prte_cmd_line.h @@ -242,6 +242,7 @@ BEGIN_C_DECLS #define PRTE_CLI_DISPALLOC "displayalloc" // PRTE_CLI_DISPLAY reused here #define PRTE_CLI_DISPDEV "displaydevel" +#define PRTE_CLI_LIMIT "limit=" // Output qualifiers #define PRTE_CLI_NOCOPY "nocopy"