Skip to content

Commit 5c844a7

Browse files
rhc54hppritcha
authored andcommitted
Protect against the envar version of the Slurm custom args param
Protect against the envar version of the Slurm custom args MCA param. This is an unfortunate hack that hopefully will eventually go away. See both of the following for detailed explanations and discussion: openpmix#1974 open-mpi/ompi#12471 Orgs/users wanting to add custom args to the internal "srun" command used to spawn the PRRTE daemons must do so via the default MCA param files (system or user), or via the prterun (or its proxy) cmd line Signed-off-by: Ralph Castain <rhc@pmix.org> (from upstream commit 28432ed)
1 parent 13f6975 commit 5c844a7

File tree

4 files changed

+58
-44
lines changed

4 files changed

+58
-44
lines changed

src/mca/plm/slurm/plm_slurm.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,8 @@ BEGIN_C_DECLS
3333

3434
struct prte_mca_plm_slurm_component_t {
3535
prte_plm_base_component_t super;
36-
int custom_args_index;
3736
char *custom_args;
38-
bool slurm_warning_msg;
37+
bool early;
3938
};
4039
typedef struct prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component_t;
4140

src/mca/plm/slurm/plm_slurm_component.c

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "src/util/name_fns.h"
3939
#include "src/util/pmix_environ.h"
4040
#include "src/util/pmix_show_help.h"
41+
#include "src/util/pmix_string_copy.h"
4142

4243
#include "plm_slurm.h"
4344
#include "src/mca/plm/base/plm_private.h"
@@ -84,28 +85,15 @@ prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component = {
8485
here; will be initialized in plm_slurm_open() */
8586
};
8687

87-
static char *custom_args = NULL;
88-
static char *force_args = NULL;
89-
9088
static int plm_slurm_register(void)
9189
{
9290
pmix_mca_base_component_t *comp = &prte_mca_plm_slurm_component.super;
9391

9492

95-
prte_mca_plm_slurm_component.custom_args_index =
96-
pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun",
97-
PMIX_MCA_BASE_VAR_TYPE_STRING,
98-
&custom_args);
99-
100-
force_args = NULL;
101-
(void) pmix_mca_base_component_var_register(comp, "force_args", "Mandatory custom arguments to srun",
102-
PMIX_MCA_BASE_VAR_TYPE_STRING,
103-
&force_args);
104-
105-
prte_mca_plm_slurm_component.slurm_warning_msg = false;
106-
(void) pmix_mca_base_component_var_register(comp, "disable_warning", "Turn off warning message about custom args set in environment",
107-
PMIX_MCA_BASE_VAR_TYPE_BOOL,
108-
&prte_mca_plm_slurm_component.slurm_warning_msg);
93+
prte_mca_plm_slurm_component.custom_args = NULL;
94+
pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun",
95+
PMIX_MCA_BASE_VAR_TYPE_STRING,
96+
&prte_mca_plm_slurm_component.custom_args);
10997

11098
return PRTE_SUCCESS;
11199
}
@@ -117,40 +105,44 @@ static int plm_slurm_open(void)
117105

118106
static int prte_mca_plm_slurm_component_query(pmix_mca_base_module_t **module, int *priority)
119107
{
120-
const pmix_mca_base_var_t *var;
121-
pmix_status_t rc;
108+
FILE *fp;
109+
char version[1024], *ptr;
110+
int major, minor;
122111

123112
/* Are we running under a SLURM job? */
124-
125113
if (NULL != getenv("SLURM_JOBID")) {
126114
*priority = 75;
127115

128116
PMIX_OUTPUT_VERBOSE((1, prte_plm_base_framework.framework_output,
129117
"%s plm:slurm: available for selection",
130118
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)));
131119

132-
prte_mca_plm_slurm_component.custom_args = NULL;
133-
134-
// if we were are warning about externally set custom args, then
135-
// check to see if that was done
136-
if (!prte_mca_plm_slurm_component.slurm_warning_msg &&
137-
NULL == force_args) {
138-
// check for custom args
139-
rc = pmix_mca_base_var_get(prte_mca_plm_slurm_component.custom_args_index, &var);
140-
if (PMIX_SUCCESS == rc) {
141-
// the variable was set - see who set it
142-
if (PMIX_MCA_BASE_VAR_SOURCE_ENV == var->mbv_source) {
143-
// set in the environment - warn
144-
pmix_show_help("help-plm-slurm.txt", "custom-args-in-env", true,
145-
custom_args);
146-
}
147-
}
120+
// check the version
121+
fp = popen("srun --version", "r");
122+
if (NULL == fp) {
123+
// cannot run srun, so we cannot support this job
124+
*module = NULL;
125+
return PRTE_ERROR;
148126
}
149-
150-
if (NULL != force_args) {
151-
prte_mca_plm_slurm_component.custom_args = force_args;
152-
} else if (NULL != custom_args) {
153-
prte_mca_plm_slurm_component.custom_args = custom_args;
127+
if (NULL == fgets(version, sizeof(version), fp)) {
128+
pclose(fp);
129+
*module = NULL;
130+
return PRTE_ERROR;
131+
}
132+
pclose(fp);
133+
// parse on the dots
134+
major = strtol(&version[6], &ptr, 10);
135+
++ptr;
136+
minor = strtol(ptr, NULL, 10);
137+
138+
if (23 > major) {
139+
prte_mca_plm_slurm_component.early = true;
140+
} else if (23 < major) {
141+
prte_mca_plm_slurm_component.early = false;
142+
} else if (11 > minor) {
143+
prte_mca_plm_slurm_component.early = true;
144+
} else {
145+
prte_mca_plm_slurm_component.early = false;
154146
}
155147

156148
*module = (pmix_mca_base_module_t *) &prte_plm_slurm_module;

src/mca/plm/slurm/plm_slurm_module.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2019 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18-
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
18+
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -253,6 +253,11 @@ static void launch_daemons(int fd, short args, void *cbdata)
253253
/* add the srun command */
254254
pmix_argv_append(&argc, &argv, "srun");
255255

256+
// add the external launcher flag if necessary
257+
if (!prte_mca_plm_slurm_component.early) {
258+
pmix_argv_append(&argc, &argv, "--external-launcher");
259+
}
260+
256261
/* start one orted on each node */
257262
pmix_argv_append(&argc, &argv, "--ntasks-per-node=1");
258263

src/runtime/prte_init.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,24 @@ int prte_init_minimum(void)
179179
return PRTE_ERR_SILENT;
180180
}
181181

182+
/* Protect against the envar version of the Slurm
183+
* custom args MCA param. This is an unfortunate
184+
* hack that hopefully will eventually go away.
185+
* See both of the following for detailed
186+
* explanations and discussion:
187+
*
188+
* https://github.com/openpmix/prrte/issues/1974
189+
* https://github.com/open-mpi/ompi/issues/12471
190+
*
191+
* Orgs/users wanting to add custom args to the
192+
* internal "srun" command used to spawn the
193+
* PRRTE daemons must do so via the default MCA
194+
* param files (system or user), or via the
195+
* prterun (or its proxy) cmd line
196+
*/
197+
unsetenv("PRTE_MCA_plm_slurm_args");
198+
unsetenv("OMPI_MCA_plm_slurm_args");
199+
182200
/* carry across the toolname */
183201
pmix_tool_basename = prte_tool_basename;
184202

0 commit comments

Comments
 (0)