Skip to content

Commit 852efc8

Browse files
authored
Merge pull request #38 from hppritcha/upstream_pr2031
Protect against the envar version of the Slurm custom args param
2 parents 0f0a900 + 5c844a7 commit 852efc8

File tree

4 files changed

+58
-44
lines changed

4 files changed

+58
-44
lines changed

src/mca/plm/slurm/plm_slurm.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,8 @@ BEGIN_C_DECLS
3333

3434
struct prte_mca_plm_slurm_component_t {
3535
prte_plm_base_component_t super;
36-
int custom_args_index;
3736
char *custom_args;
38-
bool slurm_warning_msg;
37+
bool early;
3938
};
4039
typedef struct prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component_t;
4140

src/mca/plm/slurm/plm_slurm_component.c

Lines changed: 33 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "src/util/name_fns.h"
3939
#include "src/util/pmix_environ.h"
4040
#include "src/util/pmix_show_help.h"
41+
#include "src/util/pmix_string_copy.h"
4142

4243
#include "plm_slurm.h"
4344
#include "src/mca/plm/base/plm_private.h"
@@ -84,28 +85,15 @@ prte_mca_plm_slurm_component_t prte_mca_plm_slurm_component = {
8485
here; will be initialized in plm_slurm_open() */
8586
};
8687

87-
static char *custom_args = NULL;
88-
static char *force_args = NULL;
89-
9088
static int plm_slurm_register(void)
9189
{
9290
pmix_mca_base_component_t *comp = &prte_mca_plm_slurm_component.super;
9391

9492

95-
prte_mca_plm_slurm_component.custom_args_index =
96-
pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun",
97-
PMIX_MCA_BASE_VAR_TYPE_STRING,
98-
&custom_args);
99-
100-
force_args = NULL;
101-
(void) pmix_mca_base_component_var_register(comp, "force_args", "Mandatory custom arguments to srun",
102-
PMIX_MCA_BASE_VAR_TYPE_STRING,
103-
&force_args);
104-
105-
prte_mca_plm_slurm_component.slurm_warning_msg = false;
106-
(void) pmix_mca_base_component_var_register(comp, "disable_warning", "Turn off warning message about custom args set in environment",
107-
PMIX_MCA_BASE_VAR_TYPE_BOOL,
108-
&prte_mca_plm_slurm_component.slurm_warning_msg);
93+
prte_mca_plm_slurm_component.custom_args = NULL;
94+
pmix_mca_base_component_var_register(comp, "args", "Custom arguments to srun",
95+
PMIX_MCA_BASE_VAR_TYPE_STRING,
96+
&prte_mca_plm_slurm_component.custom_args);
10997

11098
return PRTE_SUCCESS;
11199
}
@@ -117,40 +105,44 @@ static int plm_slurm_open(void)
117105

118106
static int prte_mca_plm_slurm_component_query(pmix_mca_base_module_t **module, int *priority)
119107
{
120-
const pmix_mca_base_var_t *var;
121-
pmix_status_t rc;
108+
FILE *fp;
109+
char version[1024], *ptr;
110+
int major, minor;
122111

123112
/* Are we running under a SLURM job? */
124-
125113
if (NULL != getenv("SLURM_JOBID")) {
126114
*priority = 75;
127115

128116
PMIX_OUTPUT_VERBOSE((1, prte_plm_base_framework.framework_output,
129117
"%s plm:slurm: available for selection",
130118
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)));
131119

132-
prte_mca_plm_slurm_component.custom_args = NULL;
133-
134-
// if we were are warning about externally set custom args, then
135-
// check to see if that was done
136-
if (!prte_mca_plm_slurm_component.slurm_warning_msg &&
137-
NULL == force_args) {
138-
// check for custom args
139-
rc = pmix_mca_base_var_get(prte_mca_plm_slurm_component.custom_args_index, &var);
140-
if (PMIX_SUCCESS == rc) {
141-
// the variable was set - see who set it
142-
if (PMIX_MCA_BASE_VAR_SOURCE_ENV == var->mbv_source) {
143-
// set in the environment - warn
144-
pmix_show_help("help-plm-slurm.txt", "custom-args-in-env", true,
145-
custom_args);
146-
}
147-
}
120+
// check the version
121+
fp = popen("srun --version", "r");
122+
if (NULL == fp) {
123+
// cannot run srun, so we cannot support this job
124+
*module = NULL;
125+
return PRTE_ERROR;
148126
}
149-
150-
if (NULL != force_args) {
151-
prte_mca_plm_slurm_component.custom_args = force_args;
152-
} else if (NULL != custom_args) {
153-
prte_mca_plm_slurm_component.custom_args = custom_args;
127+
if (NULL == fgets(version, sizeof(version), fp)) {
128+
pclose(fp);
129+
*module = NULL;
130+
return PRTE_ERROR;
131+
}
132+
pclose(fp);
133+
// parse on the dots
134+
major = strtol(&version[6], &ptr, 10);
135+
++ptr;
136+
minor = strtol(ptr, NULL, 10);
137+
138+
if (23 > major) {
139+
prte_mca_plm_slurm_component.early = true;
140+
} else if (23 < major) {
141+
prte_mca_plm_slurm_component.early = false;
142+
} else if (11 > minor) {
143+
prte_mca_plm_slurm_component.early = true;
144+
} else {
145+
prte_mca_plm_slurm_component.early = false;
154146
}
155147

156148
*module = (pmix_mca_base_module_t *) &prte_plm_slurm_module;

src/mca/plm/slurm/plm_slurm_module.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
1616
* Copyright (c) 2019 Research Organization for Information Science
1717
* and Technology (RIST). All rights reserved.
18-
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
18+
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -253,6 +253,11 @@ static void launch_daemons(int fd, short args, void *cbdata)
253253
/* add the srun command */
254254
pmix_argv_append(&argc, &argv, "srun");
255255

256+
// add the external launcher flag if necessary
257+
if (!prte_mca_plm_slurm_component.early) {
258+
pmix_argv_append(&argc, &argv, "--external-launcher");
259+
}
260+
256261
/* start one orted on each node */
257262
pmix_argv_append(&argc, &argv, "--ntasks-per-node=1");
258263

src/runtime/prte_init.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,24 @@ int prte_init_minimum(void)
179179
return PRTE_ERR_SILENT;
180180
}
181181

182+
/* Protect against the envar version of the Slurm
183+
* custom args MCA param. This is an unfortunate
184+
* hack that hopefully will eventually go away.
185+
* See both of the following for detailed
186+
* explanations and discussion:
187+
*
188+
* https://github.com/openpmix/prrte/issues/1974
189+
* https://github.com/open-mpi/ompi/issues/12471
190+
*
191+
* Orgs/users wanting to add custom args to the
192+
* internal "srun" command used to spawn the
193+
* PRRTE daemons must do so via the default MCA
194+
* param files (system or user), or via the
195+
* prterun (or its proxy) cmd line
196+
*/
197+
unsetenv("PRTE_MCA_plm_slurm_args");
198+
unsetenv("OMPI_MCA_plm_slurm_args");
199+
182200
/* carry across the toolname */
183201
pmix_tool_basename = prte_tool_basename;
184202

0 commit comments

Comments
 (0)