Skip to content

Commit

Permalink
added check for mem options in slurm backend
Browse files Browse the repository at this point in the history
  • Loading branch information
o-smirnov committed Feb 2, 2024
1 parent 2c0d506 commit 5c24f1f
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 14 deletions.
3 changes: 2 additions & 1 deletion stimela/backends/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def build(self, cab: 'stimela.kitchen.cab.Cab', log: logging.Logger, rebuild=Fal
command_wrapper=self.build_command_wrapper)


def validate_backend_settings(backend_opts: Dict[str, Any]) -> BackendWrapper:
def validate_backend_settings(backend_opts: Dict[str, Any], log: logging.Logger) -> BackendWrapper:
"""Checks that backend settings refer to a valid backend
Returs tuple of options, main, wrapper, where 'main' the the main backend, and 'wrapper' is an optional wrapper backend
Expand Down Expand Up @@ -63,6 +63,7 @@ def validate_backend_settings(backend_opts: Dict[str, Any]) -> BackendWrapper:
raise BackendError(f"can't combine slurm with {backend_name} backend")
is_remote = True
is_remote_fs = False
backend_opts.slurm.validate(log)
run_command_wrapper = backend_opts.slurm.run_command_wrapper
build_command_wrapper = backend_opts.slurm.build_command_wrapper
else:
Expand Down
4 changes: 2 additions & 2 deletions stimela/backends/singularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def build(cab: 'stimela.kitchen.cab.Cab', backend: 'stimela.backend.StimelaBacke
args = [BINARY, "build", simg_path, f"docker://{image_name}"]

if command_wrapper:
args = command_wrapper(args)
args = command_wrapper(args, log=log)

retcode = xrun(args[0], args[1:], shell=False, log=log,
return_errcode=True, command_name="(singularity build)",
Expand Down Expand Up @@ -272,7 +272,7 @@ def elapsed(since=None):
# log.info(f"argument lengths are {[len(a) for a in args]}")

if command_wrapper:
args = command_wrapper(args, fqname=fqname)
args = command_wrapper(args, fqname=fqname, log=log)

retcode = xrun(args[0], args[1:], shell=False, log=log,
output_wrangler=cabstat.apply_wranglers,
Expand Down
19 changes: 14 additions & 5 deletions stimela/backends/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from omegaconf import OmegaConf
from typing import Dict, List, Any, Optional, Tuple
from contextlib import ExitStack
from scabha.basetypes import EmptyListDefault, EmptyDictDefault
from scabha.basetypes import EmptyListDefault, EmptyDictDefault, ListDefault
import datetime
from stimela.utils.xrun_asyncio import xrun

Expand All @@ -25,6 +25,8 @@ class SlurmOptions(object):
srun_path: Optional[str] = None # path to srun executable
srun_opts: Dict[str, str] = EmptyDictDefault() # extra options passed to srun. "--" prepended, and "_" replaced by "-"
build_local = True # if True, images will be built locally (i.e. on the head node) even when slurm is enabled
# these will be checked for
required_mem_opts: Optional[List[str]] = ListDefault("mem", "mem-per-cpu", "mem-per-gpu")

def get_executable(self):
global _default_srun_path
Expand All @@ -41,23 +43,30 @@ def get_executable(self):
raise BackendError(f"slurm.srun_path '{self.srun}' is not an executable")
return self.srun

def run_command_wrapper(self, args: List[str], fqname: Optional[str]=None) -> List[str]:
def run_command_wrapper(self, args: List[str], fqname: Optional[str]=None, log: Optional[logging.Logger]=None) -> List[str]:
output_args = [self.get_executable()]

if fqname is not None:
output_args += ["-J", fqname]

# add all base options that have been specified
for name, value in self.srun_opts.items():
output_args += ["--" + name.replace("_", "-"), value]
output_args += ["--" + name, value]

output_args += args
return output_args

def build_command_wrapper(self, args: List[str], fqname: Optional[str]=None) -> List[str]:
def build_command_wrapper(self, args: List[str], fqname: Optional[str]=None, log: Optional[logging.Logger]=None) -> List[str]:
if self.build_local:
return args
return self.run_command_wrapper(args, fqname=fqname)
return self.run_command_wrapper(args, fqname=fqname, log=log)

def validate(self, log: logging.Logger):
if self.required_mem_opts:
if not set(self.srun_opts.keys()).intersection(self.required_mem_opts):
raise BackendError(f"slurm.srun_opts must set one of the following: {', '.join(self.required_mem_opts)}")




SlurmOptionsSchema = OmegaConf.structured(SlurmOptions)
Expand Down
6 changes: 3 additions & 3 deletions stimela/kitchen/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def finalize(self, config=None, log=None, fqname=None, backend=None, nesting=0):
backend or {},
self.cargo.backend or {},
self.backend or {}))
runner.validate_backend_settings(backend_opts)
runner.validate_backend_settings(backend_opts, log=log)


def prevalidate(self, subst: Optional[SubstitutionNS]=None, root=False):
Expand Down Expand Up @@ -332,7 +332,7 @@ def build(self, backend={}, rebuild=False, build_skips=False, log: Optional[logg
try:
backend = OmegaConf.merge(backend, self.cargo.backend or {}, self.backend or {})
backend = OmegaConf.to_object(OmegaConf.merge(StimelaBackendSchema, backend))
backend_wrapper = runner.validate_backend_settings(backend)
backend_wrapper = runner.validate_backend_settings(backend, log=log)
except Exception as exc:
newexc = BackendError("error validating backend settings", exc)
raise newexc from None
Expand All @@ -354,7 +354,7 @@ def run(self, backend={}, subst=None, parent_log=None):
backend = OmegaConf.merge(backend, self.cargo.backend or {}, self.backend or {})
backend_opts = evaluate_and_substitute_object(backend, subst, recursion_level=-1, location=[self.fqname, "backend"])
backend_opts = OmegaConf.to_object(OmegaConf.merge(StimelaBackendSchema, backend_opts))
backend_wrapper = runner.validate_backend_settings(backend_opts)
backend_wrapper = runner.validate_backend_settings(backend_opts, log=self.log)
except Exception as exc:
newexc = BackendError("error validating backend settings", exc)
raise newexc from None
Expand Down
4 changes: 1 addition & 3 deletions stimela/stimelogging.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
from rich.padding import Padding

from . import task_stats
from .task_stats import declare_subtask, declare_subtask_attributes, \
declare_subcommand, update_process_status, \
run_process_status_update
from .task_stats import declare_subcommand, declare_subtask, declare_subtask_attributes, run_process_status_update

class FunkyMessage(object):
"""Class representing a message with two versions: funky (with markup), and boring (no markup)"""
Expand Down

0 comments on commit 5c24f1f

Please sign in to comment.