From 4296b8b6ca53b901c296f4284515138056156607 Mon Sep 17 00:00:00 2001 From: Bart Oldeman Date: Fri, 4 Oct 2024 16:26:51 +0000 Subject: [PATCH] Let jobs tweak easyconfigs themselves This can be accomplished by tweak() optionally also returning a dict which maps the tweaked easyconfig to the original version. Then the job can run `eb ... --try-*` and that original easyconfig will be retweaked in the job itself. If the easyconfig passed to the job is not tweaked, then `--try-*` is *not* passed down (so, with `--robot`, some jobs will have `--try-*` and some don't). This removes the requirement of a shared tmpdir with `--job --try-*`. Fixes #1355 --- easybuild/framework/easyconfig/tweak.py | 22 ++++++++----- easybuild/main.py | 7 +++-- easybuild/tools/job/backend.py | 3 +- easybuild/tools/parallelbuild.py | 42 ++++++++++++++++--------- test/framework/options.py | 10 ++++-- test/framework/parallelbuild.py | 6 ++-- 6 files changed, 61 insertions(+), 29 deletions(-) diff --git a/easybuild/framework/easyconfig/tweak.py b/easybuild/framework/easyconfig/tweak.py index 2a88e56d6a..95c69ce80d 100644 --- a/easybuild/framework/easyconfig/tweak.py +++ b/easybuild/framework/easyconfig/tweak.py @@ -36,6 +36,7 @@ * Fotis Georgatos (Uni.Lu, NTUA) * Alan O'Cais (Juelich Supercomputing Centre) * Maxime Boissonneault (Universite Laval, Calcul Quebec, Compute Canada) +* Bart Oldeman (McGill University, Calcul Quebec, Digital Research Alliance of Canada) """ import copy import functools @@ -82,8 +83,9 @@ def ec_filename_for(path): return fn -def tweak(easyconfigs, build_specs, modtool, targetdirs=None): - """Tweak list of easyconfigs according to provided build specifications.""" +def tweak(easyconfigs, build_specs, modtool, targetdirs=None, return_map=False): + """Tweak list of easyconfigs according to provided build specifications. + If return_map=True, also returns tweaked to original file mapping""" # keep track of originally listed easyconfigs (via their path) listed_ec_paths = [ec['spec'] for ec in easyconfigs] @@ -92,6 +94,7 @@ def tweak(easyconfigs, build_specs, modtool, targetdirs=None): tweaked_ecs_path, tweaked_ecs_deps_path = targetdirs modifying_toolchains_or_deps = False src_to_dst_tc_mapping = {} + tweak_map = {} revert_to_regex = False if 'update_deps' in build_specs: @@ -223,13 +226,18 @@ def tweak(easyconfigs, build_specs, modtool, targetdirs=None): if modifying_toolchains_or_deps: if tc_name in src_to_dst_tc_mapping: # Note pruned_build_specs are not passed down for dependencies - map_easyconfig_to_target_tc_hierarchy(orig_ec['spec'], src_to_dst_tc_mapping, - targetdir=tweaked_ecs_deps_path, - update_dep_versions=update_dependencies, - ignore_versionsuffixes=ignore_versionsuffixes) + new_ec_file = map_easyconfig_to_target_tc_hierarchy(orig_ec['spec'], src_to_dst_tc_mapping, + targetdir=tweaked_ecs_deps_path, + update_dep_versions=update_dependencies, + ignore_versionsuffixes=ignore_versionsuffixes) else: - tweak_one(orig_ec['spec'], None, build_specs, targetdir=tweaked_ecs_deps_path) + new_ec_file = tweak_one(orig_ec['spec'], None, build_specs, targetdir=tweaked_ecs_deps_path) + + if new_ec_file: + tweak_map[new_ec_file] = orig_ec['spec'] + if return_map: + return tweaked_easyconfigs, tweak_map return tweaked_easyconfigs diff --git a/easybuild/main.py b/easybuild/main.py index 83836d22c3..ed91722b23 100644 --- a/easybuild/main.py +++ b/easybuild/main.py @@ -37,6 +37,7 @@ * Ward Poelmans (Ghent University) * Fotis Georgatos (Uni.Lu, NTUA) * Maxime Boissonneault (Compute Canada) +* Bart Oldeman (McGill University, Calcul Quebec, Digital Research Alliance of Canada) """ import copy import os @@ -430,7 +431,9 @@ def process_eb_args(eb_args, eb_go, cfg_settings, modtool, testing, init_session # don't try and tweak anything if easyconfigs were generated, since building a full dep graph will fail # if easyconfig files for the dependencies are not available if try_to_generate and build_specs and not generated_ecs: - easyconfigs = tweak(easyconfigs, build_specs, modtool, targetdirs=tweaked_ecs_paths) + easyconfigs, tweak_map = tweak(easyconfigs, build_specs, modtool, targetdirs=tweaked_ecs_paths, return_map=True) + else: + tweak_map = None if options.containerize: # if --containerize/-C create a container recipe (and optionally container image), and stop @@ -552,7 +555,7 @@ def process_eb_args(eb_args, eb_go, cfg_settings, modtool, testing, init_session # submit build as job(s), clean up and exit if options.job: - submit_jobs(ordered_ecs, eb_go.generate_cmd_line(), testing=testing) + submit_jobs(ordered_ecs, eb_go.generate_cmd_line(), testing=testing, tweak_map=tweak_map) if not testing: print_msg("Submitted parallel build jobs, exiting now") return True diff --git a/easybuild/tools/job/backend.py b/easybuild/tools/job/backend.py index 1219883740..32b7da59cd 100644 --- a/easybuild/tools/job/backend.py +++ b/easybuild/tools/job/backend.py @@ -32,6 +32,7 @@ """ from abc import ABCMeta, abstractmethod +from types import SimpleNamespace from easybuild.base import fancylogger from easybuild.tools.config import get_job_backend @@ -69,7 +70,7 @@ def make_job(self, script, name, env_vars=None, hours=None, cores=None): See the `Job`:class: constructor for an explanation of what the arguments are. """ - pass + return SimpleNamespace() @abstractmethod def queue(self, job, dependencies=frozenset()): diff --git a/easybuild/tools/parallelbuild.py b/easybuild/tools/parallelbuild.py index b25d8b9bdc..97e824b198 100644 --- a/easybuild/tools/parallelbuild.py +++ b/easybuild/tools/parallelbuild.py @@ -33,6 +33,7 @@ * Toon Willems (Ghent University) * Kenneth Hoste (Ghent University) * Stijn De Weirdt (Ghent University) +* Bart Oldeman (McGill University, Calcul Quebec, Digital Research Alliance of Canada) """ import math import os @@ -45,7 +46,7 @@ from easybuild.tools.config import build_option, get_repository, get_repositorypath from easybuild.tools.filetools import get_cwd from easybuild.tools.module_naming_scheme.utilities import det_full_ec_version -from easybuild.tools.job.backend import job_backend +from easybuild.tools.job.backend import job_backend, JobBackend from easybuild.tools.repository.repository import init_repository @@ -57,7 +58,8 @@ def _to_key(dep): return ActiveMNS().det_full_module_name(dep) -def build_easyconfigs_in_parallel(build_command, easyconfigs, output_dir='easybuild-build', prepare_first=True): +def build_easyconfigs_in_parallel(build_command, easyconfigs, output_dir='easybuild-build', testing=False, + prepare_first=True, tweak_map=None, try_opts=''): """ Build easyconfigs in parallel by submitting jobs to a batch-queuing system. Return list of jobs submitted. @@ -69,11 +71,14 @@ def build_easyconfigs_in_parallel(build_command, easyconfigs, output_dir='easybu :param build_command: build command to use :param easyconfigs: list of easyconfig files :param output_dir: output directory + :param testing: If `True`, skip actual job submission :param prepare_first: prepare by runnning fetch step first for each easyconfig + :param tweak_map: Mapping from tweaked to original easyconfigs + :param try_opts: --try-* options to pass if the easyconfig is tweaked """ _log.info("going to build these easyconfigs in parallel: %s", [os.path.basename(ec['spec']) for ec in easyconfigs]) - active_job_backend = job_backend() + active_job_backend = JobBackend() if testing else job_backend() if active_job_backend is None: raise EasyBuildError("Can not use --job if no job backend is available.") @@ -93,12 +98,17 @@ def build_easyconfigs_in_parallel(build_command, easyconfigs, output_dir='easybu # this is very important, otherwise we might have race conditions # e.g. GCC-4.5.3 finds cloog.tar.gz but it was incorrectly downloaded by GCC-4.6.3 # running this step here, prevents this - if prepare_first: + if prepare_first and not testing: prepare_easyconfig(easyconfig) + # convert to to avoid needing a shared tmpdir + spec = easyconfig['spec'] + if spec in (tweak_map or {}): + spec = tweak_map[spec] + try_opts + # the new job will only depend on already submitted jobs - _log.info("creating job for ec: %s" % os.path.basename(easyconfig['spec'])) - new_job = create_job(active_job_backend, build_command, easyconfig, output_dir=output_dir) + _log.info("creating job for ec: %s using %s" % (os.path.basename(easyconfig['spec']), spec)) + new_job = create_job(active_job_backend, build_command, easyconfig, output_dir=output_dir, spec=spec) # filter out dependencies marked as external modules deps = [d for d in easyconfig['ec'].all_dependencies if not d.get('external_module', False)] @@ -116,24 +126,27 @@ def build_easyconfigs_in_parallel(build_command, easyconfigs, output_dir='easybu active_job_backend.complete() - return jobs + return build_command if testing else jobs -def submit_jobs(ordered_ecs, cmd_line_opts, testing=False, prepare_first=True): +def submit_jobs(ordered_ecs, cmd_line_opts, testing=False, prepare_first=True, tweak_map=None): """ Submit jobs. :param ordered_ecs: list of easyconfigs, in the order they should be processed :param cmd_line_opts: list of command line options (in 'longopt=value' form) :param testing: If `True`, skip actual job submission :param prepare_first: prepare by runnning fetch step first for each easyconfig + :param tweak_map: Mapping from tweaked to original easyconfigs """ curdir = get_cwd() - # regex pattern for options to ignore (help options can't reach here) + # regex patterns for options to ignore and tweak options (help options can't reach here) ignore_opts = re.compile('^--robot$|^--job|^--try-.*$|^--easystack$') + try_opts_re = re.compile('^--try-.*$') # generate_cmd_line returns the options in form --longopt=value opts = [o for o in cmd_line_opts if not ignore_opts.match(o.split('=')[0])] + try_opts = [o for o in cmd_line_opts if try_opts_re.match(o.split('=')[0])] # add --disable-job to make sure the submitted job doesn't submit a job itself, # resulting in an infinite cycle of jobs; @@ -143,6 +156,7 @@ def submit_jobs(ordered_ecs, cmd_line_opts, testing=False, prepare_first=True): # compose string with command line options, properly quoted and with '%' characters escaped opts_str = ' '.join(opts).replace('%', '%%') + try_opts_str = ' ' + ' '.join(try_opts).replace('%', '%%') eb_cmd = build_option('job_eb_cmd') @@ -154,12 +168,11 @@ def submit_jobs(ordered_ecs, cmd_line_opts, testing=False, prepare_first=True): _log.info("Command template for jobs: %s", command) if testing: _log.debug("Skipping actual submission of jobs since testing mode is enabled") - return command - else: - return build_easyconfigs_in_parallel(command, ordered_ecs, prepare_first=prepare_first) + return build_easyconfigs_in_parallel(command, ordered_ecs, testing=testing, prepare_first=prepare_first, + tweak_map=tweak_map, try_opts=try_opts_str) -def create_job(job_backend, build_command, easyconfig, output_dir='easybuild-build'): +def create_job(job_backend, build_command, easyconfig, output_dir='easybuild-build', spec=''): """ Creates a job to build a *single* easyconfig. @@ -167,6 +180,7 @@ def create_job(job_backend, build_command, easyconfig, output_dir='easybuild-bui :param build_command: format string for command, full path to an easyconfig file will be substituted in it :param easyconfig: easyconfig as processed by process_easyconfig :param output_dir: optional output path; --regtest-output-dir will be used inside the job with this variable + :param spec: untweaked easyconfig name with optional --try-* options returns the job """ @@ -183,7 +197,7 @@ def create_job(job_backend, build_command, easyconfig, output_dir='easybuild-bui command = build_command % { 'add_opts': add_opts, 'output_dir': os.path.join(os.path.abspath(output_dir), name), - 'spec': easyconfig['spec'], + 'spec': spec or easyconfig['spec'], } # just use latest build stats diff --git a/test/framework/options.py b/test/framework/options.py index f10d01cf8d..fb6732fb90 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -482,7 +482,7 @@ def test_job(self): # use gzip-1.4.eb easyconfig file that comes with the tests eb_file = os.path.join(os.path.dirname(__file__), 'easyconfigs', 'test_ecs', 'g', 'gzip', 'gzip-1.4.eb') - def check_args(job_args, passed_args=None): + def check_args(job_args, passed_args=None, try_opts='', tweaked_eb_file='gzip-1.4.eb'): """Check whether specified args yield expected result.""" if passed_args is None: passed_args = job_args[:] @@ -501,10 +501,16 @@ def check_args(job_args, passed_args=None): assertmsg = "Info log msg with job command template for --job (job_msg: %s, outtxt: %s)" % (job_msg, outtxt) self.assertTrue(re.search(job_msg, outtxt), assertmsg) + job_msg = r"INFO creating job for ec: %s using %s%s\n" % (tweaked_eb_file, eb_file, try_opts) + assertmsg = "Info log msg with creating job for --job (job_msg: %s, outtxt: %s)" % (job_msg, outtxt) + self.assertTrue(re.search(job_msg, outtxt), assertmsg) + # options passed are reordered, so order here matters to make tests pass check_args(['--debug']) check_args(['--debug', '--stop=configure', '--try-software-name=foo'], - passed_args=['--debug', "--stop='configure'"]) + passed_args=['--debug', "--stop='configure'"], + try_opts = " --try-software-name='foo'", + tweaked_eb_file = "foo-1.4.eb") check_args(['--debug', '--robot-paths=/tmp/foo:/tmp/bar'], passed_args=['--debug', "--robot-paths='/tmp/foo:/tmp/bar'"]) # --robot has preference over --robot-paths, --robot is not passed down diff --git a/test/framework/parallelbuild.py b/test/framework/parallelbuild.py index 975e46375b..a80adcb6e3 100644 --- a/test/framework/parallelbuild.py +++ b/test/framework/parallelbuild.py @@ -290,7 +290,7 @@ def test_build_easyconfigs_in_parallel_gc3pie(self): def test_submit_jobs(self): """Test submit_jobs""" test_easyconfigs_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'easyconfigs', 'test_ecs') - toy_ec = os.path.join(test_easyconfigs_dir, 't', 'toy', 'toy-0.0.eb') + toy_ec = process_easyconfig(os.path.join(test_easyconfigs_dir, 't', 'toy', 'toy-0.0.eb')) args = [ '--debug', @@ -303,7 +303,7 @@ def test_submit_jobs(self): '--job-cores=3', ] eb_go = parse_options(args=args) - cmd = submit_jobs([toy_ec], eb_go.generate_cmd_line(), testing=True) + cmd = submit_jobs(toy_ec, eb_go.generate_cmd_line(), testing=True) # these patterns must be found regexs = [ @@ -331,7 +331,7 @@ def test_submit_jobs(self): # test again with custom EasyBuild command to use in jobs update_build_option('job_eb_cmd', "/just/testing/bin/eb --debug") - cmd = submit_jobs([toy_ec], eb_go.generate_cmd_line(), testing=True) + cmd = submit_jobs(toy_ec, eb_go.generate_cmd_line(), testing=True) regex = re.compile(r" && /just/testing/bin/eb --debug %\(spec\)s ") self.assertTrue(regex.search(cmd), "Pattern '%s' found in: %s" % (regex.pattern, cmd))