From bd24ef7cdd7693c0b06eab3adb79ff241c3a8244 Mon Sep 17 00:00:00 2001 From: Georgios Kafanas Date: Sat, 3 Aug 2024 01:48:34 +0200 Subject: [PATCH] Copy build log and artifacts to a permanent location after failures The files can be build in some selected build path (--buildpath), and the logs of successful compilation are then concentrated to some other location for permanent storage (--logfile-format). Logs of failed builds remain in the build path location so that they can be inspected. However, this setup is problematic when building software in HPC jobs. Quite often in HPC systems the build path is set to some fast storage local to the node, like NVME raid mounted on `/tmp` or `/dev/shm` (as suggested in the documentation: https://docs.easybuild.io/configuration/#buildpath). The node storage is often wiped out after the end of a job, so the log files and the artifacts are no longer available after the termination of the job. This commit adds options (--log-error-path and --artifact-error-path) to accumulate error logs and artifacts in some more permanent locations, so that the can be easily inspected after a failed build. --- easybuild/framework/easyblock.py | 122 ++++++++++++++++-- easybuild/tools/config.py | 56 ++++++++ easybuild/tools/options.py | 12 +- test/framework/options.py | 11 +- .../sandbox/sources/toy/toy-0.0_add-bug.patch | 10 ++ test/framework/toy_build.py | 103 ++++++++++++++- test/framework/utilities.py | 4 + 7 files changed, 298 insertions(+), 20 deletions(-) create mode 100644 test/framework/sandbox/sources/toy/toy-0.0_add-bug.patch diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index a515bc4c86..8847759000 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -76,15 +76,17 @@ from easybuild.tools.config import EASYBUILD_SOURCES_URL # noqa from easybuild.tools.config import build_option, build_path, get_log_filename, get_repository, get_repositorypath from easybuild.tools.config import install_path, log_path, package_path, source_paths +from easybuild.tools.config import get_log_error_path, get_artifact_error_path from easybuild.tools.environment import restore_env, sanitize_env from easybuild.tools.filetools import CHECKSUM_TYPE_MD5, CHECKSUM_TYPE_SHA256 from easybuild.tools.filetools import adjust_permissions, apply_patch, back_up_file, change_dir, check_lock -from easybuild.tools.filetools import compute_checksum, convert_name, copy_file, create_lock, create_patch_info -from easybuild.tools.filetools import derive_alt_pypi_url, diff_files, dir_contains_files, download_file -from easybuild.tools.filetools import encode_class_name, extract_file +from easybuild.tools.filetools import convert_name, copy_file, copy_dir, create_lock, create_patch_info +from easybuild.tools.filetools import create_unused_dir, derive_alt_pypi_url, diff_files, dir_contains_files +from easybuild.tools.filetools import download_file, encode_class_name, extract_file, compute_checksum from easybuild.tools.filetools import find_backup_name_candidate, get_source_tarball_from_git, is_alt_pypi_url -from easybuild.tools.filetools import is_binary, is_sha256_checksum, mkdir, move_file, move_logs, read_file, remove_dir -from easybuild.tools.filetools import remove_file, remove_lock, verify_checksum, weld_paths, write_file, symlink +from easybuild.tools.filetools import is_binary, is_readable, is_sha256_checksum, mkdir, move_file, move_logs +from easybuild.tools.filetools import read_file, remove_dir, remove_file, remove_lock, verify_checksum, weld_paths +from easybuild.tools.filetools import write_file, symlink from easybuild.tools.hooks import BUILD_STEP, CLEANUP_STEP, CONFIGURE_STEP, EXTENSIONS_STEP, FETCH_STEP, INSTALL_STEP from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP @@ -1045,8 +1047,8 @@ def moduleGenerator(self): # # DIRECTORY UTILITY FUNCTIONS # - def gen_builddir(self): - """Generate the (unique) name for the builddir""" + def get_relative_builddir_base_path(self): + """Generate builddir base name relative to build_path""" clean_name = remove_unwanted_chars(self.name) # if a toolchain version starts with a -, remove the - so prevent a -- in the path name @@ -1054,7 +1056,14 @@ def gen_builddir(self): tcversion = tc['version'].lstrip('-') lastdir = "%s%s-%s%s" % (self.cfg['versionprefix'], tc['name'], tcversion, self.cfg['versionsuffix']) - builddir = os.path.join(os.path.abspath(build_path()), clean_name, self.version, lastdir) + relative_builddir = os.path.join(clean_name, self.version, lastdir) + + return relative_builddir + + def gen_builddir(self): + """Generate the (unique) name for the builddir""" + relative_builddir = self.get_relative_builddir_base_path() + builddir = os.path.join(os.path.abspath(build_path()), relative_builddir) # make sure build dir is unique if cleanupoldbuild is False or not set if not self.cfg.get('cleanupoldbuild', False): @@ -4198,6 +4207,98 @@ def print_dry_run_note(loc, silent=True): dry_run_msg(msg, silent=silent) +def persist_failed_compilation_log_and_artifacts(build_successful, application_log, silent, app, easyconfig): + persistent_dirs = {} + + def get_dir(path, dirname): + nonlocal persistent_dirs + dirpath = persistent_dirs.get(dirname) + if dirpath is None: + dirpath = create_unused_dir(path) + persistent_dirs[dirname] = dirpath + return persistent_dirs[dirname] + + def get_artifact_dir(path): + return get_dir(path, 'artifact') + + def get_log_dir(path): + return get_dir(path, 'log') + + def initialize_persistent_dir_and_replicate_if_paths_distinct(replication_operation, + get_directory, + source_path, + destination_base_path, + destination_relative_path): + abs_source_path = os.path.abspath(source_path) + abs_destination_base_path = os.path.abspath(destination_base_path) + abs_destination_path = os.path.join(abs_destination_base_path, destination_relative_path) + + if not os.path.exists(abs_source_path): + return + if os.path.realpath(abs_source_path) == os.path.realpath(abs_destination_path): + return + + final_abs_destination_base_path = get_directory(destination_base_path) + final_abs_destination_path = os.path.join(final_abs_destination_base_path, destination_relative_path) + + replication_operation(abs_source_path, final_abs_destination_path) + + if not application_log: + return + + # there may be multiple log files, or the file name may be different due to zipping + logs = glob.glob('%s*' % application_log) + print_msg( + "Results of the build can be found in the temporary log file(s) %s" % ', '.join(logs), + log=_log, + silent=silent + ) + + if build_successful: + return + + datetime_stamp = time.strftime("%Y%m%d") + '-' + time.strftime("%H%M%S") + + errorlog_path = get_log_error_path(ec=easyconfig) + if errorlog_path is not None: + errorlog_path = os.path.join(errorlog_path, datetime_stamp) + + for log_file in logs: + initialize_persistent_dir_and_replicate_if_paths_distinct( + copy_file, + get_log_dir, + log_file, + errorlog_path, + os.path.basename(log_file) + ) + + print_msg( + "Build logs of failed build copied to permanent storage: %s" % errorlog_path, + log=_log, + silent=silent + ) + + errorartifact_path = get_artifact_error_path(ec=easyconfig) + if errorartifact_path is not None: + errorartifact_path = os.path.join(errorartifact_path, datetime_stamp) + + builddir = app.builddir + if is_readable(builddir): + initialize_persistent_dir_and_replicate_if_paths_distinct( + copy_dir, + get_artifact_dir, + builddir, + errorartifact_path, + app.get_relative_builddir_base_path() + ) + + print_msg( + "Build artifacts of failed build copied to permanent storage: %s" % errorartifact_path, + log=_log, + silent=silent + ) + + def build_and_install_one(ecdict, init_env): """ Build the software @@ -4456,10 +4557,7 @@ def ensure_writable_log_dir(log_dir): else: dry_run_msg("(no ignored errors during dry run)\n", silent=silent) - if application_log: - # there may be multiple log files, or the file name may be different due to zipping - logs = glob.glob('%s*' % application_log) - print_msg("Results of the build can be found in the log file(s) %s" % ', '.join(logs), log=_log, silent=silent) + persist_failed_compilation_log_and_artifacts(success, application_log, silent, app, ecdict['ec']) del app diff --git a/easybuild/tools/config.py b/easybuild/tools/config.py index 2982c5eb2e..a7c81c1947 100644 --- a/easybuild/tools/config.py +++ b/easybuild/tools/config.py @@ -104,9 +104,11 @@ DEFAULT_MODULE_SYNTAX = 'Lua' DEFAULT_MODULES_TOOL = 'Lmod' DEFAULT_PATH_SUBDIRS = { + 'artifact_error_path': 'error_artifacts', 'buildpath': 'build', 'containerpath': 'containers', 'installpath': '', + 'log_error_path': 'error_log', 'packagepath': 'packages', 'repositorypath': 'ebfiles_repo', 'sourcepath': 'sources', @@ -475,6 +477,7 @@ class ConfigurationVariables(BaseConfigurationVariables): # list of known/required keys REQUIRED = [ + 'artifact_error_path', 'buildpath', 'config', 'containerpath', @@ -482,6 +485,7 @@ class ConfigurationVariables(BaseConfigurationVariables): 'installpath_modules', 'installpath_software', 'job_backend', + 'log_error_path', 'logfile_format', 'moduleclasses', 'module_naming_scheme', @@ -843,6 +847,58 @@ def log_path(ec=None): return log_file_format(return_directory=True, ec=ec, date=date, timestamp=timestamp) +def get_log_error_path(ec=None): + """ + Return the 'log_error_path', the location where logs are copied in case of failure + + :param ec: dict-like value with at least the keys 'name' and 'version' defined + """ + log_error_path = ConfigurationVariables()['log_error_path'] + + if not log_error_path: + return None + + if ec is None: + raise EasyBuildError("Easyconfig cannot be empty.") + + name, version = ec.get('name'), ec.get('version') + + if name is None: + raise EasyBuildError("The 'name' key is not defined.") + if version is None: + raise EasyBuildError("The 'version' key is not defined.") + + path = os.path.join(log_error_path, name + '-' + version) + + return path + + +def get_artifact_error_path(ec=None): + """ + Return the 'artifact_error_path', the location where build directories are copied in case of failure + + :param ec: dict-like value with at least the keys 'name' and 'version' defined + """ + artifact_error_path = ConfigurationVariables()['artifact_error_path'] + + if not artifact_error_path: + return None + + if ec is None: + raise EasyBuildError("Easyconfig cannot be empty.") + + name, version = ec.get('name'), ec.get('version') + + if name is None: + raise EasyBuildError("The 'name' key is not defined.") + if version is None: + raise EasyBuildError("The 'version' key is not defined.") + + path = os.path.join(artifact_error_path, name + '-' + version) + + return path + + def get_build_log_path(): """ Return (temporary) directory for build log diff --git a/easybuild/tools/options.py b/easybuild/tools/options.py index 194e3a9969..a46ee0dd01 100644 --- a/easybuild/tools/options.py +++ b/easybuild/tools/options.py @@ -559,6 +559,8 @@ def config_options(self): descr = ("Configuration options", "Configure EasyBuild behavior.") opts = OrderedDict({ + 'artifact-error-path': ("Location where artifacts are copied in case of an error", + None, 'store', mk_full_default_path('artifact_error_path')), 'avail-module-naming-schemes': ("Show all supported module naming schemes", None, 'store_true', False,), 'avail-modules-tools': ("Show all supported module tools", @@ -589,6 +591,8 @@ def config_options(self): None, 'store', None), 'job-backend': ("Backend to use for submitting jobs", 'choice', 'store', DEFAULT_JOB_BACKEND, sorted(avail_job_backends().keys())), + 'log-error-path': ("Location where logs and artifacts are copied in case of an error", + None, 'store', mk_full_default_path('log_error_path')), # purposely take a copy for the default logfile format 'logfile-format': ("Directory name and format of the log file", 'strtuple', 'store', DEFAULT_LOGFILE_FORMAT[:], {'metavar': 'DIR,FORMAT'}), @@ -1153,8 +1157,8 @@ def _postprocess_config(self): # - the could also specify the location of a *remote* (Git( repository, # which can be done in variety of formats (git@:/), https://, etc.) # (see also https://github.com/easybuilders/easybuild-framework/issues/3892); - path_opt_names = ['buildpath', 'containerpath', 'git_working_dirs_path', 'installpath', - 'installpath_modules', 'installpath_software', 'prefix', 'packagepath', + path_opt_names = ['artifact_error_path', 'buildpath', 'containerpath', 'git_working_dirs_path', 'installpath', + 'installpath_modules', 'installpath_software', 'log_error_path', 'prefix', 'packagepath', 'robot_paths', 'sourcepath'] for opt_name in path_opt_names: @@ -1163,8 +1167,8 @@ def _postprocess_config(self): if self.options.prefix is not None: # prefix applies to all paths, and repository has to be reinitialised to take new repositorypath in account # in the legacy-style configuration, repository is initialised in configuration file itself - path_opts = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'repository', 'repositorypath', - 'sourcepath'] + path_opts = ['artifact_error_path', 'buildpath', 'containerpath', 'installpath', 'log_error_path', + 'packagepath', 'repository', 'repositorypath', 'sourcepath'] for dest in path_opts: if not self.options._action_taken.get(dest, False): if dest == 'repository': diff --git a/test/framework/options.py b/test/framework/options.py index 81620570fe..eeb8683069 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -5283,7 +5283,16 @@ def test_prefix_option(self): regex = re.compile(r"(?P\S*).*%s.*" % self.test_prefix, re.M) - expected = ['buildpath', 'containerpath', 'installpath', 'packagepath', 'prefix', 'repositorypath'] + expected = [ + 'artifact-error-path', + 'buildpath', + 'containerpath', + 'installpath', + 'log-error-path', + 'packagepath', + 'prefix', + 'repositorypath', + ] self.assertEqual(sorted(regex.findall(txt)), expected) def test_dump_env_script(self): diff --git a/test/framework/sandbox/sources/toy/toy-0.0_add-bug.patch b/test/framework/sandbox/sources/toy/toy-0.0_add-bug.patch new file mode 100644 index 0000000000..7512447168 --- /dev/null +++ b/test/framework/sandbox/sources/toy/toy-0.0_add-bug.patch @@ -0,0 +1,10 @@ +--- a/toy-0.0.orig/toy.source 2014-03-06 18:48:16.000000000 +0100 ++++ b/toy-0.0/toy.source 2020-08-18 12:19:35.000000000 +0200 +@@ -2,6 +2,6 @@ + + int main(int argc, char* argv[]){ + +- printf("I'm a toy, and proud of it.\n"); ++ printf("I'm a toy, and proud of it.\n") + return 0; + } diff --git a/test/framework/toy_build.py b/test/framework/toy_build.py index 5219ee5aa6..2fe9569d89 100644 --- a/test/framework/toy_build.py +++ b/test/framework/toy_build.py @@ -40,6 +40,8 @@ import sys import tempfile import textwrap +import pathlib +import filecmp from easybuild.tools import LooseVersion from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered from test.framework.package import mock_fpm @@ -156,9 +158,10 @@ def check_toy(self, installpath, outtxt, name='toy', version='0.0', versionprefi devel_module_path = os.path.join(software_path, 'easybuild', '%s-%s-easybuild-devel' % (name, full_version)) self.assertExists(devel_module_path) - def test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=True, fails=False, verbose=True, - raise_error=False, test_report=None, name='toy', versionsuffix='', testing=True, - raise_systemexit=False, force=True, test_report_regexs=None, debug=True): + def test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, tmp_logdir=None, tmp_artifactdir=None, + verify=True, fails=False, verbose=True, raise_error=False, test_report=None, name='toy', + versionsuffix='', testing=True, raise_systemexit=False, force=True, test_report_regexs=None, + debug=True): """Perform a toy build.""" if extra_args is None: extra_args = [] @@ -172,6 +175,8 @@ def test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=True '--sourcepath=%s' % self.test_sourcepath, '--buildpath=%s' % self.test_buildpath, '--installpath=%s' % self.test_installpath, + '--log-error-path=%s' % self.test_errorlogpath, + '--artifact-error-path=%s' % self.test_errorartifactpath, '--unittest-file=%s' % self.logfile, '--robot=%s' % os.pathsep.join([self.test_buildpath, os.path.dirname(__file__)]), ] @@ -183,6 +188,8 @@ def test_toy_build(self, extra_args=None, ec_file=None, tmpdir=None, verify=True args.append('--tmpdir=%s' % tmpdir) if test_report is not None: args.append('--dump-test-report=%s' % test_report) + if tmp_logdir is not None: + args.append('--tmp-logdir=%s' % tmp_logdir) args.extend(extra_args) myerr = None try: @@ -271,6 +278,96 @@ def test_toy_broken(self): # cleanup shutil.rmtree(tmpdir) + def detect_log_file(self, tmp_logpath): + log_files = list(pathlib.Path(tmp_logpath).glob("**/*.log")) + + self.assertTrue(len(log_files) >= 1, 'Log files generated') + log_file = log_files[0] + self.assertTrue(len(log_files) == 1, f"Log file '{log_file}' is unique") + + return log_file + + def assert_build_files_copied(self, buildpath, errorlogpath): + buildir = pathlib.Path(buildpath) + errorlogdir = pathlib.Path(errorlogpath) + iso_date_pattern = r'????????-??????' + for file in buildir.iterdir(): + file_relative_path = file.relative_to(buildir) + file_copies = list(errorlogdir.glob(f"toy-0.0/{iso_date_pattern}/{file_relative_path}")) + self.assertTrue(len(file_copies) == 1, f"Unique copy of toy build file '{file}' made") + file_copy = file_copies[0] + + if file_copy.is_file(): + msg = f"File '{file}' copied succesfully" + self.assertTrue(filecmp.cmp(str(file), str(file_copy), shallow=False), msg) + + def assert_log_files_copied(self, log_file, errorlogpath): + file_name = log_file.name + saved_log_files = list(pathlib.Path(errorlogpath).glob(f"**/{file_name}")) + self.assertTrue(len(saved_log_files) == 1, f"Unique copy of log file '{log_file}' made") + for saved_log_file in saved_log_files: + msg = f"Log file '{log_file}' copied succesfully" + self.assertTrue(filecmp.cmp(str(log_file), str(saved_log_file), shallow=False), msg) + + def assert_error_reported(self, outtxt, output_regexs): + for regex_pattern in output_regexs: + regex = re.compile(regex_pattern, re.M) + msg_stdout = "Pattern %s found in full test report: %s" % (regex.pattern, outtxt) + self.assertTrue(regex.search(outtxt), msg_stdout) + + def check_errorlog(self, output_regexs, outtxt, tmp_logpath, buildpath, errorlogpath, errorartifactpath): + log_file = self.detect_log_file(tmp_logpath) + + self.assert_log_files_copied(log_file, errorlogpath) + self.assert_build_files_copied(buildpath, errorartifactpath) + self.assert_error_reported(outtxt, output_regexs) + + with open(f"{log_file}", 'r') as p_log_file: + self.assert_error_reported(p_log_file.read(), output_regexs) + + def test_toy_broken_compilation(self): + """Test whether log files and the build directory are copied to a permanent location after a failed + compilation.""" + with tempfile.TemporaryDirectory() as base_tmp_dir: + base_tmp_path = pathlib.Path(base_tmp_dir) + + tmpdir = base_tmp_path / 'tmp' + tmp_logdir = base_tmp_path / 'log_dir' + tmp_artifactdir = base_tmp_path / 'artifact_dir' + tmp_easyconfig_dir = base_tmp_path / 'easyconfig_dir' + + os.mkdir(f"{tmpdir}") + os.mkdir(f"{tmp_logdir}") + os.mkdir(f"{tmp_artifactdir}") + os.mkdir(f"{tmp_easyconfig_dir}") + + base_ec = os.path.join( + os.path.dirname(__file__), + 'easyconfigs', 'test_ecs', 't', 'toy', 'toy-0.0.eb') + + base_ec_txt = read_file(base_ec) + broken_compilation_ec_txt = re.sub( + r'toy-0\.0_fix-silly-typo-in-printf-statement\.patch', + r'toy-0.0_add-bug.patch', + base_ec_txt) + broken_compilation_ec = os.path.join(tmp_easyconfig_dir, 'toy-0.0-buggy.eb') + write_file(broken_compilation_ec, broken_compilation_ec_txt) + + self.mock_stderr(True) + self.mock_stdout(True) + outtxt = self.test_toy_build( + ec_file=broken_compilation_ec, + tmpdir=tmpdir, tmp_logdir=tmp_logdir, tmp_artifactdir=tmp_artifactdir, + verify=False, fails=True, verbose=False, raise_error=False, + name='toy', versionsuffix='-buggy') + self.mock_stderr(False) + self.mock_stdout(False) + + output_regexs = [r"^\s+toy\.c:5:44: error: expected (;|';') before"] + + self.check_errorlog(output_regexs, outtxt, tmp_logdir, + self.test_buildpath, self.test_errorlogpath, self.test_errorartifactpath) + def test_toy_tweaked(self): """Test toy build with tweaked easyconfig, for testing extra easyconfig parameters.""" test_ecs_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'easyconfigs') diff --git a/test/framework/utilities.py b/test/framework/utilities.py index bf44ad828d..d81ed26191 100644 --- a/test/framework/utilities.py +++ b/test/framework/utilities.py @@ -121,6 +121,10 @@ def setUp(self): os.environ['EASYBUILD_BUILDPATH'] = self.test_buildpath self.test_installpath = tempfile.mkdtemp() os.environ['EASYBUILD_INSTALLPATH'] = self.test_installpath + self.test_errorlogpath = tempfile.mkdtemp() + os.environ['EASYBUILD_LOG_ERROR_PATH'] = self.test_errorlogpath + self.test_errorartifactpath = tempfile.mkdtemp() + os.environ['EASYBUILD_ARTIFACT_ERROR_PATH'] = self.test_errorartifactpath # make sure that the tests only pick up easyconfigs provided with the tests os.environ['EASYBUILD_ROBOT_PATHS'] = os.path.join(testdir, 'easyconfigs', 'test_ecs')