From 6b6d2c8b77fd7f4a854f56b1d8f69fb4493167c1 Mon Sep 17 00:00:00 2001 From: Caspar van Leeuwen Date: Thu, 20 Feb 2025 23:42:04 +0100 Subject: [PATCH] Add option for ignoring individual files in the CUDA sanity check --- easybuild/framework/easyblock.py | 16 +++++++++++++--- easybuild/framework/easyconfig/default.py | 5 +++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index 747838f959..9631da2617 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -3322,6 +3322,10 @@ def sanity_check_cuda(self, cuda_dirs=None, check_cuobjdump=True): fails = [] cfg_ccs = build_option('cuda_compute_capabilities') or self.cfg.get('cuda_compute_capabilities', None) + # Construct the list of files to ignore as full paths (cuda_sanity_ignore_files contains the paths + # to ignore, relative to the installation prefix) + ignore_file_list = [os.path.join(self.installdir, d) for d in self.cfg['cuda_sanity_ignore_files']] + # If there are no CUDA compute capabilities defined, return if cfg_ccs is None or len(cfg_ccs) == 0: self.log.info("Skipping CUDA sanity check, as no CUDA compute capabilities where configured") @@ -3364,9 +3368,15 @@ def sanity_check_cuda(self, cuda_dirs=None, check_cuobjdump=True): if additional_ccs: fail_msg += "Surplus compute capabilities: %s. " % ', '.join(sorted(additional_ccs)) if missing_ccs: - fail_msg += "Missing compute capabilities: %s." % ', '.join(sorted(missing_ccs)) - self.log.warning(fail_msg) - fails.append(fail_msg) + fail_msg += "Missing compute capabilities: %s. " % ', '.join(sorted(missing_ccs)) + # We still log the result, but don't fail: + if path in ignore_file_list: + fail_msg += f"This failure will be ignored as {path} is listed in " + fail_msg += "'ignore_cuda_sanity_failures'." + self.log.warning(fail_msg) + else: + self.log.warning(fail_msg) + fails.append(fail_msg) else: msg = (f"Output of 'cuobjdump' checked for {path}; device code architecures match " "those in cuda_compute_capabilities") diff --git a/easybuild/framework/easyconfig/default.py b/easybuild/framework/easyconfig/default.py index bca46c3856..80319c6ec9 100644 --- a/easybuild/framework/easyconfig/default.py +++ b/easybuild/framework/easyconfig/default.py @@ -126,6 +126,11 @@ 'after make (for e.g.,"test" for make test)'), BUILD], 'bin_lib_subdirs': [[], "List of subdirectories for binaries and libraries, which is used during sanity check " "to check RPATH linking and banned/required libraries", BUILD], + 'cuda_sanity_ignore_files': [[], "List of files (relative to the installation prefix) for which failurs in " + "the CUDA sanity check step are ignored. Typically used for files where you " + "know the CUDA architectures in those files don't match the " + "--cuda-compute-capabitilities configured for EasyBuild AND where you know " + "that this is ok / reasonable (e.g. binary installations)", BUILD], 'sanity_check_commands': [[], ("format: [(name, options)] e.g. [('gzip','-h')]. " "Using a non-tuple is equivalent to (name, '-h')"), BUILD], 'sanity_check_paths': [{}, ("List of files and directories to check "