From 6dc02188c73e7314db44d50226f6c45d7d86b03b Mon Sep 17 00:00:00 2001 From: Spencer Wong Date: Fri, 11 Oct 2024 16:54:16 +1100 Subject: [PATCH] Initial log compression for cice4 --- payu/models/cice.py | 28 +++++++++ payu/models/cice5.py | 3 + test/models/test_cice.py | 129 +++++++++++++++++++++++++++++++++------ 3 files changed, 140 insertions(+), 20 deletions(-) diff --git a/payu/models/cice.py b/payu/models/cice.py index 50f29c1f..2ae47c0e 100644 --- a/payu/models/cice.py +++ b/payu/models/cice.py @@ -17,6 +17,8 @@ import sys import shutil import datetime +import re +import tarfile # Extensions import f90nml @@ -51,6 +53,13 @@ def __init__(self, expt, name, config): self.copy_inputs = False + # regex patterns for matching log files. When empty, no logs compressed + self.logs_to_compress = [r"iceout[0-9]{3}", + r"debug\.root\.[0-9]{2}", + r"ice_diag\.d", + r"ice_diag_out"] + self.log_tar_name = "logfiles.tar.gz" + def set_model_pathnames(self): super(Cice, self).set_model_pathnames() @@ -333,6 +342,25 @@ def archive(self, **kwargs): else: shutil.rmtree(self.work_input_path) + if self.expt.config.get('compress_logs', False): + self.compress_log_files() + + def get_log_files(self): + log_files = [] + for filename in os.listdir(self.work_path): + if any((re.match(pattern, filename) + for pattern in self.logs_to_compress)): + log_files.append(os.path.join(self.work_path, filename)) + return log_files + + def compress_log_files(self): + log_files = self.get_log_files() + with tarfile.open(name=os.path.join(self.work_path, self.log_tar_name), + mode="w:gz") as tar: + for file in log_files: + tar.add(file, arcname=os.path.basename(file)) + os.remove(file) + def collate(self): pass diff --git a/payu/models/cice5.py b/payu/models/cice5.py index bc5618af..1c8fcfac 100644 --- a/payu/models/cice5.py +++ b/payu/models/cice5.py @@ -42,6 +42,9 @@ def __init__(self, expt, name, config): self.copy_restarts = True self.copy_inputs = True + # Empty list means no log files will be compressed + self.logs_to_compress = [] + def set_local_timestep(self, t_step): dt = self.ice_in['setup_nml']['dt'] npt = self.ice_in['setup_nml']['npt'] diff --git a/test/models/test_cice.py b/test/models/test_cice.py index 15e6cb54..150e9c27 100644 --- a/test/models/test_cice.py +++ b/test/models/test_cice.py @@ -3,6 +3,7 @@ import pytest import f90nml +import tarfile import payu @@ -124,41 +125,47 @@ def empty_workdir(): workdir.symlink_to(expt_workdir) yield expt_workdir - shutil.rmtree(expt_workdir) + try: + shutil.rmtree(expt_workdir) + except FileNotFoundError: + pass workdir.unlink() +@pytest.fixture +def cice_nml(): + nml_path = os.path.join(ctrldir, CICE_NML_NAME) + f90nml.write(DEFAULT_CICE_NML, nml_path) + + yield nml_path + + # Cleanup + os.remove(nml_path) + # Important to test None case without separate ice history file @pytest.fixture(params=[None, {"icefields_nml": {"f_icy": "m"}}, {"icefields_nml": {"f_icy": "m", "f_new": "y"}}]) -def cice_config_files(request): +def cice_history_nml(request): """ - Write the default cice_in.nml namelist, and if included, separate ice - history namelist used by ESM1.5. + Write separate ice history namelist used by ESM1.5, if provided. """ - cice_nml = DEFAULT_CICE_NML ice_history = request.param + ice_history_path = os.path.join(ctrldir, HIST_NML_NAME) - with cd(ctrldir): - # 2. Create config.nml - f90nml.write(cice_nml, CICE_NML_NAME) - - if ice_history: - f90nml.write(ice_history, HIST_NML_NAME) + if ice_history: + f90nml.write(ice_history, ice_history_path) yield {'ice_history': ice_history} # cleanup - with cd(ctrldir): - os.remove(CICE_NML_NAME) - if ice_history: - os.remove(HIST_NML_NAME) + if ice_history: + os.remove(ice_history_path) @pytest.mark.parametrize("config", [DEFAULT_CONFIG], indirect=True) -def test_setup(config, cice_config_files): +def test_setup(config, cice_nml, cice_history_nml): """ Confirm that 1: payu overwrites cice_in with ice_history @@ -183,9 +190,9 @@ def test_setup(config, cice_config_files): # Check cice_in was patched with ice_history work_input_fpath = os.path.join(model.work_path, CICE_NML_NAME) input_nml = f90nml.read(work_input_fpath) - if cice_config_files['ice_history']: + if cice_history_nml['ice_history']: assert (input_nml["icefields_nml"] == - cice_config_files["ice_history"]["icefields_nml"]) + cice_history_nml["ice_history"]["icefields_nml"]) else: assert input_nml["icefields_nml"] == DEFAULT_CICE_NML["icefields_nml"] @@ -238,7 +245,7 @@ def prior_restart_cice4(run_timing_params): @pytest.mark.parametrize("config", [CONFIG_WITH_RESTART], indirect=True) -def test_restart_setup(config, cice_config_files, prior_restart_cice4, +def test_restart_setup(config, cice_nml, cice_history_nml, prior_restart_cice4, run_timing_params): """ Test that seting up an experiment from a cloned control directory @@ -280,7 +287,7 @@ def test_restart_setup(config, cice_config_files, prior_restart_cice4, @pytest.mark.parametrize("config", [DEFAULT_CONFIG], indirect=True) -def test_no_restart_ptr(config, cice_config_files): +def test_no_restart_ptr(config, cice_nml, cice_history_nml): """ Test that payu raises an error if no prior restart path is specified, restart is `true` in cice_in.nml, and the restart pointer is missing. @@ -300,3 +307,85 @@ def test_no_restart_ptr(config, cice_config_files): with pytest.raises(RuntimeError, match="Cannot find previous restart file"): model.setup() + + +CONFIG_WITH_COMPRESSION = { + "laboratory": "lab", + "jobname": "testrun", + "model": "cice", + "exe": "test.exe", + "experiment": ctrldir_basename, + "metadata": {"enable": False}, + "compress_logs": True +} + + +@pytest.fixture +def cice4_log_files(): + """ + Create cice log files matching those produced during ESM1.5 simulations. + """ + log_names = ["ice_diag_out", "ice_diag.d", "debug.root.03", + "iceout086", "iceout088", "iceout090", "iceout092", + "iceout094", "iceout096", "iceout085", "iceout087", + "iceout089", "iceout091", "iceout093", "iceout095"] + log_paths = [os.path.join(expt_workdir, name) for name in log_names] + + for log_file in log_paths: + with open(log_file, "w") as f: + f.close() + + yield log_paths + + # Cleanup + for log_file in log_paths: + try: + os.remove(log_file) + except FileNotFoundError: + pass + + +@pytest.fixture +def non_log_file(): + """ + Create a cice4 output file to be ignored by log compression. + Use cice_in.nml which is copied to the work directory in ESM1.5. + """ + non_log_path = os.path.join(expt_workdir, CICE_NML_NAME) + with open(non_log_path, "w") as f: + f.close() + + yield non_log_path + + # Cleanup + os.remove(non_log_path) + + +@pytest.mark.parametrize("config", [CONFIG_WITH_COMPRESSION], + indirect=True) +def test_log_compression(config, cice4_log_files, non_log_file, + cice_nml # Required by expt.__init__ + ): + """ + Test that logfiles produced by cice during ESM1.5 simulations are + properly compressed into a tarball by cice.compress_log_files(). + """ + with cd(ctrldir): + # Initialise laboratory and experiment + lab = payu.laboratory.Laboratory(lab_path=str(labdir)) + expt = payu.experiment.Experiment(lab, reproduce=False) + model = expt.models[0] + + # Function to test + model.compress_log_files() + + # Check that log tarball created and no original logs remain + assert set(os.listdir(expt_workdir)) == {model.log_tar_name, + os.path.basename(non_log_file)} + + # Check all logs present in tarball + log_file_names = {os.path.basename(log_path) for + log_path in cice4_log_files} + with tarfile.open(os.path.join(expt_workdir, model.log_tar_name), + "r") as tar: + assert set(tar.getnames()) == log_file_names