Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model log compression #532

Merged
merged 8 commits into from
Dec 10, 2024
28 changes: 28 additions & 0 deletions payu/models/cice.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import sys
import shutil
import datetime
import re
import tarfile

# Extensions
import f90nml
Expand Down Expand Up @@ -51,6 +53,13 @@ def __init__(self, expt, name, config):

self.copy_inputs = False

# regex patterns for matching log files. When empty, no logs compressed
self.logs_to_compress = [r"iceout[0-9]{3}",
r"debug\.root\.[0-9]{2}",
r"ice_diag\.d",
r"ice_diag_out"]
self.log_tar_name = "logfiles.tar.gz"

def set_model_pathnames(self):
super(Cice, self).set_model_pathnames()

Expand Down Expand Up @@ -333,6 +342,25 @@ def archive(self, **kwargs):
else:
shutil.rmtree(self.work_input_path)

if self.expt.config.get('compress_logs', False):
self.compress_log_files()

def get_log_files(self):
log_files = []
for filename in os.listdir(self.work_path):
if any((re.match(pattern, filename)
for pattern in self.logs_to_compress)):
log_files.append(os.path.join(self.work_path, filename))
return log_files

def compress_log_files(self):
log_files = self.get_log_files()
with tarfile.open(name=os.path.join(self.work_path, self.log_tar_name),
mode="w:gz") as tar:
for file in log_files:
tar.add(file, arcname=os.path.basename(file))
os.remove(file)

def collate(self):
pass

Expand Down
3 changes: 3 additions & 0 deletions payu/models/cice5.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ def __init__(self, expt, name, config):
self.copy_restarts = True
self.copy_inputs = True

# Empty list means no log files will be compressed
self.logs_to_compress = []

def set_local_timestep(self, t_step):
dt = self.ice_in['setup_nml']['dt']
npt = self.ice_in['setup_nml']['npt']
Expand Down
129 changes: 109 additions & 20 deletions test/models/test_cice.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
import f90nml
import tarfile

import payu

Expand Down Expand Up @@ -124,41 +125,47 @@ def empty_workdir():
workdir.symlink_to(expt_workdir)

yield expt_workdir
shutil.rmtree(expt_workdir)
try:
shutil.rmtree(expt_workdir)
except FileNotFoundError:
pass
workdir.unlink()


@pytest.fixture
def cice_nml():
nml_path = os.path.join(ctrldir, CICE_NML_NAME)
f90nml.write(DEFAULT_CICE_NML, nml_path)

yield nml_path

# Cleanup
os.remove(nml_path)

# Important to test None case without separate ice history file
@pytest.fixture(params=[None,
{"icefields_nml": {"f_icy": "m"}},
{"icefields_nml": {"f_icy": "m", "f_new": "y"}}])
def cice_config_files(request):
def cice_history_nml(request):
"""
Write the default cice_in.nml namelist, and if included, separate ice
history namelist used by ESM1.5.
Write separate ice history namelist used by ESM1.5, if provided.
"""
cice_nml = DEFAULT_CICE_NML
ice_history = request.param
ice_history_path = os.path.join(ctrldir, HIST_NML_NAME)

with cd(ctrldir):
# 2. Create config.nml
f90nml.write(cice_nml, CICE_NML_NAME)

if ice_history:
f90nml.write(ice_history, HIST_NML_NAME)
if ice_history:
f90nml.write(ice_history, ice_history_path)

yield {'ice_history': ice_history}

# cleanup
with cd(ctrldir):
os.remove(CICE_NML_NAME)
if ice_history:
os.remove(HIST_NML_NAME)
if ice_history:
os.remove(ice_history_path)


@pytest.mark.parametrize("config", [DEFAULT_CONFIG],
indirect=True)
def test_setup(config, cice_config_files):
def test_setup(config, cice_nml, cice_history_nml):
"""
Confirm that
1: payu overwrites cice_in with ice_history
Expand All @@ -183,9 +190,9 @@ def test_setup(config, cice_config_files):
# Check cice_in was patched with ice_history
work_input_fpath = os.path.join(model.work_path, CICE_NML_NAME)
input_nml = f90nml.read(work_input_fpath)
if cice_config_files['ice_history']:
if cice_history_nml['ice_history']:
assert (input_nml["icefields_nml"] ==
cice_config_files["ice_history"]["icefields_nml"])
cice_history_nml["ice_history"]["icefields_nml"])
else:
assert input_nml["icefields_nml"] == DEFAULT_CICE_NML["icefields_nml"]

Expand Down Expand Up @@ -238,7 +245,7 @@ def prior_restart_cice4(run_timing_params):

@pytest.mark.parametrize("config", [CONFIG_WITH_RESTART],
indirect=True)
def test_restart_setup(config, cice_config_files, prior_restart_cice4,
def test_restart_setup(config, cice_nml, cice_history_nml, prior_restart_cice4,
run_timing_params):
"""
Test that seting up an experiment from a cloned control directory
Expand Down Expand Up @@ -280,7 +287,7 @@ def test_restart_setup(config, cice_config_files, prior_restart_cice4,

@pytest.mark.parametrize("config", [DEFAULT_CONFIG],
indirect=True)
def test_no_restart_ptr(config, cice_config_files):
def test_no_restart_ptr(config, cice_nml, cice_history_nml):
"""
Test that payu raises an error if no prior restart path is specified,
restart is `true` in cice_in.nml, and the restart pointer is missing.
Expand All @@ -300,3 +307,85 @@ def test_no_restart_ptr(config, cice_config_files):
with pytest.raises(RuntimeError,
match="Cannot find previous restart file"):
model.setup()


CONFIG_WITH_COMPRESSION = {
"laboratory": "lab",
"jobname": "testrun",
"model": "cice",
"exe": "test.exe",
"experiment": ctrldir_basename,
"metadata": {"enable": False},
"compress_logs": True
}


@pytest.fixture
def cice4_log_files():
"""
Create cice log files matching those produced during ESM1.5 simulations.
"""
log_names = ["ice_diag_out", "ice_diag.d", "debug.root.03",
"iceout086", "iceout088", "iceout090", "iceout092",
"iceout094", "iceout096", "iceout085", "iceout087",
"iceout089", "iceout091", "iceout093", "iceout095"]
log_paths = [os.path.join(expt_workdir, name) for name in log_names]

for log_file in log_paths:
with open(log_file, "w") as f:
f.close()

yield log_paths

# Cleanup
for log_file in log_paths:
try:
os.remove(log_file)
except FileNotFoundError:
pass


@pytest.fixture
def non_log_file():
"""
Create a cice4 output file to be ignored by log compression.
Use cice_in.nml which is copied to the work directory in ESM1.5.
"""
non_log_path = os.path.join(expt_workdir, CICE_NML_NAME)
with open(non_log_path, "w") as f:
f.close()

yield non_log_path

# Cleanup
os.remove(non_log_path)


@pytest.mark.parametrize("config", [CONFIG_WITH_COMPRESSION],
indirect=True)
def test_log_compression(config, cice4_log_files, non_log_file,
cice_nml # Required by expt.__init__
):
"""
Test that logfiles produced by cice during ESM1.5 simulations are
properly compressed into a tarball by cice.compress_log_files().
"""
with cd(ctrldir):
# Initialise laboratory and experiment
lab = payu.laboratory.Laboratory(lab_path=str(labdir))
expt = payu.experiment.Experiment(lab, reproduce=False)
model = expt.models[0]

# Function to test
model.compress_log_files()

# Check that log tarball created and no original logs remain
assert set(os.listdir(expt_workdir)) == {model.log_tar_name,
os.path.basename(non_log_file)}

# Check all logs present in tarball
log_file_names = {os.path.basename(log_path) for
log_path in cice4_log_files}
with tarfile.open(os.path.join(expt_workdir, model.log_tar_name),
"r") as tar:
assert set(tar.getnames()) == log_file_names
Loading