From aba40c9087114647b465bf60e9ab56415217a464 Mon Sep 17 00:00:00 2001 From: Julia Sloan Date: Fri, 31 Jan 2025 10:26:33 -0800 Subject: [PATCH 1/2] expose regrid_dir to user --- src/Utilities.jl | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/Utilities.jl b/src/Utilities.jl index 0c73420fa9..75b359a729 100644 --- a/src/Utilities.jl +++ b/src/Utilities.jl @@ -120,11 +120,20 @@ function show_memory_usage() end """ - setup_output_dirs(; output_dir = nothing, artifacts_dir = nothing, comms_ctx) + setup_output_dirs(output_dir = pwd(), + artifacts_dir = joinpath(output_dir, "artifacts"), + checkpoints_dir = joinpath(output_dir, "checkpoints"), + regrid_dir = nothing, + comms_ctx, + ) + +Create output directories for the experiment. If `comms_ctx` is provided, +only the root process will create the directories. +By default, the artifacts and checkpoints directories are created inside the output +directory with the names `artifacts/` and `checkpoints/`. +The regrid directory is by default created as a temporary directory inside the output +directory and is automatically deleted when the process exits. -Create output directories for the experiment. If `comms_ctx` is provided, only the root process will create the directories. -By default, the regrid directory is created as a temporary directory inside the output directory, -and the artifacts directory is created inside the output directory with the name `artifacts/`. `ClimaUtilities.OutputPathGenerator` is used so that simulations can be re-run and re-started. The output path looks like: @@ -134,6 +143,8 @@ coupler_output_dir_amip/ │ └── checkpoints for the various models ├── artifacts │ └── plots produced by the postporcessing step +├── regrid_tmp_/ +│ └── temporary files used for regridding ├── output_0000/ │ ├── atmos/ │ │ └── output of the atmos model @@ -148,26 +159,27 @@ coupler_output_dir_amip/ # Arguments - `output_dir::String`: The directory where the output files will be stored. Default is the current directory. -- `regrid_dir::String`: The directory where the regridded files will be stored. Default is `output_dir/regrid_tmp/`. +- `regrid_dir::String`: The directory where the regridded files will be stored. Default is `output_dir/regrid_tmp_/`. - `checkpoint_dir::String`: The directory where the checkpoint files will be stored. Default is `output_dir/checkpoints/`. - `artifacts_dir::String`: The directory where the artifacts will be stored. Default is `output_dir/artifacts/`. - `comms_ctx::Union{Nothing, ClimaComms.AbstractCommsContext}`: The communicator context. If provided, only the root process will create the directories. # Returns -- A tuple with the paths to the output, regrid, and artifacts directories. +- A tuple with the paths to the output, artifacts, regrid, and checkpoints directories. """ function setup_output_dirs(; output_dir = pwd(), artifacts_dir = joinpath(output_dir, "artifacts"), checkpoints_dir = joinpath(output_dir, "checkpoints"), + regrid_dir = nothing, comms_ctx, ) output_dir = generate_output_path(output_dir, context = comms_ctx) - regrid_dir = nothing if ClimaComms.iamroot(comms_ctx) mkpath(artifacts_dir) mkpath(checkpoints_dir) - regrid_dir = mktempdir(output_dir, prefix = "regrid_tmp_") + # If no regrid_dir is provided, create a temporary directory + regrid_dir = isnothing(regrid_dir) ? mktempdir(output_dir, prefix = "regrid_tmp_") : mkpath(regrid_dir) end regrid_dir = ClimaComms.bcast(comms_ctx, regrid_dir) From acd4787c767803c4e3c19902c14cb52f4ce63e22 Mon Sep 17 00:00:00 2001 From: Julia Sloan Date: Mon, 3 Feb 2025 16:35:57 -0800 Subject: [PATCH 2/2] add timeout limit to sbatch job --- .buildkite/pipeline.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index c443ed2381..70a437ae2a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -200,7 +200,7 @@ steps: - label: "MPI AMIP" command: "srun julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/amip_coarse_mpi.yml --job_id amip_coarse_mpi" artifact_paths: "experiments/ClimaEarth/output/amip_coarse_mpi/artifacts/*" - timeout_in_minutes: 240 + timeout_in_minutes: 30 env: CLIMACOMMS_CONTEXT: "MPI" agents: @@ -320,6 +320,7 @@ steps: agents: slurm_ntasks: 1 soft_fail: true + timeout_in_minutes: 40 - group: "Hierarchy tests (1d)" steps: