From c9f1652563ecfec0e4d376600c3b2ff704b4e006 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Mon, 3 Feb 2025 12:05:33 -0800 Subject: [PATCH] Disable reservation for larger jobs This commit disable the clima reservation for some of the larger CI jobs. This increases the throughput for all the other jobs because slrum will not try to hold onto the reserved node to run the larger jobs. --- .buildkite/longruns/pipeline.yml | 9 +++++++++ .buildkite/pipeline.yml | 2 ++ 2 files changed, 11 insertions(+) diff --git a/.buildkite/longruns/pipeline.yml b/.buildkite/longruns/pipeline.yml index e48c855c3f..8078c2d5ed 100644 --- a/.buildkite/longruns/pipeline.yml +++ b/.buildkite/longruns/pipeline.yml @@ -145,6 +145,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "TARGET IDEALIZED: new target aqua - fixed ocean T, nocouple, atmos flux calc" @@ -158,6 +159,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "TARGET IDEALIZED: new target aqua - fixed ocean T, coupler flux calc" @@ -171,6 +173,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "TARGET IDEALIZED: new target aqua - evolving slab ocean T" @@ -184,6 +187,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "TARGET IDEALIZED: new target slab - fixed ocean T, bucket" @@ -197,6 +201,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "TARGET IDEALIZED: new target slab - evolving slab ocean T, bucket" @@ -210,6 +215,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - group: "Current target tests: AMIP surfaces" @@ -227,6 +233,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "MPI AMIP FINE: new target amip: topo" @@ -240,6 +247,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 16G + slurm_reservation: "false" soft_fail: true - label: "MPI AMIP FINE: new target amip: topo + diagedmf" @@ -253,6 +261,7 @@ steps: slurm_ntasks_per_node: 16 slurm_nodes: 4 slurm_mem_per_cpu: 20G + slurm_reservation: "false" soft_fail: true - group: "Current target tests on GPU: AMIP surface" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index c443ed2381..a3c69fc933 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -37,6 +37,7 @@ steps: - "julia --project=experiments/ClimaCore/ -e 'using Pkg; Pkg.add(\"MPI\"); Pkg.add(\"CUDA\")'" - "julia --project=experiments/ClimaCore/ -e 'using Pkg; Pkg.precompile()'" - "julia --project=experiments/ClimaCore/ -e 'using Pkg; Pkg.status()'" + - "julia --project=experiments/ClimaCore/ -e 'using CUDA; CUDA.precompile_runtime()'" - echo "--- Instantiate ClimaEarth experiments env" - "julia --project=experiments/ClimaEarth/ -e 'using Pkg; Pkg.develop(path=\".\")'" @@ -206,6 +207,7 @@ steps: agents: slurm_ntasks: 4 slurm_mem: 48GB + slurm_reservation: "false" # short high-res performance test - label: "Unthreaded AMIP FINE" # also reported by longruns with a flame graph