diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 5eb6200a85..8b0ed3ca74 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -403,6 +403,14 @@ steps: slurm_mem: 20GB slurm_gpus: 1 + - label: "GPU Slabplanet: extra atmos diagnostics" + key: "gpu_slabplanet_atmos_diags" + command: "julia --color=yes --project=experiments/ClimaEarth/ experiments/ClimaEarth/run_amip.jl --config_file $CONFIG_PATH/gpu_slabplanet_atmos_diags.yml" + artifact_paths: "experiments/ClimaEarth/output/slabplanet/gpu_slabplanet_atmos_diags_artifacts/*" + agents: + slurm_mem: 20GB + slurm_gpus: 1 + # GPU RUNS: AMIP - label: "GPU AMIP test: albedo from function" key: "gpu_amip_albedo_function" diff --git a/config/ci_configs/gpu_slabplanet_atmos_diags.yml b/config/ci_configs/gpu_slabplanet_atmos_diags.yml new file mode 100644 index 0000000000..2a3681973b --- /dev/null +++ b/config/ci_configs/gpu_slabplanet_atmos_diags.yml @@ -0,0 +1,22 @@ +anim: false +apply_limiter: false +ci_plots: true +dt: "200secs" +dt_cpl: 200 +dt_save_to_sol: "9days" +energy_check: true +h_elem: 4 +job_id: "gpu_slabplanet_atmos_diags" +mode_name: "slabplanet" +moist: "equil" +mono_surface: true +output_default_diagnostics: false +precip_model: "0M" +rad: "gray" +run_name: "gpu_slabplanet_atmos_diags" +t_end: "10days" +vert_diff: "true" +diagnostics: + - short_name: [mse, lr, ediff, ts, mass_strf, stab, vt, egr] + reduction_time: average + period: 1days diff --git a/experiments/ClimaEarth/Project.toml b/experiments/ClimaEarth/Project.toml index 07ca81a813..329f092dcd 100644 --- a/experiments/ClimaEarth/Project.toml +++ b/experiments/ClimaEarth/Project.toml @@ -1,7 +1,9 @@ [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" ArtifactWrappers = "a14bc488-3040-4b00-9dc1-f6467924858a" AtmosphericProfilesLibrary = "86bc3604-9858-485a-bdbe-831ec50de11d" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" ClimaAnalysis = "29b5916a-a76c-4e73-9657-3c8fd22e65e6" ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717" diff --git a/experiments/ClimaEarth/run_amip.jl b/experiments/ClimaEarth/run_amip.jl index 143ac99763..51aaeb4ff0 100644 --- a/experiments/ClimaEarth/run_amip.jl +++ b/experiments/ClimaEarth/run_amip.jl @@ -976,10 +976,8 @@ if ClimaComms.iamroot(comms_ctx) make_plots(Val(:general_ci_plots), [atmos_sim.integrator.p.output_dir], dir_paths.artifacts) end - ## plot all model states and coupler fields (useful for debugging) TODO: make MPI & GPU compatible - comms_ctx.device == ClimaComms.CPUSingleThreaded() && - comms_ctx isa ClimaComms.SingletonCommsContext && - debug(cs, joinpath(dir_paths.artifacts, "endstates_")) + ## plot all model states and coupler fields (useful for debugging) + !(comms_ctx isa ClimaComms.MPICommsContext) && debug(cs, dir_paths.artifacts) if isinteractive() ## clean up for interactive runs, retain all output otherwise diff --git a/experiments/ClimaEarth/user_io/debug_plots.jl b/experiments/ClimaEarth/user_io/debug_plots.jl index 4199863b9d..a23ef6ad18 100644 --- a/experiments/ClimaEarth/user_io/debug_plots.jl +++ b/experiments/ClimaEarth/user_io/debug_plots.jl @@ -1,10 +1,13 @@ import Plots import Printf +import ClimaComms import ClimaCore as CC import ClimaCorePlots import ClimaCoupler: Interfacer import ClimaAtmos as CA +include("plot_helper.jl") + # plotting functions for the coupled simulation """ debug(cs::Interfacer.CoupledSimulation, dir = "debug", cs_fields_ref = nothing) @@ -12,7 +15,7 @@ import ClimaAtmos as CA Plot the fields of a coupled simulation and save plots to a directory. """ function debug(cs::Interfacer.CoupledSimulation, dir = "debug", cs_fields_ref = nothing) - mkpath(dir) + isdir(dir) || mkpath(dir) @info "plotting debug in " * dir for sim in cs.model_sims debug(sim, dir) @@ -48,9 +51,14 @@ function debug(cs_fields::NamedTuple, dir, cs_fields_ref = nothing) :radiative_energy_flux_toa, ) all_plots = [] + for field_name in field_names field = getproperty(cs_fields, field_name) - push!(all_plots, Plots.plot(field, title = string(field_name) * print_extrema(field))) + + # Copy field onto cpu space if necessary + cpu_field = to_cpu(field) + + push!(all_plots, Plots.plot(cpu_field, title = string(field_name) * print_extrema(field))) end Plots.plot(all_plots..., size = (1500, 800)) Plots.png(joinpath(dir, "debug_coupler")) @@ -60,10 +68,13 @@ function debug(cs_fields::NamedTuple, dir, cs_fields_ref = nothing) all_plots = [] for field_name in field_names field = getproperty(cs_fields, field_name) + # Copy field onto cpu space if necessary + cpu_field = to_cpu(field) + push!( all_plots, Plots.plot( - field .- getproperty(cs_fields_ref, field_name), + cpu_field .- getproperty(cs_fields_ref, field_name), title = string(field_name) * print_extrema(field), ), ) @@ -79,17 +90,19 @@ end Plot the fields of a component model simulation and save plots to a directory. """ function debug(sim::Interfacer.ComponentModelSimulation, dir) - field_names = plot_field_names(sim) all_plots = [] for field_name in field_names field = Interfacer.get_field(sim, Val(field_name)) - push!(all_plots, Plots.plot(field, title = string(field_name) * print_extrema(field))) + + # Copy field onto cpu space if necessary + cpu_field = to_cpu(field) + + push!(all_plots, Plots.plot(cpu_field, title = string(field_name) * print_extrema(field))) end fig = Plots.plot(all_plots..., size = (1500, 800)) Plots.png(joinpath(dir, "debug_$(Interfacer.name(sim))")) - end """ diff --git a/experiments/ClimaEarth/user_io/plot_helper.jl b/experiments/ClimaEarth/user_io/plot_helper.jl index 82a430e7c8..bf461aea46 100644 --- a/experiments/ClimaEarth/user_io/plot_helper.jl +++ b/experiments/ClimaEarth/user_io/plot_helper.jl @@ -1,5 +1,7 @@ +import Adapt +import CUDA import Plots -import ClimaCoupler: PostProcessor +import ClimaCoupler: PostProcessor, TestHelper """ Plots.plot(post_data::DataPackage; zmd_params = (;), hsd_params = (;)) @@ -90,3 +92,81 @@ function Plots.contourf( clims = clims, ) end + +""" + get_ne(grid) + +Return the number of horizontal elements in a grid. +""" +function get_ne(grid::CC.Grids.SpectralElementGrid2D) + return grid.topology.mesh.ne +end +function get_ne(grid::CC.Grids.LevelGrid) + return get_ne(grid.full_grid.horizontal_grid) +end +function get_ne(grid::CC.Grids.ExtrudedFiniteDifferenceGrid) + return get_ne(grid.horizontal_grid) +end + +""" + get_R(grid) + +Return the radius of a grid. +""" +function get_R(grid) + return CC.Grids.global_geometry(grid).radius +end + +""" + get_height(grid) + +Return the height of a grid if it is 3D, or nothing otherwise. +""" +function get_height(grid::CC.Grids.ExtrudedFiniteDifferenceGrid) + return grid.vertical_grid.topology.mesh.domain.coord_max.z +end +function get_height(grid) + return nothing # 2d case +end + +""" + to_cpu(field::CC.Fields.Field) + +For a CPU field, return the field unchanged. +For a GPU field, copy the field and its underlying space onto the CPU. + +Note that this function allocates a new space and field, +and should only be used for debugging and testing. +""" +function to_cpu(field::CC.Fields.Field) + if parent(field) isa Array + return field + else + # Copy field onto cpu space + space = axes(field) + FT = CC.Spaces.undertype(space) + R = get_R(space.grid) + ne = get_ne(space.grid) + polynomial_degree = CC.Quadratures.polynomial_degree(CC.Spaces.quadrature_style(space.grid)) + nz = CC.Spaces.nlevels(space) + height = get_height(space.grid) + + cpu_comms_ctx = ClimaComms.SingletonCommsContext(ClimaComms.CPUSingleThreaded()) + cpu_space = TestHelper.create_space( + FT, + comms_ctx = cpu_comms_ctx, + R = R, + ne = ne, + polynomial_degree = polynomial_degree, + nz = nz, + height = height, + ) + cpu_field = CC.Fields.ones(cpu_space) + + parent(cpu_field) .= Array(parent(field)) + return cpu_field + end +end + +to_cpu(arr::Array) = arr +to_cpu(arr::CUDA.CuArray) = Adapt.adapt(Array, arr) diff --git a/perf/Project.toml b/perf/Project.toml index bf8e524281..eef12fdbb7 100644 --- a/perf/Project.toml +++ b/perf/Project.toml @@ -1,7 +1,9 @@ [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" ArtifactWrappers = "a14bc488-3040-4b00-9dc1-f6467924858a" AtmosphericProfilesLibrary = "86bc3604-9858-485a-bdbe-831ec50de11d" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" ClimaAtmos = "b2c96348-7fb7-4fe0-8da9-78d88439e717" ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" diff --git a/test/Project.toml b/test/Project.toml index 814c032b3f..710626fc3c 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" ArtifactWrappers = "a14bc488-3040-4b00-9dc1-f6467924858a" diff --git a/test/TestHelper.jl b/test/TestHelper.jl index f302651138..578fd1af26 100644 --- a/test/TestHelper.jl +++ b/test/TestHelper.jl @@ -35,6 +35,7 @@ function create_space( ne = 4, polynomial_degree = 3, nz = 1, + height = FT(100), ) domain = CC.Domains.SphereDomain(R) mesh = CC.Meshes.EquiangularCubedSphere(domain, ne) @@ -52,11 +53,12 @@ function create_space( if nz > 1 vertdomain = CC.Domains.IntervalDomain( CC.Geometry.ZPoint{FT}(0), - CC.Geometry.ZPoint{FT}(100); + CC.Geometry.ZPoint{FT}(height); boundary_names = (:bottom, :top), ) vertmesh = CC.Meshes.IntervalMesh(vertdomain, nelems = nz) - vert_center_space = CC.Spaces.CenterFiniteDifferenceSpace(vertmesh) + vert_topology = CC.Topologies.IntervalTopology(comms_ctx, vertmesh) + vert_center_space = CC.Spaces.CenterFiniteDifferenceSpace(vert_topology) return CC.Spaces.ExtrudedFiniteDifferenceSpace(sphere_space, vert_center_space) else return sphere_space diff --git a/test/diagnostics_tests.jl b/test/diagnostics_tests.jl index 094f30c4e2..676aad5d15 100644 --- a/test/diagnostics_tests.jl +++ b/test/diagnostics_tests.jl @@ -2,6 +2,7 @@ Unit tests for ClimaCoupler Diagnostics module =# import Test: @test, @testset +import CUDA import Dates import ClimaComms import ClimaCore as CC @@ -50,7 +51,10 @@ for FT in (Float32, Float64) nothing, # thermo_params ) Diagnostics.accumulate_diagnostics!(cs) - @test cs.diagnostics[1].field_vector[1] == expected_results[c_i] + + CUDA.@allowscalar begin + @test cs.diagnostics[1].field_vector[1] == expected_results[c_i] + end @test isnothing(Diagnostics.get_var(cs, Val(:z))) end diff --git a/test/runtests.jl b/test/runtests.jl index f2b7b5ed59..da3c743c54 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,6 +60,6 @@ end @safetestset "component model test: slab ocean" begin include("component_model_tests/slab_ocean_tests.jl") end -gpu_broken || @safetestset "debug diagnostics: amip plots" begin +@safetestset "debug diagnostics: amip plots" begin include("debug/debug_amip_plots.jl") end