Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coarse AMIP calibration pipeline #1215

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion experiments/calibration/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,7 @@ Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"
YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"

[compat]
ClimaCalibrate = "0.0.10"
ClimaCalibrate = "0.0.11"
ClimaTimeSteppers = "0.8.2"
ClimaCore = "0.14.26"
EnsembleKalmanProcesses = "2.0"
16 changes: 16 additions & 0 deletions experiments/calibration/coarse_amip/generate_observations.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Generate and save experiment observations to disk
using ClimaAnalysis, JLD2
include("experiments/calibration/coarse_amip/observation_utils.jl")

const obs_dir = "/home/ext_nefrathe_caltech_edu/calibration_obs"
const diagnostic_dir = "experiments/calibration/output/old/iteration_000/member_001/model_config/output_0000/clima_atmos/"

diagnostic_var2d = OutputVar(joinpath(diagnostic_dir, "rsdt_1M_average.nc"));
pressure = OutputVar(joinpath(diagnostic_dir, "pfull_1M_average.nc"));
diagnostic_var3d = OutputVar(joinpath(diagnostic_dir, "ta_1M_average.nc"));
diagnostic_var3d = ClimaAnalysis.Atmos.to_pressure_coordinates(diagnostic_var3d, pressure)

nt = get_all_output_vars(obs_dir, diagnostic_var2d, diagnostic_var3d)
nyears = 18
observation_vec = create_observation_vector(nt, nyears)
JLD2.save_object("experiments/calibration/coarse_amip/observations.jld2", observation_vec)
49 changes: 49 additions & 0 deletions experiments/calibration/coarse_amip/model_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
FLOAT_TYPE: "Float32"
albedo_model: "CouplerAlbedo"
atmos_config_file: "config/longrun_configs/amip_target_edonly.yml"
checkpoint_dt: "99999days"
coupler_toml: ["toml/amip_no_precipitation_timescale.toml"]
print_config_dict: true
coupler_output_dir: "experiments/calibration/output"
dt: "800secs"
dt_cpl: "800secs"
dt_save_state_to_disk: "99999days"
dt_save_to_sol: "99999days"
dz_bottom: 100.0
energy_check: false
h_elem: 3
land_albedo_type: "map_temporal"
mode_name: "amip"
mono_surface: false
netcdf_output_at_levels: true
output_default_diagnostics: false
use_coupler_diagnostics: false
radiation_reset_rng_seed: true
rayleigh_sponge: true
start_date: "20000901"
surface_setup: "PrescribedSurface"
t_end: "457days"
topography: "Earth"
topo_smoothing: true
turb_flux_partition: "CombinedStateFluxesMOST"
viscous_sponge: true
z_elem: 39
z_max: 60000.0
extra_atmos_diagnostics:
- reduction_time: average
short_name:
- rsut
- rlut
- rsdt
- rsutcs
- rlutcs
- pr
- ts
- ta
- hur
- hus
- clw
- cli
- pfull
period: 1months
writer: nc
39 changes: 39 additions & 0 deletions experiments/calibration/coarse_amip/model_interface.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import ClimaCoupler
import ClimaCalibrate
import CUDA
import EnsembleKalmanProcesses as EKP
ENV["CLIMACOMMS_DEVICE"] = "CUDA"
ENV["CLIMACOMMS_CONTEXT"] = "SINGLETON"
import JLD2
include(joinpath(pkgdir(ClimaCoupler), "experiments", "ClimaEarth", "setup_run.jl"))

function ClimaCalibrate.forward_model(iter, member)

config_file = joinpath(pkgdir(ClimaCoupler), "experiments", "calibration", "coarse_amip", "model_config.yml")
config_dict = get_coupler_config_dict(config_file)

output_dir_root = config_dict["coupler_output_dir"]
eki = JLD2.load_object(ClimaCalibrate.ekp_path(output_dir_root, iter))
minibatch = EKP.get_current_minibatch(eki)
config_dict["start_date"] = minibatch_to_start_date(minibatch)

spinup_days = 92
nyears = length(minibatch)
t_end_days = spinup_days + 365nyears
config_dict["t_end"] = "$(t_end_days)days"

# Set member parameter file
sampled_parameter_file = ClimaCalibrate.parameter_path(output_dir_root, iter, member)
config_dict["calibration_toml"] = sampled_parameter_file
# Set member output directory
member_output_dir = ClimaCalibrate.path_to_ensemble_member(output_dir_root, iter, member)
config_dict["coupler_output_dir"] = member_output_dir

return setup_and_run(config_dict)
end

function minibatch_to_start_date(batch)
start_year = minimum(batch) + 1999
@assert start_year >= 2000
return "$(start_year)0901"
end
77 changes: 77 additions & 0 deletions experiments/calibration/coarse_amip/observation_map.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
using ClimaAnalysis, Dates
import ClimaCalibrate
import ClimaCoupler
import JLD2
import EnsembleKalmanProcesses as EKP
include(joinpath(pkgdir(ClimaCoupler), "experiments/calibration/coarse_amip/observation_utils.jl"))

function ClimaCalibrate.observation_map(iteration)
observation_vec = JLD2.load_object(observation_path)
single_member_dims = length(EKP.get_obs(first(observation_vec)))
G_ensemble = Array{Float64}(undef, single_member_dims, ensemble_size)
for m in 1:ensemble_size
member_path = ClimaCalibrate.path_to_ensemble_member(output_dir, iteration, m)
simdir_path = joinpath(member_path, "model_config/output_active")
try
G_ensemble[:, m] .= process_member_data(SimDir(simdir_path))

catch e
@warn "Error processing member $m, filling observation map entry with NaNs" exception = e
G_ensemble[:, m] .= NaN
end
end
return G_ensemble
end

function process_member_data(simdir::SimDir)
pressure = get_monthly_averages(simdir, "pfull")

rsdt_full = get_monthly_averages(simdir, "rsdt")
rsut_full = get_monthly_averages(simdir, "rsut")
rlut_full = get_monthly_averages(simdir, "rlut")

year_net_radiation = (rlut_full + rsut_full - rsdt_full) |> average_lat |> average_lon |> average_time

rsut = process_outputvar(simdir, "rsut")
rlut = process_outputvar(simdir, "rlut")
rsutcs = process_outputvar(simdir, "rsutcs")
rlutcs = process_outputvar(simdir, "rlutcs")
cre = rsut + rlut - rsutcs - rlutcs

pr = process_outputvar(simdir, "pr")
# shf = process_outputvar(simdir, "shf")
ts = process_outputvar(simdir, "ts")

ta = process_outputvar(simdir, "ta")
hur = process_outputvar(simdir, "hur")
hus = process_outputvar(simdir, "hus")
# clw = get_seasonal_averages(simdir, "clw")
# cli = get_seasonal_averages(simdir, "cli")

return vcat(year_net_radiation.data, rsut, rlut, cre, pr, ts)#, ta, hur, hus)
end

function process_outputvar(simdir, name)
days = 86_400

monthly_avgs = get_monthly_averages(simdir, name)
# Preprocess to match observations
if has_altitude(monthly_avgs)
pressure = get_monthly_averages(simdir, "pfull")
monthly_avgs = ClimaAnalysis.Atmos.to_pressure_coordinates(monthly_avgs, pressure)
monthly_avgs = limit_pressure_dim_to_era5_range(monthly_avgs)
end
# TODO: Ask ollie how to replace nans in a way that makes sense
monthly_avgs = ClimaAnalysis.replace(monthly_avgs, missing => 0.0, NaN => 0.0)
monthly_avgs = ClimaAnalysis.shift_to_start_of_previous_month(monthly_avgs)

# Cut off first 3 months
single_year = window(monthly_avgs, "time"; left = 92days)
seasons = split_by_season_across_time(single_year)
# Ensure each season has three months
@assert all(map(x -> length(times(x)) == 3, seasons))
seasonal_avgs = average_time.(seasons)

downsampled_seasonal_avg_arrays = downsample.(seasonal_avgs, 3)
return vcat(vec.(downsampled_seasonal_avg_arrays)...)
end
Loading
Loading