Skip to content

Commit

Permalink
Refactor leaderboard code
Browse files Browse the repository at this point in the history
This commit refactors the leaderboard code using the new features from
ClimaAnalysis. This commit also deletes the old leaderboard code and the
tests for it. Also, there is an off by one month issue when handling the
dates and seasons. This arises because the simulation data assigns the
next month the monthly average for this month. For instance, the monthly
average for January 2010 is assigned the date 2/1/2010. This is fixed in
this commit.

The commit also moves the code to leaderboard.jl and data_sources.jl
and. The file read_amip.jl calls leaderboard.jl to plot now. The
conditional to run leaderboard.jl is changed from t_end > 86400 to t_end
> 86400 * 30 since it doesn't make sense to run the code unless one or
more month is done. This is because the dataset contains monthly
averages.

One significant difference is the leaderboard which now plot the best
and worst single model using only annual rather than averaging the error
over annual and seasonal data.
  • Loading branch information
ph-kev committed Oct 7, 2024
1 parent 9afdcee commit 1b8a197
Show file tree
Hide file tree
Showing 12 changed files with 305 additions and 1,050 deletions.
2 changes: 1 addition & 1 deletion experiments/ClimaEarth/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
[compat]
ArgParse = "1.1"
ArtifactWrappers = "0.2"
ClimaAnalysis = "0.5.4"
ClimaAnalysis = "0.5.10"
ClimaAtmos = "0.27"
ClimaCorePlots = "0.2"
ClimaDiagnostics = "0.2"
Expand Down
132 changes: 132 additions & 0 deletions experiments/ClimaEarth/leaderboard/data_sources.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import ClimaAnalysis
import ClimaUtilities.ClimaArtifacts: @clima_artifact

# For plotting bias plots
compare_vars_biases_plot_extrema = Dict(
"pr" => (-5.0, 5.0),
"rsdt" => (-2.0, 2.0),
"rsut" => (-50.0, 50.0),
"rlut" => (-50.0, 50.0),
"rsutcs" => (-20.0, 20.0),
"rlutcs" => (-20.0, 20.0),
"rsds" => (-50.0, 50.0),
"rsus" => (-10.0, 10.0),
"rlds" => (-50.0, 50.0),
"rlus" => (-50.0, 50.0),
"rsdscs" => (-10.0, 10.0),
"rsuscs" => (-10.0, 10.0),
"rldscs" => (-20.0, 20.0),
)

# To add a variable to the bias plots, add the variable to sim_var_dict, obs_var_dict,
# compare_vars_biases_groups, and compare_vars_biases_plot_extrema
# To add a variable to the leaderboard, add the variable to rmse_var_dict

# Dict for loading in simulation data
sim_var_dict = Dict{String, Any}(
"pr" =>
() -> begin
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr")
sim_var =
ClimaAnalysis.convert_units(sim_var, "mm/day", conversion_function = x -> x .* Float32(-86400))
sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
return sim_var
end,
)

# Loop to load the rest of the simulation data
# Tuple of short names for loading simulation and observational data
sim_obs_short_names_no_pr = [
("rsdt", "solar_mon"),
("rsut", "toa_sw_all_mon"),
("rlut", "toa_lw_all_mon"),
("rsutcs", "toa_sw_clr_t_mon"),
("rlutcs", "toa_lw_clr_t_mon"),
("rsds", "sfc_sw_down_all_mon"),
("rsus", "sfc_sw_up_all_mon"),
("rlds", "sfc_lw_down_all_mon"),
("rlus", "sfc_lw_up_all_mon"),
("rsdscs", "sfc_sw_down_clr_t_mon"),
("rsuscs", "sfc_sw_up_clr_t_mon"),
("rldscs", "sfc_lw_down_clr_t_mon"),
]

for (short_name, _) in sim_obs_short_names_no_pr
sim_var_dict[short_name] =
() -> begin
sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name)
sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
return sim_var
end
end

# Dict for loading observational data
obs_var_dict = Dict{String, Any}(
"pr" =>
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"),
"precip",
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
return obs_var
end,
)

# Loop to load the rest of the observational data
for (sim_name, obs_name) in sim_obs_short_names_no_pr
obs_var_dict[sim_name] =
(start_date) -> begin
obs_var = ClimaAnalysis.OutputVar(
joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
obs_name,
new_start_date = start_date,
shift_by = Dates.firstdayofmonth,
)
# Convert from W m-2 to W m^-2
ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") :
error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units")
return obs_var
end
end

# Used to organize plots
compare_vars_biases_groups = [
["pr", "rsdt", "rsut", "rlut"],
["rsds", "rsus", "rlds", "rlus"],
["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"],
]

# For plotting box plots and leaderboard

# Load data into RMSEVariables
rmse_var_names = ["pr", "rsut", "rlut"]
rmse_var_pr = ClimaAnalysis.read_rmses(
joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"),
"pr",
units = "mm / day",
)
rmse_var_rsut = ClimaAnalysis.read_rmses(
joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"),
"rsut",
units = "W m^-2",
)
rmse_var_rlut = ClimaAnalysis.read_rmses(
joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"),
"rlut",
units = "W m^-2",
)

# Add models and units for CliMA
rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA")
ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day")

rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA")
ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2")

rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA")
ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2")

# Store the rmse vars in a dictionary
rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut)
168 changes: 168 additions & 0 deletions experiments/ClimaEarth/leaderboard/leaderboard.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import ClimaAnalysis
import ClimaUtilities.ClimaArtifacts: @clima_artifact
import GeoMakie
import CairoMakie
import Dates

include("data_sources.jl")

# leaderboard_base_path is the path to save the leaderboards
# diagnostics_folder_path is the path to simulation data
"""
compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
Plot the biases and a leaderboard of various variables.
The paramter `leaderboard_base_path` is the path to save the leaderboards and bias plots and
`diagnostics_folder_path` is the path to the simulation data.
"""
function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
@info "Error against observations"

# Set up dict for storing simulation and observational data after processing
sim_obs_comparsion_dict = Dict()
seasons = ["ANN", "MAM", "JJA", "SON", "DJF"]

# Print dates for debugging
pr_var = sim_var_dict["pr"]() # it shouldn't matter what short name we use
output_dates = Dates.DateTime(pr_var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(pr_var))
@info "Working with dates:"
@info output_dates

for short_name in keys(sim_var_dict)
# Simulation data
sim_var = sim_var_dict[short_name]()

# Observational data
obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"])

# Remove first spin_up_months from simulation
spin_up_months = 6
spinup_cutoff = spin_up_months * 31 * 86400.0
ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff &&
(sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff))

obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var)
obs_var_seasons = ClimaAnalysis.split_by_season(obs_var)
sim_var_seasons = ClimaAnalysis.split_by_season(sim_var)

# Add annual to start of seasons
obs_var_seasons = (obs_var, obs_var_seasons...)
sim_var_seasons = (sim_var, sim_var_seasons...)

# Take time average
obs_var_seasons = (
!ClimaAnalysis.isempty(obs_var) ? ClimaAnalysis.average_time(obs_var) : obs_var for
obs_var in obs_var_seasons
)
sim_var_seasons = (
!ClimaAnalysis.isempty(sim_var) ? ClimaAnalysis.average_time(sim_var) : sim_var for
sim_var in sim_var_seasons
)

# Save observation and simulation data
sim_obs_comparsion_dict[short_name] = Dict(
season => (sim_var_s, obs_var_s) for
(season, sim_var_s, obs_var_s) in zip(seasons, sim_var_seasons, obs_var_seasons)
)
end

# Filter seasons to remove seasons with no dates
filter!(season -> !ClimaAnalysis.isempty(sim_obs_comparsion_dict["pr"][season][1]), seasons)

# Plot bias plots
for compare_vars_biases in compare_vars_biases_groups
for season in seasons
fig_bias = CairoMakie.Figure(; size = (600, 300 * length(compare_vars_biases)))
for (row, short_name) in enumerate(compare_vars_biases)
sim_var = sim_obs_comparsion_dict[short_name][season][1]
if !ClimaAnalysis.isempty(sim_var)
# Add "mean " for plotting the title
sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))"
ClimaAnalysis.Visualize.plot_bias_on_globe!(
fig_bias,
sim_obs_comparsion_dict[short_name][season]...,
cmap_extrema = compare_vars_biases_plot_extrema[short_name],
p_loc = (row, 1),
)
end
end
# Do if and else statement for naming files appropriately
if season != "ANN"
CairoMakie.save(
joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_$season.png"),
fig_bias,
)
else
CairoMakie.save(
joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_total.png"),
fig_bias,
)
end
end
end

# Plot plots with all the seasons (not annual)
seasons_no_ann = filter(season -> season != "ANN", seasons)
for compare_vars_biases in compare_vars_biases_groups
fig_all_seasons = CairoMakie.Figure(; size = (600 * length(seasons_no_ann), 300 * length(compare_vars_biases)))
for (col, season) in enumerate(seasons_no_ann)
# Plot at 2 * col - 1 because a bias plot takes up 1 x 2 space
CairoMakie.Label(fig_all_seasons[0, 2 * col - 1], season, tellwidth = false, fontsize = 30)
for (row, short_name) in enumerate(compare_vars_biases)
sim_var = sim_obs_comparsion_dict[short_name][season][1]
if !ClimaAnalysis.isempty(sim_var)
ClimaAnalysis.Visualize.plot_bias_on_globe!(
fig_all_seasons,
sim_obs_comparsion_dict[short_name][season]...,
cmap_extrema = compare_vars_biases_plot_extrema[short_name],
p_loc = (row, 2 * col - 1),
)
end
end
end
CairoMakie.save(
joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_all_seasons.png"),
fig_all_seasons,
)
end

# Add RMSE for the CliMA model and for each season
for short_name in rmse_var_names
for season in seasons
!ClimaAnalysis.isempty(sim_obs_comparsion_dict[short_name][season][1]) && (
rmse_var_dict[short_name]["CliMA", season] =
ClimaAnalysis.global_rmse(sim_obs_comparsion_dict[short_name][season]...)
)
end
end

# Plot box plots
fig_leaderboard = CairoMakie.Figure(; size = (800, 300 * length(rmse_var_dict) + 400), fontsize = 20)
for (loc, short_name) in enumerate(rmse_var_names)
ClimaAnalysis.Visualize.plot_boxplot!(
fig_leaderboard,
rmse_var_dict[short_name],
ploc = (loc, 1),
best_and_worst_category_name = "ANN",
)
end

# Plot leaderboard
ClimaAnalysis.Visualize.plot_leaderboard!(
fig_leaderboard,
(rmse_var_dict[short_name] for short_name in rmse_var_names)...,
best_category_name = "ANN",
ploc = (length(rmse_var_dict) + 1, 1),
)
CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard)
end

if abspath(PROGRAM_FILE) == @__FILE__
if length(ARGS) < 2
error("Usage: julia leaderboard.jl <Filepath to save leaderboard and bias plots> <Filepath to simulation data>")
end
leaderboard_base_path = ARGS[begin]
diagnostics_folder_path = ARGS[begin + 1]
compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
end
Loading

0 comments on commit 1b8a197

Please sign in to comment.