Refactor leaderboard code

This commit refactors the leaderboard code using the new features from ClimaAnalysis. This commit also deletes the old leaderboard code and the tests for it. Also, there is an off by one month issue when handling the dates and seasons. This arises because the simulation data assigns the next month the monthly average for this month. For instance, the monthly average for January 2010 is assigned the date 2/1/2010. This is fixed in this commit. The commit also moves the code to leaderboard.jl and data_sources.jl and. The file read_amip.jl calls leaderboard.jl to plot now. The conditional to run leaderboard.jl is changed from t_end > 86400 to t_end > 86400 * 30 since it doesn't make sense to run the code unless one or more month is done. This is because the dataset contains monthly averages. One significant difference is the leaderboard which now plot the best and worst single model using only annual rather than averaging the error over annual and seasonal data.
CliMA · Oct 7, 2024 · 1b8a197 · 1b8a197
1 parent 9afdcee
commit 1b8a197
Show file tree

Hide file tree

Showing 12 changed files with 305 additions and 1,050 deletions.
diff --git a/experiments/ClimaEarth/Project.toml b/experiments/ClimaEarth/Project.toml
@@ -34,7 +34,7 @@ YAML = "ddb6d928-2868-570f-bddf-ab3f9cf99eb6"
 [compat]
 ArgParse = "1.1"
 ArtifactWrappers = "0.2"
-ClimaAnalysis = "0.5.4"
+ClimaAnalysis = "0.5.10"
 ClimaAtmos = "0.27"
 ClimaCorePlots = "0.2"
 ClimaDiagnostics = "0.2"

diff --git a/experiments/ClimaEarth/leaderboard/data_sources.jl b/experiments/ClimaEarth/leaderboard/data_sources.jl
@@ -0,0 +1,132 @@
+import ClimaAnalysis
+import ClimaUtilities.ClimaArtifacts: @clima_artifact
+
+# For plotting bias plots
+compare_vars_biases_plot_extrema = Dict(
+    "pr" => (-5.0, 5.0),
+    "rsdt" => (-2.0, 2.0),
+    "rsut" => (-50.0, 50.0),
+    "rlut" => (-50.0, 50.0),
+    "rsutcs" => (-20.0, 20.0),
+    "rlutcs" => (-20.0, 20.0),
+    "rsds" => (-50.0, 50.0),
+    "rsus" => (-10.0, 10.0),
+    "rlds" => (-50.0, 50.0),
+    "rlus" => (-50.0, 50.0),
+    "rsdscs" => (-10.0, 10.0),
+    "rsuscs" => (-10.0, 10.0),
+    "rldscs" => (-20.0, 20.0),
+)
+
+# To add a variable to the bias plots, add the variable to sim_var_dict, obs_var_dict,
+# compare_vars_biases_groups, and compare_vars_biases_plot_extrema
+# To add a variable to the leaderboard, add the variable to rmse_var_dict
+
+# Dict for loading in simulation data
+sim_var_dict = Dict{String, Any}(
+    "pr" =>
+        () -> begin
+            sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = "pr")
+            sim_var =
+                ClimaAnalysis.convert_units(sim_var, "mm/day", conversion_function = x -> x .* Float32(-86400))
+            sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
+            return sim_var
+        end,
+)
+
+# Loop to load the rest of the simulation data
+# Tuple of short names for loading simulation and observational data
+sim_obs_short_names_no_pr = [
+    ("rsdt", "solar_mon"),
+    ("rsut", "toa_sw_all_mon"),
+    ("rlut", "toa_lw_all_mon"),
+    ("rsutcs", "toa_sw_clr_t_mon"),
+    ("rlutcs", "toa_lw_clr_t_mon"),
+    ("rsds", "sfc_sw_down_all_mon"),
+    ("rsus", "sfc_sw_up_all_mon"),
+    ("rlds", "sfc_lw_down_all_mon"),
+    ("rlus", "sfc_lw_up_all_mon"),
+    ("rsdscs", "sfc_sw_down_clr_t_mon"),
+    ("rsuscs", "sfc_sw_up_clr_t_mon"),
+    ("rldscs", "sfc_lw_down_clr_t_mon"),
+]
+
+for (short_name, _) in sim_obs_short_names_no_pr
+    sim_var_dict[short_name] =
+        () -> begin
+            sim_var = get(ClimaAnalysis.SimDir(diagnostics_folder_path), short_name = short_name)
+            sim_var = ClimaAnalysis.shift_to_start_of_previous_month(sim_var)
+            return sim_var
+        end
+end
+
+# Dict for loading observational data
+obs_var_dict = Dict{String, Any}(
+    "pr" =>
+        (start_date) -> begin
+            obs_var = ClimaAnalysis.OutputVar(
+                joinpath(@clima_artifact("precipitation_obs"), "gpcp.precip.mon.mean.197901-202305.nc"),
+                "precip",
+                new_start_date = start_date,
+                shift_by = Dates.firstdayofmonth,
+            )
+            return obs_var
+        end,
+)
+
+# Loop to load the rest of the observational data
+for (sim_name, obs_name) in sim_obs_short_names_no_pr
+    obs_var_dict[sim_name] =
+        (start_date) -> begin
+            obs_var = ClimaAnalysis.OutputVar(
+                joinpath(@clima_artifact("radiation_obs"), "CERES_EBAF_Ed4.2_Subset_200003-201910.nc"),
+                obs_name,
+                new_start_date = start_date,
+                shift_by = Dates.firstdayofmonth,
+            )
+            # Convert from W m-2 to W m^-2
+            ClimaAnalysis.units(obs_var) == "W m-2" ? obs_var = ClimaAnalysis.set_units(obs_var, "W m^-2") :
+            error("Did not expect $(ClimaAnalysis.units(obs_var)) for the units")
+            return obs_var
+        end
+end
+
+# Used to organize plots
+compare_vars_biases_groups = [
+    ["pr", "rsdt", "rsut", "rlut"],
+    ["rsds", "rsus", "rlds", "rlus"],
+    ["rsutcs", "rlutcs", "rsdscs", "rsuscs", "rldscs"],
+]
+
+# For plotting box plots and leaderboard
+
+# Load data into RMSEVariables
+rmse_var_names = ["pr", "rsut", "rlut"]
+rmse_var_pr = ClimaAnalysis.read_rmses(
+    joinpath(@clima_artifact("cmip_model_rmse"), "pr_rmse_amip_pr_amip_5yr.csv"),
+    "pr",
+    units = "mm / day",
+)
+rmse_var_rsut = ClimaAnalysis.read_rmses(
+    joinpath(@clima_artifact("cmip_model_rmse"), "rsut_rmse_amip_rsut_amip_5yr.csv"),
+    "rsut",
+    units = "W m^-2",
+)
+rmse_var_rlut = ClimaAnalysis.read_rmses(
+    joinpath(@clima_artifact("cmip_model_rmse"), "rlut_rmse_amip_rlut_amip_5yr.csv"),
+    "rlut",
+    units = "W m^-2",
+)
+
+# Add models and units for CliMA
+rmse_var_pr = ClimaAnalysis.add_model(rmse_var_pr, "CliMA")
+ClimaAnalysis.add_unit!(rmse_var_pr, "CliMA", "mm / day")
+
+rmse_var_rsut = ClimaAnalysis.add_model(rmse_var_rsut, "CliMA")
+ClimaAnalysis.add_unit!(rmse_var_rsut, "CliMA", "W m^-2")
+
+rmse_var_rlut = ClimaAnalysis.add_model(rmse_var_rlut, "CliMA")
+ClimaAnalysis.add_unit!(rmse_var_rlut, "CliMA", "W m^-2")
+
+# Store the rmse vars in a dictionary
+rmse_var_dict = Dict("pr" => rmse_var_pr, "rsut" => rmse_var_rsut, "rlut" => rmse_var_rlut)
diff --git a/experiments/ClimaEarth/leaderboard/leaderboard.jl b/experiments/ClimaEarth/leaderboard/leaderboard.jl
@@ -0,0 +1,168 @@
+import ClimaAnalysis
+import ClimaUtilities.ClimaArtifacts: @clima_artifact
+import GeoMakie
+import CairoMakie
+import Dates
+
+include("data_sources.jl")
+
+# leaderboard_base_path is the path to save the leaderboards
+# diagnostics_folder_path is the path to simulation data
+"""
+    compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
+
+Plot the biases and a leaderboard of various variables.
+
+The paramter `leaderboard_base_path` is the path to save the leaderboards and bias plots and
+`diagnostics_folder_path` is the path to the simulation data.
+"""
+function compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
+    @info "Error against observations"
+
+    # Set up dict for storing simulation and observational data after processing
+    sim_obs_comparsion_dict = Dict()
+    seasons = ["ANN", "MAM", "JJA", "SON", "DJF"]
+
+    # Print dates for debugging
+    pr_var = sim_var_dict["pr"]() # it shouldn't matter what short name we use
+    output_dates = Dates.DateTime(pr_var.attributes["start_date"]) .+ Dates.Second.(ClimaAnalysis.times(pr_var))
+    @info "Working with dates:"
+    @info output_dates
+
+    for short_name in keys(sim_var_dict)
+        # Simulation data
+        sim_var = sim_var_dict[short_name]()
+
+        # Observational data
+        obs_var = obs_var_dict[short_name](sim_var.attributes["start_date"])
+
+        # Remove first spin_up_months from simulation
+        spin_up_months = 6
+        spinup_cutoff = spin_up_months * 31 * 86400.0
+        ClimaAnalysis.times(sim_var)[end] >= spinup_cutoff &&
+            (sim_var = ClimaAnalysis.window(sim_var, "time", left = spinup_cutoff))
+
+        obs_var = ClimaAnalysis.resampled_as(obs_var, sim_var)
+        obs_var_seasons = ClimaAnalysis.split_by_season(obs_var)
+        sim_var_seasons = ClimaAnalysis.split_by_season(sim_var)
+
+        # Add annual to start of seasons
+        obs_var_seasons = (obs_var, obs_var_seasons...)
+        sim_var_seasons = (sim_var, sim_var_seasons...)
+
+        # Take time average
+        obs_var_seasons = (
+            !ClimaAnalysis.isempty(obs_var) ? ClimaAnalysis.average_time(obs_var) : obs_var for
+            obs_var in obs_var_seasons
+        )
+        sim_var_seasons = (
+            !ClimaAnalysis.isempty(sim_var) ? ClimaAnalysis.average_time(sim_var) : sim_var for
+            sim_var in sim_var_seasons
+        )
+
+        # Save observation and simulation data
+        sim_obs_comparsion_dict[short_name] = Dict(
+            season => (sim_var_s, obs_var_s) for
+            (season, sim_var_s, obs_var_s) in zip(seasons, sim_var_seasons, obs_var_seasons)
+        )
+    end
+
+    # Filter seasons to remove seasons with no dates
+    filter!(season -> !ClimaAnalysis.isempty(sim_obs_comparsion_dict["pr"][season][1]), seasons)
+
+    # Plot bias plots
+    for compare_vars_biases in compare_vars_biases_groups
+        for season in seasons
+            fig_bias = CairoMakie.Figure(; size = (600, 300 * length(compare_vars_biases)))
+            for (row, short_name) in enumerate(compare_vars_biases)
+                sim_var = sim_obs_comparsion_dict[short_name][season][1]
+                if !ClimaAnalysis.isempty(sim_var)
+                    # Add "mean " for plotting the title
+                    sim_var.attributes["short_name"] = "mean $(ClimaAnalysis.short_name(sim_var))"
+                    ClimaAnalysis.Visualize.plot_bias_on_globe!(
+                        fig_bias,
+                        sim_obs_comparsion_dict[short_name][season]...,
+                        cmap_extrema = compare_vars_biases_plot_extrema[short_name],
+                        p_loc = (row, 1),
+                    )
+                end
+            end
+            # Do if and else statement for naming files appropriately
+            if season != "ANN"
+                CairoMakie.save(
+                    joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_$season.png"),
+                    fig_bias,
+                )
+            else
+                CairoMakie.save(
+                    joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_total.png"),
+                    fig_bias,
+                )
+            end
+        end
+    end
+
+    # Plot plots with all the seasons (not annual)
+    seasons_no_ann = filter(season -> season != "ANN", seasons)
+    for compare_vars_biases in compare_vars_biases_groups
+        fig_all_seasons = CairoMakie.Figure(; size = (600 * length(seasons_no_ann), 300 * length(compare_vars_biases)))
+        for (col, season) in enumerate(seasons_no_ann)
+            # Plot at 2 * col - 1 because a bias plot takes up 1 x 2 space
+            CairoMakie.Label(fig_all_seasons[0, 2 * col - 1], season, tellwidth = false, fontsize = 30)
+            for (row, short_name) in enumerate(compare_vars_biases)
+                sim_var = sim_obs_comparsion_dict[short_name][season][1]
+                if !ClimaAnalysis.isempty(sim_var)
+                    ClimaAnalysis.Visualize.plot_bias_on_globe!(
+                        fig_all_seasons,
+                        sim_obs_comparsion_dict[short_name][season]...,
+                        cmap_extrema = compare_vars_biases_plot_extrema[short_name],
+                        p_loc = (row, 2 * col - 1),
+                    )
+                end
+            end
+        end
+        CairoMakie.save(
+            joinpath(leaderboard_base_path, "bias_$(first(compare_vars_biases))_all_seasons.png"),
+            fig_all_seasons,
+        )
+    end
+
+    # Add RMSE for the CliMA model and for each season
+    for short_name in rmse_var_names
+        for season in seasons
+            !ClimaAnalysis.isempty(sim_obs_comparsion_dict[short_name][season][1]) && (
+                rmse_var_dict[short_name]["CliMA", season] =
+                    ClimaAnalysis.global_rmse(sim_obs_comparsion_dict[short_name][season]...)
+            )
+        end
+    end
+
+    # Plot box plots
+    fig_leaderboard = CairoMakie.Figure(; size = (800, 300 * length(rmse_var_dict) + 400), fontsize = 20)
+    for (loc, short_name) in enumerate(rmse_var_names)
+        ClimaAnalysis.Visualize.plot_boxplot!(
+            fig_leaderboard,
+            rmse_var_dict[short_name],
+            ploc = (loc, 1),
+            best_and_worst_category_name = "ANN",
+        )
+    end
+
+    # Plot leaderboard
+    ClimaAnalysis.Visualize.plot_leaderboard!(
+        fig_leaderboard,
+        (rmse_var_dict[short_name] for short_name in rmse_var_names)...,
+        best_category_name = "ANN",
+        ploc = (length(rmse_var_dict) + 1, 1),
+    )
+    CairoMakie.save(joinpath(leaderboard_base_path, "bias_leaderboard.png"), fig_leaderboard)
+end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    if length(ARGS) < 2
+        error("Usage: julia leaderboard.jl <Filepath to save leaderboard and bias plots> <Filepath to simulation data>")
+    end
+    leaderboard_base_path = ARGS[begin]
+    diagnostics_folder_path = ARGS[begin + 1]
+    compute_leaderboard(leaderboard_base_path, diagnostics_folder_path)
+end