diff --git a/Project.toml b/Project.toml index f0d178b11f..7722bd394c 100644 --- a/Project.toml +++ b/Project.toml @@ -13,18 +13,12 @@ Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c" [weakdeps] DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" -GaussianProcesses = "891a1506-143c-57d2-908e-e1f8e92e6de9" -KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845" -MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [extensions] -EmulatorModelsExt = "DataFrames" +EmulatorModelsExt = ["DataFrames", "Flux", "MLJ", "MLJFlux"] [compat] CLIMAParameters = "0.9" diff --git a/ext/EmulatorModelsExt.jl b/ext/EmulatorModelsExt.jl index 29787f1183..bf7cb60ece 100644 --- a/ext/EmulatorModelsExt.jl +++ b/ext/EmulatorModelsExt.jl @@ -1,165 +1,17 @@ module EmulatorModelsExt import MLJ -import MLJFlux -import Flux import DataFrames as DF -import GaussianProcesses -import StatsBase -import Distributions -import CloudMicrophysics as CM +import Thermodynamics as TD +import Thermodynamics.Parameters as TDP import CloudMicrophysics.AerosolActivation as AA import CloudMicrophysics.AerosolModel as AM import CloudMicrophysics.Parameters as CMP -struct NNBuilder <: MLJFlux.Builder - layer_sizes::Vector{Integer} - dropout::Vector{Float64} -end - -function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out) - @assert length(builder.layer_sizes) == length(builder.dropout) - num_hidden_layers = length(builder.layer_sizes) - init = Flux.glorot_uniform(rng) - layers::Vector{Any} = [] - if num_hidden_layers == 0 - push!(layers, Flux.Dense(n_in => n_out, init = init)) - else - push!( - layers, - Flux.Dense( - n_in => builder.layer_sizes[1], - Flux.sigmoid_fast, - init = init, - ), - ) - end - for i in 1:num_hidden_layers - push!(layers, Flux.Dropout(builder.dropout[i])) - if i == num_hidden_layers - push!( - layers, - Flux.Dense(builder.layer_sizes[i] => n_out, init = init), - ) - else - push!( - layers, - Flux.Dense( - builder.layer_sizes[i] => builder.layer_sizes[i + 1], - Flux.sigmoid_fast, - init = init, - ), - ) - end - end - return Flux.Chain(layers...) -end - -mutable struct GPRegressor <: MLJ.Deterministic - num_gps::Integer - sample_size::Integer - use_ARG_weights::Bool - use_DTC::Bool - sample_size_inducing::Integer -end - -function MLJ.fit(model::GPRegressor, verbosity, X, y) - gps = [] - for i in 1:(model.num_gps) - if model.use_ARG_weights - weights = StatsBase.Weights([ - Distributions.pdf(Distributions.Normal(0.0, 0.5), x) for - x in X.mode_1_ARG_act_frac - ]) - inds = StatsBase.sample( - 1:DF.nrow(X), - weights, - model.sample_size, - replace = false, - ) - inds_inducing = StatsBase.sample( - 1:DF.nrow(X), - weights, - model.sample_size_inducing, - replace = false, - ) - else - inds = StatsBase.sample( - 1:DF.nrow(X), - model.sample_size, - replace = false, - ) - inds_inducing = StatsBase.sample( - 1:DF.nrow(X), - model.sample_size_inducing, - replace = false, - ) - end - if model.use_DTC - gp = GaussianProcesses.DTC( - Matrix(X[inds, :])', - Matrix(X[inds_inducing, :])', - y[inds], - GaussianProcesses.MeanConst(StatsBase.mean(y)), - GaussianProcesses.SEArd(fill(4.0, DF.ncol(X)), 0.0), - 2.0, - ) - else - gp = GaussianProcesses.GPA( - Matrix(X[inds, :])', - y[inds], - GaussianProcesses.MeanConst(StatsBase.mean(y)), - GaussianProcesses.SEArd(fill(4.0, DF.ncol(X)), 0.0), - GaussianProcesses.GaussLik(2.0), - ) - end - GaussianProcesses.optimize!(gp) - push!(gps, gp) - end - return gps, nothing, nothing -end - -function MLJ.predict(::GPRegressor, fitresult, Xnew) - means = reduce( - hcat, - [GaussianProcesses.predict_f(gp, Matrix(Xnew)')[1] for gp in fitresult], - ) - variances = reduce( - hcat, - [GaussianProcesses.predict_f(gp, Matrix(Xnew)')[2] for gp in fitresult], - ) - return (sum( - means ./ variances, - dims = 2, - ) ./ sum(1.0 ./ variances, dims = 2))[ - :, - 1, - ] -end - - -""" - MLEmulatedAerosolActivation - -The type for aerosol activation schemes that are emulated with an ML model -""" -struct MLAerosolActivationParameters <: CMP.ParametersType - ap::CMP.AerosolActivationParameters - machine::MLJ.Machine -end - -function MLEmulatedAerosolActivation( - ::Type{FT}, - emulator_filepath::String, -) where {FT} - machine = MLJ.machine(emulator_filepath) - ap = CMP.AerosolActivationParameters(FT) - return MLEmulatedAerosolActivation(ap, machine) -end - function AA.N_activated_per_mode( - ml::MLAerosolActivationParameters, + machine::MLJ.Machine, + ap::CMP.AerosolActivationParameters, aip::CMP.AirProperties, tps::TDP.ThermodynamicsParameters, T::FT, @@ -167,7 +19,7 @@ function AA.N_activated_per_mode( w::FT, q::TD.PhasePartition{FT}, ) where {FT <: Real} - hygro = mean_hygroscopicity_parameter(ml.ap, ad) + hygro = mean_hygroscopicity_parameter(ap, ad) return ntuple(Val(AM.n_modes(ad))) do i # Model predicts activation of the first mode. So, swap each mode # with the first mode repeatedly to predict all activations. @@ -187,8 +39,7 @@ function AA.N_activated_per_mode( :initial_pressure => p, ) X = DF.DataFrame([merge(reduce(merge, per_mode_data), additional_data)]) - max(FT(0), min(FT(1), MLJ.predict(ml.machine, X)[1])) * - ad.Modes[i].N + max(FT(0), min(FT(1), MLJ.predict(machine, X)[1])) * ad.Modes[i].N end end diff --git a/ml_test/2modal_nn_machine_naive.jls b/ml_test/2modal_nn_machine_naive.jls new file mode 100644 index 0000000000..97425783b6 Binary files /dev/null and b/ml_test/2modal_nn_machine_naive.jls differ diff --git a/ml_test/PreprocessAerosolData.jl b/ml_test/PreprocessAerosolData.jl new file mode 100644 index 0000000000..deb70bb817 --- /dev/null +++ b/ml_test/PreprocessAerosolData.jl @@ -0,0 +1,236 @@ +module PreprocessAerosolData + +import DataFrames as DF +using DataFramesMeta + +import CLIMAParameters +import Thermodynamics as TD + +import CloudMicrophysics.AerosolActivation as AA +import CloudMicrophysics.AerosolModel as AM +import CloudMicrophysics.Common as CO + +function get_num_modes(df::DataFrame) + i = 1 + while true + if !("mode_$(i)_N" in names(df)) + return i - 1 + end + i += 1 + end +end + +function get_num_modes(data_row::NamedTuple) + i = 1 + while true + if !(Symbol("mode_$(i)_N") in keys(data_row)) + return i - 1 + end + i += 1 + end +end + +function convert_to_ARG_params(data_row::NamedTuple) + #TODO + FT = Float64 + tps = TD.Parameters.ThermodynamicsParameters(FT) + + num_modes = get_num_modes(data_row) + @assert num_modes > 0 + mode_Ns = [] + mode_means = [] + mode_stdevs = [] + mode_kappas = [] + w = data_row.velocity + T = data_row.initial_temperature + p = data_row.initial_pressure + for i in 1:num_modes + push!(mode_Ns, data_row[Symbol("mode_$(i)_N")]) + push!(mode_means, data_row[Symbol("mode_$(i)_mean")]) + push!(mode_stdevs, data_row[Symbol("mode_$(i)_stdev")]) + push!(mode_kappas, data_row[Symbol("mode_$(i)_kappa")]) + end + ad = AM.AerosolDistribution( + Tuple( + AM.Mode_κ( + mode_means[i], + mode_stdevs[i], + mode_Ns[i], + FT(1), + FT(1), + FT(0), + mode_kappas[i], + 1, + ) for i in 1:num_modes + ), + ) + pv0 = TD.saturation_vapor_pressure(tps, T, TD.Liquid()) + vapor_mix_ratio = + pv0 / TD.Parameters.molmass_ratio(tps) / (p - pv0) + q_vap = vapor_mix_ratio / (vapor_mix_ratio + 1) + q = TD.PhasePartition(q_vap, FT(0), FT(0)) + return (; ad, T, p, w, q, mode_Ns) +end + +function convert_to_ARG_intermediates( + data_row::NamedTuple, +) + num_modes = get_num_modes(data_row) + @assert num_modes > 0 + + # TODO + FT = Float64 + tps = TD.Parameters.ThermodynamicsParameters(FT) + aip = CMP.AirProperties(FT) + ap = CMP.AerosolActivationParameters(FT) + + (; ad, T, p, w, q) = convert_to_ARG_params(data_row, tps) + + ϵ::FT = 1 / TD.Parameters.molmass_ratio(tps) + R_m::FT = TD.gas_constant_air(tps, q) + cp_m::FT = TD.cp_m(tps, q) + + L::FT = TD.latent_heat_vapor(tps, T) + p_vs::FT = TD.saturation_vapor_pressure(tps, T, TD.Liquid()) + G::FT = CO.G_func(aip, tps, T, TD.Liquid()) / ap.ρ_w + + α::FT = L * ap.g * ϵ / R_m / cp_m / T^2 - ap.g / R_m / T + γ::FT = R_m * T / ϵ / p_vs + ϵ * L^2 / cp_m / T / p + + A::FT = AA.coeff_of_curvature(ap, T) + ζ::FT = 2 * A / 3 * sqrt(α * w / G) + + Sm = AA.critical_supersaturation(ap, ad, T) + η = [ + (α * w / G)^FT(3 / 2) / (FT(2 * pi) * ap.ρ_w * γ * ad.Modes[i].N) for i in 1:num_modes + ] + + per_mode_intermediates = [ + (; + Symbol("mode_$(i)_stdev") => ad.Modes[i].stdev, + Symbol("mode_$(i)_η") => η[i], + Symbol("mode_$(i)_Sm") => Sm[i], + ) for i in 1:num_modes + ] + return merge(reduce(merge, per_mode_intermediates), (; ζ)) +end + +function get_ARG_S_max(data_row::NamedTuple) + (; ad, T, p, w, q) = convert_to_ARG_params(data_row) + # TODO + FT = Float64 + tps = TD.Parameters.ThermodynamicsParameters(FT) + aip = CMP.AirProperties(FT) + ap = CMP.AerosolActivationParameters(FT) + + max_supersaturation = + AA.max_supersaturation(ap, ad, aip, tps, T, p, w, q) + return max_supersaturation +end + +function get_ARG_S_max(X::DataFrame) + return get_ARG_S_max.(NamedTuple.(eachrow(X))) +end + +function get_ARG_S_crit(data_row::NamedTuple) + #TODO + FT = Float64 + tps = TD.Parameters.ThermodynamicsParameters(FT) + ap = CMP.AerosolActivationParameters(FT) + (; ad, T) = convert_to_ARG_params(data_row, tps) + return AA.critical_supersaturation(ap, ad, T) +end + +function get_ARG_S_crit(X::DataFrame) + return get_ARG_S_crit.(NamedTuple.(eachrow(X))) +end + +function get_ARG_act_N(data_row::NamedTuple, S_max = nothing) + + #TODO + FT = Float64 + tps = TD.Parameters.ThermodynamicsParameters(FT) + (; ad, T, p, w, q) = convert_to_ARG_params(data_row, tps) + + if S_max === nothing + return collect( + AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q) + ) + else + return collect( + AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q, S_max) + ) + end +end + +function get_ARG_act_N(data_row::NamedTuple, S_max = nothing) + return get_ARG_act_N(data_row, S_max) +end + +function get_ARG_act_N(X::DataFrame, S_max = nothing) + return transpose( + hcat(get_ARG_act_N.(NamedTuple.(eachrow(X)), S_max)...), + ) +end + +function get_ARG_act_frac(data_row::NamedTuple, S_max = nothing) + (; mode_Ns) = convert_to_ARG_params(data_row) + return get_ARG_act_N(data_row, S_max) ./ mode_Ns +end + +function get_ARG_act_frac(X::DataFrame, S_max = nothing) + return transpose( + hcat(get_ARG_act_frac.(NamedTuple.(eachrow(X)), S_max)...), + ) +end + +function preprocess_aerosol_data(X::DataFrame, add_ARG_act_frac::Bool) + num_modes = get_num_modes(X) + if add_ARG_act_frac + X = DF.transform( + X, + AsTable(All()) => + ByRow(x -> get_ARG_act_frac(x)) => + [Symbol("mode_$(i)_ARG_act_frac") for i in 1:num_modes], + ) + end + for i in 1:num_modes + X = DF.transform( + X, + Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"), + ) + X = DF.transform( + X, + Symbol("mode_$(i)_mean") => + ByRow(log) => Symbol("mode_$(i)_mean"), + ) + end + X = DF.transform(X, :velocity => ByRow(log) => :velocity) + return X +end + +function preprocess_aerosol_data_standard(X::DataFrame) + return preprocess_aerosol_data(X, false) +end + +function preprocess_aerosol_data_with_ARG_act_frac(X::DataFrame) + return preprocess_aerosol_data(X, true) +end + +function preprocess_aerosol_data_with_ARG_intermediates(X::DataFrame) + return DF.DataFrame( + convert_to_ARG_intermediates.( + NamedTuple.(eachrow(X)) + ) + ) +end + +function target_transform(act_frac) + return @. atanh(2.0 * 0.99 * (act_frac - 0.5)) +end + +function inverse_target_transform(transformed_act_frac) + return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5 +end + +end # module diff --git a/ml_test/Project.toml b/ml_test/Project.toml new file mode 100644 index 0000000000..1df66750a3 --- /dev/null +++ b/ml_test/Project.toml @@ -0,0 +1,10 @@ +[deps] +CLIMAParameters = "6eacf6c3-8458-43b9-ae03-caf5306d3d53" +CloudMicrophysics = "6a9e3e04-43cd-43ba-94b9-e8782df3c71b" +DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964" +Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7" +MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845" +Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c" diff --git a/ml_test/testing.jl b/ml_test/testing.jl new file mode 100644 index 0000000000..ce8eb6d746 --- /dev/null +++ b/ml_test/testing.jl @@ -0,0 +1,55 @@ +import MLJFlux +import Flux + +include(joinpath("PreprocessAerosolData.jl")) + +struct NNBuilder <: MLJFlux.Builder + layer_sizes::Vector{Integer} + dropout::Vector{Float64} +end + +function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out) + @assert length(builder.layer_sizes) == length(builder.dropout) + num_hidden_layers = length(builder.layer_sizes) + init = Flux.glorot_uniform(rng) + layers::Vector{Any} = [] + if num_hidden_layers == 0 + push!(layers, Flux.Dense(n_in => n_out, init = init)) + else + push!( + layers, + Flux.Dense( + n_in => builder.layer_sizes[1], + Flux.sigmoid_fast, + init = init, + ), + ) + end + for i in 1:num_hidden_layers + push!(layers, Flux.Dropout(builder.dropout[i])) + if i == num_hidden_layers + push!( + layers, + Flux.Dense(builder.layer_sizes[i] => n_out, init = init), + ) + else + push!( + layers, + Flux.Dense( + builder.layer_sizes[i] => builder.layer_sizes[i + 1], + Flux.sigmoid_fast, + init = init, + ), + ) + end + end + return Flux.Chain(layers...) +end + +emulator_filepath = joinpath( + pkgdir(CloudMicrophysics), + "ml_test", + "2modal_nn_machine_naive.jls", +) + +machine = MLJ.machine(emulator_filepath) diff --git a/src/AerosolActivation.jl b/src/AerosolActivation.jl index 71ac1ae374..d51d3558a3 100644 --- a/src/AerosolActivation.jl +++ b/src/AerosolActivation.jl @@ -213,6 +213,27 @@ function N_activated_per_mode( mode_i.N * (1 / 2) * (1 - SF.erf(u_i)) end end +function N_activated_per_mode( + ap::CMP.AerosolActivationParameters, + ad::CMP.AerosolDistributionType, + aip::CMP.AirProperties, + tps::TDP.ThermodynamicsParameters, + T::FT, + p::FT, + w::FT, + q::TD.PhasePartition{FT}, + smax::FT +) where {FT} + sm = critical_supersaturation(ap, ad, T) + + return ntuple(Val(AM.n_modes(ad))) do i + + mode_i = ad.Modes[i] + u_i::FT = 2 * log(sm[i] / smax) / 3 / sqrt(2) / log(mode_i.stdev) + + mode_i.N * (1 / 2) * (1 - SF.erf(u_i)) + end +end """ M_activated_per_mode(ap, ad, aip, tps, T, p, w, q) diff --git a/src/PreprocessAerosolData.jl b/src/PreprocessAerosolData.jl deleted file mode 100644 index 38f62aa0fc..0000000000 --- a/src/PreprocessAerosolData.jl +++ /dev/null @@ -1,275 +0,0 @@ -module PreprocessAerosolData - -import CloudMicrophysics as CM -import ..AerosolActivation as AA -import ..AerosolModel as AM -import ..Parameters as CMP -import ..CommonTypes as CT -import ..Parameters as CMP -import ..Common as CO -import CLIMAParameters as CP -import Thermodynamics as TD -import DataFrames as DF -using DataFramesMeta - -const FT = Float64 -const APS = CMP.AbstractCloudMicrophysicsParameters - -include(joinpath(pkgdir(CM), "test", "create_parameters.jl")) -toml_dict = CP.create_toml_dict(FT; dict_type = "alias") -default_param_set = cloud_microphysics_parameters(toml_dict) - -function get_num_modes(df::DataFrame) - i = 1 - while true - if !("mode_$(i)_N" in names(df)) - return i - 1 - end - i += 1 - end -end - -function get_num_modes(data_row::NamedTuple) - i = 1 - while true - if !(Symbol("mode_$(i)_N") in keys(data_row)) - return i - 1 - end - i += 1 - end -end - -function convert_to_ARG_params(data_row::NamedTuple, param_set::APS) - num_modes = get_num_modes(data_row) - @assert num_modes > 0 - mode_Ns = [] - mode_means = [] - mode_stdevs = [] - mode_kappas = [] - w = data_row.velocity - T = data_row.initial_temperature - p = data_row.initial_pressure - for i in 1:num_modes - push!(mode_Ns, data_row[Symbol("mode_$(i)_N")]) - push!(mode_means, data_row[Symbol("mode_$(i)_mean")]) - push!(mode_stdevs, data_row[Symbol("mode_$(i)_stdev")]) - push!(mode_kappas, data_row[Symbol("mode_$(i)_kappa")]) - end - ad = AM.AerosolDistribution( - Tuple( - AM.Mode_κ( - mode_means[i], - mode_stdevs[i], - mode_Ns[i], - FT(1), - FT(1), - FT(0), - mode_kappas[i], - 1, - ) for i in 1:num_modes - ), - ) - thermo_params = CMP.thermodynamics_params(param_set) - pv0 = TD.saturation_vapor_pressure(thermo_params, T, TD.Liquid()) - vapor_mix_ratio = - pv0 / TD.Parameters.molmass_ratio(thermo_params) / (p - pv0) - q_vap = vapor_mix_ratio / (vapor_mix_ratio + 1) - q = TD.PhasePartition(q_vap, FT(0), FT(0)) - return (; ad, T, p, w, q, mode_Ns) -end - -function convert_to_ARG_params(data_row::NamedTuple) - return convert_to_ARG_params(data_row, default_param_set) -end - -function convert_to_ARG_intermediates(data_row::NamedTuple, param_set::APS) - num_modes = get_num_modes(data_row) - @assert num_modes > 0 - - (; ad, T, p, w, q) = convert_to_ARG_params(data_row, param_set) - - thermo_params = CMP.thermodynamics_params(param_set) - _grav::FT = CMP.grav(param_set) - _ρ_cloud_liq::FT = CMP.ρ_cloud_liq(param_set) - - _ϵ::FT = 1 / CMP.molmass_ratio(param_set) - R_m::FT = TD.gas_constant_air(thermo_params, q) - cp_m::FT = TD.cp_m(thermo_params, q) - - L::FT = TD.latent_heat_vapor(thermo_params, T) - p_vs::FT = TD.saturation_vapor_pressure(thermo_params, T, TD.Liquid()) - G::FT = CO.G_func(param_set, T, TD.Liquid()) / _ρ_cloud_liq - - α::FT = L * _grav * _ϵ / R_m / cp_m / T^2 - _grav / R_m / T - γ::FT = R_m * T / _ϵ / p_vs + _ϵ * L^2 / cp_m / T / p - - A::FT = AA.coeff_of_curvature(param_set, T) - ζ::FT = 2 * A / 3 * sqrt(α * w / G) - - Sm = AA.critical_supersaturation(param_set, ad, T) - η = [ - (α * w / G)^FT(3 / 2) / (FT(2 * pi) * _ρ_cloud_liq * γ * ad.Modes[i].N) for i in 1:num_modes - ] - - per_mode_intermediates = [ - (; - Symbol("mode_$(i)_stdev") => ad.Modes[i].stdev, - Symbol("mode_$(i)_η") => η[i], - Symbol("mode_$(i)_Sm") => Sm[i], - ) for i in 1:num_modes - ] - - return merge(reduce(merge, per_mode_intermediates), (; ζ)) -end - -function convert_to_ARG_intermediates(data_row::NamedTuple) - return convert_to_ARG_intermediates(data_row, default_param_set) -end - -function get_ARG_S_max(data_row::NamedTuple, param_set::APS) - (; ad, T, p, w, q) = convert_to_ARG_params(data_row, param_set) - max_supersaturation = - AA.max_supersaturation(param_set, CT.ARG2000Type(), ad, T, p, w, q) - return max_supersaturation -end - -function get_ARG_S_max(data_row::NamedTuple) - return get_ARG_S_max(data_row, default_param_set) -end - -function get_ARG_S_max(X::DataFrame, param_set::APS) - return get_ARG_S_max.(NamedTuple.(eachrow(X)), param_set) -end - -function get_ARG_S_max(X::DataFrame) - return get_ARG_S_max(X, default_param_set) -end - -function get_ARG_S_crit(data_row::NamedTuple, param_set::APS) - (; ad, T) = convert_to_ARG_params(data_row, param_set) - return AA.critical_supersaturation(param_set, ad, T) -end - -function get_ARG_S_crit(data_row::NamedTuple) - return get_ARG_S_crit(data_row, default_param_set) -end - -function get_ARG_S_crit(X::DataFrame, param_set::APS) - return get_ARG_S_crit.(NamedTuple.(eachrow(X)), param_set) -end - -function get_ARG_S_crit(X::DataFrame) - return get_ARG_S_crit(X, default_param_set) -end - -function get_ARG_act_N(data_row::NamedTuple, param_set::APS, S_max = nothing) - (; ad, T, p, w, q) = convert_to_ARG_params(data_row, param_set) - if S_max === nothing - return collect( - AA.N_activated_per_mode( - param_set, - CT.ARG2000Type(), - ad, - T, - p, - w, - q, - ), - ) - else - critical_supersaturation = AA.critical_supersaturation(param_set, ad, T) - return collect( - AA.N_activated_per_mode( - param_set, - CT.ARG2000Type(), - ad, - T, - p, - w, - q, - S_max, - critical_supersaturation, - ), - ) - end -end - -function get_ARG_act_N(data_row::NamedTuple, S_max = nothing) - return get_ARG_act_N(data_row, default_param_set, S_max) -end - -function get_ARG_act_N(X::DataFrame, param_set::APS, S_max = nothing) - return transpose( - hcat(get_ARG_act_N.(NamedTuple.(eachrow(X)), param_set, S_max)...), - ) -end - -function get_ARG_act_N(X::DataFrame, S_max = nothing) - return get_ARG_act_N(X, default_param_set, S_max) -end - -function get_ARG_act_frac(data_row::NamedTuple, param_set::APS, S_max = nothing) - (; mode_Ns) = convert_to_ARG_params(data_row, param_set) - return get_ARG_act_N(data_row, param_set, S_max) ./ mode_Ns -end - -function get_ARG_act_frac(data_row::NamedTuple, S_max = nothing) - return get_ARG_act_frac(data_row, default_param_set, S_max) -end - -function get_ARG_act_frac(X::DataFrame, param_set::APS, S_max = nothing) - return transpose( - hcat(get_ARG_act_frac.(NamedTuple.(eachrow(X)), param_set, S_max)...), - ) -end - -function get_ARG_act_frac(X::DataFrame, S_max = nothing) - return get_ARG_act_frac(X, default_param_set, S_max) -end - -function preprocess_aerosol_data(X::DataFrame, add_ARG_act_frac::Bool) - num_modes = get_num_modes(X) - if add_ARG_act_frac - X = DF.transform( - X, - AsTable(All()) => - ByRow(x -> get_ARG_act_frac(x)) => - [Symbol("mode_$(i)_ARG_act_frac") for i in 1:num_modes], - ) - end - for i in 1:num_modes - X = DF.transform( - X, - Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"), - ) - X = DF.transform( - X, - Symbol("mode_$(i)_mean") => - ByRow(log) => Symbol("mode_$(i)_mean"), - ) - end - X = DF.transform(X, :velocity => ByRow(log) => :velocity) - return X -end - -function preprocess_aerosol_data_standard(X::DataFrame) - return preprocess_aerosol_data(X, false) -end - -function preprocess_aerosol_data_with_ARG_act_frac(X::DataFrame) - return preprocess_aerosol_data(X, true) -end - -function preprocess_aerosol_data_with_ARG_intermediates(X::DataFrame) - return DF.DataFrame(convert_to_ARG_intermediates.(NamedTuple.(eachrow(X)))) -end - -function target_transform(act_frac) - return @. atanh(2.0 * 0.99 * (act_frac - 0.5)) -end - -function inverse_target_transform(transformed_act_frac) - return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5 -end - -end # module