-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a7d088d
commit d38910d
Showing
6 changed files
with
307 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import CSV | ||
import DataFrames as DF | ||
using DataFramesMeta | ||
|
||
function get_num_modes(df::DataFrame) | ||
i = 1 | ||
while true | ||
if !("mode_$(i)_N" in names(df)) | ||
return i - 1 | ||
end | ||
i += 1 | ||
end | ||
end | ||
|
||
function get_num_modes(data_row::NamedTuple) | ||
i = 1 | ||
while true | ||
if !(Symbol("mode_$(i)_N") in keys(data_row)) | ||
return i - 1 | ||
end | ||
i += 1 | ||
end | ||
end | ||
|
||
function read_aerosol_dataset( | ||
dataset_filename::String, | ||
Y_name = :mode_1_act_frac_S_interp, | ||
) | ||
initial_data = DF.DataFrame(CSV.File(dataset_filename)) | ||
df = filter(row -> row.S_max > 0 && row.S_max < 0.2, initial_data) | ||
selected_columns_X = [] | ||
num_modes = get_num_modes(df) | ||
@info(num_modes) | ||
for i in 1:num_modes | ||
append!( | ||
selected_columns_X, | ||
Symbol.([ | ||
"mode_$(i)_N", | ||
"mode_$(i)_mean", | ||
"mode_$(i)_stdev", | ||
"mode_$(i)_kappa", | ||
]), | ||
) | ||
end | ||
append!( | ||
selected_columns_X, | ||
[:velocity, :initial_temperature, :initial_pressure], | ||
) | ||
X = df[:, selected_columns_X] | ||
Y = df[:, Y_name] | ||
return (X, Y, initial_data) | ||
end | ||
|
||
function preprocess_aerosol_data(X::DataFrame) | ||
num_modes = get_num_modes(X) | ||
for i in 1:num_modes | ||
X = DF.transform( | ||
X, | ||
Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"), | ||
) | ||
X = DF.transform( | ||
X, | ||
Symbol("mode_$(i)_mean") => | ||
ByRow(log) => Symbol("mode_$(i)_mean"), | ||
) | ||
end | ||
X = DF.transform(X, :velocity => ByRow(log) => :velocity) | ||
return X | ||
end | ||
|
||
function target_transform(act_frac) | ||
return @. atanh(2.0 * 0.99 * (act_frac - 0.5)) | ||
end | ||
|
||
function inverse_target_transform(transformed_act_frac) | ||
return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5 | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
module EmulatorModelsExt | ||
|
||
import MLJ | ||
import DataFrames as DF | ||
|
||
import Thermodynamics as TD | ||
import Thermodynamics.Parameters as TDP | ||
import CloudMicrophysics.AerosolActivation as AA | ||
import CloudMicrophysics.AerosolModel as AM | ||
import CloudMicrophysics.Parameters as CMP | ||
|
||
function AA.N_activated_per_mode( | ||
machine::MLJ.Machine, | ||
ap::CMP.AerosolActivationParameters, | ||
ad::CMP.AerosolDistributionType, | ||
aip::CMP.AirProperties, | ||
tps::TDP.ThermodynamicsParameters, | ||
T::FT, | ||
p::FT, | ||
w::FT, | ||
q::TD.PhasePartition{FT}, | ||
) where {FT <: Real} | ||
hygro = AA.mean_hygroscopicity_parameter(ap, ad) | ||
return ntuple(Val(AM.n_modes(ad))) do i | ||
# Model predicts activation of the first mode. So, swap each mode | ||
# with the first mode repeatedly to predict all activations. | ||
modes_perm = collect(1:AM.n_modes(ad)) | ||
modes_perm[[1, i]] = modes_perm[[i, 1]] | ||
per_mode_data = [ | ||
(; | ||
Symbol("mode_$(j)_N") => ad.Modes[modes_perm[j]].N, | ||
Symbol("mode_$(j)_mean") => ad.Modes[modes_perm[j]].r_dry, | ||
Symbol("mode_$(j)_stdev") => ad.Modes[modes_perm[j]].stdev, | ||
Symbol("mode_$(j)_kappa") => hygro[modes_perm[j]], | ||
) for j in 1:AM.n_modes(ad) | ||
] | ||
additional_data = (; | ||
:velocity => w, | ||
:initial_temperature => T, | ||
:initial_pressure => p, | ||
) | ||
X = DF.DataFrame([merge(reduce(merge, per_mode_data), additional_data)]) | ||
max(FT(0), min(FT(1), MLJ.predict(machine, X)[1])) * ad.Modes[i].N | ||
end | ||
end | ||
|
||
end # module |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
# Get ML packages | ||
import MLJ | ||
import Flux | ||
import MLJModels | ||
import MLJFlux | ||
Standardizer = MLJ.@load Standardizer pkg = MLJModels | ||
NeuralNetworkRegressor = MLJ.@load NeuralNetworkRegressor pkg = MLJFlux | ||
|
||
# Get the testing package | ||
import Test as TT | ||
|
||
# Get the CliMA packages | ||
import CloudMicrophysics as CM | ||
import ClimaParams as CP | ||
import Thermodynamics as TD | ||
import CloudMicrophysics.AerosolModel as AM | ||
import CloudMicrophysics.AerosolActivation as AA | ||
import CloudMicrophysics.Parameters as CMP | ||
|
||
# Container for the NN we are about to build | ||
struct NNBuilder <: MLJFlux.Builder | ||
layer_sizes::Vector{Integer} | ||
dropout::Vector | ||
end | ||
|
||
# Define the NN structure | ||
function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out) | ||
@assert length(builder.layer_sizes) == length(builder.dropout) | ||
num_hidden_layers = length(builder.layer_sizes) | ||
init = Flux.glorot_uniform(rng) | ||
layers::Vector{Any} = [] | ||
if num_hidden_layers == 0 | ||
push!(layers, Flux.Dense(n_in => n_out, init = init)) | ||
else | ||
push!( | ||
layers, | ||
Flux.Dense( | ||
n_in => builder.layer_sizes[1], | ||
Flux.sigmoid_fast, | ||
init = init, | ||
), | ||
) | ||
end | ||
for i in 1:num_hidden_layers | ||
push!(layers, Flux.Dropout(builder.dropout[i])) | ||
if i == num_hidden_layers | ||
push!( | ||
layers, | ||
Flux.Dense(builder.layer_sizes[i] => n_out, init = init), | ||
) | ||
else | ||
push!( | ||
layers, | ||
Flux.Dense( | ||
builder.layer_sizes[i] => builder.layer_sizes[i + 1], | ||
Flux.sigmoid_fast, | ||
init = init, | ||
), | ||
) | ||
end | ||
end | ||
return Flux.Chain(layers...) | ||
end | ||
|
||
# Load aerosol data reading and preprocessing functions | ||
include(joinpath(pkgdir(CM), "ext", "Common.jl")) | ||
|
||
# Get the ML model | ||
function get_2modal_NN_model_FT32() | ||
FT = Float32 | ||
machine_name = "2modal_nn_machine_naive.jls" | ||
|
||
# If the ML model already exists load it in. | ||
# If it does not exist, train a NN | ||
fpath = joinpath(pkgdir(CM), "test") | ||
if isfile(joinpath(fpath, machine_name)) | ||
# Read-in the saved ML model | ||
emulator_filepath = joinpath(fpath, machine_name) | ||
return MLJ.machine(emulator_filepath) | ||
else | ||
# Download data files if you don't have them | ||
# (not the most elegant way...) | ||
fname = "2modal_dataset1_train.csv" | ||
if !isfile(joinpath(fpath, "data", fname)) | ||
# For reasons unknown to humankind uploading/downloading the file | ||
# from Caltech box changes the file. We are using dropbox for now. | ||
# In the end we should upload the files to Caltech Data | ||
# (and pray they are not changed there...) | ||
url = "https://www.dropbox.com/scl/fi/qgq6ujvqenebjkskqvht5/2modal_dataset1_train.csv?rlkey=53qtqz0mtce993gy5jtnpdfz5&dl=0" | ||
download(url, fname) | ||
mkdir(joinpath(fpath, "data")) | ||
mv(fname, joinpath(fpath, "data", fname), force = true) | ||
end | ||
|
||
# Read the aerosol data | ||
X_train, Y_train, initial_data = | ||
read_aerosol_dataset(joinpath(fpath, "data", fname)) | ||
|
||
# Define the training pipeline | ||
pipeline = | ||
preprocess_aerosol_data |> | ||
Standardizer() |> | ||
MLJ.TransformedTargetModel( | ||
NeuralNetworkRegressor( | ||
builder = NNBuilder([250, 50, 5], [FT(0.3), FT(0), FT(0)]), | ||
optimiser = Flux.Optimise.Adam(0.001, (0.9, 0.999), 1.0e-8), | ||
epochs = 2000, | ||
loss = Flux.mse, | ||
batch_size = 1000, | ||
), | ||
transformer = target_transform, | ||
inverse = inverse_target_transform, | ||
) | ||
# Create the untrained ML model | ||
mach = MLJ.machine(pipeline, X_train, Y_train) | ||
# Train a new ML model | ||
@info("Training a new ML model. This may take a minute or two.") | ||
MLJ.fit!(mach, verbosity = 0) | ||
# Save the ML model to a binary file that can be re-used by CloudMicrophysics | ||
MLJ.save(joinpath(fpath, machine_name), mach) | ||
return mach | ||
end | ||
end | ||
|
||
function test_emulator_NN(FT) | ||
|
||
aip = CMP.AirProperties(FT) | ||
tps = TD.Parameters.ThermodynamicsParameters(FT) | ||
ap = CMP.AerosolActivationParameters(FT) | ||
|
||
# Atmospheric conditions | ||
T = FT(294) # air temperature K | ||
p = FT(1e5) # air pressure Pa | ||
w = FT(0.5) # vertical velocity m/s | ||
p_vs = TD.saturation_vapor_pressure(tps, T, TD.Liquid()) | ||
q_vs = 1 / (1 - TD.Parameters.molmass_ratio(tps) * (p_vs - p) / p_vs) | ||
q = TD.PhasePartition(q_vs) | ||
|
||
# Aerosol size distribution | ||
salt = CMP.Seasalt(FT) | ||
# Accumulation mode | ||
r1 = FT(0.243 * 1e-6) # m | ||
σ1 = FT(1.4) # - | ||
N1 = FT(100 * 1e6) # 1/m3 | ||
# Coarse Mode | ||
r2 = FT(1.5 * 1e-6) # m | ||
σ2 = FT(2.1) # - | ||
N2 = FT(1e6) # 1/m3 | ||
acc = AM.Mode_κ(r1, σ1, N1, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1) | ||
crs = AM.Mode_κ(r2, σ2, N2, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1) | ||
ad = AM.AerosolDistribution((crs, acc)) | ||
|
||
# Get the ML model | ||
mach = get_2modal_NN_model_FT32() | ||
|
||
TT.@test AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q)[1] ≈ | ||
AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q)[1] rtol = | ||
1e-5 | ||
TT.@test AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q)[2] ≈ | ||
AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q)[2] rtol = | ||
1e-3 | ||
end | ||
|
||
@info "Aerosol activation NN test" | ||
test_emulator_NN(Float32) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters