Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aerosol activation NN #318

Merged
merged 1 commit into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,19 @@ RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"

[weakdeps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"

[extensions]
EmulatorModelsExt = ["DataFrames", "MLJ"]

[compat]
ClimaParams = "0.10"
DataFrames = "1.6"
DocStringExtensions = "0.8, 0.9"
ForwardDiff = "0.10"
MLJ = "0.20"
RootSolvers = "0.3, 0.4"
SpecialFunctions = "1, 2"
Thermodynamics = "0.12.4"
Expand Down
77 changes: 77 additions & 0 deletions ext/Common.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import CSV
import DataFrames as DF
using DataFramesMeta

function get_num_modes(df::DataFrame)
i = 1
while true
if !("mode_$(i)_N" in names(df))
return i - 1
end
i += 1
end
end

function get_num_modes(data_row::NamedTuple)
i = 1
while true
if !(Symbol("mode_$(i)_N") in keys(data_row))
return i - 1

Check warning on line 19 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L15-L19

Added lines #L15 - L19 were not covered by tests
end
i += 1
end

Check warning on line 22 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L21-L22

Added lines #L21 - L22 were not covered by tests
end

function read_aerosol_dataset(
dataset_filename::String,
Y_name = :mode_1_act_frac_S_interp,
)
initial_data = DF.DataFrame(CSV.File(dataset_filename))
df = filter(row -> row.S_max > 0 && row.S_max < 0.2, initial_data)
selected_columns_X = []
num_modes = get_num_modes(df)
@info(num_modes)
for i in 1:num_modes
append!(
selected_columns_X,
Symbol.([
"mode_$(i)_N",
"mode_$(i)_mean",
"mode_$(i)_stdev",
"mode_$(i)_kappa",
]),
)
end
append!(
selected_columns_X,
[:velocity, :initial_temperature, :initial_pressure],
)
X = df[:, selected_columns_X]
Y = df[:, Y_name]
return (X, Y, initial_data)
end

function preprocess_aerosol_data(X::DataFrame)
num_modes = get_num_modes(X)
for i in 1:num_modes
X = DF.transform(
X,
Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"),
)
X = DF.transform(
X,
Symbol("mode_$(i)_mean") =>
ByRow(log) => Symbol("mode_$(i)_mean"),
)
end
X = DF.transform(X, :velocity => ByRow(log) => :velocity)
return X
end

function target_transform(act_frac)
return @. atanh(2.0 * 0.99 * (act_frac - 0.5))
end

function inverse_target_transform(transformed_act_frac)
return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5
end
47 changes: 47 additions & 0 deletions ext/EmulatorModelsExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
module EmulatorModelsExt

import MLJ
import DataFrames as DF

import Thermodynamics as TD
import Thermodynamics.Parameters as TDP
import CloudMicrophysics.AerosolActivation as AA
import CloudMicrophysics.AerosolModel as AM
import CloudMicrophysics.Parameters as CMP

function AA.N_activated_per_mode(
machine::MLJ.Machine,
ap::CMP.AerosolActivationParameters,
ad::CMP.AerosolDistributionType,
aip::CMP.AirProperties,
tps::TDP.ThermodynamicsParameters,
T::FT,
p::FT,
w::FT,
q::TD.PhasePartition{FT},
) where {FT <: Real}
hygro = AA.mean_hygroscopicity_parameter(ap, ad)
return ntuple(Val(AM.n_modes(ad))) do i
# Model predicts activation of the first mode. So, swap each mode
# with the first mode repeatedly to predict all activations.
modes_perm = collect(1:AM.n_modes(ad))
modes_perm[[1, i]] = modes_perm[[i, 1]]
per_mode_data = [
(;
Symbol("mode_$(j)_N") => ad.Modes[modes_perm[j]].N,
Symbol("mode_$(j)_mean") => ad.Modes[modes_perm[j]].r_dry,
Symbol("mode_$(j)_stdev") => ad.Modes[modes_perm[j]].stdev,
Symbol("mode_$(j)_kappa") => hygro[modes_perm[j]],
) for j in 1:AM.n_modes(ad)
]
additional_data = (;
:velocity => w,
:initial_temperature => T,
:initial_pressure => p,
)
X = DF.DataFrame([merge(reduce(merge, per_mode_data), additional_data)])
max(FT(0), min(FT(1), MLJ.predict(machine, X)[1])) * ad.Modes[i].N
end
end

end # module
9 changes: 8 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
[deps]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
CloudMicrophysics = "6a9e3e04-43cd-43ba-94b9-e8782df3c71b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
165 changes: 165 additions & 0 deletions test/emulator_NN.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Get ML packages
import MLJ
import Flux
import MLJModels
import MLJFlux
Standardizer = MLJ.@load Standardizer pkg = MLJModels
NeuralNetworkRegressor = MLJ.@load NeuralNetworkRegressor pkg = MLJFlux

# Get the testing package
import Test as TT

# Get the CliMA packages
import CloudMicrophysics as CM
import ClimaParams as CP
import Thermodynamics as TD
import CloudMicrophysics.AerosolModel as AM
import CloudMicrophysics.AerosolActivation as AA
import CloudMicrophysics.Parameters as CMP

# Container for the NN we are about to build
struct NNBuilder <: MLJFlux.Builder
layer_sizes::Vector{Integer}
dropout::Vector
end

# Define the NN structure
function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out)
@assert length(builder.layer_sizes) == length(builder.dropout)
num_hidden_layers = length(builder.layer_sizes)
init = Flux.glorot_uniform(rng)
layers::Vector{Any} = []
if num_hidden_layers == 0
push!(layers, Flux.Dense(n_in => n_out, init = init))
else
push!(
layers,
Flux.Dense(
n_in => builder.layer_sizes[1],
Flux.sigmoid_fast,
init = init,
),
)
end
for i in 1:num_hidden_layers
push!(layers, Flux.Dropout(builder.dropout[i]))
if i == num_hidden_layers
push!(
layers,
Flux.Dense(builder.layer_sizes[i] => n_out, init = init),
)
else
push!(
layers,
Flux.Dense(
builder.layer_sizes[i] => builder.layer_sizes[i + 1],
Flux.sigmoid_fast,
init = init,
),
)
end
end
return Flux.Chain(layers...)
end

# Load aerosol data reading and preprocessing functions
include(joinpath(pkgdir(CM), "ext", "Common.jl"))

# Get the ML model
function get_2modal_NN_model_FT32()
FT = Float32
machine_name = "2modal_nn_machine_naive.jls"

# If the ML model already exists load it in.
# If it does not exist, train a NN
fpath = joinpath(pkgdir(CM), "test")
if isfile(joinpath(fpath, machine_name))
# Read-in the saved ML model
emulator_filepath = joinpath(fpath, machine_name)
return MLJ.machine(emulator_filepath)
else
# Download data files if you don't have them
# (not the most elegant way...)
fname = "2modal_dataset1_train.csv"
if !isfile(joinpath(fpath, "data", fname))
# For reasons unknown to humankind uploading/downloading the file
# from Caltech box changes the file. We are using dropbox for now.
# In the end we should upload the files to Caltech Data
# (and pray they are not changed there...)
url = "https://www.dropbox.com/scl/fi/qgq6ujvqenebjkskqvht5/2modal_dataset1_train.csv?rlkey=53qtqz0mtce993gy5jtnpdfz5&dl=0"
download(url, fname)
mkdir(joinpath(fpath, "data"))
mv(fname, joinpath(fpath, "data", fname), force = true)
end

# Read the aerosol data
X_train, Y_train, initial_data =
read_aerosol_dataset(joinpath(fpath, "data", fname))

# Define the training pipeline
pipeline =
preprocess_aerosol_data |>
Standardizer() |>
MLJ.TransformedTargetModel(
NeuralNetworkRegressor(
builder = NNBuilder([250, 50, 5], [FT(0.3), FT(0), FT(0)]),
optimiser = Flux.Optimise.Adam(0.001, (0.9, 0.999), 1.0e-8),
epochs = 2000,
loss = Flux.mse,
batch_size = 1000,
),
transformer = target_transform,
inverse = inverse_target_transform,
)
# Create the untrained ML model
mach = MLJ.machine(pipeline, X_train, Y_train)
# Train a new ML model
@info("Training a new ML model. This may take a minute or two.")
MLJ.fit!(mach, verbosity = 0)
# Save the ML model to a binary file that can be re-used by CloudMicrophysics
MLJ.save(joinpath(fpath, machine_name), mach)
return mach
end
end

function test_emulator_NN(FT)

aip = CMP.AirProperties(FT)
tps = TD.Parameters.ThermodynamicsParameters(FT)
ap = CMP.AerosolActivationParameters(FT)

# Atmospheric conditions
T = FT(294) # air temperature K
p = FT(1e5) # air pressure Pa
w = FT(0.5) # vertical velocity m/s
p_vs = TD.saturation_vapor_pressure(tps, T, TD.Liquid())
q_vs = 1 / (1 - TD.Parameters.molmass_ratio(tps) * (p_vs - p) / p_vs)
q = TD.PhasePartition(q_vs)

# Aerosol size distribution
salt = CMP.Seasalt(FT)
# Accumulation mode
r1 = FT(0.243 * 1e-6) # m
σ1 = FT(1.4) # -
N1 = FT(100 * 1e6) # 1/m3
# Coarse Mode
r2 = FT(1.5 * 1e-6) # m
σ2 = FT(2.1) # -
N2 = FT(1e6) # 1/m3
acc = AM.Mode_κ(r1, σ1, N1, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1)
crs = AM.Mode_κ(r2, σ2, N2, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1)
ad = AM.AerosolDistribution((crs, acc))

# Get the ML model
mach = get_2modal_NN_model_FT32()

TT.@test AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q)[1] ≈
AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q)[1] rtol =
1e-5
TT.@test AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q)[2] ≈
AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q)[2] rtol =
1e-3
end

@info "Aerosol activation NN test"
test_emulator_NN(Float32)
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ include("common_types_tests.jl")
include("nucleation_unit_tests.jl")
include("precipitation_susceptibility_tests.jl")
include("p3_tests.jl")
include("emulator_NN.jl")
include("aqua.jl")
Loading