Skip to content

Commit

Permalink
NN aerosol activation extension
Browse files Browse the repository at this point in the history
  • Loading branch information
mikhailmints authored and trontrytel committed Mar 5, 2024
1 parent 7b0de4f commit 3be25db
Show file tree
Hide file tree
Showing 5 changed files with 320 additions and 1 deletion.
19 changes: 19 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,29 @@ RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Thermodynamics = "b60c26fb-14c3-4610-9d3e-2d17fe7ff00c"

[weakdeps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"

[extensions]
EmulatorModelsExt = ["CSV", "DataFrames", "DataFramesMeta", "Flux", "MLJ", "MLJFlux", "MLJModels"]

[compat]
ClimaParams = "0.10"
CSV = "0.10"
DataFrames = "1.6"
DataFramesMeta = "0.14"
DocStringExtensions = "0.8, 0.9"
Flux = "0.14"
ForwardDiff = "0.10"
MLJ = "0.20"
MLJFlux = "0.4"
MLJModels = "0.16"
RootSolvers = "0.3, 0.4"
SpecialFunctions = "1, 2"
Thermodynamics = "0.12.4"
Expand Down
77 changes: 77 additions & 0 deletions ext/Common.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import CSV
import DataFrames as DF
using DataFramesMeta

function get_num_modes(df::DataFrame)
i = 1
while true
if !("mode_$(i)_N" in names(df))
return i - 1

Check warning on line 9 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L5-L9

Added lines #L5 - L9 were not covered by tests
end
i += 1
end

Check warning on line 12 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L11-L12

Added lines #L11 - L12 were not covered by tests
end

function get_num_modes(data_row::NamedTuple)
i = 1
while true
if !(Symbol("mode_$(i)_N") in keys(data_row))
return i - 1

Check warning on line 19 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L15-L19

Added lines #L15 - L19 were not covered by tests
end
i += 1
end

Check warning on line 22 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L21-L22

Added lines #L21 - L22 were not covered by tests
end

function read_aerosol_dataset(

Check warning on line 25 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L25

Added line #L25 was not covered by tests
dataset_filename::String,
Y_name = :mode_1_act_frac_S_interp,
)
initial_data = DF.DataFrame(CSV.File(dataset_filename))
df = filter(row -> row.S_max > 0 && row.S_max < 0.2, initial_data)
selected_columns_X = []
num_modes = get_num_modes(df)
@info(num_modes)
for i in 1:num_modes
append!(

Check warning on line 35 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L29-L35

Added lines #L29 - L35 were not covered by tests
selected_columns_X,
Symbol.([
"mode_$(i)_N",
"mode_$(i)_mean",
"mode_$(i)_stdev",
"mode_$(i)_kappa",
]),
)
end
append!(

Check warning on line 45 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L44-L45

Added lines #L44 - L45 were not covered by tests
selected_columns_X,
[:velocity, :initial_temperature, :initial_pressure],
)
X = df[:, selected_columns_X]
Y = df[:, Y_name]
return (X, Y, initial_data)

Check warning on line 51 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L49-L51

Added lines #L49 - L51 were not covered by tests
end

function preprocess_aerosol_data(X::DataFrame)
num_modes = get_num_modes(X)
for i in 1:num_modes
X = DF.transform(

Check warning on line 57 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L54-L57

Added lines #L54 - L57 were not covered by tests
X,
Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"),
)
X = DF.transform(

Check warning on line 61 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L61

Added line #L61 was not covered by tests
X,
Symbol("mode_$(i)_mean") =>
ByRow(log) => Symbol("mode_$(i)_mean"),
)
end
X = DF.transform(X, :velocity => ByRow(log) => :velocity)
return X

Check warning on line 68 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L66-L68

Added lines #L66 - L68 were not covered by tests
end

function target_transform(act_frac)
return @. atanh(2.0 * 0.99 * (act_frac - 0.5))

Check warning on line 72 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L71-L72

Added lines #L71 - L72 were not covered by tests
end

function inverse_target_transform(transformed_act_frac)
return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5

Check warning on line 76 in ext/Common.jl

View check run for this annotation

Codecov / codecov/patch

ext/Common.jl#L75-L76

Added lines #L75 - L76 were not covered by tests
end
47 changes: 47 additions & 0 deletions ext/EmulatorModelsExt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
module EmulatorModelsExt

import MLJ
import DataFrames as DF

import Thermodynamics as TD
import Thermodynamics.Parameters as TDP
import CloudMicrophysics.AerosolActivation as AA
import CloudMicrophysics.AerosolModel as AM
import CloudMicrophysics.Parameters as CMP

function AA.N_activated_per_mode(

Check warning on line 12 in ext/EmulatorModelsExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/EmulatorModelsExt.jl#L12

Added line #L12 was not covered by tests
machine::MLJ.Machine,
ap::CMP.AerosolActivationParameters,
ad::CMP.AerosolDistributionType,
aip::CMP.AirProperties,
tps::TDP.ThermodynamicsParameters,
T::FT,
p::FT,
w::FT,
q::TD.PhasePartition{FT},
) where {FT <: Real}
hygro = AA.mean_hygroscopicity_parameter(ap, ad)
return ntuple(Val(AM.n_modes(ad))) do i

Check warning on line 24 in ext/EmulatorModelsExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/EmulatorModelsExt.jl#L23-L24

Added lines #L23 - L24 were not covered by tests
# Model predicts activation of the first mode. So, swap each mode
# with the first mode repeatedly to predict all activations.
modes_perm = collect(1:AM.n_modes(ad))
modes_perm[[1, i]] = modes_perm[[i, 1]]
per_mode_data = [

Check warning on line 29 in ext/EmulatorModelsExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/EmulatorModelsExt.jl#L27-L29

Added lines #L27 - L29 were not covered by tests
(;
Symbol("mode_$(j)_N") => ad.Modes[modes_perm[j]].N,
Symbol("mode_$(j)_mean") => ad.Modes[modes_perm[j]].r_dry,
Symbol("mode_$(j)_stdev") => ad.Modes[modes_perm[j]].stdev,
Symbol("mode_$(j)_kappa") => hygro[modes_perm[j]],
) for j in 1:AM.n_modes(ad)
]
additional_data = (;

Check warning on line 37 in ext/EmulatorModelsExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/EmulatorModelsExt.jl#L37

Added line #L37 was not covered by tests
:velocity => w,
:initial_temperature => T,
:initial_pressure => p,
)
X = DF.DataFrame([merge(reduce(merge, per_mode_data), additional_data)])
max(FT(0), min(FT(1), MLJ.predict(machine, X)[1])) * ad.Modes[i].N

Check warning on line 43 in ext/EmulatorModelsExt.jl

View check run for this annotation

Codecov / codecov/patch

ext/EmulatorModelsExt.jl#L42-L43

Added lines #L42 - L43 were not covered by tests
end
end

end # module
9 changes: 8 additions & 1 deletion test/Project.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
[deps]
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
CloudMicrophysics = "6a9e3e04-43cd-43ba-94b9-e8782df3c71b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
RootSolvers = "7181ea78-2dcb-4de3-ab41-2b8ab5a31e74"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
169 changes: 169 additions & 0 deletions test/emulator_NN.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Get ML packages
import MLJ
import Flux
import MLJModels
import MLJFlux
Standardizer = MLJ.@load Standardizer pkg = MLJModels
NeuralNetworkRegressor = MLJ.@load NeuralNetworkRegressor pkg = MLJFlux

# Get the testing package
import Test as TT

# Get the CliMA packages
import CloudMicrophysics as CM
import ClimaParams as CP
import Thermodynamics as TD
import CloudMicrophysics.AerosolModel as AM
import CloudMicrophysics.AerosolActivation as AA
import CloudMicrophysics.Parameters as CMP

# Container for the NN we are about to build
struct NNBuilder <: MLJFlux.Builder
layer_sizes::Vector{Integer}
dropout::Vector
end

# Define the NN structure
function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out)
@assert length(builder.layer_sizes) == length(builder.dropout)
num_hidden_layers = length(builder.layer_sizes)
init = Flux.glorot_uniform(rng)
layers::Vector{Any} = []
if num_hidden_layers == 0
push!(layers, Flux.Dense(n_in => n_out, init = init))
else
push!(
layers,
Flux.Dense(
n_in => builder.layer_sizes[1],
Flux.sigmoid_fast,
init = init,
),
)
end
for i in 1:num_hidden_layers
push!(layers, Flux.Dropout(builder.dropout[i]))
if i == num_hidden_layers
push!(
layers,
Flux.Dense(builder.layer_sizes[i] => n_out, init = init),
)
else
push!(
layers,
Flux.Dense(
builder.layer_sizes[i] => builder.layer_sizes[i + 1],
Flux.sigmoid_fast,
init = init,
),
)
end
end
return Flux.Chain(layers...)
end

# Load aerosol data reading and preprocessing functions
include(joinpath(pkgdir(CM), "ext", "Common.jl"))

# Get the ML model
function get_2modal_NN_model_FT32()
FT = Float32
machine_name = "2modal_nn_machine_naive.jls"

# If the ML model already exists load it in.
# If it does not exist, train a NN
fpath = joinpath(pkgdir(CM), "test")
if isfile(joinpath(fpath, machine_name))
# Read-in the saved ML model
emulator_filepath = joinpath(fpath, machine_name)
return MLJ.machine(emulator_filepath)
else
# Download data files if you don't have them
# (not the most elegant way...)
fname = "2modal_dataset1_train.csv"
if !isfile(joinpath(fpath, "data", fname))
# For reasons unknown to humankind uploading/downloading the file
# from Caltech box changes the file. We are using dropbox for now.
# In the end we should upload the files to Caltech Data
# (and pray they are not changed there...)
url = "https://www.dropbox.com/scl/fi/qgq6ujvqenebjkskqvht5/2modal_dataset1_train.csv?rlkey=53qtqz0mtce993gy5jtnpdfz5&dl=0"
download(url, fname)
mkdir(joinpath(fpath, "data"))
mv(
joinpath(pkgdir(CM), fname),
joinpath(fpath, "data", fname),
force = true,
)
end

# Read the aerosol data
X_train, Y_train, initial_data =
read_aerosol_dataset(joinpath(fpath, "data", fname))

# Define the training pipeline
pipeline =
preprocess_aerosol_data |>
Standardizer() |>
MLJ.TransformedTargetModel(
NeuralNetworkRegressor(
builder = NNBuilder([250, 50, 5], [FT(0.3), FT(0), FT(0)]),
optimiser = Flux.Optimise.Adam(0.001, (0.9, 0.999), 1.0e-8),
epochs = 2000,
loss = Flux.mse,
batch_size = 1000,
),
transformer = target_transform,
inverse = inverse_target_transform,
)
# Create the untrained ML model
mach = MLJ.machine(pipeline, X_train, Y_train)
# Train a new ML model
@info("Training a new ML model. This may take a minute or two.")
MLJ.fit!(mach, verbosity = 0)
# Save the ML model to a binary file that can be re-used by CloudMicrophysics
MLJ.save(joinpath(fpath, machine_name), mach)
return mach
end
end

function test_emulator_NN(FT)

aip = CMP.AirProperties(FT)
tps = TD.Parameters.ThermodynamicsParameters(FT)
ap = CMP.AerosolActivationParameters(FT)

# Atmospheric conditions
T = FT(294) # air temperature K
p = FT(1e5) # air pressure Pa
w = FT(0.5) # vertical velocity m/s
p_vs = TD.saturation_vapor_pressure(tps, T, TD.Liquid())
q_vs = 1 / (1 - TD.Parameters.molmass_ratio(tps) * (p_vs - p) / p_vs)
q = TD.PhasePartition(q_vs)

# Aerosol size distribution
salt = CMP.Seasalt(FT)
# Accumulation mode
r1 = FT(0.243 * 1e-6) # m
σ1 = FT(1.4) # -
N1 = FT(100 * 1e6) # 1/m3
# Coarse Mode
r2 = FT(1.5 * 1e-6) # m
σ2 = FT(2.1) # -
N2 = FT(1e6) # 1/m3
acc = AM.Mode_κ(r1, σ1, N1, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1)
crs = AM.Mode_κ(r2, σ2, N2, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1)
ad = AM.AerosolDistribution((crs, acc))

# Get the ML model
mach = get_2modal_NN_model_FT32()

TT.@test AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q)[1]
AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q)[1] rtol =
1e-5
TT.@test AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q)[2]
AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q)[2] rtol =
1e-3
end

@info "Aerosol activation NN test"
test_emulator_NN(Float32)

0 comments on commit 3be25db

Please sign in to comment.