diff --git a/ext/EmulatorModelsExt.jl b/ext/EmulatorModelsExt.jl
index bf7cb60ece..4926667f4a 100644
--- a/ext/EmulatorModelsExt.jl
+++ b/ext/EmulatorModelsExt.jl
@@ -12,6 +12,7 @@ import CloudMicrophysics.Parameters as CMP
 function AA.N_activated_per_mode(
     machine::MLJ.Machine,
     ap::CMP.AerosolActivationParameters,
+    ad::CMP.AerosolDistributionType,
     aip::CMP.AirProperties,
     tps::TDP.ThermodynamicsParameters,
     T::FT,
@@ -19,7 +20,7 @@ function AA.N_activated_per_mode(
     w::FT,
     q::TD.PhasePartition{FT},
 ) where {FT <: Real}
-    hygro = mean_hygroscopicity_parameter(ap, ad)
+    hygro = AA.mean_hygroscopicity_parameter(ap, ad)
     return ntuple(Val(AM.n_modes(ad))) do i
         # Model predicts activation of the first mode. So, swap each mode
         # with the first mode repeatedly to predict all activations.
diff --git a/ml_test/Common.jl b/ml_test/Common.jl
new file mode 100644
index 0000000000..834cc9cbb6
--- /dev/null
+++ b/ml_test/Common.jl
@@ -0,0 +1,77 @@
+import CSV
+import DataFrames as DF
+using DataFramesMeta
+
+function get_num_modes(df::DataFrame)
+    i = 1
+    while true
+        if !("mode_$(i)_N" in names(df))
+            return i - 1
+        end
+        i += 1
+    end
+end
+
+function get_num_modes(data_row::NamedTuple)
+    i = 1
+    while true
+        if !(Symbol("mode_$(i)_N") in keys(data_row))
+            return i - 1
+        end
+        i += 1
+    end
+end
+
+function read_aerosol_dataset(
+    dataset_filename::String,
+    Y_name = :mode_1_act_frac_S_interp
+)
+    initial_data = DF.DataFrame(CSV.File(dataset_filename))
+    df = filter(row -> row.S_max > 0 && row.S_max < 0.2, initial_data)
+    selected_columns_X = []
+    num_modes = get_num_modes(df)
+    @info(num_modes)
+    for i in 1:num_modes
+        append!(
+            selected_columns_X,
+            Symbol.([
+                "mode_$(i)_N",
+                "mode_$(i)_mean",
+                "mode_$(i)_stdev",
+                "mode_$(i)_kappa",
+            ]),
+        )
+    end
+    append!(
+        selected_columns_X,
+        [:velocity, :initial_temperature, :initial_pressure],
+    )
+    X = df[:, selected_columns_X]
+    Y = df[:, Y_name]
+    return (X, Y, initial_data)
+end
+
+function preprocess_aerosol_data(X::DataFrame)
+    num_modes = get_num_modes(X)
+    for i in 1:num_modes
+        X = DF.transform(
+            X,
+            Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"),
+        )
+        X = DF.transform(
+            X,
+            Symbol("mode_$(i)_mean") =>
+                ByRow(log) => Symbol("mode_$(i)_mean"),
+        )
+    end
+    X = DF.transform(X, :velocity => ByRow(log) => :velocity)
+    return X
+end
+
+function target_transform(act_frac)
+    return @. atanh(2.0 * 0.99 * (act_frac - 0.5))
+end
+
+function inverse_target_transform(transformed_act_frac)
+    return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5
+end
diff --git a/ml_test/PreprocessAerosolData.jl b/ml_test/PreprocessAerosolData.jl
deleted file mode 100644
index 62d5b80323..0000000000
--- a/ml_test/PreprocessAerosolData.jl
+++ /dev/null
@@ -1,207 +0,0 @@
-#module PreprocessAerosolData
-
-import CloudMicrophysics as CM
-
-import CloudMicrophysics.AerosolActivation as AA
-import CloudMicrophysics.AerosolModel as AM
-import CloudMicrophysics.Parameters as CMP
-import CloudMicrophysics.Parameters as CMP
-import CloudMicrophysics.Common as CO
-
-import ClimaParams as CP
-import Thermodynamics as TD
-import DataFrames as DF
-using DataFramesMeta
-
-const FT = Float64
-#const APS = CMP.ParametersType
-
-function get_num_modes(df::DataFrame)
-    i = 1
-    while true
-        if !("mode_$(i)_N" in names(df))
-            return i - 1
-        end
-        i += 1
-    end
-end
-
-function get_num_modes(data_row::NamedTuple)
-    i = 1
-    while true
-        if !(Symbol("mode_$(i)_N") in keys(data_row))
-            return i - 1
-        end
-        i += 1
-    end
-end
-
-#function convert_to_ARG_params(data_row::NamedTuple, tps)
-#
-#    num_modes = get_num_modes(data_row)
-#    @assert num_modes > 0
-#    mode_Ns = []
-#    mode_means = []
-#    mode_stdevs = []
-#    mode_kappas = []
-#    w = data_row.velocity
-#    T = data_row.initial_temperature
-#    p = data_row.initial_pressure
-#    for i in 1:num_modes
-#        push!(mode_Ns, data_row[Symbol("mode_$(i)_N")])
-#        push!(mode_means, data_row[Symbol("mode_$(i)_mean")])
-#        push!(mode_stdevs, data_row[Symbol("mode_$(i)_stdev")])
-#        push!(mode_kappas, data_row[Symbol("mode_$(i)_kappa")])
-#    end
-#    ad = AM.AerosolDistribution(
-#        Tuple(
-#            AM.Mode_κ(
-#                mode_means[i],
-#                mode_stdevs[i],
-#                mode_Ns[i],
-#                FT(1),
-#                FT(1),
-#                FT(0),
-#                mode_kappas[i],
-#                1,
-#            ) for i in 1:num_modes
-#        ),
-#    )
-#    pv0 = TD.saturation_vapor_pressure(tps, T, TD.Liquid())
-#    vapor_mix_ratio = pv0 / TD.Parameters.molmass_ratio(tps) / (p - pv0)
-#    q_vap = vapor_mix_ratio / (vapor_mix_ratio + 1)
-#    q = TD.PhasePartition(q_vap, FT(0), FT(0))
-#    return (; ad, T, p, w, q, mode_Ns)
-#end
-#
-#function convert_to_ARG_params(data_row::NamedTuple)
-#    return convert_to_ARG_params(data_row, default_param_set)
-#end
-#
-#function get_ARG_S_max(data_row::NamedTuple, param_set::APS)
-#    (; ad, T, p, w, q) = convert_to_ARG_params(data_row, param_set)
-#    max_supersaturation =
-#        AA.max_supersaturation(param_set, CT.ARG2000Type(), ad, T, p, w, q)
-#    return max_supersaturation
-#end
-#
-#function get_ARG_S_max(data_row::NamedTuple)
-#    return get_ARG_S_max(data_row, default_param_set)
-#end
-#
-#function get_ARG_S_max(X::DataFrame, param_set::APS)
-#    return get_ARG_S_max.(NamedTuple.(eachrow(X)), param_set)
-#end
-#
-#function get_ARG_S_max(X::DataFrame)
-#    return get_ARG_S_max(X, default_param_set)
-#end
-#
-#function get_ARG_S_crit(data_row::NamedTuple, param_set::APS)
-#    (; ad, T) = convert_to_ARG_params(data_row, param_set)
-#    return AA.critical_supersaturation(param_set, ad, T)
-#end
-#
-#function get_ARG_S_crit(data_row::NamedTuple)
-#    return get_ARG_S_crit(data_row, default_param_set)
-#end
-#
-#function get_ARG_S_crit(X::DataFrame, param_set::APS)
-#    return get_ARG_S_crit.(NamedTuple.(eachrow(X)), param_set)
-#end
-#
-#function get_ARG_S_crit(X::DataFrame)
-#    return get_ARG_S_crit(X, default_param_set)
-#end
-#
-#function get_ARG_act_N(data_row::NamedTuple, param_set::APS, S_max = nothing)
-#    (; ad, T, p, w, q) = convert_to_ARG_params(data_row, param_set)
-#    if S_max === nothing
-#        return collect(
-#            AA.N_activated_per_mode(
-#                param_set,
-#                CT.ARG2000Type(),
-#                ad,
-#                T,
-#                p,
-#                w,
-#                q,
-#            ),
-#        )
-#    else
-#        critical_supersaturation = AA.critical_supersaturation(param_set, ad, T)
-#        return collect(
-#            AA.N_activated_per_mode(
-#                param_set,
-#                CT.ARG2000Type(),
-#                ad,
-#                T,
-#                p,
-#                w,
-#                q,
-#                S_max,
-#                critical_supersaturation,
-#            ),
-#        )
-#    end
-#end
-#
-#function get_ARG_act_N(data_row::NamedTuple, S_max = nothing)
-#    return get_ARG_act_N(data_row, default_param_set, S_max)
-#end
-#
-#function get_ARG_act_N(X::DataFrame, param_set::APS, S_max = nothing)
-#    return transpose(
-#        hcat(get_ARG_act_N.(NamedTuple.(eachrow(X)), param_set, S_max)...),
-#    )
-#end
-#
-#function get_ARG_act_N(X::DataFrame, S_max = nothing)
-#    return get_ARG_act_N(X, default_param_set, S_max)
-#end
-#
-#function get_ARG_act_frac(data_row::NamedTuple, param_set::APS, S_max = nothing)
-#    (; mode_Ns) = convert_to_ARG_params(data_row, param_set)
-#    return get_ARG_act_N(data_row, param_set, S_max) ./ mode_Ns
-#end
-#
-#function get_ARG_act_frac(data_row::NamedTuple, S_max = nothing)
-#    return get_ARG_act_frac(data_row, default_param_set, S_max)
-#end
-#
-#function get_ARG_act_frac(X::DataFrame, param_set::APS, S_max = nothing)
-#    return transpose(
-#        hcat(get_ARG_act_frac.(NamedTuple.(eachrow(X)), param_set, S_max)...),
-#    )
-#end
-#
-#function get_ARG_act_frac(X::DataFrame, S_max = nothing)
-#    return get_ARG_act_frac(X, default_param_set, S_max)
-#end
-
-function preprocess_aerosol_data(X::DataFrame)
-    num_modes = get_num_modes(X)
-    for i in 1:num_modes
-        X = DF.transform(
-            X,
-            Symbol("mode_$(i)_N") => ByRow(log) => Symbol("mode_$(i)_N"),
-        )
-        X = DF.transform(
-            X,
-            Symbol("mode_$(i)_mean") =>
-                ByRow(log) => Symbol("mode_$(i)_mean"),
-        )
-    end
-    X = DF.transform(X, :velocity => ByRow(log) => :velocity)
-    return X
-end
-
-function target_transform(act_frac)
-    return @. atanh(2.0 * 0.99 * (act_frac - 0.5))
-end
-
-function inverse_target_transform(transformed_act_frac)
-    return @. (1.0 / (2.0 * 0.99)) * tanh(transformed_act_frac) + 0.5
-end
-
-#end # module
diff --git a/ml_test/Project.toml b/ml_test/Project.toml
index f505bcaec6..48af46c9e1 100644
--- a/ml_test/Project.toml
+++ b/ml_test/Project.toml
@@ -1,12 +1,11 @@
 [deps]
-CLIMAParameters = "6eacf6c3-8458-43b9-ae03-caf5306d3d53"
 CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
+ClimaParams = "5c42b081-d73a-476f-9059-fd94b934656c"
 CloudMicrophysics = "6a9e3e04-43cd-43ba-94b9-e8782df3c71b"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DataFramesMeta = "1313f7d8-7da2-5740-9ea0-a2ca25f37964"
-EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
-JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJFlux = "094fc8d1-fd35-5302-93ea-dabda2abf845"
 MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7"
diff --git a/ml_test/testing.jl b/ml_test/testing.jl
deleted file mode 100644
index ab73bd8b97..0000000000
--- a/ml_test/testing.jl
+++ /dev/null
@@ -1,63 +0,0 @@
-import CloudMicrophysics
-import MLJ
-import MLJFlux
-import Flux
-
-include(joinpath(
-    pkgdir(CloudMicrophysics),
-    "ml_test",
-    "PreprocessAerosolData.jl")
-)
-
-@info(PreprocessAerosolData)
-
-struct NNBuilder <: MLJFlux.Builder
-    layer_sizes::Vector{Integer}
-    dropout::Vector{Float64}
-end
-
-function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out)
-    @assert length(builder.layer_sizes) == length(builder.dropout)
-    num_hidden_layers = length(builder.layer_sizes)
-    init = Flux.glorot_uniform(rng)
-    layers::Vector{Any} = []
-    if num_hidden_layers == 0
-        push!(layers, Flux.Dense(n_in => n_out, init = init))
-    else
-        push!(
-            layers,
-            Flux.Dense(
-                n_in => builder.layer_sizes[1],
-                Flux.sigmoid_fast,
-                init = init,
-            ),
-        )
-    end
-    for i in 1:num_hidden_layers
-        push!(layers, Flux.Dropout(builder.dropout[i]))
-        if i == num_hidden_layers
-            push!(
-                layers,
-                Flux.Dense(builder.layer_sizes[i] => n_out, init = init),
-            )
-        else
-            push!(
-                layers,
-                Flux.Dense(
-                    builder.layer_sizes[i] => builder.layer_sizes[i + 1],
-                    Flux.sigmoid_fast,
-                    init = init,
-                ),
-            )
-        end
-    end
-    return Flux.Chain(layers...)
-end
-
-emulator_filepath = joinpath(
-    pkgdir(CloudMicrophysics),
-    "ml_test",
-    "2modal_nn_machine_naive.jls",
-)
-
-machine = MLJ.machine(emulator_filepath)
diff --git a/ml_test/train_emulator.jl b/ml_test/train_emulator.jl
new file mode 100644
index 0000000000..dbe2fb6958
--- /dev/null
+++ b/ml_test/train_emulator.jl
@@ -0,0 +1,155 @@
+# Get ML packages
+import MLJ
+import Flux
+import MLJModels
+import MLJFlux
+Standardizer = MLJ.@load Standardizer pkg = MLJModels
+NeuralNetworkRegressor = MLJ.@load NeuralNetworkRegressor pkg = MLJFlux
+
+# path to our ML testing folder
+import CloudMicrophysics as CM
+fpath = joinpath(pkgdir(CM), "ml_test")
+
+# We will be precise
+const FT = Float64
+
+# Container for the NN we are about to build
+struct NNBuilder <: MLJFlux.Builder
+    layer_sizes::Vector{Integer}
+    dropout::Vector{Float64}
+end
+
+# TODO - what does it do exactly?
+function MLJFlux.build(builder::NNBuilder, rng, n_in, n_out)
+    @assert length(builder.layer_sizes) == length(builder.dropout)
+    num_hidden_layers = length(builder.layer_sizes)
+    init = Flux.glorot_uniform(rng)
+    layers::Vector{Any} = []
+    if num_hidden_layers == 0
+        push!(layers, Flux.Dense(n_in => n_out, init = init))
+    else
+        push!(
+            layers,
+            Flux.Dense(
+                n_in => builder.layer_sizes[1],
+                Flux.sigmoid_fast,
+                init = init,
+            ),
+        )
+    end
+    for i in 1:num_hidden_layers
+        push!(layers, Flux.Dropout(builder.dropout[i]))
+        if i == num_hidden_layers
+            push!(
+                layers,
+                Flux.Dense(builder.layer_sizes[i] => n_out, init = init),
+            )
+        else
+            push!(
+                layers,
+                Flux.Dense(
+                    builder.layer_sizes[i] => builder.layer_sizes[i + 1],
+                    Flux.sigmoid_fast,
+                    init = init,
+                ),
+            )
+        end
+    end
+    return Flux.Chain(layers...)
+end
+
+# Load aerosol data reading and preprocessing functions
+include(joinpath(fpath, "Common.jl"))
+
+# Get the ML model
+function get_ML_model(machine_name)
+    # If the ML model already exists load it in.
+    # If it does not exist, train a NN
+    if isfile(joinpath(fpath, machine_name))
+        # Read-in the saved ML model
+        emulator_filepath = joinpath(fpath, machine_name)
+        return MLJ.machine(emulator_filepath)
+    else
+        # Download data files if you don't have them
+        # (not the most elegant way...)
+        fname = "2modal_dataset1_train.csv"
+        if !isfile(joinpath(fpath, "data", fname))
+            # For reasons unknown to humankind uploading/downloading the file
+            # from Caltech box changes the file. We are using dropbox for now.
+            # In the end we should upload the files to Caltech Data
+            # (and pray they are not changed there...)
+            url = "https://www.dropbox.com/scl/fi/qgq6ujvqenebjkskqvht5/2modal_dataset1_train.csv?rlkey=53qtqz0mtce993gy5jtnpdfz5&dl=0"
+            download(url, fname)
+            mkdir(joinpath(fpath, "data"))
+            mv(joinpath(pkgdir(CM), fname), joinpath(fpath, "data", fname), force=true)
+        end
+
+        # Read the aerosol data
+        X_train, Y_train, initial_data =
+            read_aerosol_dataset(joinpath(fpath, "data", fname))
+
+        # Define the training pipeline
+        pipeline =
+            preprocess_aerosol_data |>
+            Standardizer() |>
+            MLJ.TransformedTargetModel(
+                NeuralNetworkRegressor(
+                    builder = NNBuilder([250, 50, 5], [0.3, 0.0, 0.0]),
+                    optimiser = Flux.Optimise.Adam(0.001, (0.9, 0.999), 1.0e-8),
+                    epochs = 2000,
+                    loss = Flux.mse,
+                    batch_size = 1000,
+                ),
+                transformer = target_transform,
+                inverse = inverse_target_transform,
+            )
+        # Create the untrained ML model
+        mach = MLJ.machine(pipeline, X_train, Y_train)
+        # Train a new ML model
+        @info("Training a new ML model. This may take a minute or two.")
+        MLJ.fit!(mach, verbosity = 0)
+        # Save the ML model to a binary file that can be re-used by CloudMicrophysics
+        MLJ.save(joinpath(fpath, machine_name), mach)
+        return mach
+    end
+end
+
+# Try using the ML model and the default aerosol activation functions
+import ClimaParams as CP
+import Thermodynamics as TD
+
+import CloudMicrophysics.AerosolModel as AM
+import CloudMicrophysics.AerosolActivation as AA
+import CloudMicrophysics.Parameters as CMP
+
+aip = CMP.AirProperties(FT)
+tps = TD.Parameters.ThermodynamicsParameters(FT)
+ap = CMP.AerosolActivationParameters(FT)
+
+# Atmospheric conditions
+T = FT(294)    # air temperature K
+p = FT(1e5)    # air pressure Pa
+w = FT(0.5)    # vertical velocity m/s
+p_vs = TD.saturation_vapor_pressure(tps, T, TD.Liquid())
+q_vs = 1 / (1 - TD.Parameters.molmass_ratio(tps) * (p_vs - p) / p_vs)
+q = TD.PhasePartition(q_vs)
+
+# Aerosol size distribution
+salt = CMP.Seasalt(FT)
+# Accumulation mode
+r1 = FT(0.243 * 1e-6) # m
+σ1 = FT(1.4)          # -
+N1 = FT(100 * 1e6)    # 1/m3
+# Coarse Mode
+r2 = FT(1.5 * 1e-6)   # m
+σ2 = FT(2.1)          # -
+N2 = FT(1e6)          # 1/m3
+acc = AM.Mode_κ(r1, σ1, N1, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1)
+crs = AM.Mode_κ(r2, σ2, N2, (FT(1.0),), (FT(1.0),), (salt.M,), (salt.κ,), 1)
+ad = AM.AerosolDistribution((crs, acc))
+
+# Get the ML model
+mach = get_ML_model("2modal_nn_machine_naive.jls")
+
+@info("ML version: ", AA.N_activated_per_mode(mach, ap, ad, aip, tps, T, p, w, q))
+@info("CM version: ", AA.N_activated_per_mode(ap, ad, aip, tps, T, p, w, q))