From 7f37d7c08ed481583482205d88909c177b82eed2 Mon Sep 17 00:00:00 2001 From: Jinrae Kim Date: Fri, 24 Jan 2025 17:00:24 -0600 Subject: [PATCH] Update for compatibility (Julia 1.10) (#69) * Update for compatibility (Julia 1.10) * Fix bugs for compat update * wip (why Julia version not specified in github action?) * Specified julia version * wip * wip * Update version * wip * wip * wip * wip --- .github/workflows/CI.yml | 16 +++++----- Project.toml | 14 ++++---- src/approximators/EPLSE.jl | 2 +- src/approximators/FNN.jl | 3 +- src/approximators/approximators.jl | 3 +- .../DLSE.jl | 2 +- src/approximators/normalized_approximators.jl | 2 +- .../PICNN.jl | 11 ++++--- .../parametrised_convex_approximators/PLSE.jl | 2 +- .../parametrised_convex_approximators/PMA.jl | 2 +- .../convex_approximators/LSE.jl | 2 +- .../convex_approximators/MA.jl | 2 +- src/implicit_diff.jl | 12 +++---- src/minimise.jl | 32 ++++++++++--------- src/trainer/trainer.jl | 2 +- test/implicit_diff.jl | 4 +-- test/pure_train.jl | 12 +++---- 17 files changed, 64 insertions(+), 59 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 8b7d385..c3c1980 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -19,16 +19,16 @@ jobs: Xvfb -ac ${{ env.DISPLAY }} -screen 0 1280x780x24 & - uses: julia-actions/setup-julia@v1 with: - version: 1 - - uses: actions/cache@v1 - env: - cache-name: cache-artifacts + # Warning: It is strongly recommended to wrap this value in quotes. + # Otherwise, the YAML parser used by GitHub Actions parses certain + # versions as numbers which causes the wrong version to be selected. + # For example, `1.10` may be parsed as `1.1`. + version: '1.10' + - uses: actions/cache@v2 with: path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} + key: ${{ runner.os }}-julia-${{ hashFiles('**/Project.toml') }} restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- + ${{ runner.os }}-julia- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 diff --git a/Project.toml b/Project.toml index 3d0f501..2b479b4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "ParametrisedConvexApproximators" uuid = "668502ff-1e8f-42bf-95c7-24f1e819f537" authors = ["JinraeKim and contributors"] -version = "0.3.0" +version = "0.4.0" [deps] AccessorsExtra = "33016aad-b69d-45be-9359-82a41f556fd4" @@ -19,15 +19,15 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [compat] AccessorsExtra = "0.1" -ComponentArrays = "0.13" -Convex = "0.15" +ComponentArrays = "0.15" +Convex = "0.16" ECOS = "1.1" -Flux = "0.13" +Flux = "0.16" ForwardDiff = "0.10" ImplicitDifferentiation = "0.5" -Optim = "1.7" -ProgressMeter = "1.7" -julia = "1" +Optim = "1.11" +ProgressMeter = "1.10" +julia = "1.10" [extras] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" diff --git a/src/approximators/EPLSE.jl b/src/approximators/EPLSE.jl index 6c4519f..4944e8d 100644 --- a/src/approximators/EPLSE.jl +++ b/src/approximators/EPLSE.jl @@ -11,7 +11,7 @@ struct EPLSE <: AbstractApproximator min_decision max_decision end -Flux.@functor EPLSE (plse, nn,) +Flux.@layer EPLSE trainable=(plse, nn,) function (network::EPLSE)(x, u; initial_guess=nothing,) diff --git a/src/approximators/FNN.jl b/src/approximators/FNN.jl index e3abf80..e6e715a 100644 --- a/src/approximators/FNN.jl +++ b/src/approximators/FNN.jl @@ -3,7 +3,7 @@ struct FNN <: AbstractApproximator m::Int NN::Flux.Chain end -Flux.@functor FNN (NN,) +Flux.@layer FNN trainable=(NN,) function FNN(n::Int, m::Int, h_array::Vector{Int}, act) node_array = [n+m, h_array..., 1] FNN(n, m, construct_layer_array(node_array, act)) @@ -20,4 +20,3 @@ size(u) = (m, d) function (nn::FNN)(x, u) res = nn.NN(vcat(x, u)) end -# Flux.params(approximator::FNN) = Flux.params(approximator.NN) diff --git a/src/approximators/approximators.jl b/src/approximators/approximators.jl index e7919fd..5f212bb 100644 --- a/src/approximators/approximators.jl +++ b/src/approximators/approximators.jl @@ -32,7 +32,8 @@ function construct_layer_array(node_array, act; act_terminal=Flux.identity) end function number_of_parameters(approximator::AbstractApproximator) - Flux.params(approximator) |> Map(length) |> sum + # Flux.params(approximator) |> Map(length) |> sum + sum([length(params) for params in Flux.trainables(approximator)]) end diff --git a/src/approximators/difference_of_convex_approximators/DLSE.jl b/src/approximators/difference_of_convex_approximators/DLSE.jl index 7220c5b..ef3a4e2 100644 --- a/src/approximators/difference_of_convex_approximators/DLSE.jl +++ b/src/approximators/difference_of_convex_approximators/DLSE.jl @@ -10,7 +10,7 @@ struct DLSE <: DifferenceOfConvexApproximator NN1::LSE NN2::LSE end -Flux.@functor DLSE (NN1, NN2) +Flux.@layer DLSE trainable=(NN1, NN2) function (nn::DLSE)(x::AbstractArray, u::AbstractArray) diff --git a/src/approximators/normalized_approximators.jl b/src/approximators/normalized_approximators.jl index 9104dc0..857affa 100644 --- a/src/approximators/normalized_approximators.jl +++ b/src/approximators/normalized_approximators.jl @@ -21,7 +21,7 @@ struct MaxAbsNormalisedApproximator{T<:AbstractApproximator} <: NormalisedApprox decision_max_abs::Union{Array, Nothing} cost_max_abs::Union{Array, Nothing} end -Flux.@functor MaxAbsNormalisedApproximator (network,) +Flux.@layer MaxAbsNormalisedApproximator trainable=(network,) function MaxAbsNormalisedApproximator( network::AbstractApproximator, diff --git a/src/approximators/parametrised_convex_approximators/PICNN.jl b/src/approximators/parametrised_convex_approximators/PICNN.jl index 24a756b..3572c4f 100644 --- a/src/approximators/parametrised_convex_approximators/PICNN.jl +++ b/src/approximators/parametrised_convex_approximators/PICNN.jl @@ -15,7 +15,7 @@ struct PICNN <: ParametrisedConvexApproximator m::Int NN::Flux.Chain end -Flux.@functor PICNN (NN,) +Flux.@layer PICNN trainable=(NN,) function PICNN(n::Int, m::Int, u_array::Vector{Int}, z_array::Vector{Int}, g, g̃) PICNN(n, m, make_PICNN(n, m, u_array, z_array, g, g̃)) end @@ -71,7 +71,7 @@ struct PICNN_Layer b # params(m)[10] g # not trainable; will not be tracked by Flux, automatically end -Flux.@functor PICNN_Layer (W̃, b̃, Wz, Wzu, bz, Wy, Wyu, by, Wu, b) # make "struct" compatible with Flux +Flux.@layer PICNN_Layer trainable=(W̃, b̃, Wz, Wzu, bz, Wy, Wyu, by, Wu, b) # make "struct" compatible with Flux function PICNN_Layer(uin::Int, uout::Int, zin::Int, zout::Int, y::Int, g=Flux.identity, g̃=Flux.identity; initW = Flux.glorot_uniform, initb = zeros # default initialisation method @@ -92,6 +92,7 @@ end function Flux.leakyrelu(x::Convex.AbstractExpr) Convex.max(x, 0.1*x) end + function (nn::PICNN_Layer)(input) u, z, y = input # network params @@ -110,8 +111,10 @@ function (nn::PICNN_Layer)(input) else u_next = g̃.(W̃*u .+ b̃) z_next = g.( - Wz * dot(*)(z, max.(Wzu*u .+ bz, 0.0)) # dot(*) is Hadamard product in Convex - + Wy * dot(*)(y, (Wyu*u .+ by)) + # Wz * dot(*)(z, max.(Wzu*u .+ bz, 0.0)) # dot(*) is Hadamard product in Convex + # + Wy * dot(*)(y, (Wyu*u .+ by)) + Wz * (z .* max.(Wzu*u .+ bz, 0.0)) # dot(*) is not supported by Flux anymore + + Wy * (y .* (Wyu*u .+ by)) + (Wu * u .+ b) ) end diff --git a/src/approximators/parametrised_convex_approximators/PLSE.jl b/src/approximators/parametrised_convex_approximators/PLSE.jl index 0be531a..3d02e01 100644 --- a/src/approximators/parametrised_convex_approximators/PLSE.jl +++ b/src/approximators/parametrised_convex_approximators/PLSE.jl @@ -6,7 +6,7 @@ struct PLSE <: ParametrisedConvexApproximator NN::Flux.Chain strict::Bool end -Flux.@functor PLSE (NN,) +Flux.@layer PLSE trainable=(NN,) function PLSE(n::Int, m::Int, i_max::Int, T::Real, h_array::Vector{Int}, act; strict=false) @assert T > 0 node_array = [n, h_array..., i_max*(m+1)] diff --git a/src/approximators/parametrised_convex_approximators/PMA.jl b/src/approximators/parametrised_convex_approximators/PMA.jl index 16407da..e5e24ef 100644 --- a/src/approximators/parametrised_convex_approximators/PMA.jl +++ b/src/approximators/parametrised_convex_approximators/PMA.jl @@ -4,7 +4,7 @@ struct PMA <: ParametrisedConvexApproximator i_max::Int NN::Flux.Chain end -Flux.@functor PMA (NN,) +Flux.@layer PMA trainable=(NN,) """ Basic constructor PMA based on Flux.Chain. diff --git a/src/approximators/parametrised_convex_approximators/convex_approximators/LSE.jl b/src/approximators/parametrised_convex_approximators/convex_approximators/LSE.jl index 53efdc7..5d250aa 100644 --- a/src/approximators/parametrised_convex_approximators/convex_approximators/LSE.jl +++ b/src/approximators/parametrised_convex_approximators/convex_approximators/LSE.jl @@ -23,7 +23,7 @@ struct LSE <: ConvexApproximator _α_is _β_is end -Flux.@functor LSE (_α_is, _β_is,) +Flux.@layer LSE trainable=(_α_is, _β_is,) function LSE(n::Int, m::Int, i_max::Int, T::Real) @assert T > 0 α_is = [Flux.glorot_uniform(n+m) for i in 1:i_max] diff --git a/src/approximators/parametrised_convex_approximators/convex_approximators/MA.jl b/src/approximators/parametrised_convex_approximators/convex_approximators/MA.jl index 476c646..eb33f22 100644 --- a/src/approximators/parametrised_convex_approximators/convex_approximators/MA.jl +++ b/src/approximators/parametrised_convex_approximators/convex_approximators/MA.jl @@ -21,7 +21,7 @@ struct MA <: ConvexApproximator _α_is _β_is end -Flux.@functor MA (_α_is, _β_is) +Flux.@layer MA trainable=(_α_is, _β_is) function MA(n::Int, m::Int, i_max::Int) α_is = [Flux.glorot_uniform(n+m) for i in 1:i_max] β_is = [Flux.glorot_uniform(1) for i in 1:i_max] diff --git a/src/implicit_diff.jl b/src/implicit_diff.jl index 7309ab2..7fecf3d 100644 --- a/src/implicit_diff.jl +++ b/src/implicit_diff.jl @@ -3,18 +3,18 @@ function minimise_logsumexp(θ; T, min_decision, max_decision, initial_guess, so B = θ[:, end] m = size(A)[2] u = Convex.Variable(m) - if initial_guess != nothing + if !isnothing(initial_guess) u.value = initial_guess end obj = T * Convex.logsumexp((1/T)*(A*u + B)) prob = Convex.minimize(obj) - if min_decision != nothing - prob.constraints += [u >= min_decision] + if !isnothing(min_decision) + push!(prob.constraints, u >= min_decision) end - if max_decision != nothing - prob.constraints += [u <= max_decision] + if !isnothing(max_decision) + push!(prob.constraints, u <= max_decision) end - solve!(prob, solver(), silent_solver=true, verbose=false) + solve!(prob, solver, silent=true) minimiser = typeof(u.value) <: Number ? [u.value] : u.value[:] # to make it a vector return minimiser end diff --git a/src/minimise.jl b/src/minimise.jl index 54d0648..2c047cb 100644 --- a/src/minimise.jl +++ b/src/minimise.jl @@ -25,7 +25,7 @@ implicit differentation is used here. function _minimise( network::PLSE, x::AbstractVector, min_decision, max_decision, initial_guess; - solver=() -> ECOS.Optimizer(), # See https://github.com/jump-dev/Convex.jl/issues/346 + solver=ECOS.Optimizer, # See https://github.com/jump-dev/Convex.jl/issues/346 ) (; m, T) = network θ = _affine_map(network, x) @@ -47,7 +47,7 @@ Basic DCA [1] is used. [2] https://github.com/Corrado-possieri/DLSE_neural_networks/commit/8883e5bcf1733b79b2dd3c432b31af30b4bba0a6#diff-aa888e053028cc6dbd9f0cfb1c30f61f1bde256be213f27b9a083b95292ec5ebR26 """ function _minimise(network::DifferenceOfConvexApproximator, x::AbstractVector, min_decision, max_decision, initial_guess; - solver=() -> ECOS.Optimizer(), # See https://github.com/jump-dev/Convex.jl/issues/346 + solver=ECOS.Optimizer, # See https://github.com/jump-dev/Convex.jl/issues/346 max_iter=30, tol=1e-3, # borrowed from [2] ) @@ -60,15 +60,15 @@ function _minimise(network::DifferenceOfConvexApproximator, x::AbstractVector, m @assert length(u) == length(max_decision) end # initial guess - if initial_guess == nothing - if min_decision != nothing && max_decision != nothing + if isnothing(initial_guess) + if !isnothing(min_decision) && !isnothing(max_decision) initial_guess = min_decision + (max_decision - min_decision) .* rand(size(min_decision)...) else initial_guess = randn(m) - if min_decision != nothing + if !isnothing(min_decision) initial_guess = maximum(hcat(min_decision, initial_guess); dims=2)[:] end - if max_decision != nothing + if !isnothing(max_decision) initial_guess = minimum(hcat(max_decision, initial_guess); dims=2)[:] end end @@ -82,13 +82,15 @@ function _minimise(network::DifferenceOfConvexApproximator, x::AbstractVector, m k = k + 1 v = grad_NN2(χ) # BE CAREFUL: CONSIDER THAT IT IS FOR BIVARIATE FUNCTION problem = Convex.minimize(network.NN1(x, u)[1] - v'*u) - if min_decision != nothing - problem.constraints += [u >= min_decision] + if !isnothing(min_decision) + # problem.constraints += [u >= min_decision] + push!(problem.constraints, u >= min_decision) end - if max_decision != nothing - problem.constraints += [u <= max_decision] + if !isnothing(max_decision) + # problem.constraints += [u <= max_decision] + push!(problem.constraints, u <= max_decision) end - solve!(problem, solver(); verbose=false, silent_solver=true) + solve!(problem, solver; silent=true) χ_next = typeof(u.value) <: Number ? [u.value] : u.value[:] # to make it a vector if norm(χ_next - χ) / (1+norm(χ)) < tol || k == max_iter # @show k @@ -125,13 +127,13 @@ function _minimise(network::AbstractApproximator, x::AbstractVector, min_decisio ) (; m) = network obj(u) = network(x, u)[1] - if min_decision == nothing + if isnothing(min_decision) min_decision = Float64[] # no constraint end - if max_decision == nothing + if isnothing(max_decision) max_decision = Float64[] # no constraint end - if initial_guess == nothing + if isnothing(initial_guess) if min_decision != Float64[] && max_decision != Float64[] initial_guess = (min_decision+eps()*ones(m)) + ((max_decision-eps()*ones(m)) - (min_decision+eps()*ones(m))) .* rand(size(min_decision)...) else @@ -144,7 +146,7 @@ function _minimise(network::AbstractApproximator, x::AbstractVector, min_decisio end end end - dfc = TwiceDifferentiableConstraints(min_decision, max_decision) + dfc = Optim.TwiceDifferentiableConstraints(min_decision, max_decision) res = Optim.optimize(obj, dfc, initial_guess, solver()) # minimiser = prod(size(initial_guess)) == 1 ? res.minimizer[1] : res.minimizer minimiser = res.minimizer diff --git a/src/trainer/trainer.jl b/src/trainer/trainer.jl index 27cfa30..f2ba9b6 100644 --- a/src/trainer/trainer.jl +++ b/src/trainer/trainer.jl @@ -10,7 +10,7 @@ struct SupervisedLearningTrainer <: AbstractTrainer dataset, network; normalisation=nothing, loss=Flux.Losses.mse, - optimiser=Adam(1e-3), + optimiser=Flux.Adam(1e-3), ) network = retrieve_normalised_network(network, dataset, normalisation) @assert dataset.split == :full diff --git a/test/implicit_diff.jl b/test/implicit_diff.jl index 9474166..5b6282f 100644 --- a/test/implicit_diff.jl +++ b/test/implicit_diff.jl @@ -23,7 +23,7 @@ function main(; epochs=2, N=100, N_test=10,) data = Flux.DataLoader((X, U_true), batchsize=16) for multithreading in [false, true] @show multithreading - params_init = deepcopy(Flux.params(model)) + params_init = deepcopy(Flux.trainables(model)) @time for epoch in 1:epochs @show epoch @show Flux.Losses.mse(minimise(model, X_test), U_true_test) @@ -36,7 +36,7 @@ function main(; epochs=2, N=100, N_test=10,) Flux.update!(opt_state, model, grads[1]) end end - @test any(Flux.params(model) .!= params_init) + @test any(Flux.trainables(model) .!= params_init) end end diff --git a/test/pure_train.jl b/test/pure_train.jl index 62fca35..c9c1e8c 100644 --- a/test/pure_train.jl +++ b/test/pure_train.jl @@ -20,6 +20,8 @@ Z = hcat([sum(X[:, i].^2)+sum(Y[:, i].^2) for i in 1:d]...) X_test = 2 * (2*rand(n, d_test) .- 1) Y_test = 2 * (2*rand(m, d_test) .- 1) Z_test = hcat([sum(X_test[:, i].^2)+sum(Y_test[:, i].^2) for i in 1:d_test]...) +min_decision = -ones(m) +max_decision = +ones(m) # network construction @@ -52,20 +54,18 @@ function main(epochs=2, network=nothing) :DLSE => dlse, :EPLSE => eplse, ) - if network != nothing + if !isnothing(network) networks = Dict(network => networks[network]) end for (name, model) in networks @show name - params_init = deepcopy(Flux.params(model)) - @test all(Flux.params(model) .== params_init) + params_init = deepcopy(Flux.trainables(model)) + @test all(Flux.trainables(model) .== params_init) # training data = Flux.DataLoader((X, Y, Z), batchsize=16) - # @infiltrate opt_state = Flux.setup(Adam(1e-4), model) - # @infiltrate @time for epoch in 1:epochs @show epoch @show Flux.Losses.mse(model(X_test, Y_test), Z_test) @@ -80,7 +80,7 @@ function main(epochs=2, network=nothing) end end end - @test any(Flux.params(model) .!= params_init) + @test any(Flux.trainables(model) .!= params_init) end end