expose factorization through as MOI.AbstractModelAttribute

andrewrosemberg · andrewrosemberg · commit e668506202fb · 2025-02-17T16:42:57.000-05:00
diff --git a/src/NonLinearProgram/NonLinearProgram.jl b/src/NonLinearProgram/NonLinearProgram.jl
@@ -372,7 +372,7 @@ get_num_params(model::Model) = get_num_params(model.model)
 
 function _cache_evaluator!(model::Model)
     form = model.model
-    # Retrieve and sort primal variables by index
+    # Retrieve and sort primal variables by NLP index
     params = sort(all_params(form); by = x -> x.value)
     primal_vars = sort(all_primal_vars(form); by = x -> x.value)
     num_primal = length(primal_vars)
@@ -389,7 +389,7 @@ function _cache_evaluator!(model::Model)
     num_low = length(has_low)
     num_up = length(has_up)
 
-    # Create unified dual mapping
+    # Create unified dual mapping from constraint index to NLP index
     dual_mapping = Vector{Int}(undef, form.num_constraints)
     for (ci, cni) in form.constraints_2_nlp_index
         dual_mapping[ci.value] = cni.value
@@ -437,9 +437,6 @@ end
 function DiffOpt.forward_differentiate!(
     model::Model;
     tol = 1e-6,
-    st = 1e-6,
-    max_corrections = 50,
-    allow_inertia_correction = true,
 )
     model.diff_time = @elapsed begin
         cache = _cache_evaluator!(model)
@@ -448,7 +445,7 @@ function DiffOpt.forward_differentiate!(
         Δp = zeros(length(cache.params))
         for (i, var_idx) in enumerate(cache.params)
             ky = form.var2ci[var_idx]
-            if haskey(model.input_cache.dp, ky)
+            if haskey(model.input_cache.dp, ky) # only for set sensitivities
                 Δp[i] = model.input_cache.dp[ky]
             end
         end
@@ -457,9 +454,6 @@ function DiffOpt.forward_differentiate!(
         Δs = compute_sensitivity(
             model;
             tol = tol,
-            st = st,
-            max_corrections = max_corrections,
-            allow_inertia_correction = allow_inertia_correction,
         )
 
         # Extract primal and dual sensitivities
@@ -477,9 +471,6 @@ end
 function DiffOpt.reverse_differentiate!(
     model::Model;
     tol = 1e-6,
-    st = 1e-6,
-    max_corrections = 50,
-    allow_inertia_correction = true,
 )
     model.diff_time = @elapsed begin
         cache = _cache_evaluator!(model)
@@ -489,9 +480,6 @@ function DiffOpt.reverse_differentiate!(
         Δs = compute_sensitivity(
             model;
             tol = tol,
-            st = st,
-            max_corrections = max_corrections,
-            allow_inertia_correction = allow_inertia_correction,
         )
         num_primal = length(cache.primal_vars)
         # Fetch primal sensitivities
diff --git a/src/NonLinearProgram/nlp_utilities.jl b/src/NonLinearProgram/nlp_utilities.jl
@@ -388,44 +388,6 @@ function build_sensitivity_matrices(
     return M, N
 end
 
-"""
-    inertia_corrector_factorization(M::SparseMatrixCSC, num_w, num_cons; st=1e-6, max_corrections=50)
-
-Inertia correction for the factorization of the KKT matrix. Sparse version.
-"""
-function inertia_corrector_factorization(
-    M::SparseMatrixCSC,
-    num_w,
-    num_cons;
-    st = 1e-6,
-    max_corrections = 50,
-    allow_inertia_correction = true,
-)
-    # Factorization
-    K = lu(M; check = false)
-    # Inertia correction
-    status = K.status
-    num_c = 0
-    diag_mat = ones(size(M, 1))
-    diag_mat[num_w+1:num_w+num_cons] .= -1
-    diag_mat = SparseArrays.spdiagm(diag_mat)
-    if status == 1
-        @assert allow_inertia_correction "Inertia correction needed but not allowed"
-        @info "Inertia correction needed"
-    end
-    while status == 1 && num_c < max_corrections
-        M = M + st * diag_mat
-        K = lu(M; check = false)
-        status = K.status
-        num_c += 1
-    end
-    if status != 0
-        @warn "Inertia correction failed"
-        return nothing
-    end
-    return K
-end
-
 """
     compute_derivatives_no_relax(model::Model, cons::Vector{MOI.Nonlinear.ConstraintIndex},
         _X::AbstractVector, _V_L::AbstractVector, _X_L::AbstractVector, _V_U::AbstractVector, _X_U::AbstractVector, leq_locations::Vector{Z}, geq_locations::Vector{Z}, ineq_locations::Vector{Z},
@@ -447,9 +409,6 @@ function compute_derivatives_no_relax(
     ineq_locations::Vector{Z},
     has_up::Vector{Z},
     has_low::Vector{Z};
-    st = 1e-6,
-    max_corrections = 50,
-    allow_inertia_correction = true,
 ) where {Z<:Integer}
     M, N = build_sensitivity_matrices(
         model,
@@ -470,13 +429,10 @@ function compute_derivatives_no_relax(
     num_vars = get_num_primal_vars(model)
     num_cons = get_num_constraints(model)
     num_ineq = length(ineq_locations)
-    K = inertia_corrector_factorization(
+    K = model.input_cache.factorization(
         M,
         num_vars + num_ineq,
-        num_cons;
-        st = st,
-        max_corrections = max_corrections,
-        allow_inertia_correction = allow_inertia_correction,
+        num_cons
     ) # Factorization
     if isnothing(K)
         return zeros(size(M, 1), size(N, 2)), K, N
@@ -499,9 +455,6 @@ Compute the sensitivity of the solution given sensitivity of the parameters (Δp
 function compute_sensitivity(
     model::Model;
     tol = 1e-6,
-    st = 1e-6,
-    max_corrections = 50,
-    allow_inertia_correction = true,
 )
     # Solution and bounds
     X,
@@ -529,10 +482,7 @@ function compute_sensitivity(
         geq_locations,
         ineq_locations,
         has_up,
-        has_low;
-        st = st,
-        max_corrections = max_corrections,
-        allow_inertia_correction = allow_inertia_correction,
+        has_low
     )
     ## Adjust signs based on JuMP convention
     num_vars = get_num_primal_vars(model)
diff --git a/src/diff_opt.jl b/src/diff_opt.jl
@@ -11,6 +11,50 @@
 
 const MOIDD = MOI.Utilities.DoubleDicts
 
+"""
+    LuFactorizationWithInertiaCorrection{T<:Real}
+
+A callable struct to store the parameters for the inertia correction in the
+Lu-factorization. If no inertia correction is needed, it only performs the LU
+factorization.
+"""
+struct LuFactorizationWithInertiaCorrection{T<:Real} <: Function
+    st::T 
+    max_corrections::Int
+end
+function LuFactorizationWithInertiaCorrection(; st::T = 1e-6, max_corrections::Int = 50) where T
+    return LuFactorizationWithInertiaCorrection{T}(st, max_corrections)
+end
+
+function (lu_struct::LuFactorizationWithInertiaCorrection)(
+    M::SparseArrays.SparseMatrixCSC,
+    num_w,
+    num_cons
+)
+    # Factorization
+    K = SparseArrays.lu(M; check = false)
+    # Inertia correction
+    status = K.status
+    if status == 1
+        @info "Inertia correction needed"
+        num_c = 0
+        diag_mat = ones(size(M, 1))
+        diag_mat[num_w+1:num_w+num_cons] .= -1
+        diag_mat = SparseArrays.spdiagm(diag_mat)
+        while status == 1 && num_c < lu_struct.max_corrections
+            M = M + lu_struct.st * diag_mat
+            K = lu(M; check = false)
+            status = K.status
+            num_c += 1
+        end
+        if status != 0
+            @warn "Inertia correction failed"
+            return nothing
+        end
+    end
+    return K
+end
+
 Base.@kwdef mutable struct InputCache
     dx::Dict{MOI.VariableIndex,Float64} = Dict{MOI.VariableIndex,Float64}()# dz for QP
     dp::Dict{MOI.ConstraintIndex,Float64} = Dict{MOI.ConstraintIndex,Float64}() # Specifically for NonLinearProgram
@@ -28,6 +72,7 @@ Base.@kwdef mutable struct InputCache
     vector_constraints::MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}} =
         MOIDD.DoubleDict{MOI.VectorAffineFunction{Float64}}() # also includes G for QPs
     objective::Union{Nothing,MOI.AbstractScalarFunction} = nothing
+    factorization::Function = LuFactorizationWithInertiaCorrection()
 end
 
 function Base.empty!(cache::InputCache)
@@ -37,6 +82,7 @@ function Base.empty!(cache::InputCache)
     empty!(cache.scalar_constraints)
     empty!(cache.vector_constraints)
     cache.objective = nothing
+    cache.factorization = LuFactorizationWithInertiaCorrection()
     return
 end
 
@@ -92,6 +138,29 @@ where `x` and `y` are the relevant `MOI.VariableIndex`.
 """
 struct ForwardObjectiveFunction <: MOI.AbstractModelAttribute end
 
+"""
+    MFactorization <: MOI.AbstractModelAttribute
+
+A `MOI.AbstractModelAttribute` to set which factorization function to use for the
+implict function diferentiation needed to compute the sensitivities for
+`NonLinearProgram` models.
+
+The function will be called with the following signature:
+```julia
+function factorization(M::SparseMatrixCSC{T<Real}, # The matrix to factorize
+    num_w::Int, # Number of primal and slack variables (can be ignored - useful for inertia correction)
+    num_cons::Int, # The number of constraints (can be ignored - useful for inertia correction)
+)
+```
+
+Can be set by the user to use a custom factorization function:
+
+```julia
+MOI.set(model, DiffOpt.MFactorization(), factorization)
+```
+"""
+struct MFactorization <: MOI.AbstractModelAttribute end
+
 """
     ForwardConstraintFunction <: MOI.AbstractConstraintAttribute
 
@@ -346,6 +415,15 @@ function MOI.set(model::AbstractModel, ::ForwardObjectiveFunction, objective)
     return
 end
 
+function MOI.set(
+    model::AbstractModel,
+    ::MFactorization,
+    factorization::Function,
+)
+    model.input_cache.factorization = factorization
+    return
+end
+
 function MOI.set(
     model::AbstractModel,
     ::ReverseVariablePrimal,
diff --git a/src/jump_moi_overloads.jl b/src/jump_moi_overloads.jl
@@ -21,6 +21,14 @@ function MOI.set(
     return MOI.set(model, attr, JuMP.moi_function(func))
 end
 
+function MOI.set(
+    model::JuMP.Model,
+    attr::MFactorization,
+    factorization::Function,
+)
+    return MOI.set(JuMP.backend(model), attr, factorization)
+end
+
 function MOI.set(
     model::JuMP.Model,
     attr::ForwardObjectiveFunction,
diff --git a/src/moi_wrapper.jl b/src/moi_wrapper.jl
@@ -512,21 +512,22 @@ function MOI.set(model::Optimizer, ::ModelConstructor, model_constructor)
     return
 end
 
-function reverse_differentiate!(model::Optimizer; kwargs...)
+function reverse_differentiate!(model::Optimizer)
     st = MOI.get(model.optimizer, MOI.TerminationStatus())
     if !in(st, (MOI.LOCALLY_SOLVED, MOI.OPTIMAL))
         error(
             "Trying to compute the reverse differentiation on a model with termination status $(st)",
         )
     end
     diff = _diff(model)
+    MOI.set(diff, MFactorization(), model.input_cache.factorization)
     for (vi, value) in model.input_cache.dx
         MOI.set(diff, ReverseVariablePrimal(), model.index_map[vi], value)
     end
     for (vi, value) in model.input_cache.dy
         MOI.set(diff, ReverseConstraintDual(), model.index_map[vi], value)
     end
-    return reverse_differentiate!(diff; kwargs...)
+    return reverse_differentiate!(diff)
 end
 
 function _copy_forward_in_constraint(diff, index_map, con_map, constraints)
@@ -541,14 +542,15 @@ function _copy_forward_in_constraint(diff, index_map, con_map, constraints)
     return
 end
 
-function forward_differentiate!(model::Optimizer; kwargs...)
+function forward_differentiate!(model::Optimizer)
     st = MOI.get(model.optimizer, MOI.TerminationStatus())
     if !in(st, (MOI.LOCALLY_SOLVED, MOI.OPTIMAL))
         error(
             "Trying to compute the forward differentiation on a model with termination status $(st)",
         )
     end
     diff = _diff(model)
+    MOI.set(diff, MFactorization(), model.input_cache.factorization)
     if model.input_cache.objective !== nothing
         MOI.set(
             diff,
@@ -580,7 +582,7 @@ function forward_differentiate!(model::Optimizer; kwargs...)
             diff.model.input_cache.dp[model.index_map[vi]] = value
         end
     end
-    return forward_differentiate!(diff; kwargs...)
+    return forward_differentiate!(diff)
 end
 
 function empty_input_sensitivities!(model::Optimizer)
@@ -673,6 +675,8 @@ end
 
 MOI.supports(::Optimizer, ::ForwardObjectiveFunction) = true
 
+MOI.supports(::Optimizer, ::MFactorization) = true
+
 function MOI.get(model::Optimizer, ::ForwardObjectiveFunction)
     return model.input_cache.objective
 end