jump-dev · odow · Apr 30, 2025 · Apr 28, 2025 · Apr 28, 2025 · Apr 29, 2025
diff --git a/src/Nonlinear/ReverseAD/forward_over_reverse.jl b/src/Nonlinear/ReverseAD/forward_over_reverse.jl
@@ -6,6 +6,15 @@
 
 const TAG = :ReverseAD
 
+"""
+    const MAX_CHUNK::Int = 10
+
+An upper bound on the chunk sie for forward-over-reverse. Increasing this could
+improve performance at the cost of extra memory allocation. It has been 10 for a
+long time, and nobody seems to have complained.
+"""
+const MAX_CHUNK = 10
+
 """
     _eval_hessian(
         d::NLPEvaluator,
@@ -23,46 +32,30 @@ Returns the number of non-zeros in the computed Hessian, which will be used to
 update the offset for the next call.
 """
 function _eval_hessian(
-    d::NLPEvaluator,
-    f::_FunctionStorage,
-    H::AbstractVector{Float64},
-    λ::Float64,
-    offset::Int,
-)::Int
-    chunk = min(size(f.seed_matrix, 2), d.max_chunk)
-    # As a performance optimization, skip dynamic dispatch if the chunk is 1.
-    if chunk == 1
-        return _eval_hessian_inner(d, f, H, λ, offset, Val(1))
-    else
-        return _eval_hessian_inner(d, f, H, λ, offset, Val(chunk))
-    end
-end
-
-function _eval_hessian_inner(
     d::NLPEvaluator,
     ex::_FunctionStorage,
     H::AbstractVector{Float64},
     scale::Float64,
     nzcount::Int,
-    ::Val{CHUNK},
-) where {CHUNK}
+)::Int
     if ex.linearity == LINEAR
         @assert length(ex.hess_I) == 0
         return 0
     end
+    chunk = min(size(ex.seed_matrix, 2), d.max_chunk)
     Coloring.prepare_seed_matrix!(ex.seed_matrix, ex.rinfo)
     # Compute hessian-vector products
     num_products = size(ex.seed_matrix, 2) # number of hessian-vector products
-    num_chunks = div(num_products, CHUNK)
+    num_chunks = div(num_products, chunk)
     @assert size(ex.seed_matrix, 1) == length(ex.rinfo.local_indices)
-    for offset in 1:CHUNK:(CHUNK*num_chunks)
-        _eval_hessian_chunk(d, ex, offset, CHUNK, Val(CHUNK))
+    for offset in 1:chunk:(chunk*num_chunks)
+        _eval_hessian_chunk(d, ex, offset, chunk, chunk)
     end
     # leftover chunk
-    remaining = num_products - CHUNK * num_chunks
+    remaining = num_products - chunk * num_chunks
     if remaining > 0
-        offset = CHUNK * num_chunks + 1
-        _eval_hessian_chunk(d, ex, offset, remaining, Val(CHUNK))
+        offset = chunk * num_chunks + 1
+        _eval_hessian_chunk(d, ex, offset, remaining, chunk)
     end
     want, got = nzcount + length(ex.hess_I), length(H)
     if want > got
@@ -90,32 +83,59 @@ function _eval_hessian_chunk(
     ex::_FunctionStorage,
     offset::Int,
     chunk::Int,
-    ::Val{CHUNK},
-) where {CHUNK}
+    chunk_size::Int,
+)
     for r in eachindex(ex.rinfo.local_indices)
         # set up directional derivatives
         @inbounds idx = ex.rinfo.local_indices[r]
         # load up ex.seed_matrix[r,k,k+1,...,k+remaining-1] into input_ϵ
         for s in 1:chunk
-            # If `chunk < CHUNK`, leaves junk in the unused components
-            d.input_ϵ[(idx-1)*CHUNK+s] = ex.seed_matrix[r, offset+s-1]
+            # If `chunk < chunk_size`, leaves junk in the unused components
+            d.input_ϵ[(idx-1)*chunk_size+s] = ex.seed_matrix[r, offset+s-1]
         end
     end
-    _hessian_slice_inner(d, ex, Val(CHUNK))
+    _hessian_slice_inner(d, ex, chunk_size)
     fill!(d.input_ϵ, 0.0)
     # collect directional derivatives
     for r in eachindex(ex.rinfo.local_indices)
         @inbounds idx = ex.rinfo.local_indices[r]
         # load output_ϵ into ex.seed_matrix[r,k,k+1,...,k+remaining-1]
         for s in 1:chunk
-            ex.seed_matrix[r, offset+s-1] = d.output_ϵ[(idx-1)*CHUNK+s]
+            ex.seed_matrix[r, offset+s-1] = d.output_ϵ[(idx-1)*chunk_size+s]
         end
     end
     return
 end
 
-function _hessian_slice_inner(d, ex, ::Val{CHUNK}) where {CHUNK}
-    T = ForwardDiff.Partials{CHUNK,Float64}  # This is our element type.
+# A wrapper function to avoid dynamic dispatch.
+function _hessian_slice_inner(d, ex, chunk::Int)
+    @assert 1 <= chunk <= MAX_CHUNK
+    @assert MAX_CHUNK == 10
+    if chunk == 1
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{1,Float64})
+    elseif chunk == 2
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{2,Float64})
+    elseif chunk == 3
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{3,Float64})
+    elseif chunk == 4
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{4,Float64})
+    elseif chunk == 5
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{5,Float64})
+    elseif chunk == 6
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{6,Float64})
+    elseif chunk == 7
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{7,Float64})
+    elseif chunk == 8
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{8,Float64})
+    elseif chunk == 9
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{9,Float64})
+    else
+        _hessian_slice_inner(d, ex, ForwardDiff.Partials{10,Float64})
+    end
+    return
+end
+
+function _hessian_slice_inner(d, ex, ::Type{T}) where {T}
     fill!(d.output_ϵ, 0.0)
     output_ϵ = _reinterpret_unsafe(T, d.output_ϵ)
     subexpr_forward_values_ϵ =

diff --git a/src/Nonlinear/ReverseAD/mathoptinterface_api.jl b/src/Nonlinear/ReverseAD/mathoptinterface_api.jl
@@ -140,8 +140,7 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol})
         max_expr_length = max(max_expr_length, length(d.constraints[end].nodes))
         max_chunk = max(max_chunk, size(d.constraints[end].seed_matrix, 2))
     end
-    # 10 is hardcoded upper bound to avoid excess memory allocation
-    max_chunk = min(max_chunk, 10)
+    max_chunk = min(max_chunk, MAX_CHUNK)
     max_expr_with_sub_length = max(max_expr_with_sub_length, max_expr_length)
     if d.want_hess || want_hess_storage
         d.input_ϵ = zeros(max_chunk * N)

diff --git a/test/Nonlinear/ReverseAD.jl b/test/Nonlinear/ReverseAD.jl
@@ -150,21 +150,13 @@ function test_objective_quadratic_multivariate_subexpressions()
     MOI.eval_hessian_objective(evaluator, H, val)
     @test H == [2.0, 2.0, 1.0]
     @test evaluator.backend.max_chunk == 2
-    # The call of `_eval_hessian_inner` from `_eval_hessian` needs dynamic dispatch for `Val(chunk)` so it allocates.
-    # We call directly `_eval_hessian_inner` to check that the rest does not allocates.
-    @test 0 == @allocated MOI.Nonlinear.ReverseAD._eval_hessian_inner(
-        evaluator.backend,
-        evaluator.backend.objective,
-        H,
-        1.0,
-        0,
-        Val(2),
-    )
+    @test 0 == @allocated MOI.eval_hessian_objective(evaluator, H, val)
     @test MOI.hessian_lagrangian_structure(evaluator) ==
           [(1, 1), (2, 2), (2, 1)]
     H = [NaN, NaN, NaN]
     μ = Float64[]
     MOI.eval_hessian_lagrangian(evaluator, H, val, 1.5, μ)
+    @test 0 == @allocated MOI.eval_hessian_lagrangian(evaluator, H, val, 1.5, μ)
     @test H == 1.5 .* [2.0, 2.0, 1.0]
     v = [0.3, 0.4]
     hv = [NaN, NaN]