Skip to content

Commit fae0768

Browse files
mohamed82008pkofod
authored andcommitted
[WIP] Fix HagerZhang bugs (#136)
* fixes bugs and paper discrepancies in HagerZhang linesearch * Initial HagerZhang fixes * fix tests
1 parent 1f40bf3 commit fae0768

File tree

3 files changed

+92
-83
lines changed

3 files changed

+92
-83
lines changed

src/hagerzhang.jl

+27-26
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,10 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
116116
if !(isfinite(phi_0) && isfinite(dphi_0))
117117
throw(ArgumentError("Value and slope at step length = 0 must be finite."))
118118
end
119-
if dphi_0 >= zeroT
119+
if dphi_0 >= eps(T) * abs(phi_0)
120120
throw(ArgumentError("Search direction is not a direction of descent."))
121+
elseif dphi_0 >= 0
122+
return zeroT, phi_0
121123
end
122124

123125
# Prevent values of x_new = x+αs that are likely to make
@@ -132,7 +134,8 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
132134

133135

134136
phi_lim = phi_0 + epsilon * abs(phi_0)
135-
@assert c > zeroT
137+
@assert c >= 0
138+
c <= eps(T) && return zeroT, phi_0
136139
@assert isfinite(c) && c <= alphamax
137140
phi_c, dphi_c = ϕdϕ(c)
138141
iterfinite = 1
@@ -145,7 +148,7 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
145148
if !(isfinite(phi_c) && isfinite(dphi_c))
146149
@warn("Failed to achieve finite new evaluation point, using alpha=0")
147150
mayterminate[] = false # reset in case another initial guess is used next
148-
return zeroT, ϕ(zeroT) # phi_0
151+
return zeroT, phi_0
149152
end
150153
push!(alphas, c)
151154
push!(values, phi_c)
@@ -191,32 +194,40 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
191194
# The value is higher, but the slope is downward, so we must
192195
# have crested over the peak. Use bisection.
193196
ib = length(alphas)
194-
ia = ib - 1
197+
ia = 1
195198
if c alphas[ib] || slopes[ib] >= zeroT
196199
error("c = ", c)
197200
end
198201
# ia, ib = bisect(phi, lsr, ia, ib, phi_lim) # TODO: Pass options
199202
ia, ib = bisect!(ϕdϕ, alphas, values, slopes, ia, ib, phi_lim, display)
200203
isbracketed = true
201204
else
202-
# We'll still going downhill, expand the interval and try again
205+
# We'll still going downhill, expand the interval and try again.
206+
# Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + ϵ_k
207+
# So cold = c has a lower objective than phi_0 up to epsilon.
208+
# This makes it a viable step to return if bracketing fails.
209+
210+
# Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c.
211+
# Going back to the loop with c = cold will only result in infinite cycling.
212+
# So returning (cold, phi_cold) and exiting the line search is the best move.
203213
cold = c
214+
phi_cold = phi_c
215+
if nextfloat(cold) >= alphamax
216+
mayterminate[] = false # reset in case another initial guess is used next
217+
return cold, phi_cold
218+
end
204219
c *= rho
205220
if c > alphamax
206-
c = (alphamax + cold)/2
221+
c = alphamax
207222
if display & BRACKET > 0
208-
println("bracket: exceeding alphamax, bisecting: alphamax = ", alphamax,
209-
", cold = ", cold, ", new c = ", c)
210-
end
211-
if c == cold || nextfloat(c) >= alphamax
212-
mayterminate[] = false # reset in case another initial guess is used next
213-
return cold, phi_c
223+
println("bracket: exceeding alphamax, using c = alphamax = ", alphamax,
224+
", cold = ", cold)
214225
end
215226
end
216227
phi_c, dphi_c = ϕdϕ(c)
217228
iterfinite = 1
218229
while !(isfinite(phi_c) && isfinite(dphi_c)) && c > nextfloat(cold) && iterfinite < iterfinitemax
219-
alphamax = c
230+
alphamax = c # shrinks alphamax, assumes that steps >= c can never have finite phi_c and dphi_c
220231
iterfinite += 1
221232
if display & BRACKET > 0
222233
println("bracket: non-finite value, bisection")
@@ -225,24 +236,14 @@ function (ls::HagerZhang)(ϕ, ϕdϕ,
225236
phi_c, dphi_c = ϕdϕ(c)
226237
end
227238
if !(isfinite(phi_c) && isfinite(dphi_c))
228-
mayterminate[] = false # reset in case another initial guess is used next
229-
return cold, ϕ(cold)
230-
elseif dphi_c < zeroT && c == alphamax
231-
# We're on the edge of the allowed region, and the
232-
# value is still decreasing. This can be due to
233-
# roundoff error in barrier penalties, a barrier
234-
# coefficient being so small that being eps() away
235-
# from it still doesn't turn the slope upward, or
236-
# mistakes in the user's function.
237-
if iterfinite >= iterfinitemax
239+
if display & BRACKET > 0
238240
println("Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.")
239241
println("c = ", c,
240242
", alphamax = ", alphamax,
241243
", phi_c = ", phi_c,
242244
", dphi_c = ", dphi_c)
243245
end
244-
mayterminate[] = false # reset in case another initial guess is used next
245-
return c, phi_c
246+
return cold, phi_cold
246247
end
247248
push!(alphas, c)
248249
push!(values, phi_c)
@@ -394,7 +395,7 @@ function secant2!(ϕdϕ,
394395
# we updated a, do it for b too
395396
c = secant(alphas, values, slopes, ia, iA)
396397
end
397-
if a <= c <= b
398+
if (iA == ic || iB == ic) && a <= c <= b
398399
if display & SECANT2 > 0
399400
println("secant2: second c = ", c)
400401
end

src/initialguess.jl

+64-56
Original file line numberDiff line numberDiff line change
@@ -164,13 +164,14 @@ If α0 is NaN, then procedure I0 is called at the first iteration,
164164
otherwise, we select according to procedure I1-2, with starting value α0.
165165
"""
166166
@with_kw struct InitialHagerZhang{T}
167-
ψ0::T = 0.01
168-
ψ1::T = 0.2
169-
ψ2::T = 2.0
170-
ψ3::T = 0.1
171-
αmax::T = Inf
172-
α0::T = 1.0 # Initial alpha guess. NaN => algorithm calculates
173-
verbose::Bool = false
167+
ψ0::T = 0.01
168+
ψ1::T = 0.2
169+
ψ2::T = 2.0
170+
ψ3::T = 0.1
171+
αmax::T = Inf
172+
α0::T = 1.0 # Initial alpha guess. NaN => algorithm calculates
173+
quadstep::Bool = true
174+
verbose::Bool = false
174175
end
175176

176177
function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
@@ -181,6 +182,7 @@ function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
181182
# pick the initial step size according to HZ #I0
182183
state.alpha = _hzI0(state.x, NLSolversBase.gradient(df),
183184
NLSolversBase.value(df),
185+
is.αmax,
184186
convert(eltype(state.x), is.ψ0)) # Hack to deal with type instability between is{T} and state.x
185187
if Tls <: HagerZhang
186188
ls.mayterminate[] = false
@@ -194,7 +196,7 @@ function (is::InitialHagerZhang)(ls::Tls, state, phi_0, dphi_0, df) where Tls
194196
end
195197
T = eltype(state.alpha)
196198
state.alpha = _hzI12(state.alpha, df, state.x, state.s, state.x_ls, phi_0, dphi_0,
197-
is.ψ1, is.ψ2, is.ψ3, convert(T, is.αmax), is.verbose, mayterminate)
199+
is.ψ1, is.ψ2, is.ψ3, T(is.αmax), is.verbose, is.quadstep, mayterminate)
198200
end
199201
return state.alpha
200202
end
@@ -212,6 +214,7 @@ function _hzI12(alpha::T,
212214
psi3::Real,
213215
alphamax::T,
214216
verbose::Bool,
217+
quadstep::Bool,
215218
mayterminate) where {Tx,T}
216219
ϕ = make_ϕ(df, x_new, x, s)
217220

@@ -221,65 +224,55 @@ function _hzI12(alpha::T,
221224

222225
alphatest = psi1 * alpha
223226
alphatest = min(alphatest, alphamax)
224-
227+
alphatest == 0 && return alphatest
225228
phitest = ϕ(alphatest)
229+
alphatest, phitest = get_finite(alphatest, phitest, ϕ, psi3, iterfinitemax, phi_0, mayterminate)
230+
alphatest == 0 && return alphatest
226231

227-
iterfinite = 1
228-
while !isfinite(phitest)
229-
if iterfinite >= iterfinitemax
230-
mayterminate[] = true
231-
return convert(T, 0)
232-
# TODO: Throw error / LineSearchException instead?
233-
# error("Failed to achieve finite test value; alphatest = ", alphatest)
234-
end
235-
236-
alphatest = psi3 * alphatest
237-
phitest = ϕ(alphatest)
238-
iterfinite += 1
239-
end
240-
a = ((phitest-phi_0)/alphatest - dphi_0)/alphatest # quadratic fit
241-
242-
if verbose == true
243-
println("quadfit: alphatest = ", alphatest,
244-
", phi_0 = ", phi_0,
245-
", dphi_0 = ", dphi_0,
246-
", phitest = ", phitest,
247-
", quadcoef = ", a)
248-
end
249-
mayterminate[] = false
250-
if isfinite(a) && a > 0 && phitest <= phi_0
251-
alpha = -dphi_0 / 2 / a # if convex, choose minimum of quadratic
252-
if alpha == 0
253-
error("alpha is zero. dphi_0 = ", dphi_0, ", phi_0 = ", phi_0, ", phitest = ", phitest, ", alphatest = ", alphatest, ", a = ", a)
254-
end
255-
if alpha <= alphamax
256-
mayterminate[] = true
257-
else
258-
alpha = alphamax
259-
mayterminate[] = false
260-
end
232+
mayterminate[] = quadstep_success = false
233+
if quadstep
234+
a = ((phitest - phi_0) / alphatest - dphi_0) / alphatest # quadratic fit
261235
if verbose == true
262-
println("alpha guess (quadratic): ", alpha,
263-
",(mayterminate = ", mayterminate, ")")
236+
println("quadfit: alphatest = ", alphatest,
237+
", phi_0 = ", phi_0,
238+
", dphi_0 = ", dphi_0,
239+
", phitest = ", phitest,
240+
", quadcoef = ", a)
264241
end
265-
else
266-
if phitest > phi_0
267-
alpha = alphatest
268-
else
269-
alpha *= psi2 # if not convex, expand the interval
242+
if isfinite(a) && a > 0 && phitest <= phi_0
243+
alphatest2 = -dphi_0 / 2 / a # if convex, choose minimum of quadratic
244+
alphatest2 = min(alphatest2, alphamax)
245+
phitest2 = ϕ(alphatest2)
246+
if isfinite(phitest2)
247+
mayterminate[] = quadstep_success = true
248+
alphatest = alphatest2
249+
phitest = phitest2
250+
if verbose == true
251+
println("alpha guess (quadratic): ", alphatest,
252+
",(mayterminate = ", mayterminate[], ")")
253+
end
254+
end
270255
end
271256
end
272-
alpha = min(alphamax, alpha)
273-
if verbose == true
274-
println("alpha guess (expand): ", alpha)
257+
if (!quadstep || !quadstep_success) && phitest <= phi_0
258+
# If no quadstep or it fails, expand the interval.
259+
# While the phitest <= phi_0 condition was not in the paper, it gives a significant boost to the speed. The rationale behind it is that since the slope at alpha = 0 is negative, if phitest > phi_0 then a local minimum must be between alpha = 0 and alpha = alphatest, so alpha_test is good enough to return.
260+
alphatest = psi2 * alpha
261+
alphatest = min(alphatest, alphamax)
262+
phitest = ϕ(alphatest)
263+
alphatest, phitest = get_finite(alphatest, phitest, ϕ, psi3, iterfinitemax, phi_0, mayterminate)
264+
if verbose == true
265+
println("alpha guess (expand): ", alphatest)
266+
end
275267
end
276-
return alpha
268+
return alphatest
277269
end
278270

279271
# Generate initial guess for step size (HZ, stage I0)
280272
function _hzI0(x::AbstractArray{Tx},
281273
gr::AbstractArray{Tx},
282274
f_x::T,
275+
alphamax::T,
283276
psi0::T = convert(T, 1)/100) where {Tx,T}
284277
zeroT = convert(T, 0)
285278
alpha = convert(T, 1)
@@ -289,8 +282,23 @@ function _hzI0(x::AbstractArray{Tx},
289282
if x_max != zeroT
290283
alpha = psi0 * x_max / gr_max
291284
elseif f_x != zeroT
292-
alpha = psi0 * abs(f_x) / norm(gr)
285+
alpha = psi0 * abs(f_x) / norm(gr)^2
293286
end
294287
end
295-
return alpha
288+
return min(alpha, alphamax)
289+
end
290+
291+
function get_finite(alpha::T, phi, ϕ, factor, itermax, phi_0, mayterminate) where {T}
292+
iter = 1
293+
while !isfinite(phi)
294+
if iter >= itermax
295+
mayterminate[] = true
296+
return T(0), phi_0
297+
end
298+
299+
alpha = factor * alpha
300+
phi = ϕ(alpha)
301+
iter += 1
302+
end
303+
return alpha, phi
296304
end

test/initial.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
state.f_x_previous = 2*phi_0
9191
is = InitialQuadratic(snap2one=(0.9,Inf))
9292
is(ls, state, phi_0, dphi_0, df)
93-
@test state.alpha == 0.8200000000000001
93+
@test state.alpha == 1.0
9494
@test ls.mayterminate[] == false
9595

9696
# Test Quadratic snap2one

0 commit comments

Comments
 (0)