Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

report errorbars from amsgrad #13

Merged
merged 3 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Gutzwiller.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ using SpecialFunctions
using Tables
using VectorInterface

export num_parameters, val_and_grad
export num_parameters, val_and_grad, val_err_and_grad
include("ansatz/abstract.jl")
export VectorAnsatz
include("ansatz/vector.jl")
Expand Down
65 changes: 40 additions & 25 deletions src/amsgrad.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ struct GradientDescentResult{T,P<:SVector{<:Any,T},F}

param::Vector{P}
value::Vector{T}
error::Vector{T}
gradient::Vector{P}
first_moment::Vector{P}
second_moment::Vector{P}
Expand All @@ -27,7 +28,7 @@ function Base.show(io::IO, r::GradientDescentResult)
println(io, "GradientDescentResult")
println(io, " iterations: ", r.iterations)
println(io, " converged: ", r.converged, " (", r.reason, ")")
println(io, " last value: ", r.value[end])
println(io, " last value: ", r.value[end], " ± ", r.error[end])
print(io, " last params: ", r.param[end])
end

Expand All @@ -39,9 +40,9 @@ function Tables.Schema(r::GradientDescentResult{T,P}) where {T,P}
hyper_types = typeof.(value.(r.hyperparameters))

return Tables.Schema(
(hyper_keys..., :iter, :param, :value, :gradient,
(hyper_keys..., :iter, :param, :value, :error, :gradient,
:first_moment, :second_moment, :param_delta),
(hyper_types..., Int, Int, P, T, P, P, P, P)
(hyper_types..., Int, Int, P, T, T, P, P, P, P)
)
end

Expand All @@ -55,6 +56,7 @@ function Base.getindex(rows::GradientDescentResultRows, i)
iter=i,
param=r.param[i],
value=r.value[i],
error=r.error[i],
gradient=r.gradient[i],
first_moment=r.first_moment[i],
second_moment=r.second_moment[i],
Expand All @@ -69,6 +71,7 @@ end
adaptive_gradient_descent(
φ, ψ, f, p_init;
maxiter=100, verbose=true, α=0.01,
early_stop=true,
grad_tol=√eps(T), param_tol=√eps(T), val_tol=√(eps(T)),
first_moment_init, second_moment_init, fix_params,
kwargs...
Expand Down Expand Up @@ -117,7 +120,9 @@ function adaptive_gradient_descent(
first_moment_init=nothing,
second_moment_init=nothing,
fix_params=nothing,
grad_tol=√eps(T), param_tol=√eps(T), val_tol=√(eps(T)),
early_stop=true,
grad_tol=√eps(T), param_tol=√eps(T), val_tol=√(eps(T)), err_tol=0.0,
fkwargs=(;),
kwargs...
) where {N,T,P<:SVector{N,T}}

Expand All @@ -128,7 +133,7 @@ function adaptive_gradient_descent(
not_fixed = .!SVector{N,Bool}(fix_params)
end

val, grad = val_and_grad(f, p_init)
val, grad = val_and_grad(f, p_init; fkwargs...)
old_val = Inf

if isnothing(first_moment_init)
Expand All @@ -146,6 +151,7 @@ function adaptive_gradient_descent(

params = P[]
values = T[]
errors = T[]
gradients = P[]
first_moments = P[]
second_moments = P[]
Expand All @@ -159,7 +165,7 @@ function adaptive_gradient_descent(

while iter ≤ maxiter
iter += 1
val, grad = val_and_grad(f, p)
val, err, grad = val_err_and_grad(f, p; fkwargs...)
first_moment = φ(first_moment, grad; kwargs...)
second_moment = ψ(second_moment, grad; kwargs...)
grad = grad .* not_fixed
Expand All @@ -170,34 +176,43 @@ function adaptive_gradient_descent(

push!(params, p)
push!(values, val)
push!(errors, err)
push!(gradients, grad)
push!(first_moments, first_moment)
push!(second_moments, second_moment)
push!(param_deltas, δp)

if norm(δp) < param_tol
verbose && @info "Converged (params)"
reason = "params"
converged = true
break
end
if norm(grad) < grad_tol
verbose && @info "Converged (grad)"
reason = "gradient"
converged = true
break
end
if abs(δval) < val_tol
verbose && @info "Converged (value)"
reason = "value"
converged = true
break
if early_stop
if norm(δp) < param_tol
verbose && @info "Converged after $iter iterations (params)"
reason = "params"
converged = true
break
end
if norm(grad) < grad_tol
verbose && @info "Converged after $iter iterations (grad)"
reason = "gradient"
converged = true
break
end
if abs(δval) < val_tol
verbose && @info "Converged after $iter iterations (value)"
reason = "value"
converged = true
break
end
if abs(δval) < err * err_tol
verbose && @info "Converged after $iter iterations (errorbars)"
reason = "errorbars"
converged = true
break
end
end

p = p + δp

verbose && next!(
prog; showvalues=(((:iter, iter), (:value, val), (:param, p)))
prog; showvalues=(((:iter, iter), (:value, val ± err), (:param, p)))
)
end
iter == maxiter && verbose && @info "Aborted (maxiter)"
Expand All @@ -206,7 +221,7 @@ function adaptive_gradient_descent(

return GradientDescentResult(
f, (; α, kwargs...), p_init, length(params), converged, reason,
params, values, gradients, first_moments, second_moments, param_deltas,
params, values, errors, gradients, first_moments, second_moments, param_deltas,
)
end

Expand Down
10 changes: 10 additions & 0 deletions src/ansatz/abstract.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,13 @@ val_and_grad
function val_and_grad(a::AbstractAnsatz{K,<:Any,0}, addr::K, _) where {K}
return a(addr, SVector{0,valtype(a)}()), SVector{0,valtype(a)}()
end

"""
val_err_and_grad(args...)

Return the value, its error and gradient. See [`val_and_grad`](@ref).
"""
function val_err_and_grad(args...)
val, grad = val_and_grad(args...)
return val, zero(typeof(val)), grad
end
2 changes: 1 addition & 1 deletion src/ansatz/extgutzwiller.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
The Extended Gutzwiller ansatz:

```math
G_i = exp(-g_1 H_{i,i}) exp(-g_2 \\sum_{<i,j>} n_i n_j ),
G(|f⟩; 𝐠) = exp(-g_1 ⟨f|H|f⟩ - g_2 ⟨f| ∑_{<i,j>} n_i n_j |f⟩),
```

where ``H`` is an ExtendedHubbardReal1D Hamiltonian. The additional term accounts for the strength of nearest-neighbour interactions.
Expand Down
2 changes: 1 addition & 1 deletion src/ansatz/gutzwiller.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
The Gutzwiller ansatz:

```math
G_i = exp(-g H_{i,i}),
G(|f⟩; g) = exp(-g ⟨f|H|f⟩),
```

where ``H`` is the `hamiltonian` passed to the struct.
Expand Down
5 changes: 2 additions & 3 deletions src/ansatz/jastrow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using Rimu.Hamiltonians: circshift_dot
JastrowAnsatz(hamiltonian) <: AbstractAnsatz

```math
J(|f⟩; p) = exp(-∑_{k=1}^M ∑_{l=k}^M p_{k,l} ⟨f|n_k n_l|f⟩)
J(|f⟩; 𝐩) = exp(-∑_{k=1}^M ∑_{l=k}^M p_{k,l} ⟨f| n_k n_l |f⟩)
```

With translationally invariant Hamiltonians, use [`RelativeJastrowAnsatz`](@ref) instead.
Expand Down Expand Up @@ -65,9 +65,8 @@ For a translationally invariant Hamiltonian, this is equivalent to [`JastrowAnsa
but has fewer parameters.

```math
J(|f⟩; p) = exp(-∑_{d=0}^{M÷2} p_d ∑_{k=1}^{M} ⟨f|n_k n_{k + d}|f⟩)
R(|f⟩; 𝐩) = exp(-∑_{d=0}^{M/2} p_d ∑_{k=1}^M ⟨f| n_k n_{k + d} |f⟩)
```

"""
struct RelativeJastrowAnsatz{A,N,H} <: AbstractAnsatz{A,Float64,N}
hamiltonian::H
Expand Down
29 changes: 27 additions & 2 deletions src/kinetic-vqmc/state.jl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ function Base.empty!(st::KineticVQMCWalkerState)
empty!(st.residence_times)
empty!(st.local_energies)
empty!(st.addresses)
curr_address = starting_address(st.hamiltonian)
return st
end
function Base.length(st::KineticVQMCWalkerState)
Expand All @@ -43,11 +42,22 @@ end
function val_and_grad(res::KineticVQMCWalkerState)
weights = FrequencyWeights(res.residence_times)
val = mean(res.local_energies, weights)
grads = res.grad_ratios .* (res.local_energies .- val)# ./ weights
grads = res.grad_ratios .* (res.local_energies .- val)
grad = 2 * mean(grads, weights)

return val, grad
end
function val_err_and_grad(res::KineticVQMCWalkerState)
weights = FrequencyWeights(res.residence_times)

b_res = blocking_analysis(resample(res.residence_times, res.local_energies))
val = b_res.mean
err = b_res.err

grads = res.grad_ratios .* (res.local_energies .- val)
grad = 2 * mean(grads, weights)
return val, err, grad
end


"""
Expand Down Expand Up @@ -137,6 +147,21 @@ function val_and_grad(res::KineticVQMCResult{<:Any,T}) where {T}

return vals / walkers, grads ./ walkers
end
function val_err_and_grad(res::KineticVQMCResult{<:Any,T}) where {T}
vals = zero(T)
errs = zero(T)
grads = zero(eltype(res.states[1].grad_ratios))
walkers = length(res.states)

for w in 1:walkers
v, e, g = val_err_and_grad(res.states[w])
errs += e^2
vals += v
grads += g
end

return vals / walkers, √mean(errs) / walkers, grads ./ walkers
end

"""
resample(residence_times, values; len=length(values))
Expand Down
22 changes: 18 additions & 4 deletions src/kinetic-vqmc/wrapper.jl
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,31 @@ function _reset!(ke::KineticVQMC{N,T}, params) where {N,T}
end
end

function (ke::KineticVQMC)(params)
function (ke::KineticVQMC)(
params;
samples=ke.steps * ke.walkers, steps=samples ÷ ke.walkers,
)
_reset!(ke, params)
res = kinetic_vqmc!(ke.result; steps=ke.steps)
res = kinetic_vqmc!(ke.result; steps)
return mean(res)
end

function val_and_grad(ke::KineticVQMC, params)
function val_and_grad(
ke::KineticVQMC, params;
samples=ke.steps * ke.walkers, steps=samples ÷ ke.walkers,
)
_reset!(ke, params)
res = kinetic_vqmc!(ke.result; steps=ke.steps)
res = kinetic_vqmc!(ke.result; steps)
return val_and_grad(res)
end
function val_err_and_grad(
ke::KineticVQMC, params;
samples=ke.steps * ke.walkers, steps=samples ÷ ke.walkers,
)
_reset!(ke, params)
res = kinetic_vqmc!(ke.result; steps)
return val_err_and_grad(res)
end

function (ke::KineticVQMC)(F, G, params)
_reset!(ke, params)
Expand Down
Loading