diff --git a/src/Interfaces/Interfaces.jl b/src/Interfaces/Interfaces.jl
index cea09d223..aa0f38510 100644
--- a/src/Interfaces/Interfaces.jl
+++ b/src/Interfaces/Interfaces.jl
@@ -35,11 +35,18 @@ Follow the links for the definitions of the interfaces!
 * [`apply_column!`](@ref)
 * [`apply_operator!`](@ref)
 * [`step_stats`](@ref)
+
+## Functions for retrieving information from DataFrames:
+
+* [`num_replicas`](@ref)
+* [`num_spectral_states`](@ref)
+* [`num_overlaps`](@ref)
 """
 module Interfaces
 
 using LinearAlgebra: LinearAlgebra, diag
 using VectorInterface: VectorInterface, add, add!, zerovector!, scalartype
+using DataFrames: DataFrame, metadata
 
 import OrderedCollections: freeze
 
@@ -54,9 +61,12 @@ export
     random_offdiagonal, starting_address, allows_address_type,
     LOStructure, IsDiagonal, IsHermitian, AdjointKnown, AdjointUnknown, has_adjoint,
     AbstractOperator, AbstractObservable
+export
+    num_replicas, num_spectral_states, num_overlaps
 
 include("stochasticstyles.jl")
 include("dictvectors.jl")
 include("hamiltonians.jl")
+include("dataframes.jl")
 
 end
diff --git a/src/Interfaces/dataframes.jl b/src/Interfaces/dataframes.jl
new file mode 100644
index 000000000..9d893c5df
--- /dev/null
+++ b/src/Interfaces/dataframes.jl
@@ -0,0 +1,46 @@
+"""
+    num_replicas(df::DataFrame)
+
+Return the number of replicas used in the simulation that produced `df`.
+"""
+function num_replicas(df::DataFrame)
+    if haskey(metadata(df), "num_replicas")
+        return parse(Int, metadata(df, "num_replicas"))
+    else
+        num = length(filter(startswith("norm"), names(df)))
+        num > 0 || throw(ArgumentError("No replicas found in DataFrame"))
+        return num
+    end
+end
+
+"""
+    num_spectral_states(df::DataFrame)
+
+Return the number of spectral states used in the simulation that produced `df`.
+"""
+function num_spectral_states(df::DataFrame)
+    if haskey(metadata(df), "num_spectral_states")
+        return parse(Int, metadata(df, "num_spectral_states"))
+    else
+        if length(filter(startswith("norm"), names(df))) == 0
+            throw(ArgumentError("No spectral states found in DataFrame"))
+        end
+        return 1
+    end
+end
+
+"""
+    num_overlaps(df::DataFrame)
+
+Return the number of overlaps between replicas.
+"""
+function num_overlaps(df::DataFrame)
+    if haskey(metadata(df), "num_overlaps")
+        return parse(Int, metadata(df, "num_overlaps"))
+    else
+        if length(filter(startswith("norm"), names(df))) == 0
+            throw(ArgumentError("No replicas found in DataFrame"))
+        end
+        return length(filter(startswith(r"c[0-9]+_dot"), names(df)))
+    end
+end
diff --git a/src/Rimu.jl b/src/Rimu.jl
index 7a190185c..ba9bed471 100644
--- a/src/Rimu.jl
+++ b/src/Rimu.jl
@@ -46,7 +46,7 @@ include("mpi_helpers.jl")
 
 include("Interfaces/Interfaces.jl")
 @reexport using .Interfaces
-using .Interfaces: dot_from_right
+import .Interfaces: dot_from_right, num_replicas, num_overlaps, num_spectral_states
 include("BitStringAddresses/BitStringAddresses.jl")
 @reexport using .BitStringAddresses
 include("Hamiltonians/Hamiltonians.jl")
@@ -78,7 +78,7 @@ export TimeStepStrategy, ConstantTimeStep, OvershootControl
 export localpart, walkernumber
 export smart_logger, default_logger
 export ProjectorMonteCarloProblem, SimulationPlan, state_vectors
-export FCIQMC, num_replicas, num_spectral_states, GramSchmidt
+export FCIQMC, num_replicas, num_spectral_states, num_overlaps, GramSchmidt
 
 function __init__()
     # Turn on smart logging once at runtime. Turn off with `default_logger()`.
diff --git a/src/StatsTools/StatsTools.jl b/src/StatsTools/StatsTools.jl
index 5152d8dc6..f46a086fa 100644
--- a/src/StatsTools/StatsTools.jl
+++ b/src/StatsTools/StatsTools.jl
@@ -34,6 +34,7 @@ using SpecialFunctions: SpecialFunctions, erf
 using Statistics: Statistics
 using StrFormat: StrFormat, @f_str
 using StrLiterals: StrLiterals
+import ..Interfaces: num_replicas, num_spectral_states, num_overlaps
 
 import ProgressLogging, Folds
 import MacroTools
diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
index c081ff7b8..e15eb5cb8 100644
--- a/src/StatsTools/reweighting.jl
+++ b/src/StatsTools/reweighting.jl
@@ -207,7 +207,7 @@ Compute the [`growth_estimator`](@ref) on a `DataFrame` `df` or
 depths.
 
 Returns a `NamedTuple` with the fields
-* `df_ge`: `DataFrame` with reweighting depth and `growth_estiamator` data. See example
+* `df_ge`: `DataFrame` with reweighting depth and `growth_estimator` data. See example
   below.
 * `correlation_estimate`: estimated correlation time from blocking analysis
 * `se, se_l, se_u`: [`shift_estimator`](@ref) and error
@@ -252,7 +252,6 @@ function growth_estimator_analysis(
     df = DataFrame(sim)
     shift_v = Vector(getproperty(df, Symbol(shift_name))) # casting to `Vector` to make SIMD loops efficient
     norm_v = Vector(getproperty(df, Symbol(norm_name)))
-    num_reps = length(filter(startswith("norm"), names(df)))
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     se = blocking_analysis(shift_v; skip)
     E_r = se.mean
@@ -423,7 +422,6 @@ function mixed_estimator_analysis(
     shift_v = Vector(getproperty(df, Symbol(shift_name))) # casting to `Vector` to make SIMD loops efficient
     hproj_v = Vector(getproperty(df, Symbol(hproj_name)))
     vproj_v = Vector(getproperty(df, Symbol(vproj_name)))
-    num_reps = length(filter(startswith("norm"), names(df)))
 
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     se = blocking_analysis(shift_v; skip)
@@ -568,7 +566,7 @@ function rayleigh_replica_estimator(
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = parse(Int, metadata(df, "num_replicas"))
+    num_reps = num_replicas(df)
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     T = eltype(df[!, Symbol(shift_name, "_r1s1")])
     shift_v = Vector{T}[]
@@ -641,7 +639,7 @@ function rayleigh_replica_estimator_analysis(
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = parse(Int, metadata(df, "num_replicas"))
+    num_reps = num_replicas(df)
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     # estimate the correlation time by blocking the shift data
     T = eltype(df[!, Symbol(shift_name, "_r1s1")])
diff --git a/src/StatsTools/variational_energy_estimator.jl b/src/StatsTools/variational_energy_estimator.jl
index 2f4da9b27..48a100cf4 100644
--- a/src/StatsTools/variational_energy_estimator.jl
+++ b/src/StatsTools/variational_energy_estimator.jl
@@ -53,32 +53,38 @@ end
 
 function variational_energy_estimator(sim; max_replicas=:all, spectral_state=1, kwargs...)
     df = DataFrame(sim)
-    num_replicas = parse(Int, metadata(df, "num_replicas"))
-    if num_replicas == 1
+    num_reps = num_replicas(df)
+    if num_reps == 1
         throw(ArgumentError(
             "No replicas found. Use keyword \
             `replica_strategy=AllOverlaps(n)` with n≥2 in `ProjectorMonteCarloProblem` to set up replicas!"
         ))
     end
-    @assert num_replicas ≥ 2 "At least two replicas are needed, found $num_replicas"
+    @assert num_reps ≥ 2 "At least two replicas are needed, found $num_replicas"
 
-    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
-    @assert num_overlaps == binomial(num_replicas, 2) "Unexpected number of overlaps."
+    num_olaps = num_overlaps(df)
+    if num_olaps == 0
+        throw(ArgumentError(
+            "No overlaps found. Use keyword \
+            `replica_strategy=AllOverlaps(n)` with n≥2 in `ProjectorMonteCarloProblem` to set up replicas!"
+        ))
+    end
+    @assert num_olaps == binomial(num_reps, 2) "Unexpected number of overlaps."
 
     # process at most `max_replicas` but at least 2 replicas
     if max_replicas isa Integer
-        num_replicas = max(2, min(max_replicas, num_replicas))
+        num_reps = max(2, min(max_replicas, num_reps))
     end
 
-    shiftnames = [Symbol("shift_r$(i)s$(spectral_state)") for i in 1:num_replicas]
+    shiftnames = [Symbol("shift_r$(i)s$(spectral_state)") for i in 1:num_reps]
     shifts = map(name -> getproperty(df, name), shiftnames)
-    @assert length(shifts) == num_replicas
+    @assert length(shifts) == num_reps
 
     overlap_names = [
-        Symbol("r$(i)s$(spectral_state)_dot_r$(j)s$(spectral_state)") for i in 1:num_replicas for j in i+1:num_replicas
+        Symbol("r$(i)s$(spectral_state)_dot_r$(j)s$(spectral_state)") for i in 1:num_reps for j in i+1:num_reps
     ]
     overlaps = map(name -> getproperty(df, name), overlap_names)
-    @assert length(overlaps) ≤ num_overlaps
+    @assert length(overlaps) ≤ num_olaps
 
     return variational_energy_estimator(shifts, overlaps; kwargs...)
 end
diff --git a/src/pmc_simulation.jl b/src/pmc_simulation.jl
index ba89163a7..9eb0e7d37 100644
--- a/src/pmc_simulation.jl
+++ b/src/pmc_simulation.jl
@@ -187,6 +187,7 @@ end
 
 num_spectral_states(sm::PMCSimulation) = num_spectral_states(sm.state)
 num_replicas(sm::PMCSimulation) = num_replicas(sm.state)
+num_overlaps(sm::PMCSimulation) = num_overlaps(sm.state)
 
 function report_simulation_status_metadata!(report::Report, sm::PMCSimulation)
     @unpack modified, aborted, success, message, elapsed_time = sm
diff --git a/src/qmc_states.jl b/src/qmc_states.jl
index 9e6fe17bf..6e6714ebd 100644
--- a/src/qmc_states.jl
+++ b/src/qmc_states.jl
@@ -120,6 +120,8 @@ end
 
 num_replicas(::ReplicaState{N}) where {N} = N
 num_spectral_states(::ReplicaState{<:Any, S}) where {S} = S
+num_overlaps(::ReplicaState{<:Any,<:Any,<:Any,<:Any,<:NoStats}) = 0
+num_overlaps(::ReplicaState{N,<:Any,<:Any,<:Any,<:AllOverlaps{N,<:Any,<:Any,B}}) where {N,B} = B*N*(N-1)÷2
 
 Base.show(io::IO, r::ReplicaState) = show(io, MIME("text/plain"), r)
 function Base.show(io::IO, ::MIME"text/plain", st::ReplicaState)
@@ -187,6 +189,7 @@ function report_default_metadata!(report::Report, state::ReplicaState)
     report_metadata!(report, "laststep", state.simulation_plan.last_step)
     report_metadata!(report, "num_replicas", num_replicas(state))
     report_metadata!(report, "num_spectral_states", num_spectral_states(state))
+    report_metadata!(report, "num_overlaps", num_overlaps(state))
     report_metadata!(report, "hamiltonian", s_state.hamiltonian)
     report_metadata!(report, "reporting_strategy", state.reporting_strategy)
     report_metadata!(report, "shift_strategy", algorithm.shift_strategy)
diff --git a/test/Interfaces.jl b/test/Interfaces.jl
index 18fa74f50..faf57ff5d 100644
--- a/test/Interfaces.jl
+++ b/test/Interfaces.jl
@@ -1,6 +1,7 @@
 using LinearAlgebra
 using Rimu
 using Test
+using DataFrames
 
 @testset "Interface basics" begin
     @test eltype(StyleUnknown{String}()) == String
@@ -36,6 +37,12 @@ using Test
     @test_throws ArgumentError Interfaces.dot_from_right(1, 2, 3)
 end
 
+@testset "DataFrame interfaces" begin
+    @test_throws ArgumentError num_replicas(DataFrame())
+    @test_throws ArgumentError num_spectral_states(DataFrame())
+    @test_throws ArgumentError num_overlaps(DataFrame())
+end
+
 # using lomc! with a matrix was removed in Rimu.jl v0.12.0
 @testset "lomc! with matrix" begin
     ham = [1 1 2 3 2;
diff --git a/test/excited_states_tests.jl b/test/excited_states_tests.jl
index c54110fbc..dca8c87f6 100644
--- a/test/excited_states_tests.jl
+++ b/test/excited_states_tests.jl
@@ -2,7 +2,7 @@ using Rimu
 using Test
 
 
-@testset "excited state energies" begin
+@testset "excited states" begin
     ham = HubbardReal1D(BoseFS(1,1,1,1,1))
     pr = ExactDiagonalizationProblem(ham)
     result = solve(pr)
@@ -22,6 +22,7 @@ using Test
     @test energy1.mean ≈ vals[1]
     @test energy2.mean ≈ vals[2]
     @test energy3.mean ≈ vals[3]
+    @test num_spectral_states(df) == 3
 
     n_replicas = 2
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, n_replicas)
@@ -40,13 +41,18 @@ using Test
     @test energy5.mean ≈ vals[2]
     @test energy6.mean ≈ vals[3]
 
+    @test num_overlaps(df) == 0
+    @test_throws ArgumentError variational_energy_estimator(df)
+
     replica_strategy = AllOverlaps(n_replicas; operator=G2RealCorrelator(0), mixed_spectral_overlaps=true)
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, replica_strategy)
     df = DataFrame(solve(p))
+    @test num_replicas(df) == 2
+    @test num_overlaps(df) == 1
     for state in 1:3
         r = rayleigh_replica_estimator(df; spectral_state=state)
         @test r.f ≈ g2s[state] atol=0.01
     end
-    num_overlaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
-    @test num_overlaps == 15
+    num_olaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
+    @test num_olaps == 15
 end