From 4eb105626fc7c3131a2d84e54ca50820a2a83347 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Thu, 15 Sep 2022 17:14:51 -0700
Subject: [PATCH 01/21] Add generic Rosenbrock algorithm

---
 src/solvers/imex_ark.jl                          | 2 +-
 src/solvers/{rosenbrock.jl => old_rosenbrock.jl} | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename src/solvers/{rosenbrock.jl => old_rosenbrock.jl} (100%)

diff --git a/src/solvers/imex_ark.jl b/src/solvers/imex_ark.jl
index d0366cdd..558a07c6 100644
--- a/src/solvers/imex_ark.jl
+++ b/src/solvers/imex_ark.jl
@@ -15,7 +15,7 @@ The abscissae are often defined as c_χ[i] := ∑_{j=1}^s a_χ[i,j] for the expl
 and implicit methods to be "internally consistent", with c_exp[i] = c_imp[i] for
 the overall IMEX method to be "internally consistent", but this is not required.
 If the weights are defined as b_χ[j] := a_χ[s,j], then u_next = U[s]; i.e., the
-method is FSAL (first same as last).
+method is FSAL (first same as last), which means that it is "stiffly accurate".
 
 To simplify our notation, let
     a_χ[s+1,j] := b_χ[j] ∀ j ∈ 1:s,
diff --git a/src/solvers/rosenbrock.jl b/src/solvers/old_rosenbrock.jl
similarity index 100%
rename from src/solvers/rosenbrock.jl
rename to src/solvers/old_rosenbrock.jl

From d38fae95a6f7e8205110d1f12f0b310d5c993866 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Thu, 15 Sep 2022 17:15:19 -0700
Subject: [PATCH 02/21] WIP

---
 src/solvers/rosenbrock.jl | 151 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100644 src/solvers/rosenbrock.jl

diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
new file mode 100644
index 00000000..4a2e70b8
--- /dev/null
+++ b/src/solvers/rosenbrock.jl
@@ -0,0 +1,151 @@
+#=
+An s-stage diagonally implicit Runge-Kutta (DIRK) method for solving
+∂u/∂t = f(u, t) is specified by a lower triangular s×s matrix a (whose values
+are called "internal coefficients"), a vector b of length s (whose values are
+called "weights"), and a vector c of length s (whose values are called
+"abcissae" or "nodes").
+Given the state u at time t, the state u_next at time t + Δt is given by
+    u_next := u + Δt * ∑_{i=1}^s bᵢ * f(Uᵢ, t + Δt * cᵢ), where
+    Uᵢ := u + Δt * ∑_{j=1}^i aᵢⱼ * f(Uⱼ, t + Δt * cⱼ) ∀ i ∈ 1:s.
+In order to transform this DIRK method into a Rosenbrock method, we must assume
+that it is "internally consistent", which means that
+    cᵢ := ∑_{j=1}^i aᵢⱼ ∀ i ∈ 1:s.
+
+First, we will simplify our notation by defining
+    Tᵢ := t + Δt * ∑_{j=1}^i aᵢⱼ and
+    kᵢ := Δt * f(Uᵢ, Tᵢ) ∀ i ∈ 1:s.
+This simplifies our previous expressions to
+    u_next = u + ∑_{i=1}^s bᵢ * kᵢ and
+    Uᵢ = u + ∑_{j=1}^i aᵢⱼ * kⱼ.
+Next, we will define
+    Ûᵢ := u + ∑_{j=1}^{i-1} aᵢⱼ * kⱼ and
+    T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ ∀ i ∈ 1:s.
+This implies that
+    Uᵢ = Ûᵢ + aᵢᵢ * kᵢ and
+    Tᵢ = T̂ᵢ + Δt * aᵢᵢ.
+Substituting this into the definition of kᵢ gives us
+    kᵢ = Δt * f(Ûᵢ + aᵢᵢ * kᵢ, T̂ᵢ + Δt * aᵢᵢ).
+Approximating the value of f with a first-order Taylor series expansion around
+Ûᵢ and T̂ᵢ gives us
+    kᵢ = Δt * f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * aᵢᵢ * kᵢ + Δt^2 * ḟ(Ûᵢ, T̂ᵢ) * aᵢᵢ,
+    where J(u, t) := ∂f/∂u(u, t) and ḟ(u, t) := ∂f/∂t(u, t).
+
+If we ignore the ḟ term, this equation for kᵢ can also be obtained by performing
+a single Newton iteration to solve for Uᵢ from the starting guess Ûᵢ.
+Substituting the definition of kᵢ into the last expression for Uᵢ gives us
+    Uᵢ = Ûᵢ + Δt * aᵢᵢ * f(Uᵢ, Tᵢ).
+This can be rewritten as
+    Fᵢ(Uᵢ) = 0, where
+    Fᵢ(u) := Ûᵢ + Δt * aᵢᵢ * f(u, Tᵢ) - u ∀ i ∈ 1:s.
+The Jacobian of this new function is
+    ∂Fᵢ/∂u(u) = Δt * aᵢᵢ * J(u, Tᵢ) - I.
+When solving this equation using Newton's method, we proceed from the n-th
+iteration's state Uᵢ_n to the next iteration's state by defining
+    Uᵢ_{n+1} := Uᵢ_n + ΔUᵢ_n, where
+    ΔUᵢ_n := -∂Fᵢ/∂u(Uᵢ_n) \\ Fᵢ(Uᵢ_n) ∀ n ≥ 0.
+So, if Uᵢ_0 = Ûᵢ, this means that
+    ΔUᵢ_0 = -∂Fᵢ/∂u(Ûᵢ) \\ Fᵢ(Ûᵢ) =
+        = (I - Δt * aᵢᵢ * J(Ûᵢ, Tᵢ)) \\ (Δt * aᵢᵢ * f(Ûᵢ, Tᵢ)).
+We can rearrange this equation to find that
+    ΔUᵢ_0 = Δt * aᵢᵢ * f(Ûᵢ, Tᵢ) + Δt * J(Ûᵢ, Tᵢ) * aᵢᵢ * ΔUᵢ_0.
+Dividing through by aᵢᵢ gives us
+    ΔUᵢ_0/aᵢᵢ = Δt * f(Ûᵢ, Tᵢ) + Δt * J(Ûᵢ, Tᵢ) * aᵢᵢ * (ΔUᵢ_0/aᵢᵢ).
+We can see that this equation for ΔUᵢ_0/aᵢᵢ is identical to the one for kᵢ,
+aside from the missing ḟ term.
+
+In order to get a Rosenbrock method, we must make a modification to the equation
+for kᵢ.
+By using only a single Newton iteration, we break the assumptions that provide
+guarantees about the convergence of the DIRK method.
+To remedy this, we introduce an additional lower triangular s×s matrix of
+coefficients γ, and we redefine
+    kᵢ := Δt * f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ * kⱼ +
+          Δt^2 * ḟ(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ ∀ i ∈ 1:s.
+In other words, we account for the higher-order terms dropped by the first-order
+Taylor series expansion by taking carefully chosen linear combinations of kⱼ.
+Since this effectively replaces aᵢᵢ with entries in γ, the diagonal entries can
+be omitted from a, making it strictly lower triangular.
+
+For a "modified Rosenbrock method", the coefficients are chosen so that
+J(Ûᵢ, T̂ᵢ) and ḟ(Ûᵢ, T̂ᵢ) can be approximated by their values at time t, J(u, t)
+and ḟ(u, t).
+For a "W-method", the approximation can be even coarser; e.g., the values at a
+previous timestep, or the values at a precomputed reference state, or even
+approximations that do not correspond to any particular state.
+So, we will modify the last equation by replacing J(Ûᵢ, T̂ᵢ) and ḟ(Ûᵢ, T̂ᵢ) with
+some approximations J and ḟ (whose values should be governed by the constraints
+of the Rosenbrock method being used), giving us
+    kᵢ := Δt * f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * kⱼ +
+          Δt^2 * ḟ * ∑_{j=1}^i γᵢⱼ ∀ i ∈ 1:s.
+
+Implementing a way to solve the last equation for kᵢ would require evaluating
+the matrix-vector product J(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ * kⱼ, which is more
+computationally expensive than taking linear combinations of vectors.
+This can be avoided by introducing the new variables
+    Kᵢ := ∑_{j=1}^i γᵢⱼ * kⱼ ∀ i ∈ 1:s.
+We can rewrite this, along with the last equations for u_next and Ûᵢ, as matrix
+equations:
+    K = γ * k,
+    u_next = u + bᵀ * k, and
+    Û = u + a * k.
+In these equations, k, K, and U are matrices whose i-th rows are kᵢ, Kᵢ, and Uᵢ,
+respectively; the last equation uses the fact that a is now strictly lower
+triangular.
+If γᵢᵢ != 0 ∀ i ∈ 1:s, then, γ is invertible, so the first equation implies that
+    k = γ⁻¹ * K.
+Substituting this into the two remaining matrix equations gives us
+    u_next = u + bᵀγ⁻¹ * K and
+    U = u + aγ⁻¹ * K.
+Since γ is lower triangular, γ⁻¹ is also lower triangular, and, since a is
+strictly lower triangular, aγ⁻¹ is also strictly lower triangular, which means
+that we can rewrite the last three equations as
+    kᵢ = ∑_{j=1}^i (γ⁻¹)ᵢⱼ * Kⱼ,
+    u_next = u + ∑_{i=1}^s (bᵀγ⁻¹)ᵢ * Kᵢ, and
+    Ûᵢ = u + ∑_{j=1}^{i-1} (aγ⁻¹)ᵢⱼ * Kⱼ.
+Also, since γ is lower triangular, (γ⁻¹)ᵢᵢ = 1/γᵢᵢ, which means that
+    kᵢ = 1/γᵢᵢ * Kᵢ + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ.
+Using this new expression for kᵢ, we can rewrite the last equation for kᵢ as
+    1/γᵢᵢ * Kᵢ + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ =
+        = Δt * f(Ûᵢ, T̂ᵢ) + Δt * J * Kᵢ + Δt^2 * ḟ * ∑_{j=1}^i γᵢⱼ.
+Solving for Kᵢ gives us
+    Kᵢ = (I - Δt * γᵢᵢ * J) \\
+        (
+            Δt^2 * ḟ * ∑_{j=1}^i γᵢᵢ * γᵢⱼ - ∑_{j=1}^{i-1} γᵢᵢ * (γ⁻¹)ᵢⱼ * Kⱼ +
+            Δt * γᵢᵢ * f(Ûᵢ, T̂ᵢ)
+        ).
+
+So, in summary, we have that, for some lower triangular s×s matrix γ with
+nonzero diagonal entries, some strictly lower triangular s×s matrix a, some
+vector b of length s, and some approximations J and ḟ of ∂f/∂u(Ûᵢ, T̂ᵢ) and
+∂f/∂t(Ûᵢ, T̂ᵢ),
+    u_next := u + ∑_{i=1}^s (bᵀγ⁻¹)ᵢ * Kᵢ, where
+    Ûᵢ := u + ∑_{j=1}^{i-1} (aγ⁻¹)ᵢⱼ * Kⱼ,
+    T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ, and
+    Kᵢ := (I - Δt * γᵢᵢ * J) \\
+        (
+            Δt^2 * ḟ * ∑_{j=1}^i γᵢᵢ * γᵢⱼ - ∑_{j=1}^{i-1} γᵢᵢ * (γ⁻¹)ᵢⱼ * Kⱼ +
+            Δt * γᵢᵢ * f(Ûᵢ, T̂ᵢ)
+        ) ∀ i ∈ 1:s.
+
+Now, suppose that, instead of being provided with f(u, t), we are provided with
+g(u₊, u, t, Δt), where
+    g(u₊, u, t, Δt) := u₊ + Δt * f(u, t).
+This is useful if, for example, we have some tendency f̃(u, t), but, instead of
+just using it to increment a state, we want to apply a filter/limiter L after
+incrementing:
+    g(u₊, u, t, Δt) = L(u₊ + Δt * f̃(u, t)).
+Rewriting the last definition of Kᵢ in terms of g instead of f gives us
+    Kᵢ := (I - Δt * γᵢᵢ * J) \\
+        g(
+            Δt^2 * ḟ * ∑_{j=1}^i γᵢᵢ * γᵢⱼ - ∑_{j=1}^{i-1} γᵢᵢ * (γ⁻¹)ᵢⱼ * Kⱼ,
+            Ûᵢ,
+            T̂ᵢ,
+            Δt * γᵢᵢ,
+        ) ∀ i ∈ 1:s.
+=#
+
+struct RosenbrockAlgorithm{γ, a, b, L}
+    linsolve:::L
+end
+RosenbrockAlgorithm{γ, a, b}(linsolve::L) where {γ, a, b, L} =
+    RosenbrockAlgorithm{γ, a, b, L}(linsolve)

From 568da5c287a7dc748fc5b48d245f5db6488e7b2d Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Sun, 18 Sep 2022 21:04:31 -0700
Subject: [PATCH 03/21] Finish working out math

---
 src/ClimaTimeSteppers.jl  |   3 +-
 src/solvers/rosenbrock.jl | 436 ++++++++++++++++++++++++++++----------
 2 files changed, 326 insertions(+), 113 deletions(-)

diff --git a/src/ClimaTimeSteppers.jl b/src/ClimaTimeSteppers.jl
index 46344e7c..c5b6f53e 100644
--- a/src/ClimaTimeSteppers.jl
+++ b/src/ClimaTimeSteppers.jl
@@ -74,9 +74,11 @@ include("solvers/convergence_condition.jl")
 include("solvers/convergence_checker.jl")
 include("solvers/newtons_method.jl")
 include("solvers/imex_ark.jl")
+include("solvers/rosenbrock.jl")
 
 # Include concrete implementations
 include("solvers/imex_ark_tableaus.jl")
+include("solvers/rosenbrock_tableaus.jl")
 include("solvers/multirate.jl")
 include("solvers/lsrk.jl")
 include("solvers/ssprk.jl")
@@ -84,7 +86,6 @@ include("solvers/ark.jl")
 # include("solvers/ars.jl") # previous implementations of ARS schemes
 include("solvers/mis.jl")
 include("solvers/wickerskamarock.jl")
-include("solvers/rosenbrock.jl")
 
 include("callbacks.jl")
 
diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
index 4a2e70b8..5c8430fd 100644
--- a/src/solvers/rosenbrock.jl
+++ b/src/solvers/rosenbrock.jl
@@ -1,4 +1,6 @@
 #=
+## Introduction to Rosenbrock Methods
+
 An s-stage diagonally implicit Runge-Kutta (DIRK) method for solving
 ∂u/∂t = f(u, t) is specified by a lower triangular s×s matrix a (whose values
 are called "internal coefficients"), a vector b of length s (whose values are
@@ -6,65 +8,45 @@ called "weights"), and a vector c of length s (whose values are called
 "abcissae" or "nodes").
 Given the state u at time t, the state u_next at time t + Δt is given by
     u_next := u + Δt * ∑_{i=1}^s bᵢ * f(Uᵢ, t + Δt * cᵢ), where
-    Uᵢ := u + Δt * ∑_{j=1}^i aᵢⱼ * f(Uⱼ, t + Δt * cⱼ) ∀ i ∈ 1:s.
+    Uᵢ := u + Δt * ∑_{j=1}^i aᵢⱼ * f(Uⱼ, t + Δt * cⱼ).
 In order to transform this DIRK method into a Rosenbrock method, we must assume
 that it is "internally consistent", which means that
-    cᵢ := ∑_{j=1}^i aᵢⱼ ∀ i ∈ 1:s.
+    cᵢ := ∑_{j=1}^i aᵢⱼ.
 
 First, we will simplify our notation by defining
     Tᵢ := t + Δt * ∑_{j=1}^i aᵢⱼ and
-    kᵢ := Δt * f(Uᵢ, Tᵢ) ∀ i ∈ 1:s.
+    Fᵢ := f(Uᵢ, Tᵢ).
 This simplifies our previous expressions to
-    u_next = u + ∑_{i=1}^s bᵢ * kᵢ and
-    Uᵢ = u + ∑_{j=1}^i aᵢⱼ * kⱼ.
+    u_next = u + Δt * ∑_{i=1}^s bᵢ * Fᵢ and
+    Uᵢ = u + Δt * ∑_{j=1}^i aᵢⱼ * Fⱼ.
 Next, we will define
-    Ûᵢ := u + ∑_{j=1}^{i-1} aᵢⱼ * kⱼ and
-    T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ ∀ i ∈ 1:s.
+    Ûᵢ := u + Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ and
+    T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ.
 This implies that
-    Uᵢ = Ûᵢ + aᵢᵢ * kᵢ and
+    Uᵢ = Ûᵢ + Δt * aᵢᵢ * Fᵢ and
     Tᵢ = T̂ᵢ + Δt * aᵢᵢ.
-Substituting this into the definition of kᵢ gives us
-    kᵢ = Δt * f(Ûᵢ + aᵢᵢ * kᵢ, T̂ᵢ + Δt * aᵢᵢ).
+Substituting this into the definition of Fᵢ gives us
+    Fᵢ = f(Ûᵢ + Δt * aᵢᵢ * Fᵢ, T̂ᵢ + Δt * aᵢᵢ).
 Approximating the value of f with a first-order Taylor series expansion around
 Ûᵢ and T̂ᵢ gives us
-    kᵢ = Δt * f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * aᵢᵢ * kᵢ + Δt^2 * ḟ(Ûᵢ, T̂ᵢ) * aᵢᵢ,
-    where J(u, t) := ∂f/∂u(u, t) and ḟ(u, t) := ∂f/∂t(u, t).
-
-If we ignore the ḟ term, this equation for kᵢ can also be obtained by performing
-a single Newton iteration to solve for Uᵢ from the starting guess Ûᵢ.
-Substituting the definition of kᵢ into the last expression for Uᵢ gives us
-    Uᵢ = Ûᵢ + Δt * aᵢᵢ * f(Uᵢ, Tᵢ).
-This can be rewritten as
-    Fᵢ(Uᵢ) = 0, where
-    Fᵢ(u) := Ûᵢ + Δt * aᵢᵢ * f(u, Tᵢ) - u ∀ i ∈ 1:s.
-The Jacobian of this new function is
-    ∂Fᵢ/∂u(u) = Δt * aᵢᵢ * J(u, Tᵢ) - I.
-When solving this equation using Newton's method, we proceed from the n-th
-iteration's state Uᵢ_n to the next iteration's state by defining
-    Uᵢ_{n+1} := Uᵢ_n + ΔUᵢ_n, where
-    ΔUᵢ_n := -∂Fᵢ/∂u(Uᵢ_n) \\ Fᵢ(Uᵢ_n) ∀ n ≥ 0.
-So, if Uᵢ_0 = Ûᵢ, this means that
-    ΔUᵢ_0 = -∂Fᵢ/∂u(Ûᵢ) \\ Fᵢ(Ûᵢ) =
-        = (I - Δt * aᵢᵢ * J(Ûᵢ, Tᵢ)) \\ (Δt * aᵢᵢ * f(Ûᵢ, Tᵢ)).
-We can rearrange this equation to find that
-    ΔUᵢ_0 = Δt * aᵢᵢ * f(Ûᵢ, Tᵢ) + Δt * J(Ûᵢ, Tᵢ) * aᵢᵢ * ΔUᵢ_0.
-Dividing through by aᵢᵢ gives us
-    ΔUᵢ_0/aᵢᵢ = Δt * f(Ûᵢ, Tᵢ) + Δt * J(Ûᵢ, Tᵢ) * aᵢᵢ * (ΔUᵢ_0/aᵢᵢ).
-We can see that this equation for ΔUᵢ_0/aᵢᵢ is identical to the one for kᵢ,
-aside from the missing ḟ term.
-
-In order to get a Rosenbrock method, we must make a modification to the equation
-for kᵢ.
+    Fᵢ ≈ f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * aᵢᵢ * Fᵢ + Δt * ḟ(Ûᵢ, T̂ᵢ) * aᵢᵢ, where
+    J(u, t) := ∂f/∂u(u, t) and
+    ḟ(u, t) := ∂f/∂t(u, t).
+This is roughly equivalent to running a single Newton iteration to solve for Fᵢ,
+starting with an initial guess of f(Ûᵢ, T̂ᵢ) (or, equivalently, to solve for Uᵢ,
+starting with an initial guess of Ûᵢ).
+
+In order to obtain a Rosenbrock method, we must modify this equation for Fᵢ.
 By using only a single Newton iteration, we break the assumptions that provide
 guarantees about the convergence of the DIRK method.
 To remedy this, we introduce an additional lower triangular s×s matrix of
-coefficients γ, and we redefine
-    kᵢ := Δt * f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ * kⱼ +
-          Δt^2 * ḟ(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ ∀ i ∈ 1:s.
+coefficients γ, and we redefine Fᵢ to be the solution to
+    Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ * Fⱼ +
+         Δt * ḟ(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ.
 In other words, we account for the higher-order terms dropped by the first-order
-Taylor series expansion by taking carefully chosen linear combinations of kⱼ.
+Taylor series expansion by taking carefully chosen linear combinations of Fⱼ.
 Since this effectively replaces aᵢᵢ with entries in γ, the diagonal entries can
-be omitted from a, making it strictly lower triangular.
+now be omitted from a, making it strictly lower triangular.
 
 For a "modified Rosenbrock method", the coefficients are chosen so that
 J(Ûᵢ, T̂ᵢ) and ḟ(Ûᵢ, T̂ᵢ) can be approximated by their values at time t, J(u, t)
@@ -73,79 +55,309 @@ For a "W-method", the approximation can be even coarser; e.g., the values at a
 previous timestep, or the values at a precomputed reference state, or even
 approximations that do not correspond to any particular state.
 So, we will modify the last equation by replacing J(Ûᵢ, T̂ᵢ) and ḟ(Ûᵢ, T̂ᵢ) with
-some approximations J and ḟ (whose values should be governed by the constraints
-of the Rosenbrock method being used), giving us
-    kᵢ := Δt * f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * kⱼ +
-          Δt^2 * ḟ * ∑_{j=1}^i γᵢⱼ ∀ i ∈ 1:s.
-
-Implementing a way to solve the last equation for kᵢ would require evaluating
-the matrix-vector product J(Ûᵢ, T̂ᵢ) * ∑_{j=1}^i γᵢⱼ * kⱼ, which is more
-computationally expensive than taking linear combinations of vectors.
-This can be avoided by introducing the new variables
-    Kᵢ := ∑_{j=1}^i γᵢⱼ * kⱼ ∀ i ∈ 1:s.
-We can rewrite this, along with the last equations for u_next and Ûᵢ, as matrix
-equations:
-    K = γ * k,
-    u_next = u + bᵀ * k, and
-    Û = u + a * k.
-In these equations, k, K, and U are matrices whose i-th rows are kᵢ, Kᵢ, and Uᵢ,
-respectively; the last equation uses the fact that a is now strictly lower
-triangular.
-If γᵢᵢ != 0 ∀ i ∈ 1:s, then, γ is invertible, so the first equation implies that
-    k = γ⁻¹ * K.
-Substituting this into the two remaining matrix equations gives us
-    u_next = u + bᵀγ⁻¹ * K and
-    U = u + aγ⁻¹ * K.
-Since γ is lower triangular, γ⁻¹ is also lower triangular, and, since a is
-strictly lower triangular, aγ⁻¹ is also strictly lower triangular, which means
-that we can rewrite the last three equations as
-    kᵢ = ∑_{j=1}^i (γ⁻¹)ᵢⱼ * Kⱼ,
-    u_next = u + ∑_{i=1}^s (bᵀγ⁻¹)ᵢ * Kᵢ, and
-    Ûᵢ = u + ∑_{j=1}^{i-1} (aγ⁻¹)ᵢⱼ * Kⱼ.
-Also, since γ is lower triangular, (γ⁻¹)ᵢᵢ = 1/γᵢᵢ, which means that
-    kᵢ = 1/γᵢᵢ * Kᵢ + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ.
-Using this new expression for kᵢ, we can rewrite the last equation for kᵢ as
-    1/γᵢᵢ * Kᵢ + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ =
-        = Δt * f(Ûᵢ, T̂ᵢ) + Δt * J * Kᵢ + Δt^2 * ḟ * ∑_{j=1}^i γᵢⱼ.
-Solving for Kᵢ gives us
-    Kᵢ = (I - Δt * γᵢᵢ * J) \\
-        (
-            Δt^2 * ḟ * ∑_{j=1}^i γᵢᵢ * γᵢⱼ - ∑_{j=1}^{i-1} γᵢᵢ * (γ⁻¹)ᵢⱼ * Kⱼ +
-            Δt * γᵢᵢ * f(Ûᵢ, T̂ᵢ)
-        ).
-
-So, in summary, we have that, for some lower triangular s×s matrix γ with
-nonzero diagonal entries, some strictly lower triangular s×s matrix a, some
-vector b of length s, and some approximations J and ḟ of ∂f/∂u(Ûᵢ, T̂ᵢ) and
-∂f/∂t(Ûᵢ, T̂ᵢ),
-    u_next := u + ∑_{i=1}^s (bᵀγ⁻¹)ᵢ * Kᵢ, where
-    Ûᵢ := u + ∑_{j=1}^{i-1} (aγ⁻¹)ᵢⱼ * Kⱼ,
+some approximations J and ḟ (whose values must satisfy the constraints of the
+specific Rosenbrock method being used), giving us
+    Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * Fⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ.
+Solving this equation for Fᵢ lets us redefine
+    Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * (
+              f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
+              Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+          ).
+Since multiplying ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ by J is more computationally expensive
+than subtracting it from another value, we will rewrite this as
+    Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * (
+              f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
+              Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+          ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
+
+So, in summary, a Rosenbrock method is defined by some lower triangular s×s
+matrix γ, some strictly lower triangular s×s matrix a, some vector b of length
+s, and some approximations J and ḟ of ∂f/∂u(Ûᵢ, T̂ᵢ) and ∂f/∂t(Ûᵢ, T̂ᵢ), which
+are all used to compute
+    u_next := u + Δt * ∑_{i=1}^s bᵢ * Fᵢ, where
+    Ûᵢ := u + Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ,
     T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ, and
-    Kᵢ := (I - Δt * γᵢᵢ * J) \\
-        (
-            Δt^2 * ḟ * ∑_{j=1}^i γᵢᵢ * γᵢⱼ - ∑_{j=1}^{i-1} γᵢᵢ * (γ⁻¹)ᵢⱼ * Kⱼ +
-            Δt * γᵢᵢ * f(Ûᵢ, T̂ᵢ)
-        ) ∀ i ∈ 1:s.
-
-Now, suppose that, instead of being provided with f(u, t), we are provided with
-g(u₊, u, t, Δt), where
+    Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * (
+              f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
+              Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+          ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
+
+## Converting to Matrix Form
+
+To simplify our further reformulations, we will convert our definitions to
+matrix form.
+
+First, we will reduce the number of matrix equations we need to analyze by
+defining
+    âᵢⱼ := i < s ? a₍ᵢ₊₁₎ⱼ : bⱼ and
+    Û⁺ᵢ := i < s ? Ûᵢ₊₁ : u_next.
+We can then redefine
+    u_next := Û⁺ₛ and
+    Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁, where
+    Û⁺ᵢ := u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ.
+
+The equations we will convert to matrix form are
+    Û⁺ᵢ = u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ and
+    Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * Fⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ.
+Rewriting these with an explicit element index n ∈ 1:N (where N = length(u))
+gives us
+    Û⁺ᵢ[n] = u[n] + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ[n] and
+    Fᵢ[n] = f(Ûᵢ, T̂ᵢ)[n] + Δt * ∑_{m=1}^N J[n,m] * ∑_{j=1}^i γᵢⱼ * Fⱼ[m] +
+            Δt * ḟ[n] * ∑_{j=1}^i γᵢⱼ.
+Now, we will formally define the vectors and matrices
+    𝟙 ∈ ℝˢ     | 𝟙ᵢ := 1,
+    u ∈ ℝᴺ     | uₙ := u[n],
+    ḟ ∈ ℝᴺ     | ḟₙ := ḟ[n], and
+    Û⁺ ∈ ℝᴺ×ℝˢ | Û⁺ₙᵢ := Û⁺ᵢ[n],
+    F ∈ ℝᴺ×ℝˢ  | Fₙᵢ := Fᵢ[n],
+    F̂ ∈ ℝᴺ×ℝˢ  | F̂ₙᵢ := f(Ûᵢ, T̂ᵢ)[n],
+    J ∈ ℝᴺ×ℝᴺ  | Jₙₘ := J[n,m].
+If J and ḟ are different for each stage, they may be replaced with tensors in
+the following manipulations.
+We then have that
+    Û⁺ₙᵢ = uₙ * 𝟙ᵢ + Δt * ∑_{j=1}^i Fₙⱼ * âᵢⱼ and
+    Fₙᵢ = F̂ₙᵢ + Δt * ∑_{m=1}^N Jₙₘ * ∑_{j=1}^i Fₘⱼ * γᵢⱼ +
+          Δt * ḟₙ * ∑_{j=1}^i 𝟙ⱼ * γᵢⱼ.
+We can rewrite this as
+    Û⁺ₙᵢ = uₙ * (𝟙ᵀ)ᵢ + Δt * ∑_{j=1}^s Fₙⱼ * (âᵀ)ⱼᵢ and
+    Fₙᵢ = F̂ₙᵢ + Δt * ∑_{m=1}^N Jₙₘ * ∑_{j=1}^s Fₘⱼ * (γᵀ)ⱼᵢ +
+          Δt * ḟₙ * ∑_{j=1}^s (𝟙ᵀ)ⱼ * (γᵀ)ⱼᵢ.
+Combining matrix-matrix and matrix-vector products gives us
+    Û⁺ₙᵢ = uₙ * (𝟙ᵀ)ᵢ + Δt * (F * âᵀ)ₙᵢ and
+    Fₙᵢ = F̂ₙᵢ + Δt * (J * F * γᵀ)ₙᵢ + Δt * ḟₙ * (γ * 𝟙)ᵢ.
+Dropping the indices then tells us that
+    Û⁺ = u ⊗ 𝟙ᵀ + Δt * F * âᵀ and
+    F = F̂ + Δt * J * F * γᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+To rewrite this in a way that more directly corresponds to our last formulation,
+we will define the functions diag() and lower(), such that, for any lower
+triangular matrix M,
+    M = diag(M) + lower(M), where
+    diag(M)ᵢⱼ := i == j ? Mᵢⱼ : 0 and
+    lower(M)ᵢⱼ := i > j ? Mᵢⱼ : 0.
+This lets us rewrite the equation for F as
+    F * (I + diag(γ)⁻¹ * lower(γ))ᵀ -
+    Δt * J * F * (I + diag(γ)⁻¹ * lower(γ))ᵀ * diag(γ)ᵀ =
+        = F̂ + F * (diag(γ)⁻¹ * lower(γ))ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+
+We will now use these matrix equations to define two reformulations: one that
+optimizes performance by eliminating the subtraction after the linear solve, and
+one that enables limiters by appropriately modifying the value being subtracted.
+
+## Eliminating the Subtraction
+
+Let us define a new N×s matrix
+    K := F * γᵀ.
+We then have that
+    F = K * (γ⁻¹)ᵀ.
+This allows us to rewrite the matrix equations as
+    Û⁺ = u ⊗ 𝟙ᵀ + Δt * K * (â * γ⁻¹)ᵀ and
+    K * (γ⁻¹)ᵀ = F̂ + Δt * J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+Since γ is lower triangular,
+    γ⁻¹ = diag(γ⁻¹) + lower(γ⁻¹) = diag(γ)⁻¹ + lower(γ⁻¹).
+This lets us rewrite the equation for K as
+    K * (diag(γ)⁻¹ + lower(γ⁻¹))ᵀ = F̂ + Δt * J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+Multiplying through on the right by diag(γ)ᵀ and rearranging gives us
+    K - Δt * J * K * diag(γ)ᵀ =
+        (F̂ - K * lower(γ⁻¹)ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ) * diag(γ)ᵀ.
+Taking this and the equation for Û⁺ back out of matrix form gives us
+    Û⁺ᵢ = u + Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ and
+    Kᵢ - Δt * γᵢᵢ * J * Kᵢ =
+        = γᵢᵢ *
+          (f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ).
+Thus, to optimize performance, we can redefine
+    Û⁺ᵢ := u + Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
+    Kᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * γᵢᵢ * (
+              f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+          ).
+
+## Enabling Limiters
+
+In the previous section, we changed the temporary variable from Fᵢ, which is an
+approximation of f(Uᵢ, Tᵢ), to Kᵢ, which is a linear combination of previously
+computed values of Fᵢ.
+Now, we will change the temporary variable to Vᵢ, so that the approximations of
+Uᵢ and u_next will be linear combinations of previously computed values of Vᵢ.
+In other words, we will make it so that Û⁺ is a linear transformation of the
+temporary variable, rather than an affine transformation, absorbing u into the
+temporary variable.
+So, consider a lower triangular s×s matrix β and an N×s matrix V such that
+    Û⁺ = V * βᵀ.
+This allows us to rewrite the matrix equations as
+    V * βᵀ = u ⊗ 𝟙ᵀ + Δt * F * âᵀ and
+    F = F̂ + Δt * J * F * γᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+Solving the first equation for F tells us that
+    F = Δt⁻¹ * V * (â⁻¹ * β)ᵀ - Δt⁻¹ * u ⊗ (â⁻¹ * 𝟙)ᵀ.
+Substituting this into the second equation gives us
+    Δt⁻¹ * V * (â⁻¹ * β)ᵀ - Δt⁻¹ * u ⊗ (â⁻¹ * 𝟙)ᵀ =
+        = F̂ + J * V * (γ * â⁻¹ * β)ᵀ - J * u ⊗ (γ * â⁻¹ * 𝟙)ᵀ +
+          Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+Multiplying through on the right by Δt * (β⁻¹ * â * γ⁻¹)ᵀ and rearranging
+results in
+    V * (β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ - Δt * J * V =
+        = u ⊗ (β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ +
+          Δt * F̂ * (β⁻¹ * â * γ⁻¹)ᵀ + Δt² * ḟ ⊗ (β⁻¹ * â * 𝟙)ᵀ.
+Since γ, â, and β are all lower triangular, β⁻¹ * â * γ⁻¹ * â⁻¹ * β is also
+lower triangular, which means that
+    β⁻¹ * â * γ⁻¹ * â⁻¹ * β =
+        diag(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β).
+In addition, we have that
+    diag(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) =
+        = diag(β⁻¹) * diag(â) * diag(γ⁻¹) * diag(â⁻¹) * diag(β) =
+        = diag(β)⁻¹ * diag(â) * diag(γ)⁻¹ * diag(â)⁻¹ * diag(β) =
+        = diag(γ)⁻¹.
+Combining the last two expressions gives us
+    β⁻¹ * â * γ⁻¹ * â⁻¹ * β = diag(γ)⁻¹ + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β).
+Substituting this into the last equation for V gives us
+    V * (diag(γ)⁻¹)ᵀ + V * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ - Δt * J * V =
+        = u ⊗ (β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ +
+          Δt * F̂ * (β⁻¹ * â * γ⁻¹)ᵀ + Δt² * ḟ ⊗ (β⁻¹ * â * 𝟙)ᵀ.
+Multiplying through on the right by diag(γ)ᵀ and rearranging tells us that
+    V - Δt * J * V * diag(γ)ᵀ =
+        = u ⊗ (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ -
+          Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diag(γ)ᵀ +
+          Δt * F̂ * (diag(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
+          Δt² * ḟ ⊗ (diag(γ) * β⁻¹ * â * 𝟙)ᵀ -
+          V * (diag(γ) * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β))ᵀ.
+Since lower() preserves multiplication by a diagonal matrix, we can rewrite
+this as
+    V - Δt * J * V * diag(γ)ᵀ =
+        = u ⊗ (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ -
+          Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diag(γ)ᵀ +
+          Δt * F̂ * (diag(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
+          Δt² * ḟ ⊗ (diag(γ) * β⁻¹ * â * 𝟙)ᵀ -
+          V * lower(diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ.
+
+We will now show that this reformulation will not allow us to eliminate
+multiplications by J, as the previous ones did.
+If we wanted to factor out all multiplications by J when we convert back out of
+matrix form, we would rearrange the last equation to get
+    (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) - Δt * J * (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) * diag(γ)ᵀ =
+        = u ⊗ ((diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙)ᵀ +
+          Δt * F̂ * (diag(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
+          Δt² * ḟ ⊗ (diag(γ) * β⁻¹ * â * 𝟙)ᵀ -
+          V * lower(diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ.
+In order to apply limiters on an unscaled state, we would then require that the
+coefficient of u on the right-hand side of this equation be 𝟙ᵀ; i.e., that
+    (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙 = 𝟙.
+In general, this equation does not have a solution β; e.g., if γ = â = d * I for
+some scalar constant d, then it simplifies to
+    (β⁻¹ - β⁻¹) * 𝟙 = 𝟙.
+Even if we were to use more complex transformations, it would still be
+impossible to eliminate multiplications by J while preserving an unscaled state
+on the right-hand side.
+For example, if we were to instead set Û⁺ = u ⊗ δᵀ + V * βᵀ for some vector δ of
+length s, the above equation would become
+    (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * (δ - 𝟙) = 𝟙.
+This also does not have a general solution.
+
+So, we will proceed without rearranging the last equation for V, and we will
+require that the coefficient of u in it be 𝟙ᵀ; i.e., that
+    diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
+This equation has infinitely many solutions; the easiest way to obtain a
+solution is to set
+    diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ = I.
+This implies that
+    β⁻¹ = diag(γ)⁻¹ * â * γ * â⁻¹ and
+    β = â * γ⁻¹ * â⁻¹ * diag(γ).
+Substituting this into the last equation for V gives us
+    V - Δt * J * V * diag(γ)ᵀ =
+        = u ⊗ 𝟙ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diag(γ)ᵀ + Δt * F̂ * âᵀ +
+          Δt² * ḟ ⊗ (â * γ * 𝟙)ᵀ - V * lower(β)ᵀ.
+Taking this and the equation Û⁺ = V * βᵀ back out of matrix form gives us
+    Û⁺ᵢ = ∑_{j=1}^i βᵢⱼ * Vᵢ and
+    Vᵢ - Δt * γᵢᵢ * J * Vᵢ =
+        = u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
+          Δt * ∑_{j=1}^i âᵢⱼ * f(Ûⱼ, T̂ⱼ) + Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ -
+          ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ.
+Thus, to enable the use of limiters, we can redefine
+    Û⁺ᵢ := ∑_{j=1}^i βᵢⱼ * Vᵢ, where
+    Vᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * (
+              u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
+              Δt * ∑_{j=1}^i âᵢⱼ * f(Ûⱼ, T̂ⱼ) + Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ -
+              ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ
+          ).
+
+To actually use a limiter, we must replace the use of f(u, t) in the equation
+for Vᵢ with a new function g(u₊, u, t, Δt), where
     g(u₊, u, t, Δt) := u₊ + Δt * f(u, t).
-This is useful if, for example, we have some tendency f̃(u, t), but, instead of
-just using it to increment a state, we want to apply a filter/limiter L after
-incrementing:
+Internally, g can use a different tendency function f̃(u, t), and it can apply a
+limiter L after incrementing a state with the output of f̃, so that
     g(u₊, u, t, Δt) = L(u₊ + Δt * f̃(u, t)).
-Rewriting the last definition of Kᵢ in terms of g instead of f gives us
-    Kᵢ := (I - Δt * γᵢᵢ * J) \\
-        g(
-            Δt^2 * ḟ * ∑_{j=1}^i γᵢᵢ * γᵢⱼ - ∑_{j=1}^{i-1} γᵢᵢ * (γ⁻¹)ᵢⱼ * Kⱼ,
-            Ûᵢ,
-            T̂ᵢ,
-            Δt * γᵢᵢ,
-        ) ∀ i ∈ 1:s.
+Rewriting the last definition of Vᵢ in terms of g instead of f gives us
+    Vᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * g(
+              u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
+              Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
+              Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
+              Ûᵢ,
+              T̂ᵢ
+              Δt * âᵢᵢ,
+          ).
 =#
 
-struct RosenbrockAlgorithm{γ, a, b, L}
+struct RosenbrockAlgorithm{γ, a, b, L, M}
     linsolve:::L
+    multiply::M
+end
+RosenbrockAlgorithm{γ, a, b}(
+    linsolve::L,
+    multiply::M = nothing,
+) where {γ, a, b, L, M} = RosenbrockAlgorithm{γ, a, b, L, M}(linsolve, multiply)
+
+function check_valid_parameters(::RosenbrockAlgorithm{γ, a, b}) where {γ, a, b}
+    
+end
+
+struct RosenbrockCache{C, L, M}
+    _cache::C
+    linsolve!::L
+    multiply!::M
+end
+
+# TODO: Minimize allocations by reusing temporary variables after they are no
+# longer needed.
+function cache(
+    prob::DiffEqBase.AbstractODEProblem,
+    alg::RosenbrockAlgorithm;
+    kwargs...
+)
+    check_valid_parameters(alg)
+    s = length(b)
+    increment_mode = prob.f isa ForwardEulerODEFunction
+    u_prototype = prob.u0
+    j_prototype = prob.f.jac_prototype
+    linsolve! = alg.linsolve(Val{:init}, j_prototype, u_prototype)
+    if isnothing(alg.multiply)
+        if increment_mode
+            error("RosenbrockAlgorithm.multiply must be specified when using a \
+                   ForwardEulerODEFunction")
+        end
+        multiply! = nothing
+    else
+        multiply! = alg.multiply(Val{:init}, j_prototype, u_prototype)
+    end
+    _cache = NamedTuple((
+        :Û => similar(u_prototype),
+        (increment_mode ? (F̂s => map(i -> similar(u_prototype), 1:s),) : ())...,
+        (increment_mode ? :Vs : :Ks) => map(i -> similar(u_prototype), 1:s),
+    ))
+    C = typeof(_cache)
+    L = typeof(linsolve!)
+    M = typeof(multiply!)
+    return RosenbrockCache{C, L, M}(_cache, linsolve!, multiply!)
+end
+
+step_u!(integrator, cache::RosenbrockCache) =
+    rosenbrock_step_u!(integrator, cache, integrator.prob.f)
+
+function precomputed_values(
+    ::RosenbrockAlgorithm{γ, a, b},
+    ::ForwardEulerODEFunction
+) where {γ, a, b}
+
+end
+
+function rosenbrock_step_u!(integrator, cache, f::ForwardEulerODEFunction)
+    (; u, p, t, dt, alg) = integrator
+    (; linsolve!, multiply!) = cache
+    (; Û, F̂s, Vs) = cache._cache
 end
-RosenbrockAlgorithm{γ, a, b}(linsolve::L) where {γ, a, b, L} =
-    RosenbrockAlgorithm{γ, a, b, L}(linsolve)

From 567a430ce7f938ea03ea27fcfa50475c3a884e8e Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Sun, 18 Sep 2022 21:04:52 -0700
Subject: [PATCH 04/21] Add Rosenbrock tableaus

---
 src/solvers/rosenbrock_tableaus.jl | 94 ++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 src/solvers/rosenbrock_tableaus.jl

diff --git a/src/solvers/rosenbrock_tableaus.jl b/src/solvers/rosenbrock_tableaus.jl
new file mode 100644
index 00000000..78ca991d
--- /dev/null
+++ b/src/solvers/rosenbrock_tableaus.jl
@@ -0,0 +1,94 @@
+export Rosenbrock23, SSPKnoth, ROS3, RODAS3
+
+using StaticArrays: @SArray
+
+#=
+Rosenbrock23 in OrdinaryDiffEq and ode23s in MATLAB (2 stages, 3rd order convergence)
+
+From Section 4.1 of "The MATLAB ODE Suite" by L. F. Shampine and M. W. Reichelt
+
+The paper does not directly give the method coefficients, but, converting to our
+notation, it specifies that
+    Û₁ = u, T̂₁ = t,
+    F₁ = (I - Δt * 1/(2+√2) * J)⁻¹ * (f(Û₁, T̂₁) + Δt * 1/(2+√2) * ḟ),
+    Û₂ = u + Δt * 1/2 * F₁, T̂₂ = t + Δt * 1/2,
+    F₂ = (I - Δt * 1/(2+√2) * J)⁻¹ * (f(Û₂, T̂₂) - F₁ + Δt * 0 * ḟ) + F₁, and
+    u_next = u + Δt * (0 * F₁ + 1 * F₂)
+This implies the following table of coefficients:
+=#
+const Rosenbrock23 = RosenbrockAlgorithm{
+    @SArray([
+        1/(2+√2)  0;
+        -1/(2+√2) 1/(2+√2);
+    ]),
+    @SArray([
+        0   0;
+        1/2 0;
+    ]),
+    @SArray([0, 1]),
+}
+
+#=
+Custom Rosenbrock scheme from Oswald Knoth
+=#
+const SSPKnoth = RosenbrockAlgorithm{
+    @SArray([
+        1    0    0;
+        0    1    0;
+        -3/4 -3/4 1;
+    ]),
+    @SArray([
+        0   0   0;
+        1   0   0;
+        1/4 1/4 0;
+    ]),
+    @SArray([1/6, 1/6, 2/3]),
+}
+
+#=
+3 stages, 3rd order convergence
+
+From Section 4 of "Benchmarking Stiff ODE Solvers" by A. Sandu et. al.
+=#
+const ROS3 = begin
+    γᵢᵢ =  0.43586652150845899941601945119356
+    γ₂₁ = -0.19294655696029095575009695436041
+    γ₃₂ =  1.74927148125794685173529749738960
+    b₁  = -0.75457412385404315829818998646589
+    b₂  =  1.94100407061964420292840123379419
+    b₃  = -0.18642994676560104463021124732829
+    RosenbrockAlgorithm{
+        @SArrray([
+            γᵢᵢ 0   0;
+            γ₂₁ γᵢᵢ 0;
+            0   γ₃₂ γᵢᵢ;
+        ]),
+        @SArray([
+            0   0   0;
+            γᵢᵢ 0   0;
+            γᵢᵢ 0   0;
+        ]),
+        @SArray([b₁, b₂, b₃]),
+    }
+end
+
+#=
+4 stages, 3rd order convergence
+
+From Section 4 of "Benchmarking Stiff ODE Solvers" by A. Sandu et. al.
+=#
+const RODAS3 = RosenbrockAlgorithm{
+    @SArrray([
+        1/2  0    0    0;
+        1    1/2  0    0;
+        -1/4 -1/4 1/2  0;
+        1/12 1/12 -2/3 1/2;
+    ]),
+    @SArray([
+        0    0    0    0;
+        0    0    0    0;
+        1    0    0    0;
+        3/4  -1/4 1/2  0;
+    ]),
+    @SArray([5/6, -1/6, -1/6, 1/2]),
+}

From f10a47c0049bde741efd6b1d52d14c614b86a145 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Mon, 19 Sep 2022 13:28:19 -0700
Subject: [PATCH 05/21] WIP

---
 src/solvers/rosenbrock.jl          | 192 ++++++++++++++++++++++++++---
 src/solvers/rosenbrock_tableaus.jl |   7 +-
 test/convergence.jl                |  19 +++
 test/problems.jl                   |   8 ++
 4 files changed, 203 insertions(+), 23 deletions(-)

diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
index 5c8430fd..de4da780 100644
--- a/src/solvers/rosenbrock.jl
+++ b/src/solvers/rosenbrock.jl
@@ -112,8 +112,8 @@ Now, we will formally define the vectors and matrices
     F ∈ ℝᴺ×ℝˢ  | Fₙᵢ := Fᵢ[n],
     F̂ ∈ ℝᴺ×ℝˢ  | F̂ₙᵢ := f(Ûᵢ, T̂ᵢ)[n],
     J ∈ ℝᴺ×ℝᴺ  | Jₙₘ := J[n,m].
-If J and ḟ are different for each stage, they may be replaced with tensors in
-the following manipulations.
+(If J and ḟ are different for each stage, they may be replaced with tensors in
+all of the following manipulations.)
 We then have that
     Û⁺ₙᵢ = uₙ * 𝟙ᵢ + Δt * ∑_{j=1}^i Fₙⱼ * âᵢⱼ and
     Fₙᵢ = F̂ₙᵢ + Δt * ∑_{m=1}^N Jₙₘ * ∑_{j=1}^i Fₘⱼ * γᵢⱼ +
@@ -143,7 +143,7 @@ We will now use these matrix equations to define two reformulations: one that
 optimizes performance by eliminating the subtraction after the linear solve, and
 one that enables limiters by appropriately modifying the value being subtracted.
 
-## Eliminating the Subtraction
+## Optimizing Performance
 
 Let us define a new N×s matrix
     K := F * γᵀ.
@@ -240,7 +240,7 @@ In order to apply limiters on an unscaled state, we would then require that the
 coefficient of u on the right-hand side of this equation be 𝟙ᵀ; i.e., that
     (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙 = 𝟙.
 In general, this equation does not have a solution β; e.g., if γ = â = d * I for
-some scalar constant d, then it simplifies to
+some scalar constant d, then the equation simplifies to
     (β⁻¹ - β⁻¹) * 𝟙 = 𝟙.
 Even if we were to use more complex transformations, it would still be
 impossible to eliminate multiplications by J while preserving an unscaled state
@@ -292,20 +292,107 @@ Rewriting the last definition of Vᵢ in terms of g instead of f gives us
               T̂ᵢ
               Δt * âᵢᵢ,
           ).
+
+## Negating Wfact
+
+For some reason, OrdinaryDiffEq defines Wfact as Δt * γᵢᵢ * J - I, so we must
+negate all of our temporary variables.
+
+For the original formulation, we have that
+    Ûᵢ := u - Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ, where
+    Fᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * (
+              f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
+              Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+          ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
+For the performance formulation, we have that
+    Û⁺ᵢ := u - Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
+    Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * γᵢᵢ * (
+              f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+          ).
+For the limiters formulation, we have that
+    Û⁺ᵢ := -∑_{j=1}^i βᵢⱼ * Vᵢ, where
+    Vᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * g(
+              u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
+              Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
+              Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
+              Ûᵢ,
+              T̂ᵢ
+              Δt * âᵢᵢ,
+          ).
 =#
+import LinearAlgebra
+import StaticArrays: SUnitRange, SOneTo
 
-struct RosenbrockAlgorithm{γ, a, b, L, M}
-    linsolve:::L
+struct RosenbrockAlgorithm{γ, a, b, L, M} <: DistributedODEAlgorithm
+    linsolve::L
     multiply::M
 end
-RosenbrockAlgorithm{γ, a, b}(
+RosenbrockAlgorithm{γ, a, b}(;
     linsolve::L,
     multiply::M = nothing,
 ) where {γ, a, b, L, M} = RosenbrockAlgorithm{γ, a, b, L, M}(linsolve, multiply)
 
-function check_valid_parameters(::RosenbrockAlgorithm{γ, a, b}) where {γ, a, b}
-    
+lower_plus_diag(matrix::T) where {T} = T(LinearAlgebra.LowerTriangular(matrix))
+diag(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix))
+lower(matrix) = lower_plus_diag(matrix) - diag(matrix)
+to_enumerated_rows(v::AbstractVector) = v
+function to_enumerated_rows(m::AbstractMatrix)
+    rows = tuple(1:size(m, 1)...)
+    nonzero_indices = map(i -> findall(m[i, :] .!= 0), rows)
+    enumerated_rows = map(
+        i -> tuple(zip(nonzero_indices[i], m[i, nonzero_indices[i]])...),
+        rows,
+    )
+    return enumerated_rows
 end
+linear_combination_terms(enumerated_row, vectors) =
+    map(((j, val),) -> Base.broadcasted(*, val, vectors[j]), enumerated_row)
+function set_scaled_linear_combination!(output, enumerated_row, vectors, scale = nothing)
+    terms = linear_combination_terms(enumerated_row, vectors)
+    length(terms) > 0 || error("set_linear_combination! needs at least 1 term")
+    sum = Base.broadcasted(+, terms...)
+    if isnothing(scale)
+        Base.materialize!(output, sum)
+    else
+        Base.materialize!(output, Base.broadcasted(*, scale, sum))
+    end
+    return nothing
+end
+function add_scaled_linear_combination!(output, enumerated_row, vectors, scale = nothing)
+    terms = linear_combination_terms(enumerated_row, vectors)
+    if length(terms) > 0
+        if isnothing(scale)
+            Base.materialize!(output, Base.broadcasted(+, output, terms...))
+        else
+            scaled_sum =
+                Base.broadcasted(*, scale, Base.broadcasted(+, terms...))
+            Base.materialize!(output, Base.broadcasted(+, output, scaled_sum))
+        end
+    end
+    return nothing
+end
+
+function check_valid_parameters(
+    ::Type{<:RosenbrockAlgorithm{γ, a, b}},
+) where {γ, a, b}
+    γ === lower_plus_diag(γ) ||
+        error("γ must be a lower triangular matrix")
+    a === lower(a) ||
+        error("a must be a strictly lower triangular matrix")
+    LinearAlgebra.det(γ) != 0 ||
+        error("non-invertible matrices γ are not yet supported")
+end
+function check_valid_parameters(
+    alg_type::Type{<:RosenbrockAlgorithm{γ, a, b}},
+    ::Type{<:ForwardEulerODEFunction},
+) where {γ, a, b}
+    check_valid_parameters(alg_type)
+    â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
+    LinearAlgebra.det(â) != 0 ||
+        error("non-invertible matrices â are not yet supported when using \
+               ForwardEulerODEFunction")
+end
+check_valid_parameters(alg_type, _) = check_valid_parameters(alg_type)
 
 struct RosenbrockCache{C, L, M}
     _cache::C
@@ -317,15 +404,15 @@ end
 # longer needed.
 function cache(
     prob::DiffEqBase.AbstractODEProblem,
-    alg::RosenbrockAlgorithm;
+    alg::RosenbrockAlgorithm{γ};
     kwargs...
-)
-    check_valid_parameters(alg)
-    s = length(b)
+) where {γ}
+    check_valid_parameters(typeof(alg), typeof(prob.f))
+    s = size(γ, 1)
     increment_mode = prob.f isa ForwardEulerODEFunction
     u_prototype = prob.u0
-    j_prototype = prob.f.jac_prototype
-    linsolve! = alg.linsolve(Val{:init}, j_prototype, u_prototype)
+    W_prototype = prob.f.jac_prototype
+    linsolve! = alg.linsolve(Val{:init}, W_prototype, u_prototype)
     if isnothing(alg.multiply)
         if increment_mode
             error("RosenbrockAlgorithm.multiply must be specified when using a \
@@ -333,12 +420,14 @@ function cache(
         end
         multiply! = nothing
     else
-        multiply! = alg.multiply(Val{:init}, j_prototype, u_prototype)
+        multiply! = alg.multiply(Val{:init}, W_prototype, u_prototype)
     end
     _cache = NamedTuple((
-        :Û => similar(u_prototype),
+        :Û⁺ => similar(u_prototype),
         (increment_mode ? (F̂s => map(i -> similar(u_prototype), 1:s),) : ())...,
         (increment_mode ? :Vs : :Ks) => map(i -> similar(u_prototype), 1:s),
+        :W => similar(W_prototype),
+        :ḟ => similar(u_prototype),
     ))
     C = typeof(_cache)
     L = typeof(linsolve!)
@@ -350,14 +439,77 @@ step_u!(integrator, cache::RosenbrockCache) =
     rosenbrock_step_u!(integrator, cache, integrator.prob.f)
 
 function precomputed_values(
-    ::RosenbrockAlgorithm{γ, a, b},
-    ::ForwardEulerODEFunction
+    ::Type{<:RosenbrockAlgorithm{γ, a, b}},
+    _,
 ) where {γ, a, b}
+    â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
+    lowerγ⁻¹ = lower(inv(γ))
+    âγ⁻¹ = â * inv(γ)
+    â𝟙 = vec(sum(â, dims = 2))
+    γ𝟙 = vec(sum(γ, dims = 2))
+    diagγ𝟙 = vec(sum(diag(γ), dims = 2))
+    return map(to_enumerated_rows, (; lowerγ⁻¹, âγ⁻¹, â𝟙, γ𝟙, diagγ𝟙))
+end
 
+function precomputed_values(
+    ::Type{<:RosenbrockAlgorithm{γ, a, b}},
+    ::Type{<:ForwardEulerODEFunction},
+) where {γ, a, b}
+    â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
+    âγ = â * γ
+    β = â * inv(âγ) * diag(γ)
+    β⁻¹𝟙 = vec(sum(inv(β), dims = 2))
+    â𝟙 = vec(sum(â, dims = 2))
+    return map(to_enumerated_rows, (; â, âγ, β, β⁻¹𝟙, â𝟙))
 end
 
 function rosenbrock_step_u!(integrator, cache, f::ForwardEulerODEFunction)
     (; u, p, t, dt, alg) = integrator
     (; linsolve!, multiply!) = cache
-    (; Û, F̂s, Vs) = cache._cache
+    (; Û, Û₊, F̂s, Vs) = cache._cache
+    (; â, âγ, β, β⁻¹𝟙, â𝟙) = precomputed_values(typeof(alg), typeof(f))
+    s = size(â, 1)
+    for i in 1:s
+        Û_prev = i == 1 ? u : Û
+        T̂_prev = i == 1 ? t : t + Δt * â𝟙[i]
+
+        set_scaled_linear_combination!(Û, β[i], Vs, -1)
+
+
+    # Vᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * g(
+    #     u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
+    #     Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
+    #     Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
+    #     Ûᵢ,
+    #     T̂ᵢ
+    #     Δt * âᵢᵢ,
+    # )
+    end
+end
+
+function rosenbrock_step_u!(integrator, cache, f)
+    (; u, p, t, dt, alg) = integrator
+    (; linsolve!) = cache
+    (; Û⁺, Ks, W, ḟ) = cache._cache
+    (; lowerγ⁻¹, âγ⁻¹, â𝟙, γ𝟙, diagγ𝟙) = precomputed_values(typeof(alg), typeof(f))
+    @assert all(diagγ𝟙 .== diagγ𝟙[1])
+    f.Wfact(W, u, p, t, diagγ𝟙[1])
+    !isnothing(f.tgrad) && f.tgrad(ḟ, u, p, t)
+    for i in 1:length(Ks)
+        Û = i == 1 ? u : Û⁺
+        T̂ = i == 1 ? t : t + dt * â𝟙[i]
+
+        # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * γᵢᵢ *
+        #     (f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ)
+        f(Ks[i], Û, p, T̂)
+        add_scaled_linear_combination!(Ks[i], lowerγ⁻¹[i], Ks, -1)
+        !isnothing(f.tgrad) && (Ks[i] .+= dt .* ḟ .* γ𝟙[i])
+        Ks[i] .*= diagγ𝟙[i]
+        linsolve!(Ks[i], W, Ks[i]) # assume linsolve! can handle aliasing
+
+        # Û⁺ᵢ = u - Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
+        Û⁺ .= u
+        add_scaled_linear_combination!(Û⁺, âγ⁻¹[i], Ks, -dt)
+    end
+    u .= Û⁺
 end
diff --git a/src/solvers/rosenbrock_tableaus.jl b/src/solvers/rosenbrock_tableaus.jl
index 78ca991d..5a21e34a 100644
--- a/src/solvers/rosenbrock_tableaus.jl
+++ b/src/solvers/rosenbrock_tableaus.jl
@@ -3,7 +3,8 @@ export Rosenbrock23, SSPKnoth, ROS3, RODAS3
 using StaticArrays: @SArray
 
 #=
-Rosenbrock23 in OrdinaryDiffEq and ode23s in MATLAB (2 stages, 3rd order convergence)
+Rosenbrock23 in OrdinaryDiffEq and ode23s in MATLAB (2 stages, 3rd order
+convergence)
 
 From Section 4.1 of "The MATLAB ODE Suite" by L. F. Shampine and M. W. Reichelt
 
@@ -58,7 +59,7 @@ const ROS3 = begin
     b₂  =  1.94100407061964420292840123379419
     b₃  = -0.18642994676560104463021124732829
     RosenbrockAlgorithm{
-        @SArrray([
+        @SArray([
             γᵢᵢ 0   0;
             γ₂₁ γᵢᵢ 0;
             0   γ₃₂ γᵢᵢ;
@@ -78,7 +79,7 @@ end
 From Section 4 of "Benchmarking Stiff ODE Solvers" by A. Sandu et. al.
 =#
 const RODAS3 = RosenbrockAlgorithm{
-    @SArrray([
+    @SArray([
         1/2  0    0    0;
         1    1/2  0    0;
         -1/4 -1/4 1/2  0;
diff --git a/test/convergence.jl b/test/convergence.jl
index 3c114ece..70197c96 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -64,6 +64,25 @@ end
     end
 end
 
+@testset "Rosenbrock Algorithms" begin
+    algs3 = (Rosenbrock23, SSPKnoth, ROS3, RODAS3)
+    for (algorithm_names, order) in ((algs3, 3),)
+        for algorithm_name in algorithm_names
+            for (problem, solution) in (
+                (linear_prob_approximate_wfact, linear_sol),
+                # (split_linear_prob_wfact_split_fe, linear_sol),
+            )
+                algorithm = algorithm_name(; linsolve = linsolve_direct)
+                @test isapprox(
+                    convergence_order(problem, solution, algorithm, dts),
+                    order;
+                    atol = 0.02,
+                )
+            end
+        end
+    end
+end
+
 #=
 if ArrayType == Array
 for (prob, sol) in [
diff --git a/test/problems.jl b/test/problems.jl
index b314789a..1b3a4d54 100644
--- a/test/problems.jl
+++ b/test/problems.jl
@@ -45,6 +45,14 @@ linear_prob_wfactt = ODEProblem(
         ),
         [1/2],(0.0,1.0),-0.2)
 
+linear_prob_approximate_wfact = ODEProblem(
+    ODEFunction(
+        (du,u,p,t) -> (du .= p .* u);
+        jac_prototype=zeros(ComplexF64,1,1),
+        Wfact = (W,u,p,γ,t) -> (W[1,1]=γ*real(p)-1),
+    ),
+    [1/2 + 0.0*im],(0.0,1.0),-0.2+0.1*im)
+
 split_linear_prob_wfact_split = ODEProblem(
     SplitFunction(
         ODEFunction(

From ba496fdf403afdef835c4c8df3cf6e16b0e45043 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Mon, 19 Sep 2022 14:37:44 -0700
Subject: [PATCH 06/21] WIP

---
 src/solvers/imex_ark_tableaus.jl | 37 +-------------------------------
 1 file changed, 1 insertion(+), 36 deletions(-)

diff --git a/src/solvers/imex_ark_tableaus.jl b/src/solvers/imex_ark_tableaus.jl
index 89800ec6..c877ec4d 100644
--- a/src/solvers/imex_ark_tableaus.jl
+++ b/src/solvers/imex_ark_tableaus.jl
@@ -1,7 +1,7 @@
 export ARS111, ARS121, ARS122, ARS233, ARS232, ARS222, ARS343, ARS443
 export IMKG232a, IMKG232b, IMKG242a, IMKG242b, IMKG252a, IMKG252b
 export IMKG253a, IMKG253b, IMKG254a, IMKG254b, IMKG254c, IMKG342a, IMKG343a
-export DBM453, HOMMEM1
+export DBM453
 
 using StaticArrays: @SArray, SMatrix, sacollect
 
@@ -150,13 +150,6 @@ const ARS443 = make_IMEXARKAlgorithm(;
 # appear to be wrong, so they are not included here. Eventually, we should get
 # the official implementations from the paper's authors.
 
-# a_exp:                                   a_imp:
-# 0      0      ⋯      0      0      0     0      0      ⋯      0      0      0
-# α[1]   0      ⋱      ⋮      ⋮      ⋮     α̂[1]   δ̂[1]   ⋱      ⋮      ⋮       0
-# β[1]   α[2]   ⋱      0      0      0    β[1]   α̂[2]   ⋱      0      0      0
-# ⋮      0      ⋱      0      0      0     ⋮      0      ⋱      δ̂[s-3] 0      0
-# ⋮      ⋮       ⋱     α[s-2] 0      0     ⋮      ⋮       ⋱      α̂[s-2] δ̂[s-2] 0
-# β[s-2] 0      ⋯      0      α[s-1] 0     β[s-2] 0      ⋯       0      α̂[s-1] 0
 imkg_exp(i, j, α, β) = i == j + 1 ? α[j] : (i > 2 && j == 1 ? β[i - 2] : 0)
 imkg_imp(i, j, α̂, β, δ̂) = i == j + 1 ? α̂[j] :
     (i > 2 && j == 1 ? β[i - 2] : (1 < i <= length(α̂) && i == j ? δ̂[i - 1] : 0))
@@ -324,31 +317,3 @@ const DBM453 = let
         ]),
     )
 end
-
-################################################################################
-
-# HOMMEM1 algorithm
-
-# From Section 4.1 of "A framework to evaluate IMEX schemes for atmospheric
-# models" by Guba et al.
-
-# The algorithm has 5 implicit stages, 6 overall stages, and 2rd order accuracy.
-
-const HOMMEM1 = make_IMEXARKAlgorithm(;
-    a_exp = @SArray([
-        0   0   0   0   0   0;
-        1/5 0   0   0   0   0;
-        0   1/5 0   0   0   0;
-        0   0   1/3 0   0   0;
-        0   0   0   1/2 0   0;
-        0   0   0   0   1   0;
-    ]),
-    a_imp = @SArray([
-        0    0    0    0    0    0;
-        0    1/5  0    0    0    0;
-        0    0    1/5  0    0    0;
-        0    0    0    1/3  0    0;
-        0    0    0    0    1/2  0;
-        5/18 5/18 0    0    0    8/18;
-    ]),
-)

From c635fc9d14cda06fec853c76544b6812a47403e9 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Tue, 20 Sep 2022 19:07:21 -0700
Subject: [PATCH 07/21] WIP

---
 src/ClimaTimeSteppers.jl             |   1 +
 src/functions.jl                     |  12 +-
 src/solvers/rosenbrock.jl            | 308 ++++++++++++++++-----------
 src/solvers/rosenbrock_tableaus.jl   | 175 +++++++++++----
 src/solvers/update_signal_handler.jl |  46 ++++
 test/convergence.jl                  |  38 +++-
 test/problems.jl                     |  11 +-
 7 files changed, 421 insertions(+), 170 deletions(-)
 create mode 100644 src/solvers/update_signal_handler.jl

diff --git a/src/ClimaTimeSteppers.jl b/src/ClimaTimeSteppers.jl
index c5b6f53e..620618c6 100644
--- a/src/ClimaTimeSteppers.jl
+++ b/src/ClimaTimeSteppers.jl
@@ -70,6 +70,7 @@ end
 SciMLBase.allowscomplex(alg::DistributedODEAlgorithm) = true
 include("integrators.jl")
 
+include("solvers/update_signal_handler.jl")
 include("solvers/convergence_condition.jl")
 include("solvers/convergence_checker.jl")
 include("solvers/newtons_method.jl")
diff --git a/src/functions.jl b/src/functions.jl
index cd0665bd..f23f5de3 100644
--- a/src/functions.jl
+++ b/src/functions.jl
@@ -9,9 +9,19 @@ un .= u .+ dt * f(u, p, t)
 ```
 
 """
-struct ForwardEulerODEFunction{F} <: DiffEqBase.AbstractODEFunction{true}
+struct ForwardEulerODEFunction{F, J, W, T} <:
+    DiffEqBase.AbstractODEFunction{true}
     f::F
+    jac_prototype::J
+    Wfact::W
+    tgrad::T
 end
+ForwardEulerODEFunction(
+    f;
+    jac_prototype = nothing,
+    Wfact = nothing,
+    tgrad = nothing,
+) = ForwardEulerODEFunction(f, jac_prototype, Wfact, tgrad)
 (f::ForwardEulerODEFunction{F})(un, u, p, t, dt) where {F} = f.f(un, u, p, t, dt)
 
 # Don't wrap a ForwardEulerODEFunction in an ODEFunction.
diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
index de4da780..1f65f0e2 100644
--- a/src/solvers/rosenbrock.jl
+++ b/src/solvers/rosenbrock.jl
@@ -182,7 +182,7 @@ temporary variable, rather than an affine transformation, absorbing u into the
 temporary variable.
 So, consider a lower triangular s×s matrix β and an N×s matrix V such that
     Û⁺ = V * βᵀ.
-This allows us to rewrite the matrix equations as
+We can then rewrite the matrix equations as
     V * βᵀ = u ⊗ 𝟙ᵀ + Δt * F * âᵀ and
     F = F̂ + Δt * J * F * γᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
 Solving the first equation for F tells us that
@@ -250,8 +250,23 @@ length s, the above equation would become
     (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * (δ - 𝟙) = 𝟙.
 This also does not have a general solution.
 
-So, we will proceed without rearranging the last equation for V, and we will
-require that the coefficient of u in it be 𝟙ᵀ; i.e., that
+So, we must proceed without rearranging the last equation for V.
+In order to apply limiters on an unscaled state, we must require that the
+coefficient of u in that equation be 𝟙ᵀ, which implies that
+    diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
+
+We will now show that we cannot also make the coefficient of the J term on the
+right-hand side be the same as the one on the left-hand side.
+If we wanted this to be the case, we would also have to satisfy the equation
+    β⁻¹ * 𝟙 = 𝟙.
+In general, we cannot simultaneously satisfy both of the last two equations;
+e.g., if γ = d * I for some scalar constant d, we can rearrange the equations to
+get that
+    β * 𝟙 = d * â * γ⁻¹ * â⁻¹ * 𝟙 and β * 𝟙 = 𝟙.
+Unless d * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙 (which will not be the case in general), this
+system of equations cannot be satisfied.
+
+So, we will only require that β satisfies the equation
     diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
 This equation has infinitely many solutions; the easiest way to obtain a
 solution is to set
@@ -298,23 +313,23 @@ Rewriting the last definition of Vᵢ in terms of g instead of f gives us
 For some reason, OrdinaryDiffEq defines Wfact as Δt * γᵢᵢ * J - I, so we must
 negate all of our temporary variables.
 
-For the original formulation, we have that
+For the original formulation, this means that
     Ûᵢ := u - Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ, where
     Fᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * (
-              f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
+              f(Ûᵢ, T̂ᵢ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
               Δt * ḟ * ∑_{j=1}^i γᵢⱼ
-          ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
-For the performance formulation, we have that
+          ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
+For the performance formulation, this means that
     Û⁺ᵢ := u - Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
     Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * γᵢᵢ * (
-              f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+              f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
           ).
-For the limiters formulation, we have that
+For the limiters formulation, this means that
     Û⁺ᵢ := -∑_{j=1}^i βᵢⱼ * Vᵢ, where
     Vᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * g(
               u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
               Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
-              Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
+              Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
               Ûᵢ,
               T̂ᵢ
               Δt * âᵢᵢ,
@@ -322,16 +337,34 @@ For the limiters formulation, we have that
 =#
 import LinearAlgebra
 import StaticArrays: SUnitRange, SOneTo
+import Base: broadcasted, materialize!
 
-struct RosenbrockAlgorithm{γ, a, b, L, M} <: DistributedODEAlgorithm
+struct RosenbrockAlgorithm{γ, a, b, U, L, M, S} <: DistributedODEAlgorithm
+    update_jac::U
     linsolve::L
-    multiply::M
+    multiply!::M
+    set_Δtγ!::S
 end
 RosenbrockAlgorithm{γ, a, b}(;
+    update_jac::U = UpdateEvery(NewStep()),
     linsolve::L,
-    multiply::M = nothing,
-) where {γ, a, b, L, M} = RosenbrockAlgorithm{γ, a, b, L, M}(linsolve, multiply)
+    multiply!::M = nothing,
+    set_Δtγ!::S = nothing,
+) where {γ, a, b, U, L, M, S} =
+    RosenbrockAlgorithm{γ, a, b, U, L, M, S}(
+        update_jac,
+        linsolve,
+        multiply!,
+        set_Δtγ!,
+    )
 
+@generated foreachval(f::F, ::Val{N}) where {F, N} =
+    quote
+        Base.@nexprs $N i -> f(Val(i))
+        return nothing
+    end
+triangular_inv(matrix::T) where {T} =
+    T(inv(LinearAlgebra.LowerTriangular(matrix)))
 lower_plus_diag(matrix::T) where {T} = T(LinearAlgebra.LowerTriangular(matrix))
 diag(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix))
 lower(matrix) = lower_plus_diag(matrix) - diag(matrix)
@@ -345,171 +378,208 @@ function to_enumerated_rows(m::AbstractMatrix)
     )
     return enumerated_rows
 end
-linear_combination_terms(enumerated_row, vectors) =
-    map(((j, val),) -> Base.broadcasted(*, val, vectors[j]), enumerated_row)
-function set_scaled_linear_combination!(output, enumerated_row, vectors, scale = nothing)
-    terms = linear_combination_terms(enumerated_row, vectors)
-    length(terms) > 0 || error("set_linear_combination! needs at least 1 term")
-    sum = Base.broadcasted(+, terms...)
-    if isnothing(scale)
-        Base.materialize!(output, sum)
-    else
-        Base.materialize!(output, Base.broadcasted(*, scale, sum))
-    end
-    return nothing
-end
-function add_scaled_linear_combination!(output, enumerated_row, vectors, scale = nothing)
-    terms = linear_combination_terms(enumerated_row, vectors)
-    if length(terms) > 0
-        if isnothing(scale)
-            Base.materialize!(output, Base.broadcasted(+, output, terms...))
-        else
-            scaled_sum =
-                Base.broadcasted(*, scale, Base.broadcasted(+, terms...))
-            Base.materialize!(output, Base.broadcasted(+, output, scaled_sum))
-        end
-    end
-    return nothing
+linear_combination(enumerated_row, vectors) =
+    map(((j, val),) -> broadcasted(*, val, vectors[j]), enumerated_row)
+function scaled_linear_combination(enumerated_row, vectors, scale)
+    unscaled_terms = linear_combination(enumerated_row, vectors)
+    length(unscaled_terms) == 0 && return ()
+    return (broadcasted(*, scale, broadcasted(+, unscaled_terms...)),)
 end
 
+num_stages(::Type{<:RosenbrockAlgorithm{γ}}) where {γ} = size(γ, 1)
+
 function check_valid_parameters(
-    ::Type{<:RosenbrockAlgorithm{γ, a, b}},
-) where {γ, a, b}
+    ::Type{<:RosenbrockAlgorithm{γ, a, b, U}},
+) where {γ, a, b, U}
     γ === lower_plus_diag(γ) ||
         error("γ must be a lower triangular matrix")
     a === lower(a) ||
         error("a must be a strictly lower triangular matrix")
     LinearAlgebra.det(γ) != 0 ||
-        error("non-invertible matrices γ are not yet supported")
+        error("non-invertible matrices γ are not currently supported")
+    if U != UpdateEvery{NewStage}
+        diag(γ) === typeof(γ)(γ[1, 1] * I) ||
+            error("γ must have a uniform diagonal when \
+                   update_jac != UpdateEvery(NewStage())")
+    end
+    can_handle(U, NewStep()) || can_handle(U, NewStage()) ||
+        error("update_jac must be able to handle NewStep() or NewStage()")
 end
 function check_valid_parameters(
-    alg_type::Type{<:RosenbrockAlgorithm{γ, a, b}},
+    alg_type::Type{<:RosenbrockAlgorithm{γ, a, b, U, L, M, S}},
     ::Type{<:ForwardEulerODEFunction},
-) where {γ, a, b}
+) where {γ, a, b, U, L, M, S}
     check_valid_parameters(alg_type)
     â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
     LinearAlgebra.det(â) != 0 ||
-        error("non-invertible matrices â are not yet supported when using \
-               ForwardEulerODEFunction")
+        error("non-invertible matrices â are not currently supported when \
+               using ForwardEulerODEFunction")
+    M != Nothing ||
+        error("multiply! must be specified when using ForwardEulerODEFunction")
+    S != Nothing ||
+        error("set_Δtγ! must be specified when using ForwardEulerODEFunction")
 end
 check_valid_parameters(alg_type, _) = check_valid_parameters(alg_type)
 
-struct RosenbrockCache{C, L, M}
+struct RosenbrockCache{C, U, L}
     _cache::C
+    update_jac_cache::U
     linsolve!::L
-    multiply!::M
 end
 
 # TODO: Minimize allocations by reusing temporary variables after they are no
 # longer needed.
 function cache(
     prob::DiffEqBase.AbstractODEProblem,
-    alg::RosenbrockAlgorithm{γ};
+    alg::RosenbrockAlgorithm;
     kwargs...
-) where {γ}
+)
     check_valid_parameters(typeof(alg), typeof(prob.f))
-    s = size(γ, 1)
-    increment_mode = prob.f isa ForwardEulerODEFunction
+
+    s = num_stages(typeof(alg))
     u_prototype = prob.u0
     W_prototype = prob.f.jac_prototype
-    linsolve! = alg.linsolve(Val{:init}, W_prototype, u_prototype)
-    if isnothing(alg.multiply)
-        if increment_mode
-            error("RosenbrockAlgorithm.multiply must be specified when using a \
-                   ForwardEulerODEFunction")
-        end
-        multiply! = nothing
-    else
-        multiply! = alg.multiply(Val{:init}, W_prototype, u_prototype)
-    end
+    increment_mode = prob.f isa ForwardEulerODEFunction
+
     _cache = NamedTuple((
-        :Û⁺ => similar(u_prototype),
-        (increment_mode ? (F̂s => map(i -> similar(u_prototype), 1:s),) : ())...,
+        :Û⁺ᵢ => similar(u_prototype),
+        (increment_mode ? (:Fs => map(i -> similar(u_prototype), 1:s),) : ())...,
         (increment_mode ? :Vs : :Ks) => map(i -> similar(u_prototype), 1:s),
         :W => similar(W_prototype),
         :ḟ => similar(u_prototype),
     ))
-    C = typeof(_cache)
-    L = typeof(linsolve!)
-    M = typeof(multiply!)
-    return RosenbrockCache{C, L, M}(_cache, linsolve!, multiply!)
+
+    update_jac_cache = allocate_cache(alg.update_jac)
+
+    linsolve! = alg.linsolve(Val{:init}, W_prototype, u_prototype)
+
+    return RosenbrockCache(_cache, update_jac_cache, linsolve!)
 end
 
 step_u!(integrator, cache::RosenbrockCache) =
     rosenbrock_step_u!(integrator, cache, integrator.prob.f)
 
-function precomputed_values(
+# The precomputed values are too complicated for constant propagation, so we use
+# @generated to force the values to be compile-time constants.
+@generated function precomputed_values(
     ::Type{<:RosenbrockAlgorithm{γ, a, b}},
     _,
 ) where {γ, a, b}
-    â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
-    lowerγ⁻¹ = lower(inv(γ))
-    âγ⁻¹ = â * inv(γ)
-    â𝟙 = vec(sum(â, dims = 2))
-    γ𝟙 = vec(sum(γ, dims = 2))
+    â = vcat(a[2:end, :], transpose(b))
+    γ⁻¹ = triangular_inv(γ)
+    lowerγ⁻¹ = lower(γ⁻¹)
+    âγ⁻¹ = â * γ⁻¹
     diagγ𝟙 = vec(sum(diag(γ), dims = 2))
-    return map(to_enumerated_rows, (; lowerγ⁻¹, âγ⁻¹, â𝟙, γ𝟙, diagγ𝟙))
+    γ𝟙 = vec(sum(γ, dims = 2))
+    â𝟙 = vec(sum(â, dims = 2))
+    values = map(to_enumerated_rows, (; lowerγ⁻¹, âγ⁻¹, diagγ𝟙, γ𝟙, â𝟙))
+    return :($values)
 end
-
-function precomputed_values(
+@generated function precomputed_values(
     ::Type{<:RosenbrockAlgorithm{γ, a, b}},
     ::Type{<:ForwardEulerODEFunction},
 ) where {γ, a, b}
-    â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
+    â = vcat(a[2:end, :], transpose(b))
     âγ = â * γ
-    β = â * inv(âγ) * diag(γ)
-    β⁻¹𝟙 = vec(sum(inv(β), dims = 2))
+    β = â * triangular_inv(âγ) * diag(γ)
+    lowerâ = lower(â)
+    lowerβ = lower(β)
+    diagγ𝟙 = vec(sum(diag(γ), dims = 2))
     â𝟙 = vec(sum(â, dims = 2))
-    return map(to_enumerated_rows, (; â, âγ, β, β⁻¹𝟙, â𝟙))
+    β⁻¹𝟙 = vec(sum(triangular_inv(β), dims = 2))
+    âγ𝟙 = vec(sum(âγ, dims = 2))
+    diagâ𝟙 = vec(sum(diag(â), dims = 2))
+    values = map(
+        to_enumerated_rows,
+        (; lowerâ, lowerβ, β, diagγ𝟙, â𝟙, β⁻¹𝟙, âγ𝟙, diagâ𝟙),
+    )
+    return :($values)
 end
 
-function rosenbrock_step_u!(integrator, cache, f::ForwardEulerODEFunction)
+function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
     (; u, p, t, dt, alg) = integrator
-    (; linsolve!, multiply!) = cache
-    (; Û, Û₊, F̂s, Vs) = cache._cache
-    (; â, âγ, β, β⁻¹𝟙, â𝟙) = precomputed_values(typeof(alg), typeof(f))
-    s = size(â, 1)
-    for i in 1:s
-        Û_prev = i == 1 ? u : Û
-        T̂_prev = i == 1 ? t : t + Δt * â𝟙[i]
-
-        set_scaled_linear_combination!(Û, β[i], Vs, -1)
-
-
-    # Vᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * g(
-    #     u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
-    #     Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
-    #     Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ - ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
-    #     Ûᵢ,
-    #     T̂ᵢ
-    #     Δt * âᵢᵢ,
-    # )
+    (; update_jac, multiply!, set_Δtγ!) = alg
+    (; update_jac_cache, linsolve!) = cache
+    (; Û⁺ᵢ, Vs, Fs, W, ḟ) = cache._cache
+    (; lowerâ, lowerβ, β, diagγ𝟙, â𝟙, β⁻¹𝟙, âγ𝟙, diagâ𝟙) =
+        precomputed_values(typeof(alg), typeof(g))
+    function jac_func(Ûᵢ, T̂ᵢ, γᵢᵢ)
+        g.Wfact(W, Ûᵢ, p, dt * γᵢᵢ, T̂ᵢ)
+        !isnothing(g.tgrad) && g.tgrad(ḟ, Ûᵢ, p, T̂ᵢ)
     end
+    function stage_func(::Val{i}) where {i}
+        γᵢᵢ = diagγ𝟙[i]
+        Ûᵢ = i == 1 ? u : Û⁺ᵢ
+        T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
+
+        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, γᵢᵢ)
+
+        # Vᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * g(
+        #     u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
+        #     Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
+        #     Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
+        #     Ûᵢ,
+        #     T̂ᵢ
+        #     Δt * âᵢᵢ,
+        # )
+        set_Δtγ!(W, dt * γᵢᵢ * β⁻¹𝟙[i], dt * γᵢᵢ)
+        multiply!(Vs[i], W, u)
+        set_Δtγ!(W, dt * γᵢᵢ, dt * γᵢᵢ * β⁻¹𝟙[i])
+        Vs[i] .= broadcasted(
+            +,
+            broadcasted(-, Vs[i]),
+            scaled_linear_combination(lowerâ[i], Fs, dt)...,
+            (isnothing(g.tgrad) ? () : (broadcasted(*, dt^2 * âγ𝟙[i], ḟ),))...,
+            linear_combination(lowerβ[i], Vs)...,
+        )
+        Fs[i] .= Vs[i]
+        g(Vs[i], Ûᵢ, p, T̂ᵢ, dt * diagâ𝟙[i])
+        Fs[i] .= (Fs[i] .- Vs[i]) ./ (dt * diagâ𝟙[i])
+        linsolve!(Vs[i], W, Vs[i]) # assume that linsolve! can handle aliasing
+
+        # Û⁺ᵢ = -∑_{j=1}^i βᵢⱼ * Vᵢ
+        Û⁺ᵢ .= scaled_linear_combination(β[i], Vs, -1)[1]
+    end
+
+    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, diagγ𝟙[1])
+    foreachval(stage_func, Val(num_stages(typeof(alg))))
+    u .= Û⁺ᵢ
 end
 
 function rosenbrock_step_u!(integrator, cache, f)
     (; u, p, t, dt, alg) = integrator
-    (; linsolve!) = cache
-    (; Û⁺, Ks, W, ḟ) = cache._cache
-    (; lowerγ⁻¹, âγ⁻¹, â𝟙, γ𝟙, diagγ𝟙) = precomputed_values(typeof(alg), typeof(f))
-    @assert all(diagγ𝟙 .== diagγ𝟙[1])
-    f.Wfact(W, u, p, t, diagγ𝟙[1])
-    !isnothing(f.tgrad) && f.tgrad(ḟ, u, p, t)
-    for i in 1:length(Ks)
-        Û = i == 1 ? u : Û⁺
-        T̂ = i == 1 ? t : t + dt * â𝟙[i]
+    (; update_jac) = alg
+    (; update_jac_cache, linsolve!) = cache
+    (; Û⁺ᵢ, Ks, W, ḟ) = cache._cache
+    (; lowerγ⁻¹, âγ⁻¹, diagγ𝟙, γ𝟙, â𝟙) =
+        precomputed_values(typeof(alg), typeof(f))
+    function jac_func(Ûᵢ, T̂ᵢ, γᵢᵢ)
+        f.Wfact(W, Ûᵢ, p, dt * γᵢᵢ, T̂ᵢ)
+        !isnothing(f.tgrad) && f.tgrad(ḟ, Ûᵢ, p, T̂ᵢ)
+    end
+    function stage_func(::Val{i}) where {i}
+        γᵢᵢ = diagγ𝟙[i]
+        Ûᵢ = i == 1 ? u : Û⁺ᵢ
+        T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
+
+        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, γᵢᵢ)
 
         # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * γᵢᵢ *
-        #     (f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ)
-        f(Ks[i], Û, p, T̂)
-        add_scaled_linear_combination!(Ks[i], lowerγ⁻¹[i], Ks, -1)
-        !isnothing(f.tgrad) && (Ks[i] .+= dt .* ḟ .* γ𝟙[i])
-        Ks[i] .*= diagγ𝟙[i]
-        linsolve!(Ks[i], W, Ks[i]) # assume linsolve! can handle aliasing
+        #      (f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ)
+        f(Ks[i], Ûᵢ, p, T̂ᵢ)
+        Ks[i] .= γᵢᵢ .* broadcasted(
+            +,
+            Ks[i],
+            linear_combination(lowerγ⁻¹[i], Ks)...,
+            (isnothing(f.tgrad) ? () : (broadcasted(*, dt * γ𝟙[i], ḟ),))...,
+        )
+        linsolve!(Ks[i], W, Ks[i]) # assume that linsolve! can handle aliasing
 
         # Û⁺ᵢ = u - Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
-        Û⁺ .= u
-        add_scaled_linear_combination!(Û⁺, âγ⁻¹[i], Ks, -dt)
+        Û⁺ᵢ .= broadcasted(+, u, scaled_linear_combination(âγ⁻¹[i], Ks, -dt)...)
     end
-    u .= Û⁺
+
+    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, diagγ𝟙[1])
+    foreachval(stage_func, Val(num_stages(typeof(alg))))
+    u .= Û⁺ᵢ
 end
diff --git a/src/solvers/rosenbrock_tableaus.jl b/src/solvers/rosenbrock_tableaus.jl
index 5a21e34a..d6c4d499 100644
--- a/src/solvers/rosenbrock_tableaus.jl
+++ b/src/solvers/rosenbrock_tableaus.jl
@@ -1,15 +1,17 @@
-export Rosenbrock23, SSPKnoth, ROS3, RODAS3
+export Rosenbrock23, SSPKnoth, RODASP2
+export ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3
 
 using StaticArrays: @SArray
 
 #=
-Rosenbrock23 in OrdinaryDiffEq and ode23s in MATLAB (2 stages, 3rd order
-convergence)
+Rosenbrock23 in OrdinaryDiffEq.jl and ode23s in MATLAB
+
+Rosenbrock-W method with 2 stages and 2nd order convergence
 
 From Section 4.1 of "The MATLAB ODE Suite" by L. F. Shampine and M. W. Reichelt
 
-The paper does not directly give the method coefficients, but, converting to our
-notation, it specifies that
+The paper does not directly provide the method coefficients, but, converting to
+our notation, it specifies that
     Û₁ = u, T̂₁ = t,
     F₁ = (I - Δt * 1/(2+√2) * J)⁻¹ * (f(Û₁, T̂₁) + Δt * 1/(2+√2) * ḟ),
     Û₂ = u + Δt * 1/2 * F₁, T̂₂ = t + Δt * 1/2,
@@ -19,7 +21,7 @@ This implies the following table of coefficients:
 =#
 const Rosenbrock23 = RosenbrockAlgorithm{
     @SArray([
-        1/(2+√2)  0;
+         1/(2+√2) 0;
         -1/(2+√2) 1/(2+√2);
     ]),
     @SArray([
@@ -30,13 +32,13 @@ const Rosenbrock23 = RosenbrockAlgorithm{
 }
 
 #=
-Custom Rosenbrock scheme from Oswald Knoth
+Rosenbrock-W method with 3 stages and 2nd order convergence from Oswald Knoth
 =#
 const SSPKnoth = RosenbrockAlgorithm{
     @SArray([
-        1    0    0;
-        0    1    0;
-        -3/4 -3/4 1;
+         1    0    0;
+         0    1    0;
+        -3/4 -3/4  1;
     ]),
     @SArray([
         0   0   0;
@@ -47,49 +49,138 @@ const SSPKnoth = RosenbrockAlgorithm{
 }
 
 #=
-3 stages, 3rd order convergence
+An improved version of RODASP, which is itself an improved version of RODAS
+
+Rosenbrock-W method with 6 stages and 4th order convergence (reduces to 2nd
+order for inexact Jacobians)
 
-From Section 4 of "Benchmarking Stiff ODE Solvers" by A. Sandu et. al.
+From Table 3 of "Improvement of Rosenbrock-Wanner Method RODASP" by G.
+Steinebach
+
+The paper calls a "α" and specifies β = α + γ instead of γ.
 =#
-const ROS3 = begin
-    γᵢᵢ =  0.43586652150845899941601945119356
-    γ₂₁ = -0.19294655696029095575009695436041
-    γ₃₂ =  1.74927148125794685173529749738960
-    b₁  = -0.75457412385404315829818998646589
-    b₂  =  1.94100407061964420292840123379419
-    b₃  = -0.18642994676560104463021124732829
+const RODASP2 = begin
+    α = @SArray([
+         0                     0                     0                     0                     0                     0;
+         3/4                   0                     0                     0                     0                     0;
+         3.688749816109670e-1 -4.742684759792117e-2  0                     0                     0                     0;
+         4.596170083041160e-1  2.724432453018110e-1 -2.123145213282008e-1  0                     0                     0;
+         2.719770298548111e+0  1.358873794835473e+0 -2.838824065018641e+0 -2.398200283649438e-1  0                     0;
+        -6.315720511779362e-1 -3.326966988718489e-1  1.154688683864918e+0  5.595800661848674e-1  1/4                   0;
+    ])
+    β = @SArray([
+         1/4                   0                     0                     0                     0                     0;
+         0                     1/4                   0                     0                     0                     0;
+        -9.184372116108780e-2 -2.624106318888223e-2  1/4                   0                     0                     0;
+        -5.817702768270960e-2 -1.382129630513952e-1  5.517478318046004e-1  1/4                   0                     0;
+        -6.315720511779359e-1 -3.326966988718489e-1  1.154688683864917e+0  5.595800661848674e-1  1/4                   0;
+         1.464968119068509e-1  8.896159691002870e-2  1.648843942975147e-1  4.568000540284631e-1 -1.071428571428573e-1  1/4;
+    ])
     RosenbrockAlgorithm{
-        @SArray([
-            γᵢᵢ 0   0;
-            γ₂₁ γᵢᵢ 0;
-            0   γ₃₂ γᵢᵢ;
-        ]),
-        @SArray([
-            0   0   0;
-            γᵢᵢ 0   0;
-            γᵢᵢ 0   0;
-        ]),
-        @SArray([b₁, b₂, b₃]),
+        β - α,
+        α,
+        vec(β[end, :]),
     }
 end
 
+################################################################################
+
 #=
-4 stages, 3rd order convergence
+Methods from "New Rosenbrock W-Methods of Order 3 for Partial Differential
+Algebraic Equations of Index 1" by J Rang and L. Angermann
 
-From Section 4 of "Benchmarking Stiff ODE Solvers" by A. Sandu et. al.
+Each method is named ROS3[4][P][w/W][...], where "ROS3" indicates a Rosenbrock
+method of order 3, "4" indicates that the method has 4 stages (otherwise, it has
+3 stages), "P" indicates that the method is suited for parabolic problems, "w"
+or "W" indicates that the method can handle an approximate Jacobian, and some of
+the names end with additional identifying numbers and symbols.
+
+ROS3w and ROS3Pw both reduce to order 2 for inexact Jacobians.
+ROS34PW3 is actually of order 4 but reduces to order 3 for inexact Jacobians.
 =#
-const RODAS3 = RosenbrockAlgorithm{
+const ROS3w = RosenbrockAlgorithm{
+    @SArray([
+         4.358665215084590e-1  0                     0;
+         3.635068368900681e-1  4.358665215084590e-1  0;
+        −8.996866791992636e-1 −1.537997822626885e-1  4.358665215084590e-1;
+    ]),
+    @SArray([
+        0   0   0;
+        2/3 0   0;
+        2/3 0   0;
+    ]),
+    @SArray([1/4, 1/4, 1/2]),
+}
+const ROS3Pw = RosenbrockAlgorithm{
+    @SArray([
+         7.8867513459481287e-1  0                      0;
+        −1.5773502691896257e+0  7.8867513459481287e-1  0;
+        −6.7075317547305480e-1 −1.7075317547305482e-1  7.8867513459481287e-1;
+    ]),
+    @SArray([
+        0                     0                     0;
+        1.5773502691896257e+0 0                     0;
+        1/2                   0                     0;
+    ]),
+    @SArray([1.0566243270259355e-1, 4.9038105676657971e-2, 8.4529946162074843e-1]),
+}
+const ROS34PW1a = RosenbrockAlgorithm{
+    @SArray([
+         4.358665215084590e-1  0                     0                     0;
+        −2.218787467653286e+0  4.358665215084590e-1  0                     0;
+        −9.461966143940745e-2 −7.913526735718213e-3  4.358665215084590e-1  0;
+        −1.870323744195384e+0 −9.624340112825115e-2  2.726301276675511e-1  4.358665215084590e-1;
+    ]),
+    @SArray([
+        0                    0                    0                    0;
+        2.218787467653286e+0 0                    0                    0;
+        0                    0                    0                    0;
+        1.208587690772214e+0 7.511610241919324e-2 1/2                  0;
+    ]),
+    @SArray([3.285609536316354e-1, −5.785609536316354e-1, 1/4, 1]),
+}
+const ROS34PW1b = RosenbrockAlgorithm{
+    @SArray([
+         4.358665215084590e-1  0                     0                     0;
+        −2.218787467653286e+0  4.358665215084590e-1  0                     0;
+        −2.848610224639349e+0 −5.267530183845237e-2  4.358665215084590e-1  0;
+        −1.128167857898393e+0 −1.677546870499461e-1  5.452602553351021e-2  4.358665215084590e-1;
+    ]),
+    @SArray([
+        0                    0                    0                    0;
+        2.218787467653286e+0 0                    0                    0;
+        2.218787467653286e+0 0                    0                    0;
+        1.453923375357884e+0 0                    1/10                 0;
+    ]),
+    @SArray([5.495647928937977e-1, −5.507258170857301e-1, 1/4, 7.511610241919324e-1]),
+}
+const ROS34PW2 = RosenbrockAlgorithm{
+    @SArray([
+         4.3586652150845900e-1  0                      0                      0;
+        −8.7173304301691801e-1  4.3586652150845900e-1  0                      0;
+        −9.0338057013044082e-1  5.4180672388095326e-2  4.3586652150845900e-1  0;
+         2.4212380706095346e-1 −1.2232505839045147e+0  5.4526025533510214e-1  4.3586652150845900e-1;
+    ]),
+    @SArray([
+        0                      0                      0                      0;
+        8.7173304301691801e-1  0                      0                      0;
+        8.4457060015369423e-1 −1.1299064236484185e-1  0                      0;
+        0                      0                      1                      0;
+    ]),
+    @SArray([2.4212380706095346e-1, −1.2232505839045147e+0, 1.5452602553351020e+0, 4.3586652150845900e-1]),
+}
+const ROS34PW3 = RosenbrockAlgorithm{
     @SArray([
-        1/2  0    0    0;
-        1    1/2  0    0;
-        -1/4 -1/4 1/2  0;
-        1/12 1/12 -2/3 1/2;
+         1.0685790213016289e+0  0                      0                      0;
+        −2.5155456020628817e+0  1.0685790213016289e+0  0                      0;
+        −8.7991339217106512e-1 −9.6014187766190695e-1  1.0685790213016289e+0  0;
+        −4.1731389379448741e-1  4.1091047035857703e-1 −1.3558873204765276e+0  1.0685790213016289e+0;
     ]),
     @SArray([
-        0    0    0    0;
-        0    0    0    0;
-        1    0    0    0;
-        3/4  -1/4 1/2  0;
+        0                      0                      0                      0;
+        2.5155456020628817e+0  0                      0                      0;
+        5.0777280103144085e-1  3/4                    0                      0;
+        1.3959081404277204e-1 −3.3111001065419338e-1  8.2040559712714178e-1  0;
     ]),
-    @SArray([5/6, -1/6, -1/6, 1/2]),
+    @SArray([2.2047681286931747e-1, 2.7828278331185935e-3, 7.1844787635140066e-3, 7.6955588053404989e-1]),
 }
diff --git a/src/solvers/update_signal_handler.jl b/src/solvers/update_signal_handler.jl
new file mode 100644
index 00000000..e22ba36e
--- /dev/null
+++ b/src/solvers/update_signal_handler.jl
@@ -0,0 +1,46 @@
+export UpdateSignal, NewStep, NewStage, NewNewtonIteration
+export UpdateSignalHandler, UpdateEvery, UpdateEveryN, can_handle
+
+abstract type UpdateSignal end
+struct NewStep <: UpdateSignal end
+struct NewStage <: UpdateSignal end
+struct NewNewtonIteration <: UpdateSignal end
+
+abstract type UpdateSignalHandler end
+struct UpdateEvery{U <: UpdateSignal} <: UpdateSignalHandler
+    update_signal::U
+end
+struct UpdateEveryN{U <: UpdateSignal, R <: Union{Nothing, UpdateSignal}} <:
+       UpdateSignalHandler
+    update_signal::U
+    n::Int
+    reset_n_signal::R
+end
+UpdateEveryN(update_signal, n, reset_n_signal = nothing) =
+    UpdateEveryN(update_signal, n, reset_n_signal)
+
+can_handle(::Type{<:UpdateSignalHandler}, ::UpdateSignal) = false
+can_handle(::Type{UpdateEvery{U}}, ::U) where {U <: UpdateSignal} = true
+can_handle(::Type{UpdateEveryN{U}}, ::U) where {U <: UpdateSignal} = true
+
+allocate_cache(::UpdateSignalHandler) = (;)
+allocate_cache(::UpdateEveryN) = (; n = Ref(0))
+
+run!(alg::UpdateSignalHandler, cache, ::UpdateSignal, f!, args...) = false
+function run!(alg::UpdateEvery{U}, cache, ::U, f!, args...) where {U <: UpdateSignal}
+    f!(args...)
+    return true
+end
+function run!(alg::UpdateEveryN{U}, cache, ::U, f!, args...) where {U <: UpdateSignal}
+    cache.n[] += 1
+    if cache.n[] == alg.n
+        f!(args...)
+        cache.n[] = 0
+        return true
+    end
+    return false
+end
+function run!(alg::UpdateEveryN{U, R}, cache, ::R, f!, args...) where {U, R <: UpdateSignal}
+    cache.n[] = 0
+    return false
+end
diff --git a/test/convergence.jl b/test/convergence.jl
index 70197c96..744836d9 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -39,7 +39,7 @@ for (prob, sol, tscale) in [
 
 end
 
-@testset "IMEX ARK Algorithms" begin
+@testset "IMEX ARK Methods" begin
     algs1 = (ARS111, ARS121)
     algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
     algs2 = (algs2..., IMKG252a, IMKG252b, IMKG253a, IMKG253b, IMKG254a)
@@ -64,19 +64,43 @@ end
     end
 end
 
-@testset "Rosenbrock Algorithms" begin
-    algs3 = (Rosenbrock23, SSPKnoth, ROS3, RODAS3)
-    for (algorithm_names, order) in ((algs3, 3),)
+@testset "Rosenbrock-W Methods" begin
+    algs2 = (Rosenbrock23, SSPKnoth, RODASP2, ROS3w, ROS3Pw)
+    algs3 = (ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3)
+    for (algorithm_names, order) in ((algs2, 2), (algs3, 3))
         for algorithm_name in algorithm_names
             for (problem, solution) in (
-                (linear_prob_approximate_wfact, linear_sol),
-                # (split_linear_prob_wfact_split_fe, linear_sol),
+                (linear_prob_inexact_wfact, linear_sol),
             )
                 algorithm = algorithm_name(; linsolve = linsolve_direct)
                 @test isapprox(
                     convergence_order(problem, solution, algorithm, dts),
                     order;
-                    atol = 0.02,
+                    rtol = 0.01,
+                )
+            end
+        end
+    end
+end
+
+@testset "Rosenbrock-W Methods Limiters Formulation" begin
+    algs2 = (Rosenbrock23, SSPKnoth, RODASP2, ROS3w, ROS3Pw)
+    algs3 = (ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3)
+    for (algorithm_names, order) in ((algs2, 2), (algs3, 3))
+        for algorithm_name in algorithm_names
+            for (problem, solution) in (
+                (linear_prob_inexact_wfact_fe, linear_sol),
+            )
+                @info algorithm_name
+                algorithm = algorithm_name(;
+                    linsolve = linsolve_direct,
+                    multiply! = multiply_direct!,
+                    set_Δtγ! = set_Δtγ_direct!,
+                )
+                @test isapprox(
+                    convergence_order(problem, solution, algorithm, dts),
+                    order;
+                    rtol = 0.01,
                 )
             end
         end
diff --git a/test/problems.jl b/test/problems.jl
index 1b3a4d54..c99d670a 100644
--- a/test/problems.jl
+++ b/test/problems.jl
@@ -45,13 +45,22 @@ linear_prob_wfactt = ODEProblem(
         ),
         [1/2],(0.0,1.0),-0.2)
 
-linear_prob_approximate_wfact = ODEProblem(
+linear_prob_inexact_wfact = ODEProblem(
     ODEFunction(
         (du,u,p,t) -> (du .= p .* u);
         jac_prototype=zeros(ComplexF64,1,1),
         Wfact = (W,u,p,γ,t) -> (W[1,1]=γ*real(p)-1),
     ),
     [1/2 + 0.0*im],(0.0,1.0),-0.2+0.1*im)
+linear_prob_inexact_wfact_fe = ODEProblem(
+    ForwardEulerODEFunction(
+        (ux,u,p,t,dt) -> (ux .+= dt .* p .* u);
+        jac_prototype=zeros(ComplexF64,1,1),
+        Wfact = (W,u,p,γ,t) -> (W[1,1]=γ*real(p)-1),
+    ),
+    [1/2 + 0.0*im],(0.0,1.0),-0.2+0.1*im)
+multiply_direct!(b, A, x) = b .= A * x
+set_Δtγ_direct!(A, Δtγ_new, Δtγ_old) = A .= (A + I) * Δtγ_new / Δtγ_old - I
 
 split_linear_prob_wfact_split = ODEProblem(
     SplitFunction(

From 6e4731b01be258817c63bce79ae1734a9e69ccd4 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Wed, 21 Sep 2022 03:39:46 -0700
Subject: [PATCH 08/21] WIP

---
 src/solvers/rosenbrock.jl          | 244 +++++++++++++++--------------
 src/solvers/rosenbrock_tableaus.jl |   4 +-
 test/convergence.jl                |  26 ++-
 test/problems.jl                   |  83 ++++++++++
 test/utils.jl                      |   3 +
 5 files changed, 233 insertions(+), 127 deletions(-)

diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
index 1f65f0e2..86db7733 100644
--- a/src/solvers/rosenbrock.jl
+++ b/src/solvers/rosenbrock.jl
@@ -129,15 +129,15 @@ Dropping the indices then tells us that
     Û⁺ = u ⊗ 𝟙ᵀ + Δt * F * âᵀ and
     F = F̂ + Δt * J * F * γᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
 To rewrite this in a way that more directly corresponds to our last formulation,
-we will define the functions diag() and lower(), such that, for any lower
+we will define the functions diagonal() and lower(), such that, for any lower
 triangular matrix M,
-    M = diag(M) + lower(M), where
-    diag(M)ᵢⱼ := i == j ? Mᵢⱼ : 0 and
+    M = diagonal(M) + lower(M), where
+    diagonal(M)ᵢⱼ := i == j ? Mᵢⱼ : 0 and
     lower(M)ᵢⱼ := i > j ? Mᵢⱼ : 0.
 This lets us rewrite the equation for F as
-    F * (I + diag(γ)⁻¹ * lower(γ))ᵀ -
-    Δt * J * F * (I + diag(γ)⁻¹ * lower(γ))ᵀ * diag(γ)ᵀ =
-        = F̂ + F * (diag(γ)⁻¹ * lower(γ))ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+    F * (I + diagonal(γ)⁻¹ * lower(γ))ᵀ -
+    Δt * J * F * (I + diagonal(γ)⁻¹ * lower(γ))ᵀ * diagonal(γ)ᵀ =
+        = F̂ + F * (diagonal(γ)⁻¹ * lower(γ))ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
 
 We will now use these matrix equations to define two reformulations: one that
 optimizes performance by eliminating the subtraction after the linear solve, and
@@ -146,28 +146,28 @@ one that enables limiters by appropriately modifying the value being subtracted.
 ## Optimizing Performance
 
 Let us define a new N×s matrix
-    K := F * γᵀ.
+    K := Δt * F * γᵀ.
 We then have that
-    F = K * (γ⁻¹)ᵀ.
+    F = Δt⁻¹ * K * (γ⁻¹)ᵀ.
 This allows us to rewrite the matrix equations as
-    Û⁺ = u ⊗ 𝟙ᵀ + Δt * K * (â * γ⁻¹)ᵀ and
-    K * (γ⁻¹)ᵀ = F̂ + Δt * J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+    Û⁺ = u ⊗ 𝟙ᵀ + K * (â * γ⁻¹)ᵀ and
+    Δt⁻¹ * K * (γ⁻¹)ᵀ = F̂ + J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
 Since γ is lower triangular,
-    γ⁻¹ = diag(γ⁻¹) + lower(γ⁻¹) = diag(γ)⁻¹ + lower(γ⁻¹).
+    γ⁻¹ = diagonal(γ⁻¹) + lower(γ⁻¹) = diagonal(γ)⁻¹ + lower(γ⁻¹).
 This lets us rewrite the equation for K as
-    K * (diag(γ)⁻¹ + lower(γ⁻¹))ᵀ = F̂ + Δt * J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
-Multiplying through on the right by diag(γ)ᵀ and rearranging gives us
-    K - Δt * J * K * diag(γ)ᵀ =
-        (F̂ - K * lower(γ⁻¹)ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ) * diag(γ)ᵀ.
+    Δt⁻¹ * K * (diagonal(γ)⁻¹ + lower(γ⁻¹))ᵀ = F̂ + J * K + Δt * ḟ ⊗ (γ * 𝟙)ᵀ.
+Multiplying through on the right by Δt * diagonal(γ)ᵀ and rearranging gives us
+    K - Δt * J * K * diagonal(γ)ᵀ =
+        Δt * (F̂ - Δt⁻¹ * K * lower(γ⁻¹)ᵀ + Δt * ḟ ⊗ (γ * 𝟙)ᵀ) * diagonal(γ)ᵀ.
 Taking this and the equation for Û⁺ back out of matrix form gives us
-    Û⁺ᵢ = u + Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ and
+    Û⁺ᵢ = u + ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ and
     Kᵢ - Δt * γᵢᵢ * J * Kᵢ =
-        = γᵢᵢ *
-          (f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ).
+        = Δt * γᵢᵢ *
+          (f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ).
 Thus, to optimize performance, we can redefine
-    Û⁺ᵢ := u + Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
-    Kᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * γᵢᵢ * (
-              f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+    Û⁺ᵢ := u + ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
+    Kᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * Δt * γᵢᵢ * (
+              f(Ûᵢ, T̂ᵢ) - ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
           ).
 
 ## Enabling Limiters
@@ -199,46 +199,48 @@ results in
 Since γ, â, and β are all lower triangular, β⁻¹ * â * γ⁻¹ * â⁻¹ * β is also
 lower triangular, which means that
     β⁻¹ * â * γ⁻¹ * â⁻¹ * β =
-        diag(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β).
+        diagonal(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β).
 In addition, we have that
-    diag(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) =
-        = diag(β⁻¹) * diag(â) * diag(γ⁻¹) * diag(â⁻¹) * diag(β) =
-        = diag(β)⁻¹ * diag(â) * diag(γ)⁻¹ * diag(â)⁻¹ * diag(β) =
-        = diag(γ)⁻¹.
+    diagonal(β⁻¹ * â * γ⁻¹ * â⁻¹ * β) =
+        = diagonal(β⁻¹) * diagonal(â) * diagonal(γ⁻¹) * diagonal(â⁻¹) *
+          diagonal(β) =
+        = diagonal(β)⁻¹ * diagonal(â) * diagonal(γ)⁻¹ * diagonal(â)⁻¹ *
+          diagonal(β) =
+        = diagonal(γ)⁻¹.
 Combining the last two expressions gives us
-    β⁻¹ * â * γ⁻¹ * â⁻¹ * β = diag(γ)⁻¹ + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β).
+    β⁻¹ * â * γ⁻¹ * â⁻¹ * β = diagonal(γ)⁻¹ + lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β).
 Substituting this into the last equation for V gives us
-    V * (diag(γ)⁻¹)ᵀ + V * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ - Δt * J * V =
+    V * (diagonal(γ)⁻¹)ᵀ + V * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ - Δt * J * V =
         = u ⊗ (β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ +
           Δt * F̂ * (β⁻¹ * â * γ⁻¹)ᵀ + Δt² * ḟ ⊗ (β⁻¹ * â * 𝟙)ᵀ.
-Multiplying through on the right by diag(γ)ᵀ and rearranging tells us that
-    V - Δt * J * V * diag(γ)ᵀ =
-        = u ⊗ (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ -
-          Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diag(γ)ᵀ +
-          Δt * F̂ * (diag(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
-          Δt² * ḟ ⊗ (diag(γ) * β⁻¹ * â * 𝟙)ᵀ -
-          V * (diag(γ) * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β))ᵀ.
+Multiplying through on the right by diagonal(γ)ᵀ and rearranging tells us that
+    V - Δt * J * V * diagonal(γ)ᵀ =
+        = u ⊗ (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ -
+          Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diagonal(γ)ᵀ +
+          Δt * F̂ * (diagonal(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
+          Δt² * ḟ ⊗ (diagonal(γ) * β⁻¹ * â * 𝟙)ᵀ -
+          V * (diagonal(γ) * lower(β⁻¹ * â * γ⁻¹ * â⁻¹ * β))ᵀ.
 Since lower() preserves multiplication by a diagonal matrix, we can rewrite
 this as
-    V - Δt * J * V * diag(γ)ᵀ =
-        = u ⊗ (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ -
-          Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diag(γ)ᵀ +
-          Δt * F̂ * (diag(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
-          Δt² * ḟ ⊗ (diag(γ) * β⁻¹ * â * 𝟙)ᵀ -
-          V * lower(diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ.
+    V - Δt * J * V * diagonal(γ)ᵀ =
+        = u ⊗ (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙)ᵀ -
+          Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diagonal(γ)ᵀ +
+          Δt * F̂ * (diagonal(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
+          Δt² * ḟ ⊗ (diagonal(γ) * β⁻¹ * â * 𝟙)ᵀ -
+          V * lower(diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ.
 
 We will now show that this reformulation will not allow us to eliminate
 multiplications by J, as the previous ones did.
 If we wanted to factor out all multiplications by J when we convert back out of
 matrix form, we would rearrange the last equation to get
-    (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) - Δt * J * (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) * diag(γ)ᵀ =
-        = u ⊗ ((diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙)ᵀ +
-          Δt * F̂ * (diag(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
-          Δt² * ḟ ⊗ (diag(γ) * β⁻¹ * â * 𝟙)ᵀ -
-          V * lower(diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ.
+    (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) - Δt * J * (V - u ⊗ (β⁻¹ * 𝟙)ᵀ) * diagonal(γ)ᵀ =
+        = u ⊗ ((diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙)ᵀ +
+          Δt * F̂ * (diagonal(γ) * β⁻¹ * â * γ⁻¹)ᵀ +
+          Δt² * ḟ ⊗ (diagonal(γ) * β⁻¹ * â * 𝟙)ᵀ -
+          V * lower(diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * β)ᵀ.
 In order to apply limiters on an unscaled state, we would then require that the
 coefficient of u on the right-hand side of this equation be 𝟙ᵀ; i.e., that
-    (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙 = 𝟙.
+    (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * 𝟙 = 𝟙.
 In general, this equation does not have a solution β; e.g., if γ = â = d * I for
 some scalar constant d, then the equation simplifies to
     (β⁻¹ - β⁻¹) * 𝟙 = 𝟙.
@@ -247,13 +249,13 @@ impossible to eliminate multiplications by J while preserving an unscaled state
 on the right-hand side.
 For example, if we were to instead set Û⁺ = u ⊗ δᵀ + V * βᵀ for some vector δ of
 length s, the above equation would become
-    (diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * (δ - 𝟙) = 𝟙.
+    (diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ - β⁻¹) * (δ - 𝟙) = 𝟙.
 This also does not have a general solution.
 
 So, we must proceed without rearranging the last equation for V.
 In order to apply limiters on an unscaled state, we must require that the
 coefficient of u in that equation be 𝟙ᵀ, which implies that
-    diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
+    diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
 
 We will now show that we cannot also make the coefficient of the J term on the
 right-hand side be the same as the one on the left-hand side.
@@ -267,16 +269,16 @@ Unless d * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙 (which will not be the case in
 system of equations cannot be satisfied.
 
 So, we will only require that β satisfies the equation
-    diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
+    diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ * 𝟙 = 𝟙.
 This equation has infinitely many solutions; the easiest way to obtain a
 solution is to set
-    diag(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ = I.
+    diagonal(γ) * β⁻¹ * â * γ⁻¹ * â⁻¹ = I.
 This implies that
-    β⁻¹ = diag(γ)⁻¹ * â * γ * â⁻¹ and
-    β = â * γ⁻¹ * â⁻¹ * diag(γ).
+    β⁻¹ = diagonal(γ)⁻¹ * â * γ * â⁻¹ and
+    β = â * γ⁻¹ * â⁻¹ * diagonal(γ).
 Substituting this into the last equation for V gives us
-    V - Δt * J * V * diag(γ)ᵀ =
-        = u ⊗ 𝟙ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diag(γ)ᵀ + Δt * F̂ * âᵀ +
+    V - Δt * J * V * diagonal(γ)ᵀ =
+        = u ⊗ 𝟙ᵀ - Δt * J * u ⊗ (β⁻¹ * 𝟙)ᵀ * diagonal(γ)ᵀ + Δt * F̂ * âᵀ +
           Δt² * ḟ ⊗ (â * γ * 𝟙)ᵀ - V * lower(β)ᵀ.
 Taking this and the equation Û⁺ = V * βᵀ back out of matrix form gives us
     Û⁺ᵢ = ∑_{j=1}^i βᵢⱼ * Vᵢ and
@@ -320,9 +322,9 @@ For the original formulation, this means that
               Δt * ḟ * ∑_{j=1}^i γᵢⱼ
           ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
 For the performance formulation, this means that
-    Û⁺ᵢ := u - Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
-    Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * γᵢᵢ * (
-              f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+    Û⁺ᵢ := u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ, where
+    Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * (
+              f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
           ).
 For the limiters formulation, this means that
     Û⁺ᵢ := -∑_{j=1}^i βᵢⱼ * Vᵢ, where
@@ -345,45 +347,49 @@ struct RosenbrockAlgorithm{γ, a, b, U, L, M, S} <: DistributedODEAlgorithm
     multiply!::M
     set_Δtγ!::S
 end
-RosenbrockAlgorithm{γ, a, b}(;
+function RosenbrockAlgorithm{γ, a, b}(;
     update_jac::U = UpdateEvery(NewStep()),
     linsolve::L,
     multiply!::M = nothing,
     set_Δtγ!::S = nothing,
-) where {γ, a, b, U, L, M, S} =
-    RosenbrockAlgorithm{γ, a, b, U, L, M, S}(
+) where {γ, a, b, U, L, M, S}
+    check_valid_parameters(RosenbrockAlgorithm{γ, a, b, U})
+    return RosenbrockAlgorithm{γ, a, b, U, L, M, S}(
         update_jac,
         linsolve,
         multiply!,
         set_Δtγ!,
     )
+end
 
 @generated foreachval(f::F, ::Val{N}) where {F, N} =
     quote
         Base.@nexprs $N i -> f(Val(i))
         return nothing
     end
-triangular_inv(matrix::T) where {T} =
+lower_triangular_inv(matrix::T) where {T} =
     T(inv(LinearAlgebra.LowerTriangular(matrix)))
-lower_plus_diag(matrix::T) where {T} = T(LinearAlgebra.LowerTriangular(matrix))
-diag(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix))
-lower(matrix) = lower_plus_diag(matrix) - diag(matrix)
-to_enumerated_rows(v::AbstractVector) = v
-function to_enumerated_rows(m::AbstractMatrix)
-    rows = tuple(1:size(m, 1)...)
-    nonzero_indices = map(i -> findall(m[i, :] .!= 0), rows)
+lower_plus_diagonal(matrix::T) where {T} =
+    T(LinearAlgebra.LowerTriangular(matrix))
+diagonal(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix))
+lower(matrix) = lower_plus_diagonal(matrix) - diagonal(matrix)
+to_enumerated_rows(x) = x
+function to_enumerated_rows(matrix::AbstractMatrix)
+    rows = tuple(1:size(matrix, 1)...)
+    nonzero_indices = map(i -> findall(matrix[i, :] .!= 0), rows)
     enumerated_rows = map(
-        i -> tuple(zip(nonzero_indices[i], m[i, nonzero_indices[i]])...),
+        i -> tuple(zip(nonzero_indices[i], matrix[i, nonzero_indices[i]])...),
         rows,
     )
     return enumerated_rows
 end
-linear_combination(enumerated_row, vectors) =
+linear_combination_terms(enumerated_row, vectors) =
     map(((j, val),) -> broadcasted(*, val, vectors[j]), enumerated_row)
-function scaled_linear_combination(enumerated_row, vectors, scale)
-    unscaled_terms = linear_combination(enumerated_row, vectors)
-    length(unscaled_terms) == 0 && return ()
-    return (broadcasted(*, scale, broadcasted(+, unscaled_terms...)),)
+function linear_combination(enumerated_row, vectors)
+    length(enumerated_row) == 0 && return nothing
+    terms = linear_combination_terms(enumerated_row, vectors)
+    length(enumerated_row) == 1 && return terms[1]
+    return broadcasted(+, terms...)
 end
 
 num_stages(::Type{<:RosenbrockAlgorithm{γ}}) where {γ} = size(γ, 1)
@@ -391,14 +397,14 @@ num_stages(::Type{<:RosenbrockAlgorithm{γ}}) where {γ} = size(γ, 1)
 function check_valid_parameters(
     ::Type{<:RosenbrockAlgorithm{γ, a, b, U}},
 ) where {γ, a, b, U}
-    γ === lower_plus_diag(γ) ||
+    γ === lower_plus_diagonal(γ) ||
         error("γ must be a lower triangular matrix")
     a === lower(a) ||
         error("a must be a strictly lower triangular matrix")
     LinearAlgebra.det(γ) != 0 ||
         error("non-invertible matrices γ are not currently supported")
     if U != UpdateEvery{NewStage}
-        diag(γ) === typeof(γ)(γ[1, 1] * I) ||
+        diagonal(γ) === typeof(γ)(γ[1, 1] * I) ||
             error("γ must have a uniform diagonal when \
                    update_jac != UpdateEvery(NewStage())")
     end
@@ -406,10 +412,9 @@ function check_valid_parameters(
         error("update_jac must be able to handle NewStep() or NewStage()")
 end
 function check_valid_parameters(
-    alg_type::Type{<:RosenbrockAlgorithm{γ, a, b, U, L, M, S}},
+    ::Type{<:RosenbrockAlgorithm{γ, a, b, U, L, M, S}},
     ::Type{<:ForwardEulerODEFunction},
 ) where {γ, a, b, U, L, M, S}
-    check_valid_parameters(alg_type)
     â = vcat(a[SUnitRange(2, length(b)), SOneTo(length(b))], transpose(b))
     LinearAlgebra.det(â) != 0 ||
         error("non-invertible matrices â are not currently supported when \
@@ -419,7 +424,7 @@ function check_valid_parameters(
     S != Nothing ||
         error("set_Δtγ! must be specified when using ForwardEulerODEFunction")
 end
-check_valid_parameters(alg_type, _) = check_valid_parameters(alg_type)
+check_valid_parameters(_, _) = true
 
 struct RosenbrockCache{C, U, L}
     _cache::C
@@ -466,13 +471,12 @@ step_u!(integrator, cache::RosenbrockCache) =
     _,
 ) where {γ, a, b}
     â = vcat(a[2:end, :], transpose(b))
-    γ⁻¹ = triangular_inv(γ)
-    lowerγ⁻¹ = lower(γ⁻¹)
-    âγ⁻¹ = â * γ⁻¹
-    diagγ𝟙 = vec(sum(diag(γ), dims = 2))
+    γ⁻¹ = lower_triangular_inv(γ)
     γ𝟙 = vec(sum(γ, dims = 2))
     â𝟙 = vec(sum(â, dims = 2))
-    values = map(to_enumerated_rows, (; lowerγ⁻¹, âγ⁻¹, diagγ𝟙, γ𝟙, â𝟙))
+    matrices =
+        map(to_enumerated_rows, (; lowerγ⁻¹ = lower(γ⁻¹), âγ⁻¹ = â * γ⁻¹))
+    values = (; matrices..., diagγ = diag(γ), γ𝟙, â𝟙)
     return :($values)
 end
 @generated function precomputed_values(
@@ -481,18 +485,13 @@ end
 ) where {γ, a, b}
     â = vcat(a[2:end, :], transpose(b))
     âγ = â * γ
-    β = â * triangular_inv(âγ) * diag(γ)
-    lowerâ = lower(â)
-    lowerβ = lower(β)
-    diagγ𝟙 = vec(sum(diag(γ), dims = 2))
+    β = â * lower_triangular_inv(âγ) * diagonal(γ)
     â𝟙 = vec(sum(â, dims = 2))
-    β⁻¹𝟙 = vec(sum(triangular_inv(β), dims = 2))
     âγ𝟙 = vec(sum(âγ, dims = 2))
-    diagâ𝟙 = vec(sum(diag(â), dims = 2))
-    values = map(
-        to_enumerated_rows,
-        (; lowerâ, lowerβ, β, diagγ𝟙, â𝟙, β⁻¹𝟙, âγ𝟙, diagâ𝟙),
-    )
+    β⁻¹𝟙 = vec(sum(lower_triangular_inv(β), dims = 2))
+    matrices =
+        map(to_enumerated_rows, (; lowerâ = lower(â), lowerβ = lower(β), β))
+    values = (; matrices..., diagγ = diag(γ), diagâ = diag(â), â𝟙, âγ𝟙, β⁻¹𝟙)
     return :($values)
 end
 
@@ -501,18 +500,19 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
     (; update_jac, multiply!, set_Δtγ!) = alg
     (; update_jac_cache, linsolve!) = cache
     (; Û⁺ᵢ, Vs, Fs, W, ḟ) = cache._cache
-    (; lowerâ, lowerβ, β, diagγ𝟙, â𝟙, β⁻¹𝟙, âγ𝟙, diagâ𝟙) =
+    (; lowerâ, lowerβ, β, diagγ, diagâ, â𝟙, âγ𝟙, β⁻¹𝟙) =
         precomputed_values(typeof(alg), typeof(g))
-    function jac_func(Ûᵢ, T̂ᵢ, γᵢᵢ)
-        g.Wfact(W, Ûᵢ, p, dt * γᵢᵢ, T̂ᵢ)
+    function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
+        g.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ)
         !isnothing(g.tgrad) && g.tgrad(ḟ, Ûᵢ, p, T̂ᵢ)
     end
     function stage_func(::Val{i}) where {i}
-        γᵢᵢ = diagγ𝟙[i]
+        Δtγᵢᵢ = dt * diagγ[i]
+        Δtâᵢᵢ = dt * diagâ[i]
         Ûᵢ = i == 1 ? u : Û⁺ᵢ
         T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
 
-        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, γᵢᵢ)
+        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
 
         # Vᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * g(
         #     u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
@@ -522,26 +522,27 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
         #     T̂ᵢ
         #     Δt * âᵢᵢ,
         # )
-        set_Δtγ!(W, dt * γᵢᵢ * β⁻¹𝟙[i], dt * γᵢᵢ)
+        set_Δtγ!(W, Δtγᵢᵢ * β⁻¹𝟙[i], Δtγᵢᵢ)
         multiply!(Vs[i], W, u)
-        set_Δtγ!(W, dt * γᵢᵢ, dt * γᵢᵢ * β⁻¹𝟙[i])
+        set_Δtγ!(W, Δtγᵢᵢ, Δtγᵢᵢ * β⁻¹𝟙[i])
+        LâᵢⱼFⱼ = linear_combination(lowerâ[i], Fs)
         Vs[i] .= broadcasted(
             +,
             broadcasted(-, Vs[i]),
-            scaled_linear_combination(lowerâ[i], Fs, dt)...,
+            (isnothing(LâᵢⱼFⱼ) ? () : (broadcasted(*, dt, LâᵢⱼFⱼ),))...,
             (isnothing(g.tgrad) ? () : (broadcasted(*, dt^2 * âγ𝟙[i], ḟ),))...,
-            linear_combination(lowerβ[i], Vs)...,
+            linear_combination_terms(lowerβ[i], Vs)...,
         )
         Fs[i] .= Vs[i]
-        g(Vs[i], Ûᵢ, p, T̂ᵢ, dt * diagâ𝟙[i])
-        Fs[i] .= (Fs[i] .- Vs[i]) ./ (dt * diagâ𝟙[i])
+        g(Vs[i], Ûᵢ, p, T̂ᵢ, Δtâᵢᵢ)
+        Fs[i] .= (Vs[i] .- Fs[i]) ./ Δtâᵢᵢ
         linsolve!(Vs[i], W, Vs[i]) # assume that linsolve! can handle aliasing
 
         # Û⁺ᵢ = -∑_{j=1}^i βᵢⱼ * Vᵢ
-        Û⁺ᵢ .= scaled_linear_combination(β[i], Vs, -1)[1]
+        Û⁺ᵢ .= broadcasted(-, linear_combination(β[i], Vs))
     end
 
-    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, diagγ𝟙[1])
+    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1])
     foreachval(stage_func, Val(num_stages(typeof(alg))))
     u .= Û⁺ᵢ
 end
@@ -551,35 +552,38 @@ function rosenbrock_step_u!(integrator, cache, f)
     (; update_jac) = alg
     (; update_jac_cache, linsolve!) = cache
     (; Û⁺ᵢ, Ks, W, ḟ) = cache._cache
-    (; lowerγ⁻¹, âγ⁻¹, diagγ𝟙, γ𝟙, â𝟙) =
+    (; lowerγ⁻¹, âγ⁻¹, diagγ, γ𝟙, â𝟙) =
         precomputed_values(typeof(alg), typeof(f))
-    function jac_func(Ûᵢ, T̂ᵢ, γᵢᵢ)
-        f.Wfact(W, Ûᵢ, p, dt * γᵢᵢ, T̂ᵢ)
+    function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
+        f.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ)
         !isnothing(f.tgrad) && f.tgrad(ḟ, Ûᵢ, p, T̂ᵢ)
     end
     function stage_func(::Val{i}) where {i}
-        γᵢᵢ = diagγ𝟙[i]
+        Δtγᵢᵢ = dt * diagγ[i]
         Ûᵢ = i == 1 ? u : Û⁺ᵢ
         T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
 
-        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, γᵢᵢ)
+        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
 
-        # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * γᵢᵢ *
-        #      (f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ)
+        # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * (
+        #     f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+        # )
         f(Ks[i], Ûᵢ, p, T̂ᵢ)
-        Ks[i] .= γᵢᵢ .* broadcasted(
+        Lγ⁻¹ᵢⱼKⱼ = linear_combination(lowerγ⁻¹[i], Ks)
+        Ks[i] .= Δtγᵢᵢ .* broadcasted(
             +,
             Ks[i],
-            linear_combination(lowerγ⁻¹[i], Ks)...,
+            (isnothing(Lγ⁻¹ᵢⱼKⱼ) ? () : (broadcasted(/, Lγ⁻¹ᵢⱼKⱼ, dt),))...,
             (isnothing(f.tgrad) ? () : (broadcasted(*, dt * γ𝟙[i], ḟ),))...,
         )
         linsolve!(Ks[i], W, Ks[i]) # assume that linsolve! can handle aliasing
 
-        # Û⁺ᵢ = u - Δt * ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
-        Û⁺ᵢ .= broadcasted(+, u, scaled_linear_combination(âγ⁻¹[i], Ks, -dt)...)
+        # Û⁺ᵢ = u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
+        âγ⁻¹ᵢⱼKⱼ = linear_combination(âγ⁻¹[i], Ks)
+        Û⁺ᵢ .= isnothing(âγ⁻¹ᵢⱼKⱼ) ? u : broadcasted(-, u, âγ⁻¹ᵢⱼKⱼ)
     end
 
-    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, diagγ𝟙[1])
+    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1])
     foreachval(stage_func, Val(num_stages(typeof(alg))))
     u .= Û⁺ᵢ
 end
diff --git a/src/solvers/rosenbrock_tableaus.jl b/src/solvers/rosenbrock_tableaus.jl
index d6c4d499..33d3336a 100644
--- a/src/solvers/rosenbrock_tableaus.jl
+++ b/src/solvers/rosenbrock_tableaus.jl
@@ -92,8 +92,8 @@ Algebraic Equations of Index 1" by J Rang and L. Angermann
 Each method is named ROS3[4][P][w/W][...], where "ROS3" indicates a Rosenbrock
 method of order 3, "4" indicates that the method has 4 stages (otherwise, it has
 3 stages), "P" indicates that the method is suited for parabolic problems, "w"
-or "W" indicates that the method can handle an approximate Jacobian, and some of
-the names end with additional identifying numbers and symbols.
+or "W" indicates that the method can handle an inexact Jacobian, and there is an
+identifying string of numbers and symbols at the ends of some names.
 
 ROS3w and ROS3Pw both reduce to order 2 for inexact Jacobians.
 ROS34PW3 is actually of order 4 but reduces to order 3 for inexact Jacobians.
diff --git a/test/convergence.jl b/test/convergence.jl
index 744836d9..4f37b5c5 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -39,6 +39,10 @@ for (prob, sol, tscale) in [
 
 end
 
+using Revise, ClimaTimeSteppers, Test
+include("test/problems.jl")
+include("test/utils.jl")
+dts = 0.5 .^ (2:5)
 @testset "IMEX ARK Methods" begin
     algs1 = (ARS111, ARS121)
     algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
@@ -48,9 +52,11 @@ end
     for (algorithm_names, order) in ((algs1, 1), (algs2, 2), (algs3, 3))
         for algorithm_name in algorithm_names
             for (problem, solution) in (
-                (split_linear_prob_wfact_split, linear_sol),
-                (split_linear_prob_wfact_split_fe, linear_sol),
+                # (split_linear_prob_wfact_split, linear_sol),
+                # (split_linear_prob_wfact_split_fe, linear_sol),
+                (ark_analytic_split, ark_analytic_sol),
             )
+                @show algorithm_name
                 algorithm = algorithm_name(
                     NewtonsMethod(; linsolve = linsolve_direct, max_iters = 1),
                 ) # the problem is linear, so more iters have no effect
@@ -64,6 +70,10 @@ end
     end
 end
 
+using Revise, ClimaTimeSteppers, Test
+include("test/problems.jl")
+include("test/utils.jl")
+dts = 0.5 .^ (4:7)
 @testset "Rosenbrock-W Methods" begin
     algs2 = (Rosenbrock23, SSPKnoth, RODASP2, ROS3w, ROS3Pw)
     algs3 = (ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3)
@@ -71,6 +81,8 @@ end
         for algorithm_name in algorithm_names
             for (problem, solution) in (
                 (linear_prob_inexact_wfact, linear_sol),
+                (ark_analytic_sys, ark_analytic_sys_sol),
+                # (ark_analytic_sys_increment, ark_analytic_sys_sol),
             )
                 algorithm = algorithm_name(; linsolve = linsolve_direct)
                 @test isapprox(
@@ -83,15 +95,19 @@ end
     end
 end
 
+using Revise, ClimaTimeSteppers, Test
+include("test/problems.jl")
+include("test/utils.jl")
+dts = 0.5 .^ (4:7)
 @testset "Rosenbrock-W Methods Limiters Formulation" begin
-    algs2 = (Rosenbrock23, SSPKnoth, RODASP2, ROS3w, ROS3Pw)
-    algs3 = (ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3)
+    # only methods with invertible matrices â can be tested here
+    algs2 = (Rosenbrock23, SSPKnoth, RODASP2)
+    algs3 = (ROS34PW2, ROS34PW3)
     for (algorithm_names, order) in ((algs2, 2), (algs3, 3))
         for algorithm_name in algorithm_names
             for (problem, solution) in (
                 (linear_prob_inexact_wfact_fe, linear_sol),
             )
-                @info algorithm_name
                 algorithm = algorithm_name(;
                     linsolve = linsolve_direct,
                     multiply! = multiply_direct!,
diff --git a/test/problems.jl b/test/problems.jl
index c99d670a..30bbcb4b 100644
--- a/test/problems.jl
+++ b/test/problems.jl
@@ -37,6 +37,89 @@ linear_prob_fe = ODEProblem(
     ForwardEulerODEFunction((un,u,p,t,dt) -> (un .= u .+ dt .* p .* u)),
     [1.0],(0.0,1.0),-0.2)
 
+#=
+From Section 1.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
+=#
+(
+    ark_analytic,
+    ark_analytic_increment,
+    ark_analytic_split,
+    ark_analytic_split_increment,
+    ark_analytic_sol,
+) = let
+    FT = Float64
+    λ = FT(-100) # increase magnitude for more stiffness
+    Y₀ = FT[0]
+    tspan = (0, 10)
+    tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y .+ (1 / (1 + t^2) - λ * atan(t))
+    increment!(Y⁺, Y, λ, t, Δt) =
+        Y⁺ .+= Δt .* (λ .* Y .+ (1 / (1 + t^2) - λ * atan(t)))
+    implicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y
+    explicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= 1 / (1 + t^2) - λ * atan(t)
+    implicit_increment!(Y⁺, Y, λ, t, Δt) = Y⁺ .+= (Δt * λ) .* Y
+    explicit_increment!(Y⁺, Y, λ, t, Δt) =
+        Y⁺ .+= Δt * (1 / (1 + t^2) - λ * atan(t))
+    Wfact!(W, Y, λ, Δt, t) = W .= Δt * λ - 1
+    tgrad!(∂Y∂t, Y, λ, t) = ∂Y∂t .= -(λ * t^2 + 2 * t + λ) / (1 + t^2)^2
+    analytic_sol(u₀, λ, t) = atan(t)
+    func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!)
+    tendency_func = ODEFunction(tendency!; func_args...)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    split_tendency_func = SplitFunction(
+        ODEFunction(implicit_tendency!; func_args...),
+        explicit_tendency!,
+    )
+    split_increment_func = SplitFunction(
+        ForwardEulerODEFunction(implicit_increment!; func_args...),
+        ForwardEulerODEFunction(explicit_increment!),
+    )
+    prob_args = (Y₀, tspan, λ)
+    (
+        ODEProblem(tendency_func, prob_args...),
+        ODEProblem(increment_func, prob_args...),
+        ODEProblem(split_tendency_func, prob_args...),
+        ODEProblem(split_increment_func, prob_args...),
+        analytic_sol,
+    )
+end
+
+#=
+From Section 5.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
+=#
+(
+    ark_analytic_sys,
+    ark_analytic_sys_increment,
+    ark_analytic_sys_split,
+    ark_analytic_sys_sol,
+) = let
+    FT = Float64
+    λ = FT(-100) # increase magnitude for more stiffness
+    V = FT[1 -1 1; -1 2 1; 0 -1 2]
+    V⁻¹ = FT[5 1 -3; 2 2 -2; 1 1 1] / 4
+    D = Diagonal(FT[-1/2, -1/10, λ])
+    A = V * D * V⁻¹
+    I = LinearAlgebra.I(3)
+    Y₀ = FT[1, 1, 1]
+    tspan = (0, 1/20)
+    do_nothing!(Yₜ, Y, A, t) = Yₜ .= zero(eltype(Yₜ))
+    tendency!(Yₜ, Y, A, t) = mul!(Yₜ, A, Y)
+    increment!(Y⁺, Y, A, t, Δt) = mul!(Y⁺, A, Y, Δt, 1)
+    Wfact!(W, Y, A, Δt, t) = W .= Δt .* A .- I
+    analytic_sol(u₀, A, t) = V * exp(D * t) * V⁻¹ * Y₀
+    func_args = (; jac_prototype = similar(A), Wfact = Wfact!)
+    tendency_func = ODEFunction(tendency!; func_args...)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    split_tendency_func = SplitFunction(ODEFunction(tendency!; func_args...), do_nothing!)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    prob_args = (Y₀, tspan, A)
+    (
+        ODEProblem(tendency_func, prob_args...),
+        ODEProblem(increment_func, prob_args...),
+        ODEProblem(split_tendency_func, prob_args...),
+        analytic_sol,
+    )
+end
+    
 linear_prob_wfactt = ODEProblem(
         ODEFunction(
           (du,u,p,t,α=true,β=false) -> (du .= α .* p .* u .+ β .* du);
diff --git a/test/utils.jl b/test/utils.jl
index 3a5b43b6..d9239314 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -7,12 +7,15 @@ on the set of `dt` values in `dts`. Extra `kwargs` are passed to `solve`
 `solution` should be a function with a method `solution(u0, p, t)`.
 """
 function convergence_errors(prob, sol, method, dts; kwargs...)
+  @show sol(prob.u0, prob.p, prob.tspan[end])
   errs = map(dts) do dt
        # copy the problem so we don't mutate u0
       prob_copy = deepcopy(prob)
       u = solve(prob_copy, method; dt=dt, kwargs...)
+      @show u.u[end]
       norm(u .- sol(prob.u0, prob.p, prob.tspan[end]))
   end
+  @show errs
   return errs
 end
 

From 2181133b5180446d79ecc1c8e772be66d62124d2 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Wed, 21 Sep 2022 03:47:39 -0700
Subject: [PATCH 09/21] WIP

---
 src/solvers/imex_ark_tableaus.jl | 37 +++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/src/solvers/imex_ark_tableaus.jl b/src/solvers/imex_ark_tableaus.jl
index c877ec4d..89800ec6 100644
--- a/src/solvers/imex_ark_tableaus.jl
+++ b/src/solvers/imex_ark_tableaus.jl
@@ -1,7 +1,7 @@
 export ARS111, ARS121, ARS122, ARS233, ARS232, ARS222, ARS343, ARS443
 export IMKG232a, IMKG232b, IMKG242a, IMKG242b, IMKG252a, IMKG252b
 export IMKG253a, IMKG253b, IMKG254a, IMKG254b, IMKG254c, IMKG342a, IMKG343a
-export DBM453
+export DBM453, HOMMEM1
 
 using StaticArrays: @SArray, SMatrix, sacollect
 
@@ -150,6 +150,13 @@ const ARS443 = make_IMEXARKAlgorithm(;
 # appear to be wrong, so they are not included here. Eventually, we should get
 # the official implementations from the paper's authors.
 
+# a_exp:                                   a_imp:
+# 0      0      ⋯      0      0      0     0      0      ⋯      0      0      0
+# α[1]   0      ⋱      ⋮      ⋮      ⋮     α̂[1]   δ̂[1]   ⋱      ⋮      ⋮       0
+# β[1]   α[2]   ⋱      0      0      0    β[1]   α̂[2]   ⋱      0      0      0
+# ⋮      0      ⋱      0      0      0     ⋮      0      ⋱      δ̂[s-3] 0      0
+# ⋮      ⋮       ⋱     α[s-2] 0      0     ⋮      ⋮       ⋱      α̂[s-2] δ̂[s-2] 0
+# β[s-2] 0      ⋯      0      α[s-1] 0     β[s-2] 0      ⋯       0      α̂[s-1] 0
 imkg_exp(i, j, α, β) = i == j + 1 ? α[j] : (i > 2 && j == 1 ? β[i - 2] : 0)
 imkg_imp(i, j, α̂, β, δ̂) = i == j + 1 ? α̂[j] :
     (i > 2 && j == 1 ? β[i - 2] : (1 < i <= length(α̂) && i == j ? δ̂[i - 1] : 0))
@@ -317,3 +324,31 @@ const DBM453 = let
         ]),
     )
 end
+
+################################################################################
+
+# HOMMEM1 algorithm
+
+# From Section 4.1 of "A framework to evaluate IMEX schemes for atmospheric
+# models" by Guba et al.
+
+# The algorithm has 5 implicit stages, 6 overall stages, and 2rd order accuracy.
+
+const HOMMEM1 = make_IMEXARKAlgorithm(;
+    a_exp = @SArray([
+        0   0   0   0   0   0;
+        1/5 0   0   0   0   0;
+        0   1/5 0   0   0   0;
+        0   0   1/3 0   0   0;
+        0   0   0   1/2 0   0;
+        0   0   0   0   1   0;
+    ]),
+    a_imp = @SArray([
+        0    0    0    0    0    0;
+        0    1/5  0    0    0    0;
+        0    0    1/5  0    0    0;
+        0    0    0    1/3  0    0;
+        0    0    0    0    1/2  0;
+        5/18 5/18 0    0    0    8/18;
+    ]),
+)

From 8a366eca68f00a2e997e6d85e63fef796d6d1457 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Wed, 21 Sep 2022 04:08:30 -0700
Subject: [PATCH 10/21] WIP

---
 src/solvers/rosenbrock.jl |  82 +++++++++----------
 test/convergence.jl       |  19 +----
 test/problems.jl          | 166 +++++++++++++++++++-------------------
 3 files changed, 124 insertions(+), 143 deletions(-)

diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
index 86db7733..09fb61e1 100644
--- a/src/solvers/rosenbrock.jl
+++ b/src/solvers/rosenbrock.jl
@@ -495,6 +495,47 @@ end
     return :($values)
 end
 
+function rosenbrock_step_u!(integrator, cache, f)
+    (; u, p, t, dt, alg) = integrator
+    (; update_jac) = alg
+    (; update_jac_cache, linsolve!) = cache
+    (; Û⁺ᵢ, Ks, W, ḟ) = cache._cache
+    (; lowerγ⁻¹, âγ⁻¹, diagγ, γ𝟙, â𝟙) =
+        precomputed_values(typeof(alg), typeof(f))
+    function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
+        f.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ)
+        !isnothing(f.tgrad) && f.tgrad(ḟ, Ûᵢ, p, T̂ᵢ)
+    end
+    function stage_func(::Val{i}) where {i}
+        Δtγᵢᵢ = dt * diagγ[i]
+        Ûᵢ = i == 1 ? u : Û⁺ᵢ
+        T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
+
+        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
+
+        # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * (
+        #     f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
+        # )
+        f(Ks[i], Ûᵢ, p, T̂ᵢ)
+        Lγ⁻¹ᵢⱼKⱼ = linear_combination(lowerγ⁻¹[i], Ks)
+        Ks[i] .= Δtγᵢᵢ .* broadcasted(
+            +,
+            Ks[i],
+            (isnothing(Lγ⁻¹ᵢⱼKⱼ) ? () : (broadcasted(/, Lγ⁻¹ᵢⱼKⱼ, dt),))...,
+            (isnothing(f.tgrad) ? () : (broadcasted(*, dt * γ𝟙[i], ḟ),))...,
+        )
+        linsolve!(Ks[i], W, Ks[i]) # assume that linsolve! can handle aliasing
+
+        # Û⁺ᵢ = u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
+        âγ⁻¹ᵢⱼKⱼ = linear_combination(âγ⁻¹[i], Ks)
+        Û⁺ᵢ .= isnothing(âγ⁻¹ᵢⱼKⱼ) ? u : broadcasted(-, u, âγ⁻¹ᵢⱼKⱼ)
+    end
+
+    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1])
+    foreachval(stage_func, Val(num_stages(typeof(alg))))
+    u .= Û⁺ᵢ
+end
+
 function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
     (; u, p, t, dt, alg) = integrator
     (; update_jac, multiply!, set_Δtγ!) = alg
@@ -546,44 +587,3 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
     foreachval(stage_func, Val(num_stages(typeof(alg))))
     u .= Û⁺ᵢ
 end
-
-function rosenbrock_step_u!(integrator, cache, f)
-    (; u, p, t, dt, alg) = integrator
-    (; update_jac) = alg
-    (; update_jac_cache, linsolve!) = cache
-    (; Û⁺ᵢ, Ks, W, ḟ) = cache._cache
-    (; lowerγ⁻¹, âγ⁻¹, diagγ, γ𝟙, â𝟙) =
-        precomputed_values(typeof(alg), typeof(f))
-    function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
-        f.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ)
-        !isnothing(f.tgrad) && f.tgrad(ḟ, Ûᵢ, p, T̂ᵢ)
-    end
-    function stage_func(::Val{i}) where {i}
-        Δtγᵢᵢ = dt * diagγ[i]
-        Ûᵢ = i == 1 ? u : Û⁺ᵢ
-        T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
-
-        run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
-
-        # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * (
-        #     f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
-        # )
-        f(Ks[i], Ûᵢ, p, T̂ᵢ)
-        Lγ⁻¹ᵢⱼKⱼ = linear_combination(lowerγ⁻¹[i], Ks)
-        Ks[i] .= Δtγᵢᵢ .* broadcasted(
-            +,
-            Ks[i],
-            (isnothing(Lγ⁻¹ᵢⱼKⱼ) ? () : (broadcasted(/, Lγ⁻¹ᵢⱼKⱼ, dt),))...,
-            (isnothing(f.tgrad) ? () : (broadcasted(*, dt * γ𝟙[i], ḟ),))...,
-        )
-        linsolve!(Ks[i], W, Ks[i]) # assume that linsolve! can handle aliasing
-
-        # Û⁺ᵢ = u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
-        âγ⁻¹ᵢⱼKⱼ = linear_combination(âγ⁻¹[i], Ks)
-        Û⁺ᵢ .= isnothing(âγ⁻¹ᵢⱼKⱼ) ? u : broadcasted(-, u, âγ⁻¹ᵢⱼKⱼ)
-    end
-
-    run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1])
-    foreachval(stage_func, Val(num_stages(typeof(alg))))
-    u .= Û⁺ᵢ
-end
diff --git a/test/convergence.jl b/test/convergence.jl
index 4f37b5c5..a638f645 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -39,10 +39,6 @@ for (prob, sol, tscale) in [
 
 end
 
-using Revise, ClimaTimeSteppers, Test
-include("test/problems.jl")
-include("test/utils.jl")
-dts = 0.5 .^ (2:5)
 @testset "IMEX ARK Methods" begin
     algs1 = (ARS111, ARS121)
     algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
@@ -52,9 +48,8 @@ dts = 0.5 .^ (2:5)
     for (algorithm_names, order) in ((algs1, 1), (algs2, 2), (algs3, 3))
         for algorithm_name in algorithm_names
             for (problem, solution) in (
-                # (split_linear_prob_wfact_split, linear_sol),
-                # (split_linear_prob_wfact_split_fe, linear_sol),
-                (ark_analytic_split, ark_analytic_sol),
+                (split_linear_prob_wfact_split, linear_sol),
+                (split_linear_prob_wfact_split_fe, linear_sol),
             )
                 @show algorithm_name
                 algorithm = algorithm_name(
@@ -70,10 +65,6 @@ dts = 0.5 .^ (2:5)
     end
 end
 
-using Revise, ClimaTimeSteppers, Test
-include("test/problems.jl")
-include("test/utils.jl")
-dts = 0.5 .^ (4:7)
 @testset "Rosenbrock-W Methods" begin
     algs2 = (Rosenbrock23, SSPKnoth, RODASP2, ROS3w, ROS3Pw)
     algs3 = (ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3)
@@ -81,8 +72,6 @@ dts = 0.5 .^ (4:7)
         for algorithm_name in algorithm_names
             for (problem, solution) in (
                 (linear_prob_inexact_wfact, linear_sol),
-                (ark_analytic_sys, ark_analytic_sys_sol),
-                # (ark_analytic_sys_increment, ark_analytic_sys_sol),
             )
                 algorithm = algorithm_name(; linsolve = linsolve_direct)
                 @test isapprox(
@@ -95,10 +84,6 @@ dts = 0.5 .^ (4:7)
     end
 end
 
-using Revise, ClimaTimeSteppers, Test
-include("test/problems.jl")
-include("test/utils.jl")
-dts = 0.5 .^ (4:7)
 @testset "Rosenbrock-W Methods Limiters Formulation" begin
     # only methods with invertible matrices â can be tested here
     algs2 = (Rosenbrock23, SSPKnoth, RODASP2)
diff --git a/test/problems.jl b/test/problems.jl
index 30bbcb4b..8bb39bb5 100644
--- a/test/problems.jl
+++ b/test/problems.jl
@@ -36,89 +36,6 @@ linear_prob = IncrementingODEProblem{true}(
 linear_prob_fe = ODEProblem(
     ForwardEulerODEFunction((un,u,p,t,dt) -> (un .= u .+ dt .* p .* u)),
     [1.0],(0.0,1.0),-0.2)
-
-#=
-From Section 1.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
-=#
-(
-    ark_analytic,
-    ark_analytic_increment,
-    ark_analytic_split,
-    ark_analytic_split_increment,
-    ark_analytic_sol,
-) = let
-    FT = Float64
-    λ = FT(-100) # increase magnitude for more stiffness
-    Y₀ = FT[0]
-    tspan = (0, 10)
-    tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y .+ (1 / (1 + t^2) - λ * atan(t))
-    increment!(Y⁺, Y, λ, t, Δt) =
-        Y⁺ .+= Δt .* (λ .* Y .+ (1 / (1 + t^2) - λ * atan(t)))
-    implicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y
-    explicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= 1 / (1 + t^2) - λ * atan(t)
-    implicit_increment!(Y⁺, Y, λ, t, Δt) = Y⁺ .+= (Δt * λ) .* Y
-    explicit_increment!(Y⁺, Y, λ, t, Δt) =
-        Y⁺ .+= Δt * (1 / (1 + t^2) - λ * atan(t))
-    Wfact!(W, Y, λ, Δt, t) = W .= Δt * λ - 1
-    tgrad!(∂Y∂t, Y, λ, t) = ∂Y∂t .= -(λ * t^2 + 2 * t + λ) / (1 + t^2)^2
-    analytic_sol(u₀, λ, t) = atan(t)
-    func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!)
-    tendency_func = ODEFunction(tendency!; func_args...)
-    increment_func = ForwardEulerODEFunction(increment!; func_args...)
-    split_tendency_func = SplitFunction(
-        ODEFunction(implicit_tendency!; func_args...),
-        explicit_tendency!,
-    )
-    split_increment_func = SplitFunction(
-        ForwardEulerODEFunction(implicit_increment!; func_args...),
-        ForwardEulerODEFunction(explicit_increment!),
-    )
-    prob_args = (Y₀, tspan, λ)
-    (
-        ODEProblem(tendency_func, prob_args...),
-        ODEProblem(increment_func, prob_args...),
-        ODEProblem(split_tendency_func, prob_args...),
-        ODEProblem(split_increment_func, prob_args...),
-        analytic_sol,
-    )
-end
-
-#=
-From Section 5.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
-=#
-(
-    ark_analytic_sys,
-    ark_analytic_sys_increment,
-    ark_analytic_sys_split,
-    ark_analytic_sys_sol,
-) = let
-    FT = Float64
-    λ = FT(-100) # increase magnitude for more stiffness
-    V = FT[1 -1 1; -1 2 1; 0 -1 2]
-    V⁻¹ = FT[5 1 -3; 2 2 -2; 1 1 1] / 4
-    D = Diagonal(FT[-1/2, -1/10, λ])
-    A = V * D * V⁻¹
-    I = LinearAlgebra.I(3)
-    Y₀ = FT[1, 1, 1]
-    tspan = (0, 1/20)
-    do_nothing!(Yₜ, Y, A, t) = Yₜ .= zero(eltype(Yₜ))
-    tendency!(Yₜ, Y, A, t) = mul!(Yₜ, A, Y)
-    increment!(Y⁺, Y, A, t, Δt) = mul!(Y⁺, A, Y, Δt, 1)
-    Wfact!(W, Y, A, Δt, t) = W .= Δt .* A .- I
-    analytic_sol(u₀, A, t) = V * exp(D * t) * V⁻¹ * Y₀
-    func_args = (; jac_prototype = similar(A), Wfact = Wfact!)
-    tendency_func = ODEFunction(tendency!; func_args...)
-    increment_func = ForwardEulerODEFunction(increment!; func_args...)
-    split_tendency_func = SplitFunction(ODEFunction(tendency!; func_args...), do_nothing!)
-    increment_func = ForwardEulerODEFunction(increment!; func_args...)
-    prob_args = (Y₀, tspan, A)
-    (
-        ODEProblem(tendency_func, prob_args...),
-        ODEProblem(increment_func, prob_args...),
-        ODEProblem(split_tendency_func, prob_args...),
-        analytic_sol,
-    )
-end
     
 linear_prob_wfactt = ODEProblem(
         ODEFunction(
@@ -142,8 +59,6 @@ linear_prob_inexact_wfact_fe = ODEProblem(
         Wfact = (W,u,p,γ,t) -> (W[1,1]=γ*real(p)-1),
     ),
     [1/2 + 0.0*im],(0.0,1.0),-0.2+0.1*im)
-multiply_direct!(b, A, x) = b .= A * x
-set_Δtγ_direct!(A, Δtγ_new, Δtγ_old) = A .= (A + I) * Δtγ_new / Δtγ_old - I
 
 split_linear_prob_wfact_split = ODEProblem(
     SplitFunction(
@@ -221,6 +136,8 @@ function linsolve_direct(::Type{Val{:init}}, f, u0; kwargs...)
         x .= A \ b
     end
 end
+multiply_direct!(b, A, x) = b .= A * x
+set_Δtγ_direct!(A, Δtγ_new, Δtγ_old) = A .= (A + I) * Δtγ_new / Δtγ_old - I
 
 imex_autonomous_prob_jac = ODEProblem(
         ODEFunction(
@@ -318,3 +235,82 @@ kpr_singlerate_prob = IncrementingODEProblem{true}(
     (dQ,Q,param,t,α=1,β=0) -> (dQ .= α .* kpr_rhs(Q,param,t) .+ β .* dQ),
     [sqrt(4), sqrt(3)], (0.0, 5π/2), kpr_param,
 )
+
+# From Section 1.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
+(
+    ark_analytic,
+    ark_analytic_increment,
+    ark_analytic_split,
+    ark_analytic_split_increment,
+    ark_analytic_sol,
+) = let
+    FT = Float64
+    λ = FT(-100) # increase magnitude for more stiffness
+    Y₀ = FT[0]
+    tspan = (0, 10)
+    tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y .+ (1 / (1 + t^2) - λ * atan(t))
+    increment!(Y⁺, Y, λ, t, Δt) =
+        Y⁺ .+= Δt .* (λ .* Y .+ (1 / (1 + t^2) - λ * atan(t)))
+    implicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y
+    explicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= 1 / (1 + t^2) - λ * atan(t)
+    implicit_increment!(Y⁺, Y, λ, t, Δt) = Y⁺ .+= (Δt * λ) .* Y
+    explicit_increment!(Y⁺, Y, λ, t, Δt) =
+        Y⁺ .+= Δt * (1 / (1 + t^2) - λ * atan(t))
+    Wfact!(W, Y, λ, Δt, t) = W .= Δt * λ - 1
+    tgrad!(∂Y∂t, Y, λ, t) = ∂Y∂t .= -(λ * t^2 + 2 * t + λ) / (1 + t^2)^2
+    analytic_sol(u₀, λ, t) = atan(t)
+    func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!)
+    tendency_func = ODEFunction(tendency!; func_args...)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    split_tendency_func = SplitFunction(
+        ODEFunction(implicit_tendency!; func_args...),
+        explicit_tendency!,
+    )
+    split_increment_func = SplitFunction(
+        ForwardEulerODEFunction(implicit_increment!; func_args...),
+        ForwardEulerODEFunction(explicit_increment!),
+    )
+    prob_args = (Y₀, tspan, λ)
+    (
+        ODEProblem(tendency_func, prob_args...),
+        ODEProblem(increment_func, prob_args...),
+        ODEProblem(split_tendency_func, prob_args...),
+        ODEProblem(split_increment_func, prob_args...),
+        analytic_sol,
+    )
+end
+
+# From Section 5.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
+(
+    ark_analytic_sys,
+    ark_analytic_sys_increment,
+    ark_analytic_sys_split,
+    ark_analytic_sys_sol,
+) = let
+    FT = Float64
+    λ = FT(-100) # increase magnitude for more stiffness
+    V = FT[1 -1 1; -1 2 1; 0 -1 2]
+    V⁻¹ = FT[5 1 -3; 2 2 -2; 1 1 1] / 4
+    D = Diagonal(FT[-1/2, -1/10, λ])
+    A = V * D * V⁻¹
+    I = LinearAlgebra.I(3)
+    Y₀ = FT[1, 1, 1]
+    tspan = (0, 1/20)
+    do_nothing!(Yₜ, Y, A, t) = Yₜ .= zero(eltype(Yₜ))
+    tendency!(Yₜ, Y, A, t) = mul!(Yₜ, A, Y)
+    increment!(Y⁺, Y, A, t, Δt) = mul!(Y⁺, A, Y, Δt, 1)
+    Wfact!(W, Y, A, Δt, t) = W .= Δt .* A .- I
+    analytic_sol(u₀, A, t) = V * exp(D * t) * V⁻¹ * Y₀
+    func_args = (; jac_prototype = similar(A), Wfact = Wfact!)
+    tendency_func = ODEFunction(tendency!; func_args...)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    split_tendency_func = SplitFunction(ODEFunction(tendency!; func_args...), do_nothing!)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    prob_args = (Y₀, tspan, A)
+    (
+        ODEProblem(tendency_func, prob_args...),
+        ODEProblem(increment_func, prob_args...),
+        ODEProblem(split_tendency_func, prob_args...),
+        analytic_sol,
+    )
+end

From 13f77494fe8e70498aa1329a04d6324506ae054f Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Wed, 21 Sep 2022 04:21:37 -0700
Subject: [PATCH 11/21] WIP

---
 test/utils.jl | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/test/utils.jl b/test/utils.jl
index d9239314..3a5b43b6 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -7,15 +7,12 @@ on the set of `dt` values in `dts`. Extra `kwargs` are passed to `solve`
 `solution` should be a function with a method `solution(u0, p, t)`.
 """
 function convergence_errors(prob, sol, method, dts; kwargs...)
-  @show sol(prob.u0, prob.p, prob.tspan[end])
   errs = map(dts) do dt
        # copy the problem so we don't mutate u0
       prob_copy = deepcopy(prob)
       u = solve(prob_copy, method; dt=dt, kwargs...)
-      @show u.u[end]
       norm(u .- sol(prob.u0, prob.p, prob.tspan[end]))
   end
-  @show errs
   return errs
 end
 

From dfee79d018d33ade442d4198599bc3d51f861365 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Wed, 21 Sep 2022 05:42:11 -0700
Subject: [PATCH 12/21] WIP

---
 test/convergence.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/convergence.jl b/test/convergence.jl
index a638f645..76dd3303 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -19,12 +19,12 @@ for (prob, sol, tscale) in [
     @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts.*tscale)          ≈ 3 atol=0.05
 end
 
-for (prob, sol, tscale) in [
-    (linear_prob_wfactt, linear_sol, 1)
-]
-    @test convergence_order(prob, sol, SSPKnoth(linsolve=linsolve_direct), dts.*tscale) ≈ 2 atol=0.05
+# for (prob, sol, tscale) in [
+#     (linear_prob_wfactt, linear_sol, 1)
+# ]
+#     @test convergence_order(prob, sol, SSPKnoth(linsolve=linsolve_direct), dts.*tscale) ≈ 2 atol=0.05
 
-end
+# end
 
 
 # ForwardEulerODEFunction

From e347f0c338a9e329abd6f1257bb0fab1d9d3aef7 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 11:11:24 -0700
Subject: [PATCH 13/21] Add working plots

---
 .buildkite/pipeline.yml   |  21 ++++
 src/ClimaTimeSteppers.jl  |   1 +
 src/integrators.jl        |   2 +-
 src/solvers/rosenbrock.jl | 105 ++++++++------------
 test/Project.toml         |   2 +
 test/convergence.jl       | 196 ++++++++++++++++++++++----------------
 test/problems.jl          | 157 +++++++++++++++++-------------
 test/utils.jl             | 107 +++++++++++++++++++++
 8 files changed, 376 insertions(+), 215 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 46f0e5e8..cf5ad3e4 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -18,6 +18,18 @@ steps:
       queue: central
       slurm_ntasks: 1
 
+  - label: "init test env"
+    key: "init_test_env"
+    command:
+      - "echo $JULIA_DEPOT_PATH"
+      - "julia --project=test -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
+      - "julia --project=test -e 'using Pkg; Pkg.precompile()'"
+      - "julia --project=test -e 'using Pkg; Pkg.status()'"
+    agents:
+      config: cpu
+      queue: central
+      slurm_ntasks: 1
+
   - label: "init gpu env"
     key: "init_gpu_env"
     command:
@@ -35,6 +47,15 @@ steps:
 
   - wait
 
+  - label: "Unit tests and plots"
+    command:
+      - "julia --project=test test/runtests.jl"
+    artifact_paths: "test/output/*"
+    agents:
+      config: cpu
+      queue: central
+      slurm_ntasks: 1
+
   - label: "GPU tests"
     command:
       - "julia --project -e 'using Pkg; Pkg.test(;test_args=[\"CuArray\"])'"
diff --git a/src/ClimaTimeSteppers.jl b/src/ClimaTimeSteppers.jl
index 620618c6..feb758f6 100644
--- a/src/ClimaTimeSteppers.jl
+++ b/src/ClimaTimeSteppers.jl
@@ -70,6 +70,7 @@ end
 SciMLBase.allowscomplex(alg::DistributedODEAlgorithm) = true
 include("integrators.jl")
 
+include("solvers/matrix_utilities.jl")
 include("solvers/update_signal_handler.jl")
 include("solvers/convergence_condition.jl")
 include("solvers/convergence_checker.jl")
diff --git a/src/integrators.jl b/src/integrators.jl
index 209a53a5..1f515a6b 100644
--- a/src/integrators.jl
+++ b/src/integrators.jl
@@ -29,7 +29,7 @@ function DiffEqBase.__init(
     args...;
     dt,  # required
     stepstop=-1,
-    adjustfinal=false,
+    adjustfinal=true,
     callback=nothing,
     save_func=(u,t,integrator)->copy(u),
     saveat=nothing,
diff --git a/src/solvers/rosenbrock.jl b/src/solvers/rosenbrock.jl
index 09fb61e1..e3333a07 100644
--- a/src/solvers/rosenbrock.jl
+++ b/src/solvers/rosenbrock.jl
@@ -70,13 +70,26 @@ than subtracting it from another value, we will rewrite this as
               Δt * ḟ * ∑_{j=1}^i γᵢⱼ
           ) - γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ.
 
+To simplify our further reformulations, we will convert our definitions to
+matrix form.
+First, though, we will reduce the number of matrix equations we need to analyze
+by defining
+    âᵢⱼ := i < s ? a₍ᵢ₊₁₎ⱼ : bⱼ and
+    Û⁺ᵢ := i < s ? Ûᵢ₊₁ : u_next.
+We can then redefine
+    u_next := Û⁺ₛ and
+    Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁, where
+    Û⁺ᵢ := u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ.
+
 So, in summary, a Rosenbrock method is defined by some lower triangular s×s
 matrix γ, some strictly lower triangular s×s matrix a, some vector b of length
 s, and some approximations J and ḟ of ∂f/∂u(Ûᵢ, T̂ᵢ) and ∂f/∂t(Ûᵢ, T̂ᵢ), which
 are all used to compute
-    u_next := u + Δt * ∑_{i=1}^s bᵢ * Fᵢ, where
-    Ûᵢ := u + Δt * ∑_{j=1}^{i-1} aᵢⱼ * Fⱼ,
-    T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ, and
+    u_next := Û⁺ₛ, where
+    âᵢⱼ := i < s ? a₍ᵢ₊₁₎ⱼ : bⱼ,
+    Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁,
+    T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ,
+    Û⁺ᵢ := u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ, and
     Fᵢ := (I - Δt * γᵢᵢ * J)⁻¹ * (
               f(Ûᵢ, T̂ᵢ) + γᵢᵢ⁻¹ * ∑_{j=1}^{i-1} γᵢⱼ * Fⱼ +
               Δt * ḟ * ∑_{j=1}^i γᵢⱼ
@@ -84,18 +97,6 @@ are all used to compute
 
 ## Converting to Matrix Form
 
-To simplify our further reformulations, we will convert our definitions to
-matrix form.
-
-First, we will reduce the number of matrix equations we need to analyze by
-defining
-    âᵢⱼ := i < s ? a₍ᵢ₊₁₎ⱼ : bⱼ and
-    Û⁺ᵢ := i < s ? Ûᵢ₊₁ : u_next.
-We can then redefine
-    u_next := Û⁺ₛ and
-    Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁, where
-    Û⁺ᵢ := u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ.
-
 The equations we will convert to matrix form are
     Û⁺ᵢ = u + Δt * ∑_{j=1}^i âᵢⱼ * Fⱼ and
     Fᵢ = f(Ûᵢ, T̂ᵢ) + Δt * J * ∑_{j=1}^i γᵢⱼ * Fⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ.
@@ -337,6 +338,8 @@ For the limiters formulation, this means that
               Δt * âᵢᵢ,
           ).
 =#
+export RosenbrockAlgorithm
+
 import LinearAlgebra
 import StaticArrays: SUnitRange, SOneTo
 import Base: broadcasted, materialize!
@@ -362,36 +365,6 @@ function RosenbrockAlgorithm{γ, a, b}(;
     )
 end
 
-@generated foreachval(f::F, ::Val{N}) where {F, N} =
-    quote
-        Base.@nexprs $N i -> f(Val(i))
-        return nothing
-    end
-lower_triangular_inv(matrix::T) where {T} =
-    T(inv(LinearAlgebra.LowerTriangular(matrix)))
-lower_plus_diagonal(matrix::T) where {T} =
-    T(LinearAlgebra.LowerTriangular(matrix))
-diagonal(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix))
-lower(matrix) = lower_plus_diagonal(matrix) - diagonal(matrix)
-to_enumerated_rows(x) = x
-function to_enumerated_rows(matrix::AbstractMatrix)
-    rows = tuple(1:size(matrix, 1)...)
-    nonzero_indices = map(i -> findall(matrix[i, :] .!= 0), rows)
-    enumerated_rows = map(
-        i -> tuple(zip(nonzero_indices[i], matrix[i, nonzero_indices[i]])...),
-        rows,
-    )
-    return enumerated_rows
-end
-linear_combination_terms(enumerated_row, vectors) =
-    map(((j, val),) -> broadcasted(*, val, vectors[j]), enumerated_row)
-function linear_combination(enumerated_row, vectors)
-    length(enumerated_row) == 0 && return nothing
-    terms = linear_combination_terms(enumerated_row, vectors)
-    length(enumerated_row) == 1 && return terms[1]
-    return broadcasted(+, terms...)
-end
-
 num_stages(::Type{<:RosenbrockAlgorithm{γ}}) where {γ} = size(γ, 1)
 
 function check_valid_parameters(
@@ -473,10 +446,10 @@ step_u!(integrator, cache::RosenbrockCache) =
     â = vcat(a[2:end, :], transpose(b))
     γ⁻¹ = lower_triangular_inv(γ)
     γ𝟙 = vec(sum(γ, dims = 2))
-    â𝟙 = vec(sum(â, dims = 2))
+    a𝟙 = vec(sum(a, dims = 2))
     matrices =
         map(to_enumerated_rows, (; lowerγ⁻¹ = lower(γ⁻¹), âγ⁻¹ = â * γ⁻¹))
-    values = (; matrices..., diagγ = diag(γ), γ𝟙, â𝟙)
+    values = (; matrices..., diagγ = diag(γ), γ𝟙, a𝟙)
     return :($values)
 end
 @generated function precomputed_values(
@@ -486,12 +459,12 @@ end
     â = vcat(a[2:end, :], transpose(b))
     âγ = â * γ
     β = â * lower_triangular_inv(âγ) * diagonal(γ)
-    â𝟙 = vec(sum(â, dims = 2))
+    a𝟙 = vec(sum(a, dims = 2))
     âγ𝟙 = vec(sum(âγ, dims = 2))
     β⁻¹𝟙 = vec(sum(lower_triangular_inv(β), dims = 2))
     matrices =
         map(to_enumerated_rows, (; lowerâ = lower(â), lowerβ = lower(β), β))
-    values = (; matrices..., diagγ = diag(γ), diagâ = diag(â), â𝟙, âγ𝟙, β⁻¹𝟙)
+    values = (; matrices..., diagγ = diag(γ), diagâ = diag(â), a𝟙, âγ𝟙, β⁻¹𝟙)
     return :($values)
 end
 
@@ -500,7 +473,7 @@ function rosenbrock_step_u!(integrator, cache, f)
     (; update_jac) = alg
     (; update_jac_cache, linsolve!) = cache
     (; Û⁺ᵢ, Ks, W, ḟ) = cache._cache
-    (; lowerγ⁻¹, âγ⁻¹, diagγ, γ𝟙, â𝟙) =
+    (; lowerγ⁻¹, âγ⁻¹, diagγ, γ𝟙, a𝟙) =
         precomputed_values(typeof(alg), typeof(f))
     function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
         f.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ)
@@ -508,32 +481,33 @@ function rosenbrock_step_u!(integrator, cache, f)
     end
     function stage_func(::Val{i}) where {i}
         Δtγᵢᵢ = dt * diagγ[i]
-        Ûᵢ = i == 1 ? u : Û⁺ᵢ
-        T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
+        
+        Ûᵢ = i == 1 ? u : Û⁺ᵢ # Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁
+        T̂ᵢ = i == 1 ? t : t + dt * a𝟙[i] # T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ
 
         run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
 
-        # Kᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * (
+        # Kᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * Δt * γᵢᵢ * (
         #     f(Ûᵢ, T̂ᵢ) + ∑_{j=1}^{i-1} (γ⁻¹)ᵢⱼ/Δt * Kⱼ + Δt * ḟ * ∑_{j=1}^i γᵢⱼ
         # )
         f(Ks[i], Ûᵢ, p, T̂ᵢ)
-        Lγ⁻¹ᵢⱼKⱼ = linear_combination(lowerγ⁻¹[i], Ks)
+        lγ⁻¹ᵢⱼKⱼ = linear_combination(lowerγ⁻¹[i], Ks)
         Ks[i] .= Δtγᵢᵢ .* broadcasted(
             +,
             Ks[i],
-            (isnothing(Lγ⁻¹ᵢⱼKⱼ) ? () : (broadcasted(/, Lγ⁻¹ᵢⱼKⱼ, dt),))...,
+            (isnothing(lγ⁻¹ᵢⱼKⱼ) ? () : (broadcasted(/, lγ⁻¹ᵢⱼKⱼ, dt),))...,
             (isnothing(f.tgrad) ? () : (broadcasted(*, dt * γ𝟙[i], ḟ),))...,
         )
         linsolve!(Ks[i], W, Ks[i]) # assume that linsolve! can handle aliasing
 
-        # Û⁺ᵢ = u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
+        # Û⁺ᵢ := u - ∑_{j=1}^i (â * γ⁻¹)ᵢⱼ * Kⱼ
         âγ⁻¹ᵢⱼKⱼ = linear_combination(âγ⁻¹[i], Ks)
         Û⁺ᵢ .= isnothing(âγ⁻¹ᵢⱼKⱼ) ? u : broadcasted(-, u, âγ⁻¹ᵢⱼKⱼ)
     end
 
     run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1])
     foreachval(stage_func, Val(num_stages(typeof(alg))))
-    u .= Û⁺ᵢ
+    u .= Û⁺ᵢ # u_next := Û⁺ₛ
 end
 
 function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
@@ -541,7 +515,7 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
     (; update_jac, multiply!, set_Δtγ!) = alg
     (; update_jac_cache, linsolve!) = cache
     (; Û⁺ᵢ, Vs, Fs, W, ḟ) = cache._cache
-    (; lowerâ, lowerβ, β, diagγ, diagâ, â𝟙, âγ𝟙, β⁻¹𝟙) =
+    (; lowerâ, lowerβ, β, diagγ, diagâ, a𝟙, âγ𝟙, β⁻¹𝟙) =
         precomputed_values(typeof(alg), typeof(g))
     function jac_func(Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
         g.Wfact(W, Ûᵢ, p, Δtγᵢᵢ, T̂ᵢ)
@@ -550,12 +524,13 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
     function stage_func(::Val{i}) where {i}
         Δtγᵢᵢ = dt * diagγ[i]
         Δtâᵢᵢ = dt * diagâ[i]
-        Ûᵢ = i == 1 ? u : Û⁺ᵢ
-        T̂ᵢ = i == 1 ? t : t + dt * â𝟙[i]
+        
+        Ûᵢ = i == 1 ? u : Û⁺ᵢ # Ûᵢ := i == 1 ? u : Û⁺ᵢ₋₁
+        T̂ᵢ = i == 1 ? t : t + dt * a𝟙[i] # T̂ᵢ := t + Δt * ∑_{j=1}^{i-1} aᵢⱼ
 
         run!(update_jac, update_jac_cache, NewStage(), jac_func, Ûᵢ, T̂ᵢ, Δtγᵢᵢ)
 
-        # Vᵢ = (Δt * γᵢᵢ * J - I)⁻¹ * g(
+        # Vᵢ := (Δt * γᵢᵢ * J - I)⁻¹ * g(
         #     u - Δt * γᵢᵢ * J * u * ∑_{j=1}^i (β⁻¹)ᵢⱼ +
         #     Δt * ∑_{j=1}^{i-1} âᵢⱼ * f(Ûⱼ, T̂ⱼ) +
         #     Δt² * ḟ * ∑_{j=1}^i (â * γ)ᵢⱼ + ∑_{j=1}^{i-1} βᵢⱼ * Vⱼ,
@@ -566,11 +541,11 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
         set_Δtγ!(W, Δtγᵢᵢ * β⁻¹𝟙[i], Δtγᵢᵢ)
         multiply!(Vs[i], W, u)
         set_Δtγ!(W, Δtγᵢᵢ, Δtγᵢᵢ * β⁻¹𝟙[i])
-        LâᵢⱼFⱼ = linear_combination(lowerâ[i], Fs)
+        lâᵢⱼFⱼ = linear_combination(lowerâ[i], Fs)
         Vs[i] .= broadcasted(
             +,
             broadcasted(-, Vs[i]),
-            (isnothing(LâᵢⱼFⱼ) ? () : (broadcasted(*, dt, LâᵢⱼFⱼ),))...,
+            (isnothing(lâᵢⱼFⱼ) ? () : (broadcasted(*, dt, lâᵢⱼFⱼ),))...,
             (isnothing(g.tgrad) ? () : (broadcasted(*, dt^2 * âγ𝟙[i], ḟ),))...,
             linear_combination_terms(lowerβ[i], Vs)...,
         )
@@ -579,11 +554,11 @@ function rosenbrock_step_u!(integrator, cache, g::ForwardEulerODEFunction)
         Fs[i] .= (Vs[i] .- Fs[i]) ./ Δtâᵢᵢ
         linsolve!(Vs[i], W, Vs[i]) # assume that linsolve! can handle aliasing
 
-        # Û⁺ᵢ = -∑_{j=1}^i βᵢⱼ * Vᵢ
+        # Û⁺ᵢ := -∑_{j=1}^i βᵢⱼ * Vᵢ
         Û⁺ᵢ .= broadcasted(-, linear_combination(β[i], Vs))
     end
 
     run!(update_jac, update_jac_cache, NewStep(), jac_func, u, t, dt * diagγ[1])
     foreachval(stage_func, Val(num_stages(typeof(alg))))
-    u .= Û⁺ᵢ
+    u .= Û⁺ᵢ # u_next := Û⁺ₛ
 end
diff --git a/test/Project.toml b/test/Project.toml
index 638e2fa2..b6e181ff 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -6,6 +6,8 @@ DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
+OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
+Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
diff --git a/test/convergence.jl b/test/convergence.jl
index 76dd3303..7242b5b3 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -1,111 +1,139 @@
-using DiffEqBase, ClimaTimeSteppers, LinearAlgebra, Test
+using ClimaTimeSteppers
+using OrdinaryDiffEq, LinearAlgebra, Test, Plots, BenchmarkTools
 
 include("utils.jl")
 
-dts = 0.5 .^ (4:7)
+dt_fracs = 0.5 .^ (4:7)
 
-for (prob, sol, tscale) in [
-    (linear_prob, linear_sol, 1)
-    (sincos_prob, sincos_sol, 1)
+for (prob, sol) in [
+    (linear_prob, linear_sol)
+    (sincos_prob, sincos_sol)
 ]
+    dts = dt_fracs .* prob.tspan[end]
 
-    @test convergence_order(prob, sol, LSRKEulerMethod(), dts.*tscale)     ≈ 1 atol=0.1
-    @test convergence_order(prob, sol, LSRK54CarpenterKennedy(), dts.*tscale)       ≈ 4 atol=0.05
-    @test convergence_order(prob, sol, LSRK144NiegemannDiehlBusch(),  dts.*tscale)   ≈ 4 atol=0.05
+    @test convergence_order(prob, sol, LSRKEulerMethod(), dts)              ≈ 1 atol=0.1
+    @test convergence_order(prob, sol, LSRK54CarpenterKennedy(), dts)       ≈ 4 atol=0.05
+    @test convergence_order(prob, sol, LSRK144NiegemannDiehlBusch(),  dts)  ≈ 4 atol=0.05
 
-    @test convergence_order(prob, sol, SSPRK22Heuns(), dts.*tscale)                 ≈ 2 atol=0.05
-    @test convergence_order(prob, sol, SSPRK22Ralstons(), dts.*tscale)              ≈ 2 atol=0.05
-    @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts.*tscale)              ≈ 3 atol=0.05
-    @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts.*tscale)          ≈ 3 atol=0.05
+    @test convergence_order(prob, sol, SSPRK22Heuns(), dts)                 ≈ 2 atol=0.05
+    @test convergence_order(prob, sol, SSPRK22Ralstons(), dts)              ≈ 2 atol=0.05
+    @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts)              ≈ 3 atol=0.05
+    @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts)          ≈ 3 atol=0.05
 end
 
-# for (prob, sol, tscale) in [
-#     (linear_prob_wfactt, linear_sol, 1)
-# ]
-#     @test convergence_order(prob, sol, SSPKnoth(linsolve=linsolve_direct), dts.*tscale) ≈ 2 atol=0.05
-
-# end
-
 
 # ForwardEulerODEFunction
-for (prob, sol, tscale) in [
-    (linear_prob_fe, linear_sol, 1)
-    (sincos_prob_fe, sincos_sol, 1)
+for (prob, sol) in [
+    (linear_prob_fe, linear_sol)
+    (sincos_prob_fe, sincos_sol)
 ]
-    @test convergence_order(prob, sol, SSPRK22Heuns(), dts.*tscale)                 ≈ 2 atol=0.05
-    @test convergence_order(prob, sol, SSPRK22Ralstons(), dts.*tscale)              ≈ 2 atol=0.05
-    @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts.*tscale)              ≈ 3 atol=0.05
-    @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts.*tscale)          ≈ 3 atol=0.05
+    dts = dt_fracs .* prob.tspan[end]
+
+    @test convergence_order(prob, sol, SSPRK22Heuns(), dts)                 ≈ 2 atol=0.05
+    @test convergence_order(prob, sol, SSPRK22Ralstons(), dts)              ≈ 2 atol=0.05
+    @test convergence_order(prob, sol, SSPRK33ShuOsher(), dts)              ≈ 3 atol=0.05
+    @test convergence_order(prob, sol, SSPRK34SpiteriRuuth(), dts)          ≈ 3 atol=0.05
 
 end
 
-@testset "IMEX ARK Methods" begin
-    algs1 = (ARS111, ARS121)
-    algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
-    algs2 = (algs2..., IMKG252a, IMKG252b, IMKG253a, IMKG253b, IMKG254a)
-    algs2 = (algs2..., IMKG254b, IMKG254c, HOMMEM1)
-    algs3 = (ARS233, ARS343, ARS443, IMKG342a, IMKG343a, DBM453)
-    for (algorithm_names, order) in ((algs1, 1), (algs2, 2), (algs3, 3))
-        for algorithm_name in algorithm_names
-            for (problem, solution) in (
-                (split_linear_prob_wfact_split, linear_sol),
-                (split_linear_prob_wfact_split_fe, linear_sol),
-            )
-                @show algorithm_name
-                algorithm = algorithm_name(
-                    NewtonsMethod(; linsolve = linsolve_direct, max_iters = 1),
-                ) # the problem is linear, so more iters have no effect
-                @test isapprox(
-                    convergence_order(problem, solution, algorithm, dts),
-                    order;
-                    rtol = 0.01,
-                )
+@testset "Test Algorithms and Generate Plots" begin
+    imex_ark_dict = let
+        dict = Dict{Type, Int}()
+        algs1 = (ARS111, ARS121)
+        algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
+        algs2 = (algs2..., IMKG252a, IMKG252b, IMKG253a, IMKG253b, IMKG254a)
+        algs2 = (algs2..., IMKG254b, IMKG254c, HOMMEM1)
+        algs3 = (ARS233, ARS343, ARS443, IMKG342a, IMKG343a, DBM453)
+        for (algs, order) in ((algs1, 1), (algs2, 2), (algs3, 3))
+            for alg in algs
+                dict[alg] = order
             end
         end
+        dict
     end
-end
 
-@testset "Rosenbrock-W Methods" begin
-    algs2 = (Rosenbrock23, SSPKnoth, RODASP2, ROS3w, ROS3Pw)
-    algs3 = (ROS34PW1a, ROS34PW1b, ROS34PW2, ROS34PW3)
-    for (algorithm_names, order) in ((algs2, 2), (algs3, 3))
-        for algorithm_name in algorithm_names
-            for (problem, solution) in (
-                (linear_prob_inexact_wfact, linear_sol),
-            )
-                algorithm = algorithm_name(; linsolve = linsolve_direct)
-                @test isapprox(
-                    convergence_order(problem, solution, algorithm, dts),
-                    order;
-                    rtol = 0.01,
-                )
+    test_algs("IMEX ARK", imex_ark_dict, ark_analytic_sys, 7)
+
+    test_algs("IMEX ARK", imex_ark_dict, ark_analytic_nonlin, 10)
+
+    # For some bizarre reason, ARS121 converges with order 2 for ark_analytic.
+    imex_ark_dict_ark_analytic = copy(imex_ark_dict)
+    imex_ark_dict_ark_analytic[ARS121] = 2
+    test_algs("IMEX ARK", imex_ark_dict_ark_analytic, ark_analytic, 15)
+
+    rosenbrock_dict = let
+        dict = Dict{Type, Int}()
+        algs2 = (Rosenbrock23, SSPKnoth)
+        algs3 = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2)
+        algs4 = (ROS34PW3,)
+        for (algs, order) in ((algs2, 2), (algs3, 3), (algs4, 4))
+            for alg in algs
+                dict[alg] = order
             end
         end
+        dict
     end
+    no_increment_algs = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b)
+
+    # 6 also works, but we'll keep it at 7 to match the IMEX ARK plots.
+    test_algs(
+        "Rosenbrock",
+        rosenbrock_dict,
+        ark_analytic_sys,
+        7;
+        no_increment_algs,
+    )
+
+    # RODASP2 needs a larger dt than the other methods, so it's skipped here.
+    rosenbrock_dict′ = copy(rosenbrock_dict)
+    delete!(rosenbrock_dict′, RODASP2)
+    test_algs(
+        "Rosenbrock",
+        rosenbrock_dict′,
+        ark_analytic_nonlin,
+        10;
+        no_increment_algs,
+    )
+    
+    # The 3rd order algorithms need a larger dt than the 2nd order ones, and
+    # neither of the 4th order algorithms converge within an rtol of 0.1 of
+    # the predicted order for any value of dt.
+    algs_name = "Rosenbrock 2nd Order"
+    rosenbrock2_dict = Dict((Rosenbrock23, SSPKnoth) .=> 2)
+    test_algs(algs_name, rosenbrock2_dict, ark_analytic, 15)
+    algs_name = "Rosenbrock 3rd Order"
+    rosenbrock3_dict =
+        Dict((ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2) .=> 3)
+    test_algs(algs_name, rosenbrock3_dict, ark_analytic, 12; no_increment_algs)
 end
 
-@testset "Rosenbrock-W Methods Limiters Formulation" begin
-    # only methods with invertible matrices â can be tested here
-    algs2 = (Rosenbrock23, SSPKnoth, RODASP2)
-    algs3 = (ROS34PW2, ROS34PW3)
-    for (algorithm_names, order) in ((algs2, 2), (algs3, 3))
-        for algorithm_name in algorithm_names
-            for (problem, solution) in (
-                (linear_prob_inexact_wfact_fe, linear_sol),
-            )
-                algorithm = algorithm_name(;
-                    linsolve = linsolve_direct,
-                    multiply! = multiply_direct!,
-                    set_Δtγ! = set_Δtγ_direct!,
-                )
-                @test isapprox(
-                    convergence_order(problem, solution, algorithm, dts),
-                    order;
-                    rtol = 0.01,
-                )
-            end
-        end
-    end
+using OrdinaryDiffEq, BenchmarkTools
+@testset "Compare with OrdinaryDiffEq" begin
+    (; t_end, probs, analytic_sol) = ark_analytic_sys
+    FT = typeof(t_end)
+    tendency_prob = probs[1]
+    dt = t_end / 2^7
+
+    cts_alg = Rosenbrock23(; linsolve = linsolve_direct)
+    ode_alg = OrdinaryDiffEq.Rosenbrock23(; linsolve = linsolve_direct)
+
+    cts_tendency_end_sol = solve(deepcopy(tendency_prob), cts_alg; dt).u[end]
+    ode_tendency_end_sol =
+        solve(deepcopy(tendency_prob), ode_alg; dt, adaptive = false).u[end]
+    @test norm(cts_tendency_end_sol .- ode_tendency_end_sol) < eps(FT)
+
+    @info "Benchmark Results for ClimaTimeSteppers.Rosenbrock23:"
+    cts_trial = @benchmark solve($(deepcopy(tendency_prob)), $cts_alg, dt = $dt)
+
+    @info "Benchmark Results for OrdinaryDiffEq.Rosenbrock23:"
+    ode_trial = @benchmark solve(
+        $(deepcopy(tendency_prob)),
+        $ode_alg,
+        dt = $dt,
+        adaptive = false,
+    )
+
+    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.03
 end
 
 #=
diff --git a/test/problems.jl b/test/problems.jl
index 8bb39bb5..c4a8ea83 100644
--- a/test/problems.jl
+++ b/test/problems.jl
@@ -45,21 +45,6 @@ linear_prob_wfactt = ODEProblem(
         ),
         [1/2],(0.0,1.0),-0.2)
 
-linear_prob_inexact_wfact = ODEProblem(
-    ODEFunction(
-        (du,u,p,t) -> (du .= p .* u);
-        jac_prototype=zeros(ComplexF64,1,1),
-        Wfact = (W,u,p,γ,t) -> (W[1,1]=γ*real(p)-1),
-    ),
-    [1/2 + 0.0*im],(0.0,1.0),-0.2+0.1*im)
-linear_prob_inexact_wfact_fe = ODEProblem(
-    ForwardEulerODEFunction(
-        (ux,u,p,t,dt) -> (ux .+= dt .* p .* u);
-        jac_prototype=zeros(ComplexF64,1,1),
-        Wfact = (W,u,p,γ,t) -> (W[1,1]=γ*real(p)-1),
-    ),
-    [1/2 + 0.0*im],(0.0,1.0),-0.2+0.1*im)
-
 split_linear_prob_wfact_split = ODEProblem(
     SplitFunction(
         ODEFunction(
@@ -132,12 +117,11 @@ imex_autonomous_prob = SplitODEProblem(
     ArrayType([0.5]), (0.0,1.0), 4.0)
 
 function linsolve_direct(::Type{Val{:init}}, f, u0; kwargs...)
-    function _linsolve!(x, A, b, update_matrix = false; kwargs...)
-        x .= A \ b
-    end
+    _linsolve!(x, A, b, update_matrix = false; kwargs...) = x .= A \ b
 end
-multiply_direct!(b, A, x) = b .= A * x
-set_Δtγ_direct!(A, Δtγ_new, Δtγ_old) = A .= (A + I) * Δtγ_new / Δtγ_old - I
+multiply_direct!(b, A, x) = mul!(b, A, x)
+set_Δtγ_direct!(A, Δtγ_new, Δtγ_old) =
+    A .= (A .+ I(size(A, 1))) .* (Δtγ_new / Δtγ_old) .- I(size(A, 1))
 
 imex_autonomous_prob_jac = ODEProblem(
         ODEFunction(
@@ -236,29 +220,34 @@ kpr_singlerate_prob = IncrementingODEProblem{true}(
     [sqrt(4), sqrt(3)], (0.0, 5π/2), kpr_param,
 )
 
+struct IntegratorTestCase{FT, P, S, A}
+    test_name::String
+    linear_implicit::Bool
+    t_end::FT
+    probs::P
+    split_probs::S
+    analytic_sol::A
+end
+
 # From Section 1.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
-(
-    ark_analytic,
-    ark_analytic_increment,
-    ark_analytic_split,
-    ark_analytic_split_increment,
-    ark_analytic_sol,
-) = let
+ark_analytic = let
     FT = Float64
     λ = FT(-100) # increase magnitude for more stiffness
     Y₀ = FT[0]
-    tspan = (0, 10)
-    tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y .+ (1 / (1 + t^2) - λ * atan(t))
-    increment!(Y⁺, Y, λ, t, Δt) =
-        Y⁺ .+= Δt .* (λ .* Y .+ (1 / (1 + t^2) - λ * atan(t)))
-    implicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= λ .* Y
-    explicit_tendency!(Yₜ, Y, λ, t) = Yₜ .= 1 / (1 + t^2) - λ * atan(t)
-    implicit_increment!(Y⁺, Y, λ, t, Δt) = Y⁺ .+= (Δt * λ) .* Y
-    explicit_increment!(Y⁺, Y, λ, t, Δt) =
-        Y⁺ .+= Δt * (1 / (1 + t^2) - λ * atan(t))
-    Wfact!(W, Y, λ, Δt, t) = W .= Δt * λ - 1
-    tgrad!(∂Y∂t, Y, λ, t) = ∂Y∂t .= -(λ * t^2 + 2 * t + λ) / (1 + t^2)^2
-    analytic_sol(u₀, λ, t) = atan(t)
+    t_end = FT(10)
+
+    source(t) = 1 / (1 + t^2) - λ * atan(t)
+    tendency!(Yₜ, Y, _, t) = Yₜ .= λ .* Y .+ source(t)
+    increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= Δt .* (λ .* Y .+ source(t))
+    implicit_tendency!(Yₜ, Y, _, t) = Yₜ .= λ .* Y
+    explicit_tendency!(Yₜ, Y, _, t) = Yₜ .= source(t)
+    implicit_increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= (Δt * λ) .* Y
+    explicit_increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= Δt * source(t)
+
+    Wfact!(W, Y, _, Δt, t) = W .= Δt * λ - 1
+    tgrad!(∂Y∂t, Y, _, t) = ∂Y∂t .= -(λ + 2 * t + λ * t^2) / (1 + t^2)^2
+    analytic_sol(t) = [atan(t)]
+
     func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!)
     tendency_func = ODEFunction(tendency!; func_args...)
     increment_func = ForwardEulerODEFunction(increment!; func_args...)
@@ -270,23 +259,53 @@ kpr_singlerate_prob = IncrementingODEProblem{true}(
         ForwardEulerODEFunction(implicit_increment!; func_args...),
         ForwardEulerODEFunction(explicit_increment!),
     )
-    prob_args = (Y₀, tspan, λ)
-    (
-        ODEProblem(tendency_func, prob_args...),
-        ODEProblem(increment_func, prob_args...),
-        ODEProblem(split_tendency_func, prob_args...),
-        ODEProblem(split_increment_func, prob_args...),
+
+    make_prob(func) = ODEProblem(func, Y₀, (FT(0), t_end), nothing)
+    IntegratorTestCase(
+        "ark_analytic",
+        true,
+        t_end,
+        (make_prob(tendency_func), make_prob(increment_func)),
+        (make_prob(split_tendency_func), make_prob(split_increment_func)),
+        analytic_sol,
+    )
+end
+
+# From Section 1.2 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
+ark_analytic_nonlin = let
+    FT = Float64
+    Y₀ = FT[0]
+    t_end = FT(10)
+
+    tendency!(Yₜ, Y, _, t) = Yₜ .= (t + 1) .* exp.(.-Y)
+    increment!(Y⁺, Y, _, t, Δt) = Y⁺ .+= Δt .* ((t + 1) .* exp.(.-Y))
+    no_tendency!(Yₜ, Y, _, t) = Yₜ .= zero(FT)
+    no_increment!(Y⁺, Y, _, t, Δt) = Y⁺
+
+    Wfact!(W, Y, _, Δt, t) = W .= (-Δt * (t + 1) .* exp.(.-Y) .- 1)
+    tgrad!(∂Y∂t, Y, _, t) = ∂Y∂t .= exp.(.-Y)
+    analytic_sol(t) = [log(t^2 / 2 + t + 1)]
+
+    func_args = (; jac_prototype = Y₀, Wfact = Wfact!, tgrad = tgrad!)
+    tendency_func = ODEFunction(tendency!; func_args...)
+    split_tendency_func = SplitFunction(tendency_func, no_tendency!)
+    increment_func = ForwardEulerODEFunction(increment!; func_args...)
+    split_increment_func =
+        SplitFunction(increment_func, ForwardEulerODEFunction(no_increment!))
+
+    make_prob(func) = ODEProblem(func, Y₀, (FT(0), t_end), nothing)
+    IntegratorTestCase(
+        "ark_analytic_nonlin",
+        false,
+        t_end,
+        (make_prob(tendency_func), make_prob(increment_func)),
+        (make_prob(split_tendency_func), make_prob(split_increment_func)),
         analytic_sol,
     )
 end
 
 # From Section 5.1 of "Example Programs for ARKode v4.4.0" by D. R. Reynolds
-(
-    ark_analytic_sys,
-    ark_analytic_sys_increment,
-    ark_analytic_sys_split,
-    ark_analytic_sys_sol,
-) = let
+ark_analytic_sys = let
     FT = Float64
     λ = FT(-100) # increase magnitude for more stiffness
     V = FT[1 -1 1; -1 2 1; 0 -1 2]
@@ -295,22 +314,30 @@ end
     A = V * D * V⁻¹
     I = LinearAlgebra.I(3)
     Y₀ = FT[1, 1, 1]
-    tspan = (0, 1/20)
-    do_nothing!(Yₜ, Y, A, t) = Yₜ .= zero(eltype(Yₜ))
-    tendency!(Yₜ, Y, A, t) = mul!(Yₜ, A, Y)
-    increment!(Y⁺, Y, A, t, Δt) = mul!(Y⁺, A, Y, Δt, 1)
-    Wfact!(W, Y, A, Δt, t) = W .= Δt .* A .- I
-    analytic_sol(u₀, A, t) = V * exp(D * t) * V⁻¹ * Y₀
-    func_args = (; jac_prototype = similar(A), Wfact = Wfact!)
+    t_end = FT(1 / 20)
+
+    tendency!(Yₜ, Y, _, t) = mul!(Yₜ, A, Y)
+    increment!(Y⁺, Y, _, t, Δt) = mul!(Y⁺, A, Y, Δt, 1)
+    no_tendency!(Yₜ, Y, _, t) = Yₜ .= zero(FT)
+    no_increment!(Y⁺, Y, _, t, Δt) = Y⁺
+
+    Wfact!(W, Y, _, Δt, t) = W .= Δt .* A .- I
+    analytic_sol(t) = V * exp(D * t) * V⁻¹ * Y₀
+
+    func_args = (; jac_prototype = A, Wfact = Wfact!)
     tendency_func = ODEFunction(tendency!; func_args...)
+    split_tendency_func = SplitFunction(tendency_func, no_tendency!)
     increment_func = ForwardEulerODEFunction(increment!; func_args...)
-    split_tendency_func = SplitFunction(ODEFunction(tendency!; func_args...), do_nothing!)
-    increment_func = ForwardEulerODEFunction(increment!; func_args...)
-    prob_args = (Y₀, tspan, A)
-    (
-        ODEProblem(tendency_func, prob_args...),
-        ODEProblem(increment_func, prob_args...),
-        ODEProblem(split_tendency_func, prob_args...),
+    split_increment_func =
+        SplitFunction(increment_func, ForwardEulerODEFunction(no_increment!))
+
+    make_prob(func) = ODEProblem(func, Y₀, (FT(0), t_end), nothing)
+    IntegratorTestCase(
+        "ark_analytic_sys",
+        true,
+        t_end,
+        (make_prob(tendency_func), make_prob(increment_func)),
+        (make_prob(split_tendency_func), make_prob(split_increment_func)),
         analytic_sol,
     )
 end
diff --git a/test/utils.jl b/test/utils.jl
index 3a5b43b6..4659cdb7 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -48,3 +48,110 @@ function (::DirectSolver)(x,A,b,matrix_updated; kwargs...)
   M = mapslices(y -> mul!(similar(y), A, y), Matrix{eltype(x)}(I,n,n), dims=1)
   x .= M \ b
 end
+
+using Printf
+function test_algs(
+    algs_name,
+    algs_to_order,
+    test_case,
+    num_dt_splits;
+    num_saveat_splits = min(num_dt_splits, 8),
+    no_increment_algs = (),
+)
+    (; test_name, linear_implicit, t_end, probs, split_probs, analytic_sol) =
+        test_case
+    FT = typeof(t_end)
+    linestyles = (:solid, :dash, :dot, :dashdot, :dashdotdot)
+    
+    plot1_dt = t_end / 2^num_dt_splits
+    plot1_saveat = [FT(0), t_end / 2^num_saveat_splits]
+    while plot1_saveat[end] < t_end
+        push!(plot1_saveat, min(plot1_saveat[end] + plot1_saveat[2], t_end))
+    end
+    plot1_ylim = ()
+    plot1 = plot(
+        title = "Solution Errors of $algs_name Methods for `$test_name` \
+                 (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))",
+        xlabel = "t",
+        ylabel = "Error Norm: ||Y_computed - Y_analytic||",
+        yscale = :log10,
+        legend_position = :outerright,
+        palette = :glasbey_bw_minc_20_maxl_70_n256,
+        size = (1000, 600),
+        margin = 3Plots.mm,
+        titlelocation = :left,
+    )
+
+    t_end_string = t_end % 1 == 0 ? string(Int(t_end)) : @sprintf("%.2f", t_end)
+    plot2_dts = t_end ./ 2 .^ ((num_dt_splits - 3):3:(num_dt_splits + 3))
+    plot2 = plot(
+        title = "Convergence Orders of $algs_name Methods for `$test_name` \
+                 (at t = $t_end_string)",
+        xlabel = "dt",
+        ylabel = "Error Norm: ||Y_computed - Y_analytic||",
+        xscale = :log10,
+        yscale = :log10,
+        legend_position = :outerright,
+        palette = :glasbey_bw_minc_20_maxl_70_n256,
+        size = (1000, 600),
+        margin = 3Plots.mm,
+        titlelocation = :left,
+    )
+    
+    analytic_sols = map(analytic_sol, plot1_saveat)
+    analytic_end_sol = [analytic_sols[end]]
+    sorted_algs_to_order = sort(collect(algs_to_order); by = x -> string(x[1]))
+
+    for (alg_name, predicted_order) in sorted_algs_to_order
+        @show alg_name
+        if alg_name <: IMEXARKAlgorithm
+            max_iters = linear_implicit ? 1 : 2
+            alg =
+                alg_name(NewtonsMethod(; linsolve = linsolve_direct, max_iters))
+            (tendency_prob, increment_prob) = split_probs
+        elseif alg_name <: RosenbrockAlgorithm
+            alg = alg_name(;
+                linsolve = linsolve_direct,
+                multiply! = multiply_direct!,
+                set_Δtγ! = set_Δtγ_direct!,
+            )
+            (tendency_prob, increment_prob) = probs
+        end
+        linestyle = linestyles[(predicted_order - 1) % length(linestyles) + 1]
+
+        solve_args = (; dt = plot1_dt, saveat = plot1_saveat)
+        tendency_sols =
+            solve(deepcopy(tendency_prob), alg; solve_args...).u
+        tendency_errors = @. norm(tendency_sols - analytic_sols)
+        plot1_ylim =
+            extrema((plot1_ylim..., filter(x -> x != 0, tendency_errors)...))
+        tendency_errors .= max.(tendency_errors, eps(FT(0)))
+        label = alg_name
+        plot!(plot1, plot1_saveat, tendency_errors; label, linestyle)
+        if !(alg_name in no_increment_algs)
+            increment_sols =
+                solve(deepcopy(increment_prob), alg; solve_args...).u
+            increment_errors = @. norm(increment_sols - tendency_sols)
+            @test maximum(increment_errors) < 10000 * eps(FT) broken =
+                alg_name == HOMMEM1 # TODO
+        end
+
+        tendency_end_sols = map(
+            dt -> solve(deepcopy(tendency_prob), alg; dt).u[end],
+            plot2_dts,
+        )
+        tendency_end_errors = @. norm(tendency_end_sols - analytic_end_sol)
+        _, computed_order = hcat(ones(length(plot2_dts)), log10.(plot2_dts)) \
+            log10.(tendency_end_errors)
+        @test computed_order ≈ predicted_order rtol = 0.1
+        label = "$alg_name ($(@sprintf "%.3f" computed_order))"
+        plot!(plot2, plot2_dts, tendency_end_errors; label, linestyle)
+    end
+
+    plot!(plot1; ylim = (plot1_ylim[1] / 2, plot1_ylim[2] * 2))
+    new_algs_name = lowercase(replace(algs_name, " " => "_"))
+    mkdir("output")
+    savefig(plot1, "output/errors_$(new_algs_name)_$(test_name).png")
+    savefig(plot2, "output/orders_$(new_algs_name)_$(test_name).png")
+end
+

From 252ebde43dd88c0d6d2e367d644001ae5ac6bdd5 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 11:17:55 -0700
Subject: [PATCH 14/21] Bugfix

---
 src/solvers/matrix_utilities.jl | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 src/solvers/matrix_utilities.jl

diff --git a/src/solvers/matrix_utilities.jl b/src/solvers/matrix_utilities.jl
new file mode 100644
index 00000000..ddbac704
--- /dev/null
+++ b/src/solvers/matrix_utilities.jl
@@ -0,0 +1,33 @@
+lower_plus_diagonal(matrix::T) where {T} =
+    T(LinearAlgebra.LowerTriangular(matrix))
+diagonal(matrix::T) where {T} = T(LinearAlgebra.Diagonal(matrix))
+lower(matrix) = lower_plus_diagonal(matrix) - diagonal(matrix)
+
+lower_triangular_inv(matrix::T) where {T} =
+    T(inv(LinearAlgebra.LowerTriangular(matrix)))
+
+to_enumerated_rows(x) = x
+function to_enumerated_rows(matrix::AbstractMatrix)
+    rows = tuple(1:size(matrix, 1)...)
+    nonzero_indices = map(i -> findall(matrix[i, :] .!= 0), rows)
+    enumerated_rows = map(
+        i -> tuple(zip(nonzero_indices[i], matrix[i, nonzero_indices[i]])...),
+        rows,
+    )
+    return enumerated_rows
+end
+
+linear_combination_terms(enumerated_row, vectors) =
+    map(((j, val),) -> broadcasted(*, val, vectors[j]), enumerated_row)
+function linear_combination(enumerated_row, vectors)
+    length(enumerated_row) == 0 && return nothing
+    terms = linear_combination_terms(enumerated_row, vectors)
+    length(enumerated_row) == 1 && return terms[1]
+    return broadcasted(+, terms...)
+end
+
+@generated foreachval(f::F, ::Val{N}) where {F, N} =
+    quote
+        Base.@nexprs $N i -> f(Val(i))
+        return nothing
+    end

From 87f105c40b536e6c612fea11f8f936956af6e214 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 11:29:47 -0700
Subject: [PATCH 15/21] WIP

---
 test/Project.toml   | 1 +
 test/convergence.jl | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/Project.toml b/test/Project.toml
index b6e181ff..74a0ed21 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -8,6 +8,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
diff --git a/test/convergence.jl b/test/convergence.jl
index 7242b5b3..22787714 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -1,5 +1,5 @@
 using ClimaTimeSteppers
-using OrdinaryDiffEq, LinearAlgebra, Test, Plots, BenchmarkTools
+using OrdinaryDiffEq, LinearAlgebra, BenchmarkTools, Printf, Plots, Test
 
 include("utils.jl")
 
@@ -124,6 +124,7 @@ using OrdinaryDiffEq, BenchmarkTools
 
     @info "Benchmark Results for ClimaTimeSteppers.Rosenbrock23:"
     cts_trial = @benchmark solve($(deepcopy(tendency_prob)), $cts_alg, dt = $dt)
+    display(cts_trial)
 
     @info "Benchmark Results for OrdinaryDiffEq.Rosenbrock23:"
     ode_trial = @benchmark solve(
@@ -132,6 +133,7 @@ using OrdinaryDiffEq, BenchmarkTools
         dt = $dt,
         adaptive = false,
     )
+    display(cts_trial)
 
     @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.03
 end

From 9ab4a5cfc2891f9a8c954c623320dfe9dc4ce311 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 12:42:45 -0700
Subject: [PATCH 16/21] WIP

---
 test/Project.toml   |  1 +
 test/comparison.jl  | 32 ++++++++++++++++++++++++++++++++
 test/convergence.jl | 36 ++----------------------------------
 test/runtests.jl    |  1 +
 test/utils.jl       | 10 ++++------
 5 files changed, 40 insertions(+), 40 deletions(-)
 create mode 100644 test/comparison.jl

diff --git a/test/Project.toml b/test/Project.toml
index 74a0ed21..9a228735 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
diff --git a/test/comparison.jl b/test/comparison.jl
new file mode 100644
index 00000000..b4d411d5
--- /dev/null
+++ b/test/comparison.jl
@@ -0,0 +1,32 @@
+using BenchmarkTools
+import OrdinaryDiffEq # use import avoid namespace conflicts
+
+@testset "Compare with OrdinaryDiffEq" begin
+    (; t_end, probs, analytic_sol) = ark_analytic_sys
+    FT = typeof(t_end)
+    tendency_prob = probs[1]
+    dt = t_end / 2^7
+
+    cts_alg = Rosenbrock23(; linsolve = linsolve_direct)
+    ode_alg = OrdinaryDiffEq.Rosenbrock23(; linsolve = linsolve_direct)
+
+    cts_tendency_end_sol = solve(deepcopy(tendency_prob), cts_alg; dt).u[end]
+    ode_tendency_end_sol =
+        solve(deepcopy(tendency_prob), ode_alg; dt, adaptive = false).u[end]
+    @test norm(cts_tendency_end_sol .- ode_tendency_end_sol) < eps(FT)
+
+    @info "Benchmark Results for ClimaTimeSteppers.Rosenbrock23:"
+    cts_trial = @benchmark solve($(deepcopy(tendency_prob)), $cts_alg, dt = $dt)
+    display(cts_trial)
+
+    @info "Benchmark Results for OrdinaryDiffEq.Rosenbrock23:"
+    ode_trial = @benchmark solve(
+        $(deepcopy(tendency_prob)),
+        $ode_alg,
+        dt = $dt,
+        adaptive = false,
+    )
+    display(cts_trial)
+
+    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.03
+end
diff --git a/test/convergence.jl b/test/convergence.jl
index 22787714..34212c53 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -1,5 +1,4 @@
-using ClimaTimeSteppers
-using OrdinaryDiffEq, LinearAlgebra, BenchmarkTools, Printf, Plots, Test
+using ClimaTimeSteppers, LinearAlgebra, Printf, Plots, Test
 
 include("utils.jl")
 
@@ -65,7 +64,7 @@ end
         dict = Dict{Type, Int}()
         algs2 = (Rosenbrock23, SSPKnoth)
         algs3 = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2)
-        algs4 = (ROS34PW3,)
+        algs4 = (RODASP2, ROS34PW3)
         for (algs, order) in ((algs2, 2), (algs3, 3), (algs4, 4))
             for alg in algs
                 dict[alg] = order
@@ -107,37 +106,6 @@ end
     test_algs(algs_name, rosenbrock3_dict, ark_analytic, 12; no_increment_algs)
 end
 
-using OrdinaryDiffEq, BenchmarkTools
-@testset "Compare with OrdinaryDiffEq" begin
-    (; t_end, probs, analytic_sol) = ark_analytic_sys
-    FT = typeof(t_end)
-    tendency_prob = probs[1]
-    dt = t_end / 2^7
-
-    cts_alg = Rosenbrock23(; linsolve = linsolve_direct)
-    ode_alg = OrdinaryDiffEq.Rosenbrock23(; linsolve = linsolve_direct)
-
-    cts_tendency_end_sol = solve(deepcopy(tendency_prob), cts_alg; dt).u[end]
-    ode_tendency_end_sol =
-        solve(deepcopy(tendency_prob), ode_alg; dt, adaptive = false).u[end]
-    @test norm(cts_tendency_end_sol .- ode_tendency_end_sol) < eps(FT)
-
-    @info "Benchmark Results for ClimaTimeSteppers.Rosenbrock23:"
-    cts_trial = @benchmark solve($(deepcopy(tendency_prob)), $cts_alg, dt = $dt)
-    display(cts_trial)
-
-    @info "Benchmark Results for OrdinaryDiffEq.Rosenbrock23:"
-    ode_trial = @benchmark solve(
-        $(deepcopy(tendency_prob)),
-        $ode_alg,
-        dt = $dt,
-        adaptive = false,
-    )
-    display(cts_trial)
-
-    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.03
-end
-
 #=
 if ArrayType == Array
 for (prob, sol) in [
diff --git a/test/runtests.jl b/test/runtests.jl
index 5cc2e10f..3a471f58 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,6 +10,7 @@ include("problems.jl")
 
 include("integrator.jl")
 include("convergence.jl")
+include("comparison.jl")
 include("callbacks.jl")
 include("test_convergence_checker.jl")
 include("single_column_ARS_test.jl")
diff --git a/test/utils.jl b/test/utils.jl
index 4659cdb7..b21ae8b4 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -49,7 +49,6 @@ function (::DirectSolver)(x,A,b,matrix_updated; kwargs...)
   x .= M \ b
 end
 
-using Printf
 function test_algs(
     algs_name,
     algs_to_order,
@@ -103,7 +102,6 @@ function test_algs(
     sorted_algs_to_order = sort(collect(algs_to_order); by = x -> string(x[1]))
 
     for (alg_name, predicted_order) in sorted_algs_to_order
-        @show alg_name
         if alg_name <: IMEXARKAlgorithm
             max_iters = linear_implicit ? 1 : 2
             alg =
@@ -149,9 +147,9 @@ function test_algs(
     end
 
     plot!(plot1; ylim = (plot1_ylim[1] / 2, plot1_ylim[2] * 2))
-    new_algs_name = lowercase(replace(algs_name, " " => "_"))
-    mkdir("output")
-    savefig(plot1, "output/errors_$(new_algs_name)_$(test_name).png")
-    savefig(plot2, "output/orders_$(new_algs_name)_$(test_name).png")
+    file_name = "$(lowercase(replace(algs_name, " " => "_")))_$(test_name)"
+    mkpath("output")
+    savefig(plot1, joinpath("output", "errors_$(file_name).png"))
+    savefig(plot2, joinpath("output", "orders_$(file_name).png"))
 end
 

From ed374e080e50929a54b2e81035238e4b073fca52 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 15:29:28 -0700
Subject: [PATCH 17/21] WIP

---
 .buildkite/pipeline.yml | 14 +++++++++++---
 test/Project.toml       |  4 +++-
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index cf5ad3e4..7b3efa28 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -6,10 +6,18 @@ env:
   CLIMATEMACHINE_SETTINGS_FIX_RNG_SEED: "true"
 
 steps:
-  - label: "init perf env"
-    key: "init_perf_env"
+  - label: "init cpu env"
+    key: "init_cpu_env"
     command:
       - "echo $JULIA_DEPOT_PATH"
+
+      - echo "--- Instantiate test"
+      - "julia --project=test -e 'using Pkg; Pkg.develop(path=\".\")'"
+      - "julia --project=test -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
+      - "julia --project=test -e 'using Pkg; Pkg.precompile(;strict=true)'"
+      - "julia --project=perf -e 'using Pkg; Pkg.status()'"
+
+      - echo "--- Instantiate perf"
       - "julia --project=perf -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
       - "julia --project=perf -e 'using Pkg; Pkg.precompile()'"
       - "julia --project=perf -e 'using Pkg; Pkg.status()'"
@@ -49,7 +57,7 @@ steps:
 
   - label: "Unit tests and plots"
     command:
-      - "julia --project=test test/runtests.jl"
+      - "julia --project -e 'using Pkg; Pkg.test()'"
     artifact_paths: "test/output/*"
     agents:
       config: cpu
diff --git a/test/Project.toml b/test/Project.toml
index 9a228735..4c4cd98d 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,7 +1,6 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
@@ -14,3 +13,6 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[compat]
+OrdinaryDiffEq = "5.64.0"

From 81ba23b92278c28f8784e0291b1d52301defd595 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 15:51:22 -0700
Subject: [PATCH 18/21] WIP

---
 test/utils.jl | 59 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/test/utils.jl b/test/utils.jl
index b21ae8b4..e8eceb4e 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -61,28 +61,39 @@ function test_algs(
         test_case
     FT = typeof(t_end)
     linestyles = (:solid, :dash, :dot, :dashdot, :dashdotdot)
+    plot_kwargs = (;
+        size = (1000, 600),
+        margin = 3Plots.mm,
+        titlelocation = :left,
+        legend_position = :outerright,
+        palette = :glasbey_bw_minc_20_maxl_70_n256,
+    )
     
     plot1_dt = t_end / 2^num_dt_splits
     plot1_saveat = [FT(0), t_end / 2^num_saveat_splits]
     while plot1_saveat[end] < t_end
         push!(plot1_saveat, min(plot1_saveat[end] + plot1_saveat[2], t_end))
-    end
-    plot1_ylim = ()
-    plot1 = plot(
+    end # ensure that the saveat times are EXACTLY equal to the integrator times
+    plot1a = plot(
+        title = "Solution Norms of $algs_name Methods for `$test_name` \
+                 (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))",
+        xlabel = "t",
+        ylabel = "Solution Norm: ||Y_computed||",
+        plot_kwargs...,
+    )
+    plot1b = plot(
         title = "Solution Errors of $algs_name Methods for `$test_name` \
                  (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))",
         xlabel = "t",
         ylabel = "Error Norm: ||Y_computed - Y_analytic||",
         yscale = :log10,
-        legend_position = :outerright,
-        palette = :glasbey_bw_minc_20_maxl_70_n256,
-        size = (1000, 600),
-        margin = 3Plots.mm,
-        titlelocation = :left,
+        plot_kwargs...,
     )
+    plot1b_ymin = typemax(FT) # dynamically set ylim because some errors are 0
+    plot1b_ymax = typemin(FT)
 
     t_end_string = t_end % 1 == 0 ? string(Int(t_end)) : @sprintf("%.2f", t_end)
-    plot2_dts = t_end ./ 2 .^ ((num_dt_splits - 3):3:(num_dt_splits + 3))
+    plot2_dts = (plot1_dt / 10, plot1_dt, plot1_dt * 10)
     plot2 = plot(
         title = "Convergence Orders of $algs_name Methods for `$test_name` \
                  (at t = $t_end_string)",
@@ -90,17 +101,12 @@ function test_algs(
         ylabel = "Error Norm: ||Y_computed - Y_analytic||",
         xscale = :log10,
         yscale = :log10,
-        legend_position = :outerright,
-        palette = :glasbey_bw_minc_20_maxl_70_n256,
-        size = (1000, 600),
-        margin = 3Plots.mm,
-        titlelocation = :left,
+        plot_kwargs...,
     )
     
     analytic_sols = map(analytic_sol, plot1_saveat)
     analytic_end_sol = [analytic_sols[end]]
     sorted_algs_to_order = sort(collect(algs_to_order); by = x -> string(x[1]))
-
     for (alg_name, predicted_order) in sorted_algs_to_order
         if alg_name <: IMEXARKAlgorithm
             max_iters = linear_implicit ? 1 : 2
@@ -120,18 +126,22 @@ function test_algs(
         solve_args = (; dt = plot1_dt, saveat = plot1_saveat)
         tendency_sols =
             solve(deepcopy(tendency_prob), alg; solve_args...).u
+        tendency_norms = @. norm(tendency_sols)
         tendency_errors = @. norm(tendency_sols - analytic_sols)
-        plot1_ylim =
-            extrema((plot1_ylim..., filter(x -> x != 0, tendency_errors)...))
-        tendency_errors .= max.(tendency_errors, eps(FT(0)))
+        min_error = minimum(x -> x == 0 ? typemax(FT) : x, tendency_errors)
+        plot1b_ymin = min(plot1b_ymin, min_error)
+        plot1b_ymax = max(plot1b_ymax, maximum(tendency_errors))
+        tendency_errors .=
+            max.(tendency_errors, eps(FT(0))) # plotting 0 breaks the log scale
         label = alg_name
-        plot!(plot1, plot1_saveat, tendency_errors; label, linestyle)
+        plot!(plot1a, plot1_saveat, tendency_norms; label, linestyle)
+        plot!(plot1b, plot1_saveat, tendency_errors; label, linestyle)
         if !(alg_name in no_increment_algs)
             increment_sols =
                 solve(deepcopy(increment_prob), alg; solve_args...).u
             increment_errors = @. norm(increment_sols - tendency_sols)
             @test maximum(increment_errors) < 10000 * eps(FT) broken =
-                alg_name == HOMMEM1 # TODO
+                alg_name == HOMMEM1 # TODO: why is this broken???
         end
 
         tendency_end_sols = map(
@@ -145,11 +155,12 @@ function test_algs(
         label = "$alg_name ($(@sprintf "%.3f" computed_order))"
         plot!(plot2, plot2_dts, tendency_end_errors; label, linestyle)
     end
+    plot!(plot1b; ylim = (plot1b_ymin / 2, plot1b_ymax * 2))
 
-    plot!(plot1; ylim = (plot1_ylim[1] / 2, plot1_ylim[2] * 2))
-    file_name = "$(lowercase(replace(algs_name, " " => "_")))_$(test_name)"
     mkpath("output")
-    savefig(plot1, joinpath("output", "errors_$(file_name).png"))
-    savefig(plot2, joinpath("output", "orders_$(file_name).png"))
+    file_suffix = "$(lowercase(replace(algs_name, " " => "_")))_$(test_name)"
+    savefig(plot1a, joinpath("output", "solutions_$(file_suffix).png"))
+    savefig(plot1b, joinpath("output", "errors_$(file_suffix).png"))
+    savefig(plot2, joinpath("output", "orders_$(file_suffix).png"))
 end
 

From 7feb3facb7cd5637f48b741f6d5b2faf2900a6bc Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 18:02:29 -0700
Subject: [PATCH 19/21] WIP

---
 test/Project.toml   |  1 +
 test/comparison.jl  |  2 +-
 test/convergence.jl | 96 +++++++++++++--------------------------------
 test/utils.jl       | 13 +++---
 4 files changed, 37 insertions(+), 75 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 4c4cd98d..ffd67bd2 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79"
 DiffEqBase = "2b5f629d-d688-5b77-993f-72d75c75574e"
 DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def"
 KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
diff --git a/test/comparison.jl b/test/comparison.jl
index b4d411d5..1fdf7373 100644
--- a/test/comparison.jl
+++ b/test/comparison.jl
@@ -28,5 +28,5 @@ import OrdinaryDiffEq # use import avoid namespace conflicts
     )
     display(cts_trial)
 
-    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.03
+    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.04
 end
diff --git a/test/convergence.jl b/test/convergence.jl
index 34212c53..56c7c463 100644
--- a/test/convergence.jl
+++ b/test/convergence.jl
@@ -35,75 +35,35 @@ for (prob, sol) in [
 
 end
 
-@testset "Test Algorithms and Generate Plots" begin
-    imex_ark_dict = let
-        dict = Dict{Type, Int}()
-        algs1 = (ARS111, ARS121)
-        algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
-        algs2 = (algs2..., IMKG252a, IMKG252b, IMKG253a, IMKG253b, IMKG254a)
-        algs2 = (algs2..., IMKG254b, IMKG254c, HOMMEM1)
-        algs3 = (ARS233, ARS343, ARS443, IMKG342a, IMKG343a, DBM453)
-        for (algs, order) in ((algs1, 1), (algs2, 2), (algs3, 3))
-            for alg in algs
-                dict[alg] = order
-            end
-        end
-        dict
-    end
-
-    test_algs("IMEX ARK", imex_ark_dict, ark_analytic_sys, 7)
-
-    test_algs("IMEX ARK", imex_ark_dict, ark_analytic_nonlin, 10)
-
-    # For some bizarre reason, ARS121 converges with order 2 for ark_analytic.
-    imex_ark_dict_ark_analytic = copy(imex_ark_dict)
-    imex_ark_dict_ark_analytic[ARS121] = 2
-    test_algs("IMEX ARK", imex_ark_dict_ark_analytic, ark_analytic, 15)
-
-    rosenbrock_dict = let
-        dict = Dict{Type, Int}()
-        algs2 = (Rosenbrock23, SSPKnoth)
-        algs3 = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2)
-        algs4 = (RODASP2, ROS34PW3)
-        for (algs, order) in ((algs2, 2), (algs3, 3), (algs4, 4))
-            for alg in algs
-                dict[alg] = order
-            end
-        end
-        dict
-    end
-    no_increment_algs = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b)
-
-    # 6 also works, but we'll keep it at 7 to match the IMEX ARK plots.
-    test_algs(
-        "Rosenbrock",
-        rosenbrock_dict,
-        ark_analytic_sys,
-        7;
-        no_increment_algs,
-    )
+@testset "IMEX ARK Methods" begin
+    algs1 = (ARS111, ARS121)
+    algs2 = (ARS122, ARS232, ARS222, IMKG232a, IMKG232b, IMKG242a, IMKG242b)
+    algs2 = (algs2..., IMKG252a, IMKG252b, IMKG253a, IMKG253b, IMKG254a)
+    algs2 = (algs2..., IMKG254b, IMKG254c, HOMMEM1)
+    algs3 = (ARS233, ARS343, ARS443, IMKG342a, IMKG343a, DBM453)
+    dict = Dict(((algs1 .=> 1)..., (algs2 .=> 2)..., (algs3 .=> 3)...))
+    test_algs("IMEX ARK", dict, ark_analytic_sys, 6)
+    test_algs("IMEX ARK", dict, ark_analytic_nonlin, 9)
+    # For some bizarre reason, ARS121 converges with order 2 for ark_analytic,
+    # even though it is only a 1st order method.
+    dict′ = copy(dict)
+    dict′[ARS121] = 2
+    test_algs("IMEX ARK", dict′, ark_analytic, 14)
+end
 
-    # RODASP2 needs a larger dt than the other methods, so it's skipped here.
-    rosenbrock_dict′ = copy(rosenbrock_dict)
-    delete!(rosenbrock_dict′, RODASP2)
-    test_algs(
-        "Rosenbrock",
-        rosenbrock_dict′,
-        ark_analytic_nonlin,
-        10;
-        no_increment_algs,
-    )
-    
-    # The 3rd order algorithms need a larger dt than the 2nd order ones, and
-    # neither of the 4th order algorithms converge within an rtol of 0.1 of
-    # the predicted order for any value of dt.
-    algs_name = "Rosenbrock 2nd Order"
-    rosenbrock2_dict = Dict((Rosenbrock23, SSPKnoth) .=> 2)
-    test_algs(algs_name, rosenbrock2_dict, ark_analytic, 15)
-    algs_name = "Rosenbrock 3rd Order"
-    rosenbrock3_dict =
-        Dict((ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2) .=> 3)
-    test_algs(algs_name, rosenbrock3_dict, ark_analytic, 12; no_increment_algs)
+@testset "Rosenbrock Methods" begin
+    algs2 = (Rosenbrock23, SSPKnoth)
+    algs3 = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b, ROS34PW2)
+    algs4 = (RODASP2, ROS34PW3)
+    dict = Dict((algs2 .=> 2)..., (algs3 .=> 3)..., (algs4 .=> 4)...)
+    args = (; no_increment_algs = (ROS3w, ROS3Pw, ROS34PW1a, ROS34PW1b))
+    test_algs("Rosenbrock", dict, ark_analytic_sys, 6; args...)
+    test_algs("Rosenbrock", dict, ark_analytic_nonlin, 9; args...)
+    # For ark_analytic, there is no range where all the methods pass the test.
+    test_algs("Rosenbrock Order 2", Dict(algs2 .=> 2), ark_analytic, 14)
+    dict34 = Dict((algs3 .=> 3)..., ROS34PW3 => 4)
+    test_algs("Rosenbrock Order 3&4", dict34, ark_analytic, 12; args...)
+    test_algs("RODASP2", Dict(RODASP2 => 4), ark_analytic, 8)
 end
 
 #=
diff --git a/test/utils.jl b/test/utils.jl
index e8eceb4e..4c96519c 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -71,17 +71,18 @@ function test_algs(
     
     plot1_dt = t_end / 2^num_dt_splits
     plot1_saveat = [FT(0), t_end / 2^num_saveat_splits]
+    # Ensure that the saveat times are an EXACT subset of the integrator times.
     while plot1_saveat[end] < t_end
         push!(plot1_saveat, min(plot1_saveat[end] + plot1_saveat[2], t_end))
-    end # ensure that the saveat times are EXACTLY equal to the integrator times
-    plot1a = plot(
+    end
+    plot1a = plot(;
         title = "Solution Norms of $algs_name Methods for `$test_name` \
                  (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))",
         xlabel = "t",
         ylabel = "Solution Norm: ||Y_computed||",
         plot_kwargs...,
     )
-    plot1b = plot(
+    plot1b = plot(;
         title = "Solution Errors of $algs_name Methods for `$test_name` \
                  (with dt = 10^$(@sprintf "%.1f" log10(plot1_dt)))",
         xlabel = "t",
@@ -93,8 +94,8 @@ function test_algs(
     plot1b_ymax = typemin(FT)
 
     t_end_string = t_end % 1 == 0 ? string(Int(t_end)) : @sprintf("%.2f", t_end)
-    plot2_dts = (plot1_dt / 10, plot1_dt, plot1_dt * 10)
-    plot2 = plot(
+    plot2_dts = [plot1_dt / 4, plot1_dt, plot1_dt * 4]
+    plot2 = plot(;
         title = "Convergence Orders of $algs_name Methods for `$test_name` \
                  (at t = $t_end_string)",
         xlabel = "dt",
@@ -158,7 +159,7 @@ function test_algs(
     plot!(plot1b; ylim = (plot1b_ymin / 2, plot1b_ymax * 2))
 
     mkpath("output")
-    file_suffix = "$(lowercase(replace(algs_name, " " => "_")))_$(test_name)"
+    file_suffix = "$(test_name)_$(lowercase(replace(algs_name, " " => "_")))"
     savefig(plot1a, joinpath("output", "solutions_$(file_suffix).png"))
     savefig(plot1b, joinpath("output", "errors_$(file_suffix).png"))
     savefig(plot2, joinpath("output", "orders_$(file_suffix).png"))

From bd4d28fc0aadc88aa636a4f2b5ea513e5bb1bc98 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 18:13:55 -0700
Subject: [PATCH 20/21] WIP

---
 .buildkite/pipeline.yml | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 7b3efa28..4e3e2d93 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -26,18 +26,6 @@ steps:
       queue: central
       slurm_ntasks: 1
 
-  - label: "init test env"
-    key: "init_test_env"
-    command:
-      - "echo $JULIA_DEPOT_PATH"
-      - "julia --project=test -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
-      - "julia --project=test -e 'using Pkg; Pkg.precompile()'"
-      - "julia --project=test -e 'using Pkg; Pkg.status()'"
-    agents:
-      config: cpu
-      queue: central
-      slurm_ntasks: 1
-
   - label: "init gpu env"
     key: "init_gpu_env"
     command:

From d2273ebd227bffdd53c7a4a276b6f3c5cbd198a9 Mon Sep 17 00:00:00 2001
From: Dennis Yatunin <dyatun@gmail.com>
Date: Fri, 23 Sep 2022 18:22:51 -0700
Subject: [PATCH 21/21] WIP

---
 test/comparison.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/comparison.jl b/test/comparison.jl
index 1fdf7373..8c4a1333 100644
--- a/test/comparison.jl
+++ b/test/comparison.jl
@@ -28,5 +28,5 @@ import OrdinaryDiffEq # use import avoid namespace conflicts
     )
     display(cts_trial)
 
-    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.04
+    @test median(cts_trial).time ≈ median(ode_trial).time rtol = 0.1
 end