From 3d389aa8afbd56c85a4a744572885ddf0b423c2b Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Mon, 19 Oct 2015 23:38:55 +0200 Subject: [PATCH 1/5] Cleanup: Remove tracking of number of multiplications The statistic is currently wrong for us anyway, and not very useful for using the algorithm (as opposed to developing it). If somebody is really interested in this, they can just supply a custom matrix multiplication operator in Julia. --- src/expmv_fun.jl | 12 ++---------- src/normAm.jl | 4 +--- src/select_taylor_degree.jl | 16 ++++------------ 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/src/expmv_fun.jl b/src/expmv_fun.jl index 80ec01d..030dc91 100644 --- a/src/expmv_fun.jl +++ b/src/expmv_fun.jl @@ -4,13 +4,10 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals # bal = false, #EXPMV Matrix exponential times vector or matrix. - # [F,S,M,MV,MVD] = EXPMV(t,A,B,[],PREC) computes EXPM(t*A)*B without + # [F,S,M] = EXPMV(t,A,B,[],PREC) computes EXPM(t*A)*B without # explicitly forming EXPM(t*A). PREC is the required accuracy, 'double', # 'single' or 'half', and defaults to CLASS(A). # - # A total of MV products with A or A^* are used, of which MVD are - # for norm estimation. - # # The full syntax is # # [f,s,m,mv,mvd,unA] = expmv(t,A,b,M,prec,shift,bal,full_term,prnt). @@ -48,12 +45,9 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals if isempty(M) tt = 1 - (M,mvd,alpha,unA) = select_taylor_degree(t*A,b) - mv = mvd + (M,alpha,unA) = select_taylor_degree(t*A,b) else tt = t - mv = 0 - mvd = 0 end tol = @@ -104,7 +98,6 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals c1 = norm(b,Inf); for k = 1:m b = (t/(s*k))*(A*b); - mv = mv + 1; f = f + b; c2 = norm(b,Inf); if !full_term @@ -130,6 +123,5 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals # f = D*f #end - #return (f,s,m,mv,mvd,unA) return f end \ No newline at end of file diff --git a/src/normAm.jl b/src/normAm.jl index ed2ffbc..897bf40 100644 --- a/src/normAm.jl +++ b/src/normAm.jl @@ -56,15 +56,13 @@ function normAm(A,m) e = A'*e; end c = norm(e,Inf); - mv = m; else #(c,_,_,it) = normest1(@afun_power,t); #mv = it[2]*t*m; c = norm(A^m,1); - mv = 6*t*m; end - return (c,mv) + return c end diff --git a/src/select_taylor_degree.jl b/src/select_taylor_degree.jl index 9748738..9eb72c6 100644 --- a/src/select_taylor_degree.jl +++ b/src/select_taylor_degree.jl @@ -1,6 +1,3 @@ -#function [M,mv,alpha,unA] = ... -# select_taylor_degree(A,b,m_max,p_max,prec,shift,bal,force_estm) - function select_taylor_degree(A, b; m_max = 55, @@ -12,10 +9,9 @@ function select_taylor_degree(A, #SELECT_TAYLOR_DEGREE Select degree of Taylor approximation. - # [M,MV,alpha,unA] = SELECT_TAYLOR_DEGREE(A,m_max,p_max) forms a matrix M + # [M,alpha,unA] = SELECT_TAYLOR_DEGREE(A,m_max,p_max) forms a matrix M # for use in determining the truncated Taylor series degree in EXPMV # and EXPMV_TSPAN, based on parameters m_max and p_max. - # MV is the number of matrix-vector products with A or A^* computed. # Reference: A. H. Al-Mohy and N. J. Higham, Computing the action of # the matrix exponential, with an application to exponential @@ -48,8 +44,6 @@ function select_taylor_degree(A, A = A-mu*speye(n); end - mv = 0; - if !force_estm normA = norm(A,1) end @@ -63,9 +57,8 @@ function select_taylor_degree(A, eta = zeros(p_max,1); alpha = zeros(p_max-1,1); for p = 1:p_max - (c,k) = normAm(A,p+1); + c = normAm(A,p+1); c = c^(1/(p+1)); - mv = mv + k; eta[p] = c; end for p = 1:p_max-1 @@ -81,6 +74,5 @@ function select_taylor_degree(A, end end - return (M,mv,alpha,unA) - -end \ No newline at end of file + return (M,alpha,unA) +end From 73ba460c1e43192c4853a345684378512c0bbd24 Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Mon, 19 Oct 2015 23:42:22 +0200 Subject: [PATCH 2/5] Cleanup: Remove unused return values of select_taylor_degree --- src/expmv_fun.jl | 9 +++------ src/select_taylor_degree.jl | 4 +--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/expmv_fun.jl b/src/expmv_fun.jl index 030dc91..07f9c5c 100644 --- a/src/expmv_fun.jl +++ b/src/expmv_fun.jl @@ -10,9 +10,7 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals # # The full syntax is # - # [f,s,m,mv,mvd,unA] = expmv(t,A,b,M,prec,shift,bal,full_term,prnt). - # - # unA = 1 if the alpha_p were used instead of norm(A). + # f = expmv(t,A,b,M,prec,shift,bal,full_term,prnt). # # If repeated invocation of EXPMV is required for several values of t # or B, it is recommended to provide M as an external parameter as @@ -38,14 +36,13 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals n = size(A,1) if shift - mu = trace(A)/n - #mu = full(mu) # Much slower without the full! + mu = trace(A)/n A = A-mu*speye(n) end if isempty(M) tt = 1 - (M,alpha,unA) = select_taylor_degree(t*A,b) + M = select_taylor_degree(t * A, b) else tt = t end diff --git a/src/select_taylor_degree.jl b/src/select_taylor_degree.jl index 9eb72c6..a609c8d 100644 --- a/src/select_taylor_degree.jl +++ b/src/select_taylor_degree.jl @@ -49,11 +49,9 @@ function select_taylor_degree(A, end if !force_estm && normA <= 4*theta[m_max]*p_max*(p_max + 3)/(m_max*size(b,2)); - unA = 1; c = normA; alpha = c*ones(p_max-1,1); else - unA = 0; eta = zeros(p_max,1); alpha = zeros(p_max-1,1); for p = 1:p_max @@ -74,5 +72,5 @@ function select_taylor_degree(A, end end - return (M,alpha,unA) + return M end From f3cf785508f13d64260474cd4af86bb476f3e85a Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Mon, 19 Oct 2015 23:45:33 +0200 Subject: [PATCH 3/5] Make it clearer that select_taylor_degree only needs the width of b, not the values --- src/expmv_fun.jl | 4 ++-- src/select_taylor_degree.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/expmv_fun.jl b/src/expmv_fun.jl index 07f9c5c..0c971eb 100644 --- a/src/expmv_fun.jl +++ b/src/expmv_fun.jl @@ -14,7 +14,7 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals # # If repeated invocation of EXPMV is required for several values of t # or B, it is recommended to provide M as an external parameter as - # M = SELECT_TAYLOR_DEGREE(A,m_max,p_max,prec,shift,bal,true). + # M = SELECT_TAYLOR_DEGREE(A,b_columns,m_max,p_max,prec,shift,bal,true). # This also allows choosing different m_max and p_max. # Reference: A. H. Al-Mohy and N. J. Higham, Computing the action of @@ -42,7 +42,7 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals if isempty(M) tt = 1 - M = select_taylor_degree(t * A, b) + M = select_taylor_degree(t * A, size(b, 2)) else tt = t end diff --git a/src/select_taylor_degree.jl b/src/select_taylor_degree.jl index a609c8d..4378d83 100644 --- a/src/select_taylor_degree.jl +++ b/src/select_taylor_degree.jl @@ -1,5 +1,5 @@ function select_taylor_degree(A, - b; + b_columns; m_max = 55, p_max = 8, prec = "double", @@ -48,7 +48,7 @@ function select_taylor_degree(A, normA = norm(A,1) end - if !force_estm && normA <= 4*theta[m_max]*p_max*(p_max + 3)/(m_max*size(b,2)); + if !force_estm && normA <= 4*theta[m_max]*p_max*(p_max + 3)/(m_max*b_columns); c = normA; alpha = c*ones(p_max-1,1); else From c2f33b4dc3c19bf5b9a033f903f0002b09403080 Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Tue, 20 Oct 2015 00:17:58 +0200 Subject: [PATCH 4/5] Fix most obvious type instabilities This does not necessarily have a big impact on performance, but helps when looking at the generated code (or code_warntype, for that matter). --- src/expmv_fun.jl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/expmv_fun.jl b/src/expmv_fun.jl index 0c971eb..43bb1d1 100644 --- a/src/expmv_fun.jl +++ b/src/expmv_fun.jl @@ -1,6 +1,6 @@ export expmv -function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = false, prnt = false) +function expmv(t, A, b; M::Array{Float64, 2} = Array(Float64, 0, 0), prec = "double", shift = false, full_term = false, prnt = false) # bal = false, #EXPMV Matrix exponential times vector or matrix. @@ -41,22 +41,22 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals end if isempty(M) - tt = 1 + tt = 1.0 M = select_taylor_degree(t * A, size(b, 2)) else tt = t end - tol = - if prec == "double" - 2.0^(-53) + tol = + if prec == "half" + 2.0^(-10) elseif prec == "single" 2.0^(-24) - elseif prec == "half" - 2.0^(-10) + else + 2.0^(-53) end - s = 1; + s = 1.0; if t == 0 m = 0; @@ -74,15 +74,13 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals cost,m = findmin(C); # when C is one column. Happens if p_max = 2. end if cost == Inf - cost = 0 + cost = 0.0 end - s = max(cost/m,1); + s = max(cost/m,1.0); end - eta = 1; - if shift - eta = exp(t*mu/s) + eta = exp(t * mu / s) end f = b; @@ -108,7 +106,9 @@ function expmv(t, A, b; M = [], prec = "double", shift = false, full_term = fals end end - f = eta*f + if shift + f = eta*f + end b = f end From 3eced9f9db3bb8b133bf5adc1e5062c73ceec0ba Mon Sep 17 00:00:00 2001 From: David Nadlinger Date: Tue, 20 Oct 2015 23:22:23 +0200 Subject: [PATCH 5/5] Use in-place operations in inner loop --- src/expmv_fun.jl | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/src/expmv_fun.jl b/src/expmv_fun.jl index 43bb1d1..e2d1839 100644 --- a/src/expmv_fun.jl +++ b/src/expmv_fun.jl @@ -82,34 +82,37 @@ function expmv(t, A, b; M::Array{Float64, 2} = Array(Float64, 0, 0), prec = "dou if shift eta = exp(t * mu / s) end - - f = b; - - # if prnt - # fprintf("m = %2.0f, s = %g, m_actual = ", m, s) - # end - - for i = 1:s - c1 = norm(b,Inf); + + f = copy(b) + b1 = copy(b) + b2 = similar(b) + @inbounds for i = 1:s + c1 = norm(b1, Inf) for k = 1:m - b = (t/(s*k))*(A*b); - f = f + b; - c2 = norm(b,Inf); + A_mul_B!(b2, A, b1) + @simd for l in 1:n + b2[l] *= (t / (s * k)) + end + (b1, b2) = (b2, b1) + + @simd for l in 1:n + f[l] = f[l] + b1[l] + end + c2 = norm(b1, Inf) if !full_term - if c1 + c2 <= tol*norm(f,Inf) + if c1 + c2 <= tol * norm(f, Inf) # if prnt # fprintf(" %2.0f, ", k) # end break end - c1 = c2; + c1 = c2 end - end if shift - f = eta*f + scale!(f, eta) end - b = f + copy!(b1, f) end # if prnt