From 939c820bc4597f8a51d05d6e0339516b7d6b14be Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Sun, 9 Oct 2022 01:21:54 -0400 Subject: [PATCH 1/2] start adding butterflies Co-authored-by: Yingbo Ma --- Project.toml | 6 +- src/RecursiveFactorization.jl | 5 +- src/butterflies.jl | 114 ++++++++++++++++++++++++++++++++++ test/runtests.jl | 22 +++++++ 4 files changed, 143 insertions(+), 4 deletions(-) create mode 100644 src/butterflies.jl diff --git a/Project.toml b/Project.toml index 9772c74..ceed0ee 100644 --- a/Project.toml +++ b/Project.toml @@ -10,13 +10,15 @@ Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c" StrideArraysCore = "7792a7ef-975c-4747-a70f-980b88e8d1da" TriangularSolve = "d5829a12-d9aa-46ab-831f-fb7c9ab06edf" +VectorizedRNG = "33b4df10-0173-11e9-2a0c-851a7edac40e" [compat] -LoopVectorization = "0.10,0.11, 0.12" -Polyester = "0.3.2,0.4.1, 0.5, 0.6" +LoopVectorization = "0.10, 0.11, 0.12" +Polyester = "0.3.2, 0.4.1, 0.5, 0.6" SnoopPrecompile = "1" StrideArraysCore = "0.1.13, 0.2.1, 0.3" TriangularSolve = "0.1.1" +VectorizedRNG = "0.2.20" julia = "1.5" [extras] diff --git a/src/RecursiveFactorization.jl b/src/RecursiveFactorization.jl index 67dc11c..e79f434 100644 --- a/src/RecursiveFactorization.jl +++ b/src/RecursiveFactorization.jl @@ -1,9 +1,10 @@ module RecursiveFactorization -include("./lu.jl") +include("lu.jl") +include("butterflies.jl") import SnoopPrecompile -SnoopPrecompile.@precompile_all_calls begin lu!(rand(2, 2)) end +SnoopPrecompile.@precompile_all_calls begin lu!([1.0 0.0; 0.0 1.0]) end end # module diff --git a/src/butterflies.jl b/src/butterflies.jl new file mode 100644 index 0000000..a43cdaa --- /dev/null +++ b/src/butterflies.jl @@ -0,0 +1,114 @@ + +using VectorizedRNG + +@inline exphalf(x) = exp(x) * oftype(x, 0.5) +function 🦋!(wv, ::Val{SEED} = Val(888)) where {SEED} + T = eltype(wv) + mrng = VectorizedRNG.MutableXoshift(SEED) + GC.@preserve mrng begin rand!(exphalf, VectorizedRNG.Xoshift(mrng), wv, static(0), + T(-0.05), T(0.1)) end +end + +function 🦋workspace(A, ::Val{SEED} = Val(888)) where {SEED} + Usz = 2size(A, 1) + Vsz = 2size(A, 2) + uv = similar(A, Usz + Vsz) + 🦋!(uv, Val(SEED)) + # (U=@view(wv[1:Usz]), V=@view(wv[1+Usz:end])) + (uv,) +end +const butterfly_workspace = 🦋workspace; + +function 🦋mul_level!(A, u, v) + # for now, assume... + M, N = size(A) + Ml = M >>> 1 + Nl = N >>> 1 + Mh = M - Ml + Nh = N - Nl + @turbo for n in 1:Nl + for m in 1:Ml + A11 = A[m, n] + A21 = A[m + Mh, n] + A12 = A[m, n + Nh] + A22 = A[m + Mh, n + Nh] + + T1 = A11 + A12 + T2 = A21 + A22 + T3 = A11 - A12 + T4 = A21 - A22 + C11 = T1 + T2 + C21 = T1 - T2 + C12 = T3 + T4 + C22 = T3 - T4 + + u1 = u[m] + u2 = u[m + Mh] + v1 = v[n] + v2 = v[n + Nh] + + A[m, n] = u1 * C11 * v1 + A[m + Mh, n] = u2 * C21 * v1 + A[m, n + Nh] = u1 * C12 * v2 + A[m + Mh, n + Nh] = u2 * C22 * v2 + end + end +end + +function 🦋mul!(A, (uv,)) + M, N = size(A) + @assert iszero(M & 3) & iszero(N & 3) + Mh = M >>> 1 + Nh = N >>> 1 + U₁ = @view(uv[1:Mh]) + U₂ = @view(uv[(1 + Mh + Nh):(2 * Mh + Nh)]) + V₁ = @view(uv[(Mh + 1):(Mh + Nh)]) + V₂ = @view(uv[(1 + 2 * Mh + Nh):(2 * Mh + 2 * Nh)]) + 🦋mul_level!(@view(A[1:Mh, 1:Nh]), U₁, V₁) + 🦋mul_level!(@view(A[(1 + Mh):M, 1:Nh]), U₂, V₁) + 🦋mul_level!(@view(A[1:Mh, (1 + Nh):N]), U₁, V₂) + 🦋mul_level!(@view(A[(1 + Mh):M, (1 + Nh):N]), U₂, V₂) + U = @view(uv[(1 + 2 * Mh + 2 * Nh):(2 * Mh + 2 * Nh + M)]) + V = @view(uv[(1 + 2 * Mh + 2 * Nh + M):(2 * Mh + 2 * Nh + M + N)]) + 🦋mul_level!(@view(A[1:M, 1:N]), U, V) + A +end + +function diagnegbottom(x) + N = length(x) + y = similar(x, N >>> 1) + z = similar(x, N >>> 1) + for n in 1:(N >>> 1) + y[n] = x[n] + end + for n in 1:(N >>> 1) + z[n] = x[n + (N >>> 1)] + end + Diagonal(y), Diagonal(z) +end +🦋(A, B) = [A B + A -B] + +function materializeUV(A, (uv,)) + M, N = size(A) + @assert iszero(M & 3) & iszero(N & 3) + Mh = M >>> 1 + Nh = N >>> 1 + + U₁u, U₁l = diagnegbottom(@view(uv[1:Mh])) + U₂u, U₂l = diagnegbottom(@view(uv[(1 + Mh + Nh):(2 * Mh + Nh)])) + V₁u, V₁l = diagnegbottom(@view(uv[(Mh + 1):(Mh + Nh)])) + V₂u, V₂l = diagnegbottom(@view(uv[(1 + 2 * Mh + Nh):(2 * Mh + 2 * Nh)])) + Uu, Ul = diagnegbottom(@view(uv[(1 + 2 * Mh + 2 * Nh):(2 * Mh + 2 * Nh + M)])) + Vu, Vl = diagnegbottom(@view(uv[(1 + 2 * Mh + 2 * Nh + M):(2 * Mh + 2 * Nh + M + N)])) + + Bu2 = [🦋(U₁u, U₁l) 0*I + 0*I 🦋(U₂u, U₂l)] + Bu1 = 🦋(Uu, Ul) + + Bv2 = [🦋(V₁u, V₁l) 0*I + 0*I 🦋(V₂u, V₂l)] + Bv1 = 🦋(Vu, Vl) + + (Bu2 * Bu1)', Bv2 * Bv1 +end diff --git a/test/runtests.jl b/test/runtests.jl index 1866de2..8eaad12 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -48,3 +48,25 @@ testlu(A::Adjoint, MF::Adjoint, BF) = testlu(parent(A), parent(MF), BF) testlu(A, mylu(A, p, Val(false), check = false), BF) end end end + +function wilkinson(N) + A = zeros(N, N) + A[diagind(A)] .= 1 + A[:, end] .= 1 + for n in 1:(N - 1) + for r in (n + 1):N + @inbounds A[r, n] = -1 + end + end + A +end +@testset "🦋" begin + ws800 = 🦋workspace(B800) + 🦋mul!(copyto!(B800, A800), ws800) + U800, V800 = materializeUV(B800, ws800) + F800 = RecursiveFactorization.lu!(B800, Val(false)) + + b = rand(800) + x = V800 * (F800 \ (U800 * b)) + @test norm(A800 * x .- b) <= 1e-13 +end From fd0b61165637d572c53d62a536fbf6836e13a440 Mon Sep 17 00:00:00 2001 From: Chris Elrod Date: Tue, 11 Oct 2022 15:01:39 -0400 Subject: [PATCH 2/2] fix tests --- src/butterflies.jl | 1 + test/runtests.jl | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/butterflies.jl b/src/butterflies.jl index a43cdaa..226f068 100644 --- a/src/butterflies.jl +++ b/src/butterflies.jl @@ -1,5 +1,6 @@ using VectorizedRNG +using LinearAlgebra: Diagonal, I @inline exphalf(x) = exp(x) * oftype(x, 0.5) function 🦋!(wv, ::Val{SEED} = Val(888)) where {SEED} diff --git a/test/runtests.jl b/test/runtests.jl index 8eaad12..3573dcb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -51,7 +51,7 @@ end end function wilkinson(N) A = zeros(N, N) - A[diagind(A)] .= 1 + A[1:(N+1):N*N] .= 1 A[:, end] .= 1 for n in 1:(N - 1) for r in (n + 1):N @@ -61,12 +61,14 @@ function wilkinson(N) A end @testset "🦋" begin - ws800 = 🦋workspace(B800) - 🦋mul!(copyto!(B800, A800), ws800) - U800, V800 = materializeUV(B800, ws800) + A800 = wilkinson(800); + B800 = similar(A800); + ws800 = RecursiveFactorization.🦋workspace(B800) + RecursiveFactorization.🦋mul!(copyto!(B800, A800), ws800) + U800, V800 = RecursiveFactorization.materializeUV(B800, ws800) F800 = RecursiveFactorization.lu!(B800, Val(false)) b = rand(800) x = V800 * (F800 \ (U800 * b)) - @test norm(A800 * x .- b) <= 1e-13 + @test norm(A800 * x .- b) <= 1e-12 end