Skip to content

Commit 9fca399

Browse files
Merge branch 'JuliaGPU:master' into QR_views
2 parents 0cf05d1 + 40fa8c0 commit 9fca399

File tree

8 files changed

+300
-44
lines changed

8 files changed

+300
-44
lines changed

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "GPUArrays"
22
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
3-
version = "10.1.0"
3+
version = "10.2.2"
44

55
[deps]
66
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -16,7 +16,7 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1616
[compat]
1717
Adapt = "4.0"
1818
GPUArraysCore = "= 0.1.6"
19-
LLVM = "3.9, 4, 5, 6"
19+
LLVM = "3.9, 4, 5, 6, 7, 8"
2020
LinearAlgebra = "1"
2121
Printf = "1"
2222
Random = "1"

lib/JLArrays/Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "JLArrays"
22
uuid = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
33
authors = ["Tim Besard <tim.besard@gmail.com>"]
4-
version = "0.1.4"
4+
version = "0.1.5"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

lib/JLArrays/src/JLArrays.jl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,14 @@ end
372372
Base.copyto!(dest::DenseJLArray{T}, source::DenseJLArray{T}) where {T} =
373373
copyto!(dest, 1, source, 1, length(source))
374374

375+
function Base.resize!(a::DenseJLVector{T}, nl::Integer) where {T}
376+
a_resized = JLVector{T}(undef, nl)
377+
copyto!(a_resized, 1, a, 1, min(length(a), nl))
378+
a.data = a_resized.data
379+
a.offset = 0
380+
a.dims = size(a_resized)
381+
return a
382+
end
375383

376384
## random number generation
377385

src/host/abstractarray.jl

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,3 +318,19 @@ Base.deepcopy(x::AbstractGPUArray) = copy(x)
318318

319319
# revert of JuliaLang/julia#31929
320320
Base.filter(f, As::AbstractGPUArray) = As[map(f, As)::AbstractGPUArray{Bool}]
321+
322+
# appending
323+
324+
function Base.append!(a::AbstractGPUVector, items::AbstractVector)
325+
n = length(items)
326+
resize!(a, length(a) + n)
327+
copyto!(a, length(a) - n + 1, items, firstindex(items), n)
328+
return a
329+
end
330+
331+
# this is needed because copyto! of most GPU arrays
332+
# doesn't currently support Tuple sources
333+
function Base.append!(a::AbstractGPUVector, items::Tuple)
334+
append!(a, collect(items))
335+
return a
336+
end

src/host/linalg.jl

Lines changed: 204 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,10 @@ end
338338

339339

340340
## matrix multiplication
341-
342-
function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::AbstractArray{S}, a::Number, b::Number) where {T,S,R}
341+
# legacy method
342+
generic_matmatmul!(C::AbstractArray, A::AbstractArray, B::AbstractArray, a::Number, b::Number) =
343+
generic_matmatmul!(C, A, B, MulAddMul(a, b))
344+
function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::AbstractArray{S}, add::MulAddMul) where {T,S,R}
343345
if size(A,2) != size(B,1)
344346
throw(DimensionMismatch("matrix A has dimensions $(size(A)), matrix B has dimensions $(size(B))"))
345347
end
@@ -350,20 +352,18 @@ function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::Abstrac
350352
return fill!(C, zero(R))
351353
end
352354

353-
add = MulAddMul(a, b)
354-
355355
gpu_call(C, A, B; name="matmatmul!") do ctx, C, A, B
356356
idx = @linearidx C
357357
assume.(size(C) .> 0)
358358
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
359359

360360
@inbounds if i <= size(A,1) && j <= size(B,2)
361361
z2 = zero(A[i, 1]*B[1, j] + A[i, 1]*B[1, j])
362-
Ctmp = convert(promote_type(R, typeof(z2)), z2)
362+
Cij = convert(promote_type(R, typeof(z2)), z2)
363363
for k in 1:size(A,2)
364-
Ctmp += A[i, k]*B[k, j]
364+
Cij += A[i, k]*B[k, j]
365365
end
366-
C[i,j] = add(Ctmp, C[i,j])
366+
C[i,j] = add(Cij, C[i,j])
367367
end
368368

369369
return
@@ -372,42 +372,229 @@ function generic_matmatmul!(C::AbstractArray{R}, A::AbstractArray{T}, B::Abstrac
372372
C
373373
end
374374

375+
@static if VERSION < v"1.12.0-"
375376
function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA::AbstractChar, A::AbstractGPUMatrix, B::AbstractGPUVector, _add::MulAddMul = MulAddMul())
376-
generic_matmatmul!(C, wrap(A, tA), B, _add.alpha, _add.beta)
377+
generic_matmatmul!(C, wrap(A, tA), B, _add)
377378
end
378379

379380
function LinearAlgebra.generic_matmatmul!(C::AbstractGPUVecOrMat, tA, tB, A::AbstractGPUVecOrMat, B::AbstractGPUVecOrMat, _add::MulAddMul=MulAddMul())
380-
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add.alpha, _add.beta)
381+
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
382+
end
383+
else
384+
function LinearAlgebra.generic_matvecmul!(C::AbstractGPUVector, tA::AbstractChar, A::AbstractGPUMatrix, B::AbstractGPUVector, a::Number, b::Number)
385+
LinearAlgebra.@stable_muladdmul generic_matmatmul!(C, wrap(A, tA), B, MulAddMul(a, b))
386+
end
387+
388+
function LinearAlgebra.generic_matmatmul!(C::AbstractGPUVecOrMat, tA, tB, A::AbstractGPUVecOrMat, B::AbstractGPUVecOrMat, a::Number, b::Number)
389+
LinearAlgebra.@stable_muladdmul generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), MulAddMul(a, b))
390+
end
391+
end
392+
393+
function generic_trimatmul!(C::AbstractGPUVecOrMat{R}, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix{T}, B::AbstractGPUVecOrMat{S}) where {T,S,R}
394+
if size(A,2) != size(B,1)
395+
throw(DimensionMismatch(lazy"matrix A has dimensions $(size(A)), matrix B has dimensions $(size(B))"))
396+
end
397+
if size(C,1) != size(A,1) || size(C,2) != size(B,2)
398+
throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs $((size(A,1),size(B,2)))"))
399+
end
400+
if isempty(A) || isempty(B)
401+
return fill!(C, zero(R))
402+
end
403+
404+
upper = tfun === identity ? uploc == 'U' : uploc != 'U'
405+
unit = isunitc == 'U'
406+
407+
function trimatmul(ctx, C, A, B)
408+
idx = @linearidx C
409+
assume.(size(C) .> 0)
410+
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
411+
l, m, n = size(A, 1), size(B, 1), size(B, 2)
412+
413+
@inbounds if i <= l && j <= n
414+
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
415+
Cij = convert(promote_type(R, typeof(z2)), z2)
416+
Cij += (unit ? one(Cij) : A[i,i]) * B[i,j]
417+
for k in (upper ? (i + 1) : 1):(upper ? m : (i - 1))
418+
Cij += A[i,k] * B[k,j]
419+
end
420+
C[i,j] += Cij
421+
end
422+
423+
return
424+
end
425+
426+
function trimatmul_t(ctx, C, A, B)
427+
idx = @linearidx C
428+
assume.(size(C) .> 0)
429+
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
430+
l, m, n = size(A, 1), size(B, 1), size(B, 2)
431+
432+
@inbounds if i <= l && j <= n
433+
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
434+
Cij = convert(promote_type(R, typeof(z2)), z2)
435+
Cij += (unit ? one(Cij) : transpose(A[i,i])) * B[i,j]
436+
for k in (upper ? (i + 1) : 1):(upper ? m : (i - 1))
437+
Cij += transpose(A[k,i]) * B[k,j]
438+
end
439+
C[i,j] += Cij
440+
end
441+
442+
return
443+
end
444+
445+
function trimatmul_a(ctx, C, A, B)
446+
idx = @linearidx C
447+
assume.(size(C) .> 0)
448+
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
449+
l, m, n = size(A, 1), size(B, 1), size(B, 2)
450+
451+
@inbounds if i <= l && j <= n
452+
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
453+
Cij = convert(promote_type(R, typeof(z2)), z2)
454+
Cij += (unit ? one(Cij) : adjoint(A[i,i])) * B[i,j]
455+
for k in (upper ? (i + 1) : 1):(upper ? m : (i - 1))
456+
Cij += adjoint(A[k,i]) * B[k,j]
457+
end
458+
C[i,j] += Cij
459+
end
460+
461+
return
462+
end
463+
464+
if tfun === identity
465+
gpu_call(trimatmul, C, A, B; name="trimatmul")
466+
elseif tfun == transpose
467+
gpu_call(trimatmul_t, C, A, B; name="trimatmul_t")
468+
elseif tfun === adjoint
469+
gpu_call(trimatmul_a, C, A, B; name="trimatmul_a")
470+
else
471+
error("Not supported")
472+
end
473+
474+
C
475+
end
476+
477+
function generic_mattrimul!(C::AbstractGPUVecOrMat{R}, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix{T}, B::AbstractGPUVecOrMat{S}) where {T,S,R}
478+
if size(A,2) != size(B,1)
479+
throw(DimensionMismatch(lazy"matrix A has dimensions $(size(A)), matrix B has dimensions $(size(B))"))
480+
end
481+
if size(C,1) != size(A,1) || size(C,2) != size(B,2)
482+
throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs $((size(A,1),size(B,2)))"))
483+
end
484+
if isempty(A) || isempty(B)
485+
return fill!(C, zero(R))
486+
end
487+
488+
upper = tfun === identity ? uploc == 'U' : uploc != 'U'
489+
unit = isunitc == 'U'
490+
491+
function mattrimul(ctx, C, A, B)
492+
idx = @linearidx C
493+
assume.(size(C) .> 0)
494+
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
495+
l, m, n = size(A, 1), size(B, 1), size(B, 2)
496+
497+
@inbounds if i <= l && j <= n
498+
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
499+
Cij = convert(promote_type(R, typeof(z2)), z2)
500+
Cij += A[i,j] * (unit ? one(Cij) : B[j,j])
501+
for k in (upper ? 1 : (j + 1)):(upper ? (j - 1) : m)
502+
Cij += A[i,k] * B[k,j]
503+
end
504+
C[i,j] += Cij
505+
end
506+
507+
return
508+
end
509+
510+
function mattrimul_t(ctx, C, A, B)
511+
idx = @linearidx C
512+
assume.(size(C) .> 0)
513+
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
514+
l, m, n = size(A, 1), size(B, 1), size(B, 2)
515+
516+
@inbounds if i <= l && j <= n
517+
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
518+
Cij = convert(promote_type(R, typeof(z2)), z2)
519+
Cij += A[i,j] * (unit ? one(Cij) : transpose(B[j,j]))
520+
for k in (upper ? 1 : (j + 1) ):(upper ? (j - 1) : m)
521+
Cij += A[i,k] * transpose(B[j,k])
522+
end
523+
C[i,j] += Cij
524+
end
525+
526+
return
527+
end
528+
529+
function mattrimul_a(ctx, C, A, B)
530+
idx = @linearidx C
531+
assume.(size(C) .> 0)
532+
i, j = @inbounds Tuple(CartesianIndices(C)[idx])..., 1
533+
l, m, n = size(A, 1), size(B, 1), size(B, 2)
534+
535+
@inbounds if i <= l && j <= n
536+
z2 = zero(A[i,1] * B[1,j] + A[i,1] * B[1,j])
537+
Cij = convert(promote_type(R, typeof(z2)), z2)
538+
Cij += A[i,j] * (unit ? one(Cij) : adjoint(B[j,j]))
539+
for k in (upper ? 1 : (j + 1)):(upper ? (j - 1) : m)
540+
Cij += A[i,k] * adjoint(B[j,k])
541+
end
542+
C[i,j] += Cij
543+
end
544+
545+
return
546+
end
547+
548+
if tfun === identity
549+
gpu_call(mattrimul, C, A, B; name="mattrimul")
550+
elseif tfun == transpose
551+
gpu_call(mattrimul_t, C, A, B; name="mattrimul_t")
552+
elseif tfun === adjoint
553+
gpu_call(mattrimul_a, C, A, B; name="mattrimul_a")
554+
else
555+
error("Not supported")
556+
end
557+
558+
C
559+
end
560+
561+
if VERSION >= v"1.10-"
562+
function LinearAlgebra.generic_trimatmul!(C::AbstractGPUVecOrMat, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix, B::AbstractGPUVecOrMat)
563+
generic_trimatmul!(C, uploc, isunitc, tfun, A, B)
564+
end
565+
function LinearAlgebra.generic_mattrimul!(C::AbstractGPUMatrix, uploc, isunitc, tfun::Function, A::AbstractGPUMatrix, B::AbstractGPUMatrix)
566+
generic_mattrimul!(C, uploc, isunitc, tfun, A, B)
567+
end
381568
end
382569

383570
if VERSION < v"1.10.0-DEV.1365"
384571
# catch other functions that are called by LinearAlgebra's mul!
385572
function LinearAlgebra.gemv!(C::AbstractGPUVector, tA::AbstractChar, A::AbstractGPUMatrix, B::AbstractGPUVector, a::Number, b::Number)
386-
generic_matmatmul!(C, wrap(A, tA), B, a, b)
573+
generic_matmatmul!(C, wrap(A, tA), B, MulAddMul(a, b))
387574
end
388575
# disambiguation
389576
function LinearAlgebra.gemv!(C::AbstractGPUVector{T}, tA::AbstractChar, A::AbstractGPUMatrix{T}, B::AbstractGPUVector{T}, a::Number, b::Number) where {T<:LinearAlgebra.BlasFloat}
390-
generic_matmatmul!(C, wrap(A, tA), B, a, b)
577+
generic_matmatmul!(C, wrap(A, tA), B, MulAddMul(a, b))
391578
end
392579

393580
LinearAlgebra.gemm_wrapper!(C::AbstractGPUVecOrMat, tA::AbstractChar, tB::AbstractChar, A::AbstractGPUVecOrMat, B::AbstractGPUVecOrMat, _add::MulAddMul) =
394-
LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add)
581+
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
395582
# disambiguation
396583
LinearAlgebra.gemm_wrapper!(C::AbstractGPUVecOrMat{T}, tA::AbstractChar, tB::AbstractChar, A::AbstractGPUVecOrMat{T}, B::AbstractGPUVecOrMat{T}, _add::MulAddMul) where {T<:LinearAlgebra.BlasFloat} =
397-
LinearAlgebra.generic_matmatmul!(C, tA, tB, A, B, _add)
584+
generic_matmatmul!(C, wrap(A, tA), wrap(B, tB), _add)
398585

399586
function LinearAlgebra.syrk_wrapper!(C::AbstractGPUMatrix, tA::AbstractChar, A::AbstractGPUVecOrMat, _add::MulAddMul = MulAddMul())
400587
if tA == 'T'
401-
LinearAlgebra.generic_matmatmul!(C, 'T', 'N', A, A, _add)
588+
generic_matmatmul!(C, wrap(A, 'T'), A, _add)
402589
else # tA == 'N'
403-
LinearAlgebra.generic_matmatmul!(C, 'N', 'T', A, A, _add)
590+
generic_matmatmul!(C, A, wrap(A, 'T'), _add)
404591
end
405592
end
406593
function LinearAlgebra.herk_wrapper!(C::AbstractGPUMatrix, tA::AbstractChar, A::AbstractGPUVecOrMat, _add::MulAddMul = MulAddMul())
407594
if tA == 'C'
408-
LinearAlgebra.generic_matmatmul!(C, 'C', 'N', A, A, _add)
595+
generic_matmatmul!(C, wrap(A, 'C'), A, _add)
409596
else # tA == 'N'
410-
LinearAlgebra.generic_matmatmul!(C, 'N', 'C', A, A, _add)
597+
generic_matmatmul!(C, A, wrap(A, 'C'), _add)
411598
end
412599
end
413600
end # VERSION

test/testsuite.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ include("testsuite/construction.jl")
8888
include("testsuite/gpuinterface.jl")
8989
include("testsuite/indexing.jl")
9090
include("testsuite/base.jl")
91-
#include("testsuite/vector.jl")
91+
include("testsuite/vector.jl")
9292
include("testsuite/reductions.jl")
9393
include("testsuite/broadcasting.jl")
9494
include("testsuite/linalg.jl")

test/testsuite/linalg.jl

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,39 @@
132132
@test istriu(A) == istriu(B)
133133
end
134134
end
135+
136+
if VERSION >= v"1.10-"
137+
@testset "mul! + Triangular" begin
138+
@testset "trimatmul! ($TR x $T, $f)" for T in (Float32, ComplexF32), TR in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular), f in (identity, transpose, adjoint)
139+
n = 128
140+
A = AT(rand(T, n,n))
141+
b = AT(rand(T, n))
142+
Ct = AT(zeros(T, n))
143+
C = zeros(T, n)
144+
mul!(Ct, f(TR(A)), b)
145+
mul!(C, f(TR(collect(A))), collect(b))
146+
@test collect(Ct) C
147+
148+
B = AT(rand(T, n, n))
149+
Ct = AT(zeros(T, n, n))
150+
C = zeros(T, n, n)
151+
mul!(Ct, f(TR(A)), B)
152+
mul!(C, f(TR(collect(A))), collect(B))
153+
@test collect(Ct) C
154+
end
155+
156+
@testset "mattrimul ($TR x $T, $f)" for T in (Float32, ComplexF32), TR in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular), f in (identity, transpose, adjoint)
157+
n = 128
158+
A = AT(rand(T, n,n))
159+
B = AT(rand(T, n, n))
160+
Ct = AT(zeros(T, n, n))
161+
C = zeros(T, n, n)
162+
mul!(Ct, A, f(TR(B)))
163+
mul!(C, collect(A), f(TR(collect(B))))
164+
@test collect(Ct) C
165+
end
166+
end
167+
end
135168
end
136169

137170
@testset "diagonal" begin

0 commit comments

Comments
 (0)