From 166d937045f44cbedbe4a2316dddf2b5ec49174e Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Wed, 24 Apr 2024 19:53:12 -0400 Subject: [PATCH 1/3] Input/output convention in naive_transpose example Putting output before input in example to match convention --- examples/naive_transpose.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/naive_transpose.jl b/examples/naive_transpose.jl index de346be54..bca184fa1 100644 --- a/examples/naive_transpose.jl +++ b/examples/naive_transpose.jl @@ -3,7 +3,7 @@ include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # @kernel function naive_transpose_kernel!(a, b) i, j = @index(Global, NTuple) - @inbounds b[i, j] = a[j, i] + @inbounds a[i, j] = b[j, i] end # create wrapper function to check inputs @@ -24,8 +24,8 @@ end res = 1024 # creating initial arrays -a = rand!(allocate(backend, Float32, res, res)) -b = KernelAbstractions.zeros(backend, Float32, res, res) +b = rand!(allocate(backend, Float32, res, res)) +a = KernelAbstractions.zeros(backend, Float32, res, res) naive_transpose!(a,b) KernelAbstractions.synchronize(backend) From 2b91be5d70d34f361acf7064ecb9b4ddc58ec493 Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Wed, 24 Apr 2024 19:55:54 -0400 Subject: [PATCH 2/3] Output/input order convention in matmul example Putting output before input in example to match convention --- examples/matmul.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/examples/matmul.jl b/examples/matmul.jl index f28f91000..305436a7a 100644 --- a/examples/matmul.jl +++ b/examples/matmul.jl @@ -2,34 +2,34 @@ using KernelAbstractions, Test, Random include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # Load backend # Simple kernel for matrix multiplication -@kernel function matmul_kernel!(a, b, c) +@kernel function matmul_kernel!(output, a, b) i, j = @index(Global, NTuple) # creating a temporary sum variable for matrix multiplication - tmp_sum = zero(eltype(c)) + tmp_sum = zero(eltype(output)) for k = 1:size(a)[2] tmp_sum += a[i,k] * b[k, j] end - c[i,j] = tmp_sum + output[i,j] = tmp_sum end # Creating a wrapper kernel for launching with error checks -function matmul!(a, b, c) +function matmul!(output, a, b) if size(a)[2] != size(b)[1] println("Matrix size mismatch!") return nothing end backend = KernelAbstractions.get_backend(a) kernel! = matmul_kernel!(backend) - kernel!(a, b, c, ndrange=size(c)) + kernel!(output, a, b, ndrange=size(c)) end a = rand!(allocate(backend, Float32, 256, 123)) b = rand!(allocate(backend, Float32, 123, 45)) -c = KernelAbstractions.zeros(backend, Float32, 256, 45) +output = KernelAbstractions.zeros(backend, Float32, 256, 45) -matmul!(a,b,c) +matmul!(output, a,b) KernelAbstractions.synchronize(backend) -@test isapprox(c, a*b) +@test isapprox(output, a*b) From 89fddbce498c5c1aaef6c66341c418549c5dac7e Mon Sep 17 00:00:00 2001 From: Evelyne <110474206+evelyne-ringoot@users.noreply.github.com> Date: Wed, 24 Apr 2024 20:50:48 -0400 Subject: [PATCH 3/3] Update matmul.jl --- examples/matmul.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/matmul.jl b/examples/matmul.jl index 305436a7a..11d5c1520 100644 --- a/examples/matmul.jl +++ b/examples/matmul.jl @@ -22,7 +22,7 @@ function matmul!(output, a, b) end backend = KernelAbstractions.get_backend(a) kernel! = matmul_kernel!(backend) - kernel!(output, a, b, ndrange=size(c)) + kernel!(output, a, b, ndrange=size(output)) end a = rand!(allocate(backend, Float32, 256, 123))