Skip to content

Commit c4edd06

Browse files
committed
API changes
1 parent 9915cdd commit c4edd06

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

src/ROCKernels.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ end
168168

169169
# Reduction.
170170

171-
KA.supports_warp_reduction(::ROCBackend) = true
171+
@device_override @inline KA.__supports_warp_reduction() = true
172172

173173
@device_override @inline function KA.__shfl_down(val, offset)
174174
AMDGPU.Device.shfl_down(val, offset)

t.jl

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
using AMDGPU
2+
using KernelAbstractions
3+
4+
@kernel cpu=false function groupreduce_1!(y, x, op, neutral)
5+
i = @index(Global)
6+
val = i > length(x) ? neutral : x[i]
7+
res = @groupreduce(op, val, neutral)
8+
i == 1 && (y[1] = res)
9+
end
10+
11+
function main()
12+
x = ROCArray(ones(Float32, 256))
13+
y = ROCArray(zeros(Float32, 1))
14+
groupreduce_1!(ROCBackend(), 256)(y, x, +, 0f0; ndrange=256)
15+
@show y
16+
return
17+
end
18+
main()

0 commit comments

Comments
 (0)