Skip to content

Commit 8b94cc6

Browse files
authored
Improve CPU launch heuristic (#500)
1 parent abf0bcf commit 8b94cc6

File tree

2 files changed

+34
-1
lines changed

2 files changed

+34
-1
lines changed

src/cpu.jl

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,29 @@ function (obj::Kernel{CPU})(args...; ndrange=nothing, workgroupsize=nothing, )
4646
__run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
4747
end
4848

49+
const CPU_GRAINSIZE = 1024 # Vectorization, 4x unrolling, minimal grain size
50+
function default_cpu_workgroupsize(ndrange)
51+
# if the total kernel is small, don't launch multiple tasks
52+
if prod(ndrange) <= CPU_GRAINSIZE
53+
return ndrange
54+
else
55+
available = Ref(CPU_GRAINSIZE)
56+
return ntuple(length(ndrange)) do i
57+
dim = ndrange[i]
58+
remaining = available[]
59+
if remaining == 0
60+
return 1
61+
elseif remaining <= dim
62+
available[] = 0
63+
return remaining
64+
else
65+
available[] = remaining ÷ dim
66+
return dim
67+
end
68+
end
69+
end
70+
end
71+
4972
@inline function launch_config(kernel::Kernel{CPU}, ndrange, workgroupsize)
5073
if ndrange isa Integer
5174
ndrange = (ndrange,)
@@ -55,7 +78,7 @@ end
5578
end
5679

5780
if KernelAbstractions.workgroupsize(kernel) <: DynamicSize && workgroupsize === nothing
58-
workgroupsize = (1024,) # Vectorization, 4x unrolling, minimal grain size
81+
workgroupsize = default_cpu_workgroupsize(ndrange)
5982
end
6083
iterspace, dynamic = partition(kernel, ndrange, workgroupsize)
6184
# partition checked that the ndrange's agreed

test/test.jl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,14 @@ end
282282
@test KernelAbstractions.functional(Backend()) isa Union{Missing, Bool}
283283
end
284284

285+
@testset "CPU default workgroupsize" begin
286+
@test KernelAbstractions.default_cpu_workgroupsize((64,)) == (64,)
287+
@test KernelAbstractions.default_cpu_workgroupsize((1024,)) == (1024,)
288+
@test KernelAbstractions.default_cpu_workgroupsize((2056,)) == (1024,)
289+
@test KernelAbstractions.default_cpu_workgroupsize((64, 64,)) == (64,16)
290+
@test KernelAbstractions.default_cpu_workgroupsize((64, 64, 64,4)) == (64,16,1,1)
291+
@test KernelAbstractions.default_cpu_workgroupsize((64,15)) == (64,15)
292+
@test KernelAbstractions.default_cpu_workgroupsize((5,7,13,17)) == (5,7,13,2)
293+
end
294+
285295
end

0 commit comments

Comments
 (0)