Skip to content

Commit 5e03ecf

Browse files
committed
Allow opt-out of implicit bounds-checking
KernelAbstractions currently creates kernels that look like: ``` if __validindex(ctx) # Body end ``` This is problematic due to the convergence requirement on `@synchronize`.
1 parent 31d5b44 commit 5e03ecf

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

src/KernelAbstractions.jl

+9-3
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ synchronize(backend)
5050
```
5151
"""
5252
macro kernel(expr)
53-
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
53+
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false, #=implicit_validindex=# true)
5454
end
5555

5656
"""
@@ -60,6 +60,7 @@ This allows for two different configurations:
6060
6161
1. `cpu={true, false}`: Disables code-generation of the CPU function. This relaxes semantics such that KernelAbstractions primitives can be used in non-kernel functions.
6262
2. `inbounds={false, true}`: Enables a forced `@inbounds` macro around the function definition in the case the user is using too many `@inbounds` already in their kernel. Note that this can lead to incorrect results, crashes, etc and is fundamentally unsafe. Be careful!
63+
3. `implicit_validindex={false, true}`:
6364
6465
- [`@context`](@ref)
6566
@@ -68,9 +69,10 @@ This allows for two different configurations:
6869
"""
6970
macro kernel(ex...)
7071
if length(ex) == 1
71-
return __kernel(ex[1], true, false)
72+
return __kernel(ex[1], true, false, true)
7273
else
7374
generate_cpu = true
75+
implicit_validindex = true
7476
force_inbounds = false
7577
for i in 1:(length(ex) - 1)
7678
if ex[i] isa Expr && ex[i].head == :(=) &&
@@ -79,16 +81,20 @@ macro kernel(ex...)
7981
elseif ex[i] isa Expr && ex[i].head == :(=) &&
8082
ex[i].args[1] == :inbounds && ex[i].args[2] isa Bool
8183
force_inbounds = ex[i].args[2]
84+
elseif ex[i] isa Expr && ex[i].head == :(=) &&
85+
ex[i].args[1] == :implicit_validindex && ex[i].args[2] isa Bool
86+
implicit_validindex = ex[i].args[2]
8287
else
8388
error(
8489
"Configuration should be of form:\n" *
8590
"* `cpu=true`\n" *
8691
"* `inbounds=false`\n" *
92+
"* `implicit_validindex=false`\n" *
8793
"got `", ex[i], "`",
8894
)
8995
end
9096
end
91-
return __kernel(ex[end], generate_cpu, force_inbounds)
97+
return __kernel(ex[end], generate_cpu, force_inbounds,implicit_validindex)
9298
end
9399
end
94100

src/macros.jl

+8-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ function find_return(stmt)
1010
end
1111

1212
# XXX: Proper errors
13-
function __kernel(expr, generate_cpu = true, force_inbounds = false)
13+
function __kernel(expr, generate_cpu = true, force_inbounds = false, implicit_validindex = true)
1414
def = splitdef(expr)
1515
name = def[:name]
1616
args = def[:args]
@@ -46,7 +46,7 @@ function __kernel(expr, generate_cpu = true, force_inbounds = false)
4646

4747
def_gpu = deepcopy(def)
4848
def_gpu[:name] = gpu_name = Symbol(:gpu_, name)
49-
transform_gpu!(def_gpu, constargs, force_inbounds)
49+
transform_gpu!(def_gpu, constargs, force_inbounds, implicit_validindex)
5050
gpu_function = combinedef(def_gpu)
5151

5252
# create constructor functions
@@ -78,7 +78,7 @@ end
7878

7979
# The easy case, transform the function for GPU execution
8080
# - mark constant arguments by applying `constify`.
81-
function transform_gpu!(def, constargs, force_inbounds)
81+
function transform_gpu!(def, constargs, force_inbounds, implicit_validindex)
8282
let_constargs = Expr[]
8383
for (i, arg) in enumerate(def[:args])
8484
if constargs[i]
@@ -94,7 +94,11 @@ function transform_gpu!(def, constargs, force_inbounds)
9494
if force_inbounds
9595
push!(new_stmts, Expr(:inbounds, true))
9696
end
97-
append!(new_stmts, split(emit_gpu, body.args))
97+
if implicit_validindex
98+
append!(new_stmts, split(emit_gpu, body.args))
99+
else
100+
push!(new_stmts, body)
101+
end
98102
if force_inbounds
99103
push!(new_stmts, Expr(:inbounds, :pop))
100104
end

0 commit comments

Comments
 (0)