Skip to content

Commit ba4eee7

Browse files
committed
Use POCL as a CPU backend
1 parent b435bb2 commit ba4eee7

22 files changed

+2511
-472
lines changed

.github/workflows/ci.yml

+1-5
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,8 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
version:
25-
- '1.6'
26-
- '1.7'
27-
- '1.8'
28-
- '1.9'
2925
- '1.10'
30-
- '~1.11.0-0'
26+
- '1.11'
3127
os:
3228
- ubuntu-latest
3329
- macOS-latest

Project.toml

+18-17
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,32 @@
11
name = "KernelAbstractions"
22
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
33
authors = ["Valentin Churavy <v.churavy@gmail.com> and contributors"]
4-
version = "0.9.33"
4+
version = "0.10.0-dev"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
88
Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
9-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
9+
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
1010
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
11-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
11+
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
1212
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
13+
OpenCL_jll = "6cb37087-e8b6-5417-8430-1f242f1e46e4"
1314
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
14-
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
15-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
15+
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
16+
SPIRVIntrinsics = "71d1d633-e7e8-4a92-83a1-de8814b09ba8"
1617
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
1718
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
19+
pocl_jll = "627d6b7a-bbe6-5189-83e7-98cc0a5aeadd"
20+
21+
[weakdeps]
22+
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
23+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
24+
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
25+
26+
[extensions]
27+
EnzymeExt = "EnzymeCore"
28+
LinearAlgebraExt = "LinearAlgebra"
29+
SparseArraysExt = "SparseArrays"
1830

1931
[compat]
2032
Adapt = "0.4, 1.0, 2.0, 3.0, 4"
@@ -24,23 +36,12 @@ InteractiveUtils = "1.6"
2436
LinearAlgebra = "1.6"
2537
MacroTools = "0.5"
2638
PrecompileTools = "1"
27-
Requires = "1.3"
2839
SparseArrays = "<0.0.1, 1.6"
2940
StaticArrays = "0.12, 1.0"
3041
UUIDs = "<0.0.1, 1.6"
31-
julia = "1.6"
32-
33-
[extensions]
34-
EnzymeExt = "EnzymeCore"
35-
LinearAlgebraExt = "LinearAlgebra"
36-
SparseArraysExt = "SparseArrays"
42+
julia = "1.10"
3743

3844
[extras]
3945
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
4046
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
4147
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
42-
43-
[weakdeps]
44-
EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869"
45-
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
46-
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

examples/histogram.jl

+24-28
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,28 @@ function move(backend, input)
7474
end
7575

7676
@testset "histogram tests" begin
77-
if Base.VERSION < v"1.7.0" && !KernelAbstractions.isgpu(backend)
78-
@test_skip false
79-
else
80-
rand_input = [rand(1:128) for i in 1:1000]
81-
linear_input = [i for i in 1:1024]
82-
all_two = [2 for i in 1:512]
83-
84-
histogram_rand_baseline = create_histogram(rand_input)
85-
histogram_linear_baseline = create_histogram(linear_input)
86-
histogram_two_baseline = create_histogram(all_two)
87-
88-
rand_input = move(backend, rand_input)
89-
linear_input = move(backend, linear_input)
90-
all_two = move(backend, all_two)
91-
92-
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
93-
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
94-
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
95-
96-
histogram!(rand_histogram, rand_input)
97-
histogram!(linear_histogram, linear_input)
98-
histogram!(two_histogram, all_two)
99-
KernelAbstractions.synchronize(CPU())
100-
101-
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
102-
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
103-
@test isapprox(Array(two_histogram), histogram_two_baseline)
104-
end
77+
rand_input = [rand(1:128) for i in 1:1000]
78+
linear_input = [i for i in 1:1024]
79+
all_two = [2 for i in 1:512]
80+
81+
histogram_rand_baseline = create_histogram(rand_input)
82+
histogram_linear_baseline = create_histogram(linear_input)
83+
histogram_two_baseline = create_histogram(all_two)
84+
85+
rand_input = move(backend, rand_input)
86+
linear_input = move(backend, linear_input)
87+
all_two = move(backend, all_two)
88+
89+
rand_histogram = KernelAbstractions.zeros(backend, Int, 128)
90+
linear_histogram = KernelAbstractions.zeros(backend, Int, 1024)
91+
two_histogram = KernelAbstractions.zeros(backend, Int, 2)
92+
93+
histogram!(rand_histogram, rand_input)
94+
histogram!(linear_histogram, linear_input)
95+
histogram!(two_histogram, all_two)
96+
KernelAbstractions.synchronize(CPU())
97+
98+
@test isapprox(Array(rand_histogram), histogram_rand_baseline)
99+
@test isapprox(Array(linear_histogram), histogram_linear_baseline)
100+
@test isapprox(Array(two_histogram), histogram_two_baseline)
105101
end

src/KernelAbstractions.jl

+28-43
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ This allows for two different configurations:
6565
6666
!!! warn
6767
This is an experimental feature.
68+
69+
!!! note
70+
`cpu={true, false}` is deprecated for KernelAbstractions 1.0
6871
"""
6972
macro kernel(ex...)
7073
if length(ex) == 1
@@ -184,6 +187,8 @@ After releasing the memory of an array, it should no longer be accessed.
184187
"""
185188
function unsafe_free! end
186189

190+
unsafe_free!(::AbstractArray) = return
191+
187192
###
188193
# Kernel language
189194
# - @localmem
@@ -248,6 +253,9 @@ For storage that only persists between `@synchronize` statements, an `MArray` ca
248253
instead.
249254
250255
See also [`@uniform`](@ref).
256+
257+
!!! note
258+
`@private` is deprecated for KernelAbstractions 1.0
251259
"""
252260
macro private(T, dims)
253261
if dims isa Integer
@@ -263,6 +271,9 @@ end
263271
264272
Creates a private local of `mem` per item in the workgroup. This can be safely used
265273
across [`@synchronize`](@ref) statements.
274+
275+
!!! note
276+
`@private` is deprecated for KernelAbstractions 1.0
266277
"""
267278
macro private(expr)
268279
return esc(expr)
@@ -273,6 +284,9 @@ end
273284
274285
`expr` is evaluated outside the workitem scope. This is useful for variable declarations
275286
that span workitems, or are reused across `@synchronize` statements.
287+
288+
!!! note
289+
`@uniform` is deprecated for KernelAbstractions 1.0
276290
"""
277291
macro uniform(value)
278292
return esc(value)
@@ -316,6 +330,8 @@ Access the hidden context object used by KernelAbstractions.
316330
!!! warn
317331
Only valid to be used from a kernel with `cpu=false`.
318332
333+
!!! note
334+
`@context` will be supported on all backends in KernelAbstractions 1.0
319335
```
320336
function f(@context, a)
321337
I = @index(Global, Linear)
@@ -464,31 +480,11 @@ Abstract type for all GPU based KernelAbstractions backends.
464480
465481
!!! note
466482
New backend implementations **must** sub-type this abstract type.
467-
"""
468-
abstract type GPU <: Backend end
469-
470-
"""
471-
CPU(; static=false)
472-
473-
Instantiate a CPU (multi-threaded) backend.
474-
475-
## Options:
476-
- `static`: Uses a static thread assignment, this can be beneficial for NUMA aware code.
477-
Defaults to false.
478-
"""
479-
struct CPU <: Backend
480-
static::Bool
481-
CPU(; static::Bool = false) = new(static)
482-
end
483-
484-
"""
485-
isgpu(::Backend)::Bool
486483
487-
Returns true for all [`GPU`](@ref) backends.
484+
!!! note
485+
`GPU` will be removed in KernelAbstractions v1.0
488486
"""
489-
isgpu(::GPU) = true
490-
isgpu(::CPU) = false
491-
487+
abstract type GPU <: Backend end
492488

493489
"""
494490
get_backend(A::AbstractArray)::Backend
@@ -504,12 +500,9 @@ function get_backend end
504500
# Should cover SubArray, ReshapedArray, ReinterpretArray, Hermitian, AbstractTriangular, etc.:
505501
get_backend(A::AbstractArray) = get_backend(parent(A))
506502

507-
get_backend(::Array) = CPU()
508-
509503
# Define:
510504
# adapt_storage(::Backend, a::Array) = adapt(BackendArray, a)
511505
# adapt_storage(::Backend, a::BackendArray) = a
512-
Adapt.adapt_storage(::CPU, a::Array) = a
513506

514507
"""
515508
allocate(::Backend, Type, dims...)::AbstractArray
@@ -729,7 +722,7 @@ Partition a kernel for the given ndrange and workgroupsize.
729722
return iterspace, dynamic
730723
end
731724

732-
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: Union{CPU, GPU}, S <: _Size, NDRange <: _Size, XPUName}
725+
function construct(backend::Backend, ::S, ::NDRange, xpu_name::XPUName) where {Backend <: GPU, S <: _Size, NDRange <: _Size, XPUName}
733726
return Kernel{Backend, S, NDRange, XPUName}(backend, xpu_name)
734727
end
735728

@@ -746,6 +739,10 @@ include("compiler.jl")
746739
function __workitems_iterspace end
747740
function __validindex end
748741

742+
# for reflection
743+
function mkcontext end
744+
function launch_config end
745+
749746
include("macros.jl")
750747

751748
###
@@ -815,8 +812,11 @@ end
815812
end
816813

817814
# CPU backend
815+
include("pocl/pocl.jl")
816+
using .POCL
817+
export POCLBackend
818818

819-
include("cpu.jl")
819+
const CPU = POCLBackend
820820

821821
# precompile
822822
PrecompileTools.@compile_workload begin
@@ -830,19 +830,4 @@ PrecompileTools.@compile_workload begin
830830
end
831831
end
832832

833-
if !isdefined(Base, :get_extension)
834-
using Requires
835-
end
836-
837-
@static if !isdefined(Base, :get_extension)
838-
function __init__()
839-
@require EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" include("../ext/EnzymeExt.jl")
840-
end
841-
end
842-
843-
if !isdefined(Base, :get_extension)
844-
include("../ext/LinearAlgebraExt.jl")
845-
include("../ext/SparseArraysExt.jl")
846-
end
847-
848833
end #module

src/cpu.jl

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
unsafe_free!(::AbstractArray) = return
21
synchronize(::CPU) = nothing
32

43
allocate(::CPU, ::Type{T}, dims::Tuple) where {T} = Array{T}(undef, dims)

0 commit comments

Comments
 (0)