Skip to content

Commit bafa3cc

Browse files
authored
Add runic as a formatter (#505)
1 parent ab87f45 commit bafa3cc

34 files changed

+819
-763
lines changed

.github/workflows/runic.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: Runic formatting
2+
on:
3+
push:
4+
branches:
5+
- 'master'
6+
- 'release-'
7+
tags:
8+
- '*'
9+
pull_request:
10+
jobs:
11+
runic:
12+
name: Runic
13+
runs-on: ubuntu-latest
14+
steps:
15+
- uses: actions/checkout@v4
16+
- uses: julia-actions/setup-julia@v2
17+
with:
18+
version: "nightly" # Only nightly have the -m flag currently
19+
- uses: julia-actions/cache@v2
20+
- name: Install Runic
21+
run: |
22+
julia --color=yes --project=@runic -e 'using Pkg; Pkg.add(url = "https://github.com/fredrikekre/Runic.jl")'
23+
- name: Run Runic
24+
run: |
25+
git ls-files -z -- '*.jl' | xargs -0 julia --project=@runic -m Runic --check --diff

benchmark/benchmarks.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,12 @@ let static = BenchmarkGroup()
3636
for N in (64, 256, 512, 1024, 2048, 4096, 16384, 32768, 65536, 262144, 1048576)
3737
dtype[N] = @benchmarkable begin
3838
kernel = saxpy_kernel!($BACKEND, 1024)
39-
kernel(Z, convert($T, 2.0), X, Y, ndrange=size(Z))
40-
end setup=(
39+
kernel(Z, convert($T, 2.0), X, Y, ndrange = size(Z))
40+
end setup = (
4141
X = rand!(KernelAbstractions.zeros($BACKEND, $T, $N));
4242
Y = rand!(KernelAbstractions.zeros($BACKEND, $T, $N));
43-
Z = KernelAbstractions.zeros($BACKEND, $T, $N))
43+
Z = KernelAbstractions.zeros($BACKEND, $T, $N)
44+
)
4445
end
4546
static["$T"] = dtype
4647
end
@@ -53,11 +54,12 @@ let default = BenchmarkGroup()
5354
for N in (64, 256, 512, 1024, 2048, 4096, 16384, 32768, 65536, 262144, 1048576)
5455
dtype[N] = @benchmarkable begin
5556
kernel = saxpy_kernel!($BACKEND)
56-
kernel(Z, convert($T, 2.0), X, Y, ndrange=size(Z))
57-
end setup=(
57+
kernel(Z, convert($T, 2.0), X, Y, ndrange = size(Z))
58+
end setup = (
5859
X = rand!(KernelAbstractions.zeros($BACKEND, $T, $N));
5960
Y = rand!(KernelAbstractions.zeros($BACKEND, $T, $N));
60-
Z = KernelAbstractions.zeros($BACKEND, $T, $N))
61+
Z = KernelAbstractions.zeros($BACKEND, $T, $N)
62+
)
6163
end
6264
default["$T"] = dtype
6365
end

docs/make.jl

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ function main()
77
ci = get(ENV, "CI", "") == "true"
88

99
makedocs(;
10-
modules=[KernelAbstractions],
11-
authors="JuliaGPU and contributors",
12-
repo="https://github.com/JuliaGPU/KernelAbstractions.jl/blob/{commit}{path}#L{line}",
13-
sitename="KernelAbstractions.jl",
14-
format=Documenter.HTML(;
15-
prettyurls=ci,
16-
canonical="https://juliagpu.github.io/KernelAbstractions.jl",
17-
assets=String[],
10+
modules = [KernelAbstractions],
11+
authors = "JuliaGPU and contributors",
12+
repo = "https://github.com/JuliaGPU/KernelAbstractions.jl/blob/{commit}{path}#L{line}",
13+
sitename = "KernelAbstractions.jl",
14+
format = Documenter.HTML(;
15+
prettyurls = ci,
16+
canonical = "https://juliagpu.github.io/KernelAbstractions.jl",
17+
assets = String[],
1818
),
19-
warnonly=[:missing_docs],
20-
pages=[
19+
warnonly = [:missing_docs],
20+
pages = [
2121
"Home" => "index.md",
2222
"Quickstart" => "quickstart.md",
2323
"Writing kernels" => "kernels.md",
@@ -34,13 +34,13 @@ function main()
3434
"Extras" => [
3535
"extras/unrolling.md",
3636
], # Extras
37-
"Notes for implementations" => "implementations.md"
37+
"Notes for implementations" => "implementations.md",
3838
], # pages
3939
)
4040

4141
if ci
4242
deploydocs(;
43-
repo="github.com/JuliaGPU/KernelAbstractions.jl",
43+
repo = "github.com/JuliaGPU/KernelAbstractions.jl",
4444
push_preview = true,
4545
)
4646
end

examples/histogram.jl

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,28 +30,28 @@ end
3030
# possible to get a value of 312, then we will have 2 separate shmem blocks,
3131
# one from 1->256, and another from 256->512
3232
@uniform max_element = 1
33-
for min_element = 1:gs:N
33+
for min_element in 1:gs:N
3434

3535
# Setting shared_histogram to 0
3636
@inbounds shared_histogram[lid] = 0
3737
@synchronize()
3838

3939
max_element = min_element + gs
4040
if max_element > N
41-
max_element = N+1
41+
max_element = N + 1
4242
end
4343

4444
# Defining bin on shared memory and writing to it if possible
4545
bin = input[tid]
4646
if bin >= min_element && bin < max_element
47-
bin -= min_element-1
47+
bin -= min_element - 1
4848
@atomic shared_histogram[bin] += 1
4949
end
5050

5151
@synchronize()
5252

53-
if ((lid+min_element-1) <= N)
54-
@atomic histogram_output[lid+min_element-1] += shared_histogram[lid]
53+
if ((lid + min_element - 1) <= N)
54+
@atomic histogram_output[lid + min_element - 1] += shared_histogram[lid]
5555
end
5656

5757
end
@@ -62,7 +62,7 @@ function histogram!(histogram_output, input)
6262
backend = get_backend(histogram_output)
6363
# Need static block size
6464
kernel! = histogram_kernel!(backend, (256,))
65-
kernel!(histogram_output, input, ndrange=size(input))
65+
kernel!(histogram_output, input, ndrange = size(input))
6666
end
6767

6868
function move(backend, input)
@@ -75,9 +75,9 @@ end
7575
if Base.VERSION < v"1.7.0" && !KernelAbstractions.isgpu(backend)
7676
@test_skip false
7777
else
78-
rand_input = [rand(1:128) for i = 1:1000]
79-
linear_input = [i for i = 1:1024]
80-
all_two = [2 for i = 1:512]
78+
rand_input = [rand(1:128) for i in 1:1000]
79+
linear_input = [i for i in 1:1024]
80+
all_two = [2 for i in 1:512]
8181

8282
histogram_rand_baseline = create_histogram(rand_input)
8383
histogram_linear_baseline = create_histogram(linear_input)

examples/matmul.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) #
77

88
# creating a temporary sum variable for matrix multiplication
99
tmp_sum = zero(eltype(output))
10-
for k = 1:size(a)[2]
11-
tmp_sum += a[i,k] * b[k, j]
10+
for k in 1:size(a)[2]
11+
tmp_sum += a[i, k] * b[k, j]
1212
end
1313

14-
output[i,j] = tmp_sum
14+
output[i, j] = tmp_sum
1515
end
1616

1717
# Creating a wrapper kernel for launching with error checks
@@ -22,14 +22,14 @@ function matmul!(output, a, b)
2222
end
2323
backend = KernelAbstractions.get_backend(a)
2424
kernel! = matmul_kernel!(backend)
25-
kernel!(output, a, b, ndrange=size(output))
25+
kernel!(output, a, b, ndrange = size(output))
2626
end
2727

2828
a = rand!(allocate(backend, Float32, 256, 123))
2929
b = rand!(allocate(backend, Float32, 123, 45))
3030
output = KernelAbstractions.zeros(backend, Float32, 256, 45)
3131

32-
matmul!(output, a,b)
32+
matmul!(output, a, b)
3333
KernelAbstractions.synchronize(backend)
3434

35-
@test isapprox(output, a*b)
35+
@test isapprox(output, a * b)

examples/memcopy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function mycopy!(A, B)
1212
@assert get_backend(B) == backend
1313

1414
kernel = copy_kernel!(backend)
15-
kernel(A, B, ndrange=length(A))
15+
kernel(A, B, ndrange = length(A))
1616
end
1717

1818
A = KernelAbstractions.zeros(backend, Float64, 128, 128)

examples/memcopy_static.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function mycopy_static!(A, B)
1212
@assert get_backend(B) == backend
1313

1414
kernel = copy_kernel!(backend, 32, size(A)) # if size(A) varies this will cause recompilation
15-
kernel(A, B, ndrange=size(A))
15+
kernel(A, B, ndrange = size(A))
1616
end
1717

1818
A = KernelAbstractions.zeros(backend, Float64, 128, 128)

examples/mpi.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ function main(backend)
4343
comm = MPI.COMM_WORLD
4444
MPI.Barrier(comm)
4545

46-
dst_rank = mod(MPI.Comm_rank(comm)+1, MPI.Comm_size(comm))
47-
src_rank = mod(MPI.Comm_rank(comm)-1, MPI.Comm_size(comm))
46+
dst_rank = mod(MPI.Comm_rank(comm) + 1, MPI.Comm_size(comm))
47+
src_rank = mod(MPI.Comm_rank(comm) - 1, MPI.Comm_size(comm))
4848

4949
T = Int64
5050
M = 10
@@ -59,8 +59,10 @@ function main(backend)
5959

6060
KernelAbstractions.synchronize(backend)
6161

62-
recv_task, send_task = exchange!(h_send_buf, d_recv_buf, h_recv_buf,
63-
src_rank, dst_rank, comm)
62+
recv_task, send_task = exchange!(
63+
h_send_buf, d_recv_buf, h_recv_buf,
64+
src_rank, dst_rank, comm,
65+
)
6466

6567
cooperative_wait(recv_task)
6668
cooperative_wait(send_task)

examples/naive_transpose.jl

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ function naive_transpose!(a, b)
1717
@assert get_backend(b) == backend
1818
groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
1919
kernel! = naive_transpose_kernel!(backend, groupsize)
20-
kernel!(a, b, ndrange=size(a))
20+
kernel!(a, b, ndrange = size(a))
2121
end
2222

2323
# resolution of grid will be res*res
@@ -27,7 +27,6 @@ res = 1024
2727
b = rand!(allocate(backend, Float32, res, res))
2828
a = KernelAbstractions.zeros(backend, Float32, res, res)
2929

30-
naive_transpose!(a,b)
30+
naive_transpose!(a, b)
3131
KernelAbstractions.synchronize(backend)
3232
@test a == transpose(b)
33-

examples/numa_aware.jl

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ end
1818
Estimate the memory bandwidth (GB/s) by performing a time measurement of a
1919
SAXPY kernel. Returns the memory bandwidth (GB/s) and the compute (GFLOP/s).
2020
"""
21-
function measure_membw(backend = CPU(); verbose = true, N = 1024 * 500_000, dtype = Float32,
22-
init = :parallel)
21+
function measure_membw(
22+
backend = CPU(); verbose = true, N = 1024 * 500_000, dtype = Float32,
23+
init = :parallel,
24+
)
2325
bytes = 3 * sizeof(dtype) * N # num bytes transferred in SAXPY
2426
flops = 2 * N # num flops in SAXY
2527

@@ -37,10 +39,10 @@ function measure_membw(backend = CPU(); verbose = true, N = 1024 * 500_000, dtyp
3739
kernel = saxpy_kernel($backend, $workgroup_size, $(size(Y)))
3840
kernel($a, $X, $Y, ndrange = $(size(Y)))
3941
KernelAbstractions.synchronize($backend)
40-
end evals=2 samples=10
42+
end evals = 2 samples = 10
4143

42-
mem_rate = bytes * 1e-9 / t # GB/s
43-
flop_rate = flops * 1e-9 / t # GFLOP/s
44+
mem_rate = bytes * 1.0e-9 / t # GB/s
45+
flop_rate = flops * 1.0e-9 / t # GFLOP/s
4446

4547
if verbose
4648
println("\tMemory Bandwidth (GB/s): ", round(mem_rate; digits = 2))
@@ -51,7 +53,7 @@ end
5153

5254
# Static should be much better (on a system with multiple NUMA domains)
5355
measure_membw(CPU());
54-
measure_membw(CPU(; static=true));
56+
measure_membw(CPU(; static = true));
5557

5658
# The following has significantly worse performance (even on systems with a single memory domain)!
5759
# measure_membw(CPU(); init=:serial);

0 commit comments

Comments
 (0)