Skip to content
This repository has been archived by the owner on Mar 1, 2023. It is now read-only.

Commit

Permalink
Merge #1077
Browse files Browse the repository at this point in the history
1077: Limit thread block size in Courant number kernels r=mwarusz a=mwarusz

# Description

This limits the number of threads in a thread block for two kernels used in Courant number calculation. This allows use of high order polynomials on the GPU, at least without hyperdiffusion. Missed in #1005.

CC
@vchuravy 

I have

- [ ] Written and run all necessary tests with CLIMA by including `tests/runtests.jl`
- [ ] Followed all necessary [style guidelines](https://CliMA.github.io/CLIMA/latest/CodingConventions.html) and run `julia .dev/format.jl`
- [ ] Updated the documentation to reflect changes from this PR.



Co-authored-by: Maciej Waruszewski <mwarusz@igf.fuw.edu.pl>
  • Loading branch information
bors[bot] and mwarusz authored May 11, 2020
2 parents 0196742 + b07b979 commit 47a0a67
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 10 deletions.
9 changes: 6 additions & 3 deletions src/Numerics/DGmethods/DGmodel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -796,17 +796,20 @@ function courant(
device = grid.vgeo isa Array ? CPU() : CUDA()
pointwise_courant = similar(grid.vgeo, Nq^dim, nrealelem)
event = Event(device)
event = Grids.kernel_min_neighbor_distance!(device, (Nq, Nq, Nqk))(
event = Grids.kernel_min_neighbor_distance!(
device,
min(Nq * Nq * Nqk, 1024),
)(
Val(N),
Val(dim),
direction,
pointwise_courant,
grid.vgeo,
topology.realelems;
ndrange = (nrealelem * Nq, Nq, Nqk),
ndrange = (nrealelem * Nq * Nq * Nqk),
dependencies = (event,),
)
event = kernel_local_courant!(device, Nq * Nq * Nqk)(
event = kernel_local_courant!(device, min(Nq * Nq * Nqk, 1024))(
m,
Val(dim),
Val(N),
Expand Down
6 changes: 4 additions & 2 deletions src/Numerics/DGmethods/DGmodel_kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2594,8 +2594,10 @@ end
MArray{Tuple{num_state_gradient_flux}, FT}(undef)
end

e = @index(Group, Linear)
n = @index(Local, Linear)
I = @index(Global, Linear)
e = (I - 1) ÷ Np + 1
n = (I - 1) % Np + 1

@inbounds begin
@unroll for s in 1:num_state_conservative
local_state_conservative[s] = state_conservative[n, s, e]
Expand Down
15 changes: 10 additions & 5 deletions src/Numerics/Mesh/Grids.jl
Original file line number Diff line number Diff line change
Expand Up @@ -307,14 +307,14 @@ function min_node_distance(
device = grid.vgeo isa Array ? CPU() : CUDA()
min_neighbor_distance = similar(grid.vgeo, Nq^dim, nrealelem)
event = Event(device)
event = kernel_min_neighbor_distance!(device, (Nq, Nq, Nqk))(
event = kernel_min_neighbor_distance!(device, min(Nq * Nq * Nqk, 1024))(
Val(N),
Val(dim),
direction,
min_neighbor_distance,
grid.vgeo,
topology.realelems;
ndrange = (Nq, Nq, Nqk, nrealelem),
ndrange = (Nq * Nq * Nqk * nrealelem),
dependencies = (event,),
)
wait(device, event)
Expand Down Expand Up @@ -613,6 +613,7 @@ neighbors.
FT = eltype(min_neighbor_distance)
Nq = N + 1
Nqk = dim == 2 ? 1 : Nq
Np = Nq * Nq * Nqk

if direction isa EveryDirection
mininξ = (true, true, true)
Expand All @@ -623,12 +624,16 @@ neighbors.
end
end

e = @index(Group, Linear)
i, j, k = @index(Local, NTuple)
I = @index(Global, Linear)
e = (I - 1) ÷ Np + 1
ijk = (I - 1) % Np + 1

i = (ijk - 1) % Nq + 1
j = (ijk - 1) ÷ Nq % Nq + 1
k = (ijk - 1) ÷ Nq^2 % Nqk + 1

md = typemax(FT)

ijk = i + Nq * (j - 1) + Nq * Nq * (k - 1)
x = SVector(vgeo[ijk, _x1, e], vgeo[ijk, _x2, e], vgeo[ijk, _x3, e])

if mininξ[1]
Expand Down

0 comments on commit 47a0a67

Please sign in to comment.