Skip to content

Commit

Permalink
Fix launch when no work while keeping old behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
MrBurmark committed Feb 11, 2025
1 parent 01c41e0 commit ad29032
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 18 deletions.
10 changes: 7 additions & 3 deletions include/RAJA/policy/cuda/kernel/CudaKernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,9 +549,13 @@ struct StatementExecutor<


// Only launch kernel if we have something to iterate over
int num_blocks = launch_dims.num_blocks();
int num_threads = launch_dims.num_threads();
if (num_blocks > 0 || num_threads > 0)
bool active_threads = launch_dims.threads_are_active();
bool active_blocks = launch_dims.blocks_are_active();
int num_blocks = launch_dims.num_blocks();
int num_threads = launch_dims.num_threads();
if ((active_threads || active_blocks) &&
(!active_blocks || num_blocks > 0) &&
(!active_threads || num_threads > 0))
{

//
Expand Down
32 changes: 26 additions & 6 deletions include/RAJA/policy/cuda/kernel/internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,40 @@ struct LaunchDims
return result;
}

RAJA_INLINE
int blocks_are_active() const
{
return active.blocks.x || active.blocks.y || active.blocks.z;
}

RAJA_INLINE
int threads_are_active() const
{
return active.threads.x || active.threads.y || active.threads.z;
}

RAJA_INLINE
int num_blocks() const
{
return (active.blocks.x ? dims.blocks.x : 1) *
(active.blocks.y ? dims.blocks.y : 1) *
(active.blocks.z ? dims.blocks.z : 1);
if (blocks_are_active()) {
return (active.blocks.x ? dims.blocks.x : 1) *
(active.blocks.y ? dims.blocks.y : 1) *
(active.blocks.z ? dims.blocks.z : 1);
} else {
return 0;
}
}

RAJA_INLINE
int num_threads() const
{
return (active.threads.x ? dims.threads.x : 1) *
(active.threads.y ? dims.threads.y : 1) *
(active.threads.z ? dims.threads.z : 1);
if (threads_are_active()) {
return (active.threads.x ? dims.threads.x : 1) *
(active.threads.y ? dims.threads.y : 1) *
(active.threads.z ? dims.threads.z : 1);
} else {
return 0;
}
}

RAJA_INLINE
Expand Down
10 changes: 7 additions & 3 deletions include/RAJA/policy/hip/kernel/HipKernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,13 @@ struct StatementExecutor<


// Only launch kernel if we have something to iterate over
int num_blocks = launch_dims.num_blocks();
int num_threads = launch_dims.num_threads();
if (num_blocks > 0 || num_threads > 0)
bool active_threads = launch_dims.threads_are_active();
bool active_blocks = launch_dims.blocks_are_active();
int num_blocks = launch_dims.num_blocks();
int num_threads = launch_dims.num_threads();
if ((active_threads || active_blocks) &&
(!active_blocks || num_blocks > 0) &&
(!active_threads || num_threads > 0))
{

//
Expand Down
32 changes: 26 additions & 6 deletions include/RAJA/policy/hip/kernel/internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,40 @@ struct LaunchDims
return result;
}

RAJA_INLINE
int blocks_are_active() const
{
return active.blocks.x || active.blocks.y || active.blocks.z;
}

RAJA_INLINE
int threads_are_active() const
{
return active.threads.x || active.threads.y || active.threads.z;
}

RAJA_INLINE
int num_blocks() const
{
return (active.blocks.x ? dims.blocks.x : 1) *
(active.blocks.y ? dims.blocks.y : 1) *
(active.blocks.z ? dims.blocks.z : 1);
if (blocks_are_active()) {
return (active.blocks.x ? dims.blocks.x : 1) *
(active.blocks.y ? dims.blocks.y : 1) *
(active.blocks.z ? dims.blocks.z : 1);
} else {
return 0;
}
}

RAJA_INLINE
int num_threads() const
{
return (active.threads.x ? dims.threads.x : 1) *
(active.threads.y ? dims.threads.y : 1) *
(active.threads.z ? dims.threads.z : 1);
if (threads_are_active()) {
return (active.threads.x ? dims.threads.x : 1) *
(active.threads.y ? dims.threads.y : 1) *
(active.threads.z ? dims.threads.z : 1);
} else {
return 0;
}
}

RAJA_INLINE
Expand Down

0 comments on commit ad29032

Please sign in to comment.