Skip to content

Commit 91984f8

Browse files
authored
feat(autoware_lidar_centerpoint): process front voxels first (#9608)
* feat: optimize voxel indexing in preprocess_kernel.cu Signed-off-by: Taekjin LEE <taekjin.lee@tier4.jp> * fix: remove redundant index check Signed-off-by: Taekjin LEE <taekjin.lee@tier4.jp> * fix: modify voxel index for better memory access Signed-off-by: Taekjin LEE <taekjin.lee@tier4.jp> --------- Signed-off-by: Taekjin LEE <taekjin.lee@tier4.jp>
1 parent 5ced41e commit 91984f8

File tree

1 file changed

+11
-8
lines changed

1 file changed

+11
-8
lines changed

perception/autoware_lidar_centerpoint/lib/preprocess/preprocess_kernel.cu

+11-8
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ __global__ void generateVoxels_random_kernel(
148148

149149
int voxel_idx = floorf((point.x - min_x_range) / pillar_x_size);
150150
int voxel_idy = floorf((point.y - min_y_range) / pillar_y_size);
151-
unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
151+
unsigned int voxel_index = (grid_x_size - 1 - voxel_idx) * grid_y_size + voxel_idy;
152152

153153
unsigned int point_id = atomicAdd(&(mask[voxel_index]), 1);
154154

@@ -185,12 +185,14 @@ __global__ void generateBaseFeatures_kernel(
185185
unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
186186
unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
187187
{
188-
unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
189-
unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
190-
191-
if (voxel_idx >= grid_x_size || voxel_idy >= grid_y_size) return;
192-
193-
unsigned int voxel_index = voxel_idy * grid_x_size + voxel_idx;
188+
// exchange x and y to process in a row-major order
189+
// flip x axis direction to process front to back
190+
unsigned int voxel_idx_inverted = blockIdx.y * blockDim.y + threadIdx.y;
191+
unsigned int voxel_idy = blockIdx.x * blockDim.x + threadIdx.x;
192+
if (voxel_idx_inverted >= grid_x_size || voxel_idy >= grid_y_size) return;
193+
unsigned int voxel_idx = grid_x_size - 1 - voxel_idx_inverted;
194+
195+
unsigned int voxel_index = voxel_idx_inverted * grid_y_size + voxel_idy;
194196
unsigned int count = mask[voxel_index];
195197
if (!(count > 0)) return;
196198
count = count < MAX_POINT_IN_VOXEL_SIZE ? count : MAX_POINT_IN_VOXEL_SIZE;
@@ -220,9 +222,10 @@ cudaError_t generateBaseFeatures_launch(
220222
unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
221223
cudaStream_t stream)
222224
{
225+
// exchange x and y to process in a row-major order
223226
dim3 threads = {32, 32};
224227
dim3 blocks = {
225-
(grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
228+
(grid_y_size + threads.x - 1) / threads.x, (grid_x_size + threads.y - 1) / threads.y};
226229

227230
generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
228231
mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,

0 commit comments

Comments
 (0)