fix(lidar_centerpoint,image_projection_based_fusion): add guard to avoid exceeding max voxel size (#1065)

miursh · wep21 · web-flow · commit a1762ebc0733 · 2023-12-11T11:17:21.000+09:00
fix(lidar_centerpoint,image_projection_based_fusion): add guard to avoid exceeding max voxel size (autowarefoundation#5824) Co-authored-by: Daisuke Nishimatsu <42202095+wep21@users.noreply.github.com>
diff --git a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/preprocess_kernel.hpp b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/preprocess_kernel.hpp
@@ -27,8 +27,9 @@ cudaError_t generateVoxels_random_launch(
   cudaStream_t stream);
 
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream);
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream);
 
 cudaError_t generateFeatures_launch(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/pointpainting_trt.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/pointpainting_trt.cpp
@@ -69,8 +69,8 @@ bool PointPaintingTRT::preprocess(
 
   CHECK_CUDA_ERROR(image_projection_based_fusion::generateBaseFeatures_launch(
     mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
-    num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(),
-    stream_));
+    config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
+    coordinates_d_.get(), stream_));
 
   CHECK_CUDA_ERROR(image_projection_based_fusion::generateFeatures_launch(
     voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/preprocess_kernel.cu b/perception/image_projection_based_fusion/src/pointpainting_fusion/preprocess_kernel.cu
@@ -105,8 +105,8 @@ cudaError_t generateVoxels_random_launch(
 }
 
 __global__ void generateBaseFeatures_kernel(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
 {
   unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
   unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
@@ -120,6 +120,7 @@ __global__ void generateBaseFeatures_kernel(
 
   unsigned int current_pillarId = 0;
   current_pillarId = atomicAdd(pillar_num, 1);
+  if (current_pillarId > max_voxel_size - 1) return;
 
   voxel_num[current_pillarId] = count;
 
@@ -140,15 +141,17 @@ __global__ void generateBaseFeatures_kernel(
 
 // create 4 channels
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream)
 {
   dim3 threads = {32, 32};
   dim3 blocks = {
     (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
 
   generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
-    mask, voxels, grid_y_size, grid_x_size, pillar_num, voxel_features, voxel_num, voxel_idxs);
+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
+    voxel_idxs);
   cudaError_t err = cudaGetLastError();
   return err;
 }
diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/preprocess/preprocess_kernel.hpp
@@ -27,8 +27,9 @@ cudaError_t generateVoxels_random_launch(
   cudaStream_t stream);
 
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream);
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream);
 
 cudaError_t generateFeatures_launch(
   const float * voxel_features, const float * voxel_num_points, const int * coords,
diff --git a/perception/lidar_centerpoint/lib/centerpoint_trt.cpp b/perception/lidar_centerpoint/lib/centerpoint_trt.cpp
@@ -156,8 +156,8 @@ bool CenterPointTRT::preprocess(
 
   CHECK_CUDA_ERROR(generateBaseFeatures_launch(
     mask_d_.get(), voxels_buffer_d_.get(), config_.grid_size_y_, config_.grid_size_x_,
-    num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(),
-    stream_));
+    config_.max_voxel_size_, num_voxels_d_.get(), voxels_d_.get(), num_points_per_voxel_d_.get(),
+    coordinates_d_.get(), stream_));
 
   CHECK_CUDA_ERROR(generateFeatures_launch(
     voxels_d_.get(), num_points_per_voxel_d_.get(), coordinates_d_.get(), num_voxels_d_.get(),
diff --git a/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu b/perception/lidar_centerpoint/lib/preprocess/preprocess_kernel.cu
@@ -87,8 +87,8 @@ cudaError_t generateVoxels_random_launch(
 }
 
 __global__ void generateBaseFeatures_kernel(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs)
 {
   unsigned int voxel_idx = blockIdx.x * blockDim.x + threadIdx.x;
   unsigned int voxel_idy = blockIdx.y * blockDim.y + threadIdx.y;
@@ -102,6 +102,7 @@ __global__ void generateBaseFeatures_kernel(
 
   unsigned int current_pillarId = 0;
   current_pillarId = atomicAdd(pillar_num, 1);
+  if (current_pillarId > max_voxel_size - 1) return;
 
   voxel_num[current_pillarId] = count;
 
@@ -120,15 +121,17 @@ __global__ void generateBaseFeatures_kernel(
 
 // create 4 channels
 cudaError_t generateBaseFeatures_launch(
-  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, unsigned int * pillar_num,
-  float * voxel_features, float * voxel_num, int * voxel_idxs, cudaStream_t stream)
+  unsigned int * mask, float * voxels, int grid_y_size, int grid_x_size, int max_voxel_size,
+  unsigned int * pillar_num, float * voxel_features, float * voxel_num, int * voxel_idxs,
+  cudaStream_t stream)
 {
   dim3 threads = {32, 32};
   dim3 blocks = {
     (grid_x_size + threads.x - 1) / threads.x, (grid_y_size + threads.y - 1) / threads.y};
 
   generateBaseFeatures_kernel<<<blocks, threads, 0, stream>>>(
-    mask, voxels, grid_y_size, grid_x_size, pillar_num, voxel_features, voxel_num, voxel_idxs);
+    mask, voxels, grid_y_size, grid_x_size, max_voxel_size, pillar_num, voxel_features, voxel_num,
+    voxel_idxs);
   cudaError_t err = cudaGetLastError();
   return err;
 }