feat(lidar_centerpoint): output the covariance of pose and twist (#6573)

tzhong518 · pre-commit-ci[bot] · YoshiRi · web-flow · commit cf5a2ed247cb · 2024-04-16T11:31:28.000+09:00
* feat: postprocess variance

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* feat: output variance

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* feat: add has_variance to config

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* fix: single_inference node

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* style(pre-commit): autofix

* fix: add to pointpainting param

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* Update perception/lidar_centerpoint/src/node.cpp

Co-authored-by: Yoshi Ri &lt;yoshiyoshidetteiu@gmail.com&gt;

* Update perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp

Co-authored-by: Yoshi Ri &lt;yoshiyoshidetteiu@gmail.com&gt;

* Update perception/lidar_centerpoint/src/node.cpp

Co-authored-by: Yoshi Ri &lt;yoshiyoshidetteiu@gmail.com&gt;

* fix: add options

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* fix: avoid powf

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;

* Update launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml

Co-authored-by: Taekjin LEE &lt;technolojin@gmail.com&gt;

---------

Signed-off-by: tzhong518 &lt;sworgun@gmail.com&gt;
Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
Co-authored-by: Yoshi Ri &lt;yoshiyoshidetteiu@gmail.com&gt;
Co-authored-by: Taekjin LEE &lt;technolojin@gmail.com&gt;
diff --git a/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml b/launch/tier4_perception_launch/launch/object_recognition/detection/detector/lidar_dnn_detector.launch.xml
@@ -5,7 +5,7 @@
   <arg name="lidar_detection_model" default="centerpoint" description="options: `centerpoint`, `apollo`, `clustering`"/>
 
   <!-- Lidar detector centerpoint parameters -->
-  <arg name="centerpoint_model_name" default="centerpoint_tiny"/>
+  <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint`, `centerpoint_tiny` or `centerpoint_sigma`"/>
   <arg name="centerpoint_model_path" default="$(var data_path)/lidar_centerpoint"/>
   <arg name="lidar_model_param_path" default="$(find-pkg-share lidar_centerpoint)/config"/>
 
diff --git a/launch/tier4_perception_launch/launch/perception.launch.xml b/launch/tier4_perception_launch/launch/perception.launch.xml
@@ -36,7 +36,7 @@
   <arg name="traffic_light_arbiter_param_path"/>
 
   <!-- CenterPoint model parameters -->
-  <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint` or `centerpoint_tiny`"/>
+  <arg name="centerpoint_model_name" default="centerpoint_tiny" description="options: `centerpoint`, `centerpoint_tiny` or `centerpoint_sigma`"/>
   <arg name="centerpoint_model_path" default="$(var data_path)/lidar_centerpoint"/>
 
   <!-- PointPainting model parameters -->
diff --git a/perception/image_projection_based_fusion/config/pointpainting.param.yaml b/perception/image_projection_based_fusion/config/pointpainting.param.yaml
@@ -16,6 +16,7 @@
       downsample_factor: 1
       encoder_in_feature_size: 12
       yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
+      has_variance: false
       has_twist: false
     densification_params:
       world_frame_id: "map"
diff --git a/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/node.hpp b/perception/image_projection_based_fusion/include/image_projection_based_fusion/pointpainting_fusion/node.hpp
@@ -60,6 +60,7 @@ class PointPaintingFusionNode
   std::map<std::string, float> class_index_;
   std::map<std::string, std::function<bool(int)>> isClassTable_;
   std::vector<double> pointcloud_range;
+  bool has_variance_{false};
   bool has_twist_{false};
 
   centerpoint::NonMaximumSuppression iou_bev_nms_;
diff --git a/perception/image_projection_based_fusion/schema/pointpainting.schema.json b/perception/image_projection_based_fusion/schema/pointpainting.schema.json
@@ -60,6 +60,11 @@
               "minimum": 0.0,
               "maximum": 1.0
             },
+            "has_variance": {
+              "type": "boolean",
+              "description": "Indicates whether the model outputs variance value.",
+              "default": false
+            },
             "has_twist": {
               "type": "boolean",
               "description": "Indicates whether the model outputs twist value.",
diff --git a/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp b/perception/image_projection_based_fusion/src/pointpainting_fusion/node.cpp
@@ -140,6 +140,7 @@ PointPaintingFusionNode::PointPaintingFusionNode(const rclcpp::NodeOptions & opt
       isClassTable_.erase(cls);
     }
   }
+  has_variance_ = this->declare_parameter<bool>("has_variance");
   has_twist_ = this->declare_parameter<bool>("model_params.has_twist");
   const std::size_t point_feature_size = static_cast<std::size_t>(
     this->declare_parameter<std::int64_t>("model_params.point_feature_size"));
@@ -189,7 +190,7 @@ PointPaintingFusionNode::PointPaintingFusionNode(const rclcpp::NodeOptions & opt
   centerpoint::CenterPointConfig config(
     class_names_.size(), point_feature_size, max_voxel_size, pointcloud_range, voxel_size,
     downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
-    yaw_norm_thresholds);
+    yaw_norm_thresholds, has_variance_);
 
   // create detector
   detector_ptr_ = std::make_unique<image_projection_based_fusion::PointPaintingTRT>(
@@ -401,7 +402,7 @@ void PointPaintingFusionNode::postprocess(sensor_msgs::msg::PointCloud2 & painte
   raw_objects.reserve(det_boxes3d.size());
   for (const auto & box3d : det_boxes3d) {
     autoware_auto_perception_msgs::msg::DetectedObject obj;
-    box3DToDetectedObject(box3d, class_names_, has_twist_, obj);
+    box3DToDetectedObject(box3d, class_names_, has_twist_, has_variance_, obj);
     raw_objects.emplace_back(obj);
   }
 
diff --git a/perception/lidar_centerpoint/config/centerpoint.param.yaml b/perception/lidar_centerpoint/config/centerpoint.param.yaml
@@ -14,6 +14,7 @@
     iou_nms_threshold: 0.1
     yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
     score_threshold: 0.35
+    has_variance: false
     has_twist: false
     trt_precision: fp16
     densification_num_past_frames: 1
diff --git a/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml b/perception/lidar_centerpoint/config/centerpoint_sigma.param.yaml
@@ -0,0 +1,27 @@
+/**:
+  ros__parameters:
+    class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"]
+    point_feature_size: 4
+    max_voxel_size: 40000
+    point_cloud_range: [-76.8, -76.8, -4.0, 76.8, 76.8, 6.0]
+    voxel_size: [0.32, 0.32, 10.0]
+    downsample_factor: 1
+    encoder_in_feature_size: 9
+    # post-process params
+    circle_nms_dist_threshold: 0.5
+    iou_nms_target_class_names: ["CAR"]
+    iou_nms_search_distance_2d: 10.0
+    iou_nms_threshold: 0.1
+    yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
+    score_threshold: 0.35
+    has_variance: true
+    has_twist: true
+    trt_precision: fp16
+    densification_num_past_frames: 1
+    densification_world_frame_id: map
+
+    # weight files
+    encoder_onnx_path: "$(var model_path)/pts_voxel_encoder_$(var model_name).onnx"
+    encoder_engine_path: "$(var model_path)/pts_voxel_encoder_$(var model_name).engine"
+    head_onnx_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx"
+    head_engine_path: "$(var model_path)/pts_backbone_neck_head_$(var model_name).engine"
diff --git a/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml b/perception/lidar_centerpoint/config/centerpoint_tiny.param.yaml
@@ -14,6 +14,7 @@
     iou_nms_threshold: 0.1
     yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0]
     score_threshold: 0.35
+    has_variance: false
     has_twist: false
     trt_precision: fp16
     densification_num_past_frames: 1
diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/centerpoint_config.hpp
@@ -28,7 +28,7 @@ class CenterPointConfig
     const std::vector<double> & point_cloud_range, const std::vector<double> & voxel_size,
     const std::size_t downsample_factor, const std::size_t encoder_in_feature_size,
     const float score_threshold, const float circle_nms_dist_threshold,
-    const std::vector<double> yaw_norm_thresholds)
+    const std::vector<double> yaw_norm_thresholds, const bool has_variance)
   {
     class_size_ = class_size;
     point_feature_size_ = point_feature_size;
@@ -49,6 +49,15 @@ class CenterPointConfig
     downsample_factor_ = downsample_factor;
     encoder_in_feature_size_ = encoder_in_feature_size;
 
+    if (has_variance) {
+      has_variance_ = true;
+      head_out_offset_size_ = 4;
+      head_out_z_size_ = 2;
+      head_out_dim_size_ = 6;
+      head_out_rot_size_ = 4;
+      head_out_vel_size_ = 4;
+    }
+
     if (score_threshold > 0 && score_threshold < 1) {
       score_threshold_ = score_threshold;
     }
@@ -97,11 +106,12 @@ class CenterPointConfig
   std::size_t encoder_in_feature_size_{9};
   const std::size_t encoder_out_feature_size_{32};
   const std::size_t head_out_size_{6};
-  const std::size_t head_out_offset_size_{2};
-  const std::size_t head_out_z_size_{1};
-  const std::size_t head_out_dim_size_{3};
-  const std::size_t head_out_rot_size_{2};
-  const std::size_t head_out_vel_size_{2};
+  bool has_variance_{false};
+  std::size_t head_out_offset_size_{2};
+  std::size_t head_out_z_size_{1};
+  std::size_t head_out_dim_size_{3};
+  std::size_t head_out_rot_size_{2};
+  std::size_t head_out_vel_size_{2};
 
   // post-process params
   float score_threshold_{0.35f};
diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp
@@ -53,6 +53,7 @@ class LidarCenterPointNode : public rclcpp::Node
 
   float score_threshold_{0.0};
   std::vector<std::string> class_names_;
+  bool has_variance_{false};
   bool has_twist_{false};
 
   NonMaximumSuppression iou_bev_nms_;
diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/ros_utils.hpp
@@ -32,10 +32,14 @@ namespace centerpoint
 
 void box3DToDetectedObject(
   const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
-  autoware_auto_perception_msgs::msg::DetectedObject & obj);
+  const bool has_variance, autoware_auto_perception_msgs::msg::DetectedObject & obj);
 
 uint8_t getSemanticType(const std::string & class_name);
 
+std::array<double, 36> convertPoseCovarianceMatrix(const Box3D & box3d);
+
+std::array<double, 36> convertTwistCovarianceMatrix(const Box3D & box3d);
+
 bool isCarLikeVehicleLabel(const uint8_t label);
 
 }  // namespace centerpoint
diff --git a/perception/lidar_centerpoint/include/lidar_centerpoint/utils.hpp b/perception/lidar_centerpoint/include/lidar_centerpoint/utils.hpp
@@ -33,6 +33,17 @@ struct Box3D
   float yaw;
   float vel_x;
   float vel_y;
+
+  // variance
+  float x_variance;
+  float y_variance;
+  float z_variance;
+  float length_variance;
+  float width_variance;
+  float height_variance;
+  float yaw_variance;
+  float vel_x_variance;
+  float vel_y_variance;
 };
 
 // cspell: ignore divup
diff --git a/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu b/perception/lidar_centerpoint/lib/postprocess/postprocess_kernel.cu
@@ -58,7 +58,7 @@ __global__ void generateBoxes3D_kernel(
   const float * out_rot, const float * out_vel, const float voxel_size_x, const float voxel_size_y,
   const float range_min_x, const float range_min_y, const std::size_t down_grid_size_x,
   const std::size_t down_grid_size_y, const std::size_t downsample_factor, const int class_size,
-  const float * yaw_norm_thresholds, Box3D * det_boxes3d)
+  const bool has_variance, const float * yaw_norm_thresholds, Box3D * det_boxes3d)
 {
   // generate boxes3d from the outputs of the network.
   // shape of out_*: (N, DOWN_GRID_SIZE_Y, DOWN_GRID_SIZE_X)
@@ -107,6 +107,34 @@ __global__ void generateBoxes3D_kernel(
   det_boxes3d[idx].yaw = atan2f(yaw_sin, yaw_cos);
   det_boxes3d[idx].vel_x = vel_x;
   det_boxes3d[idx].vel_y = vel_y;
+
+  if (has_variance) {
+    const float offset_x_variance = out_offset[down_grid_size * 2 + idx];
+    const float offset_y_variance = out_offset[down_grid_size * 3 + idx];
+    const float z_variance = out_z[down_grid_size * 1 + idx];
+    const float w_variance = out_dim[down_grid_size * 3 + idx];
+    const float l_variance = out_dim[down_grid_size * 4 + idx];
+    const float h_variance = out_dim[down_grid_size * 5 + idx];
+    const float yaw_sin_log_variance = out_rot[down_grid_size * 2 + idx];
+    const float yaw_cos_log_variance = out_rot[down_grid_size * 3 + idx];
+    const float vel_x_variance = out_vel[down_grid_size * 2 + idx];
+    const float vel_y_variance = out_vel[down_grid_size * 3 + idx];
+
+    det_boxes3d[idx].x_variance = voxel_size_x * downsample_factor * expf(offset_x_variance);
+    det_boxes3d[idx].y_variance = voxel_size_x * downsample_factor * expf(offset_y_variance);
+    det_boxes3d[idx].z_variance = expf(z_variance);
+    det_boxes3d[idx].length_variance = expf(l_variance);
+    det_boxes3d[idx].width_variance = expf(w_variance);
+    det_boxes3d[idx].height_variance = expf(h_variance);
+    const float yaw_sin_sq = yaw_sin * yaw_sin;
+    const float yaw_cos_sq = yaw_cos * yaw_cos;
+    const float yaw_norm_sq = (yaw_sin_sq + yaw_cos_sq) * (yaw_sin_sq + yaw_cos_sq);
+    det_boxes3d[idx].yaw_variance =
+      (yaw_cos_sq * expf(yaw_sin_log_variance) + yaw_sin_sq * expf(yaw_cos_log_variance)) /
+      yaw_norm_sq;
+    det_boxes3d[idx].vel_x_variance = expf(vel_x_variance);
+    det_boxes3d[idx].vel_y_variance = expf(vel_y_variance);
+  }
 }
 
 PostProcessCUDA::PostProcessCUDA(const CenterPointConfig & config) : config_(config)
@@ -131,7 +159,7 @@ cudaError_t PostProcessCUDA::generateDetectedBoxes3D_launch(
     out_heatmap, out_offset, out_z, out_dim, out_rot, out_vel, config_.voxel_size_x_,
     config_.voxel_size_y_, config_.range_min_x_, config_.range_min_y_, config_.down_grid_size_x_,
     config_.down_grid_size_y_, config_.downsample_factor_, config_.class_size_,
-    thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
+    config_.has_variance_, thrust::raw_pointer_cast(yaw_norm_thresholds_d_.data()),
     thrust::raw_pointer_cast(boxes3d_d_.data()));
 
   // suppress by score
diff --git a/perception/lidar_centerpoint/lib/ros_utils.cpp b/perception/lidar_centerpoint/lib/ros_utils.cpp
@@ -25,7 +25,7 @@ using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
 
 void box3DToDetectedObject(
   const Box3D & box3d, const std::vector<std::string> & class_names, const bool has_twist,
-  autoware_auto_perception_msgs::msg::DetectedObject & obj)
+  const bool has_variance, autoware_auto_perception_msgs::msg::DetectedObject & obj)
 {
   // TODO(yukke42): the value of classification confidence of DNN, not probability.
   obj.existence_probability = box3d.score;
@@ -58,6 +58,10 @@ void box3DToDetectedObject(
   obj.shape.type = autoware_auto_perception_msgs::msg::Shape::BOUNDING_BOX;
   obj.shape.dimensions =
     tier4_autoware_utils::createTranslation(box3d.length, box3d.width, box3d.height);
+  if (has_variance) {
+    obj.kinematics.has_position_covariance = has_variance;
+    obj.kinematics.pose_with_covariance.covariance = convertPoseCovarianceMatrix(box3d);
+  }
 
   // twist
   if (has_twist) {
@@ -68,6 +72,10 @@ void box3DToDetectedObject(
     twist.angular.z = 2 * (std::atan2(vel_y, vel_x) - yaw);
     obj.kinematics.twist_with_covariance.twist = twist;
     obj.kinematics.has_twist = has_twist;
+    if (has_variance) {
+      obj.kinematics.has_twist_covariance = has_variance;
+      obj.kinematics.twist_with_covariance.covariance = convertTwistCovarianceMatrix(box3d);
+    }
   }
 }
 
@@ -92,4 +100,24 @@ uint8_t getSemanticType(const std::string & class_name)
   }
 }
 
+std::array<double, 36> convertPoseCovarianceMatrix(const Box3D & box3d)
+{
+  using POSE_IDX = tier4_autoware_utils::xyzrpy_covariance_index::XYZRPY_COV_IDX;
+  std::array<double, 36> pose_covariance{};
+  pose_covariance[POSE_IDX::X_X] = box3d.x_variance;
+  pose_covariance[POSE_IDX::Y_Y] = box3d.y_variance;
+  pose_covariance[POSE_IDX::Z_Z] = box3d.z_variance;
+  pose_covariance[POSE_IDX::YAW_YAW] = box3d.yaw_variance;
+  return pose_covariance;
+}
+
+std::array<double, 36> convertTwistCovarianceMatrix(const Box3D & box3d)
+{
+  using POSE_IDX = tier4_autoware_utils::xyzrpy_covariance_index::XYZRPY_COV_IDX;
+  std::array<double, 36> twist_covariance{};
+  twist_covariance[POSE_IDX::X_X] = box3d.vel_x_variance;
+  twist_covariance[POSE_IDX::Y_Y] = box3d.vel_y_variance;
+  return twist_covariance;
+}
+
 }  // namespace centerpoint
diff --git a/perception/lidar_centerpoint/src/node.cpp b/perception/lidar_centerpoint/src/node.cpp
@@ -55,7 +55,8 @@ LidarCenterPointNode::LidarCenterPointNode(const rclcpp::NodeOptions & node_opti
   const std::string head_onnx_path = this->declare_parameter<std::string>("head_onnx_path");
   const std::string head_engine_path = this->declare_parameter<std::string>("head_engine_path");
   class_names_ = this->declare_parameter<std::vector<std::string>>("class_names");
-  has_twist_ = this->declare_parameter("has_twist", false);
+  has_variance_ = this->declare_parameter<bool>("has_variance");
+  has_twist_ = this->declare_parameter<bool>("has_twist");
   const std::size_t point_feature_size =
     static_cast<std::size_t>(this->declare_parameter<std::int64_t>("point_feature_size"));
   const std::size_t max_voxel_size =
@@ -102,7 +103,7 @@ LidarCenterPointNode::LidarCenterPointNode(const rclcpp::NodeOptions & node_opti
   CenterPointConfig config(
     class_names_.size(), point_feature_size, max_voxel_size, point_cloud_range, voxel_size,
     downsample_factor, encoder_in_feature_size, score_threshold, circle_nms_dist_threshold,
-    yaw_norm_thresholds);
+    yaw_norm_thresholds, has_variance_);
   detector_ptr_ =
     std::make_unique<CenterPointTRT>(encoder_param, head_param, densification_param, config);
 
@@ -152,7 +153,7 @@ void LidarCenterPointNode::pointCloudCallback(
   raw_objects.reserve(det_boxes3d.size());
   for (const auto & box3d : det_boxes3d) {
     autoware_auto_perception_msgs::msg::DetectedObject obj;
-    box3DToDetectedObject(box3d, class_names_, has_twist_, obj);
+    box3DToDetectedObject(box3d, class_names_, has_twist_, has_variance_, obj);
     raw_objects.emplace_back(obj);
   }