autowarefoundation · knzo25 · Jan 27, 2025 · Feb 20, 2025 · Feb 21, 2025 · Feb 21, 2025
diff --git a/perception/autoware_lidar_bevfusion/CMakeLists.txt b/perception/autoware_lidar_bevfusion/CMakeLists.txt
@@ -0,0 +1,214 @@
+cmake_minimum_required(VERSION 3.14)
+project(autoware_lidar_bevfusion)
+
+find_package(autoware_cmake REQUIRED)
+autoware_package()
+
+add_compile_options(-Wno-deprecated-declarations)
+
+option(CUDA_VERBOSE "Verbose output of CUDA modules" OFF)
+
+# set flags for CUDA availability
+option(CUDA_AVAIL "CUDA available" OFF)
+find_package(CUDA)
+if(CUDA_FOUND)
+  find_library(CUBLAS_LIBRARIES cublas HINTS
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+    ${CUDA_TOOLKIT_ROOT_DIR}/lib
+  )
+  if(CUDA_VERBOSE)
+    message("CUDA is available!")
+    message("CUDA Libs: ${CUDA_LIBRARIES}")
+    message("CUDA Headers: ${CUDA_INCLUDE_DIRS}")
+  endif()
+  # Note: cublas_device was depreciated in CUDA version 9.2
+  #       https://forums.developer.nvidia.com/t/where-can-i-find-libcublas-device-so-or-libcublas-device-a/67251/4
+  #       In LibTorch, CUDA_cublas_device_LIBRARY is used.
+  unset(CUDA_cublas_device_LIBRARY CACHE)
+  set(CUDA_AVAIL ON)
+else()
+  message("CUDA NOT FOUND")
+  set(CUDA_AVAIL OFF)
+endif()
+
+# set flags for TensorRT availability
+option(TRT_AVAIL "TensorRT available" OFF)
+# try to find the tensorRT modules
+find_library(NVINFER nvinfer)
+find_library(NVONNXPARSER nvonnxparser)
+if(NVINFER AND NVONNXPARSER)
+  if(CUDA_VERBOSE)
+    message("TensorRT is available!")
+    message("NVINFER: ${NVINFER}")
+    message("NVONNXPARSER: ${NVONNXPARSER}")
+  endif()
+  set(TRT_AVAIL ON)
+else()
+  message("TensorRT is NOT Available")
+  set(TRT_AVAIL OFF)
+endif()
+
+# set flags for CUDNN availability
+option(CUDNN_AVAIL "CUDNN available" OFF)
+# try to find the CUDNN module
+find_library(CUDNN_LIBRARY
+NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+PATH_SUFFIXES lib lib64 bin
+DOC "CUDNN library."
+)
+if(CUDNN_LIBRARY)
+  if(CUDA_VERBOSE)
+    message(STATUS "CUDNN is available!")
+    message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
+  endif()
+  set(CUDNN_AVAIL ON)
+else()
+  message("CUDNN is NOT Available")
+  set(CUDNN_AVAIL OFF)
+endif()
+
+# set flags for spconv availability
+option(SPCONV_AVAIL "spconv available" OFF)
+# try to find spconv
+find_package(cumm)
+find_package(spconv)
+if(${cumm_FOUND} AND ${spconv_FOUND})
+  message("spconv is available!")
+  set(SPCONV_AVAIL ON)
+else()
+  message("spconv is NOT Available")
+  set(SPCONV_AVAIL OFF)
+endif()
+
+if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL AND SPCONV_AVAIL)
+  find_package(ament_cmake_auto REQUIRED)
+  ament_auto_find_build_dependencies()
+
+  include_directories(
+    include
+    ${CUDA_INCLUDE_DIRS}
+  )
+
+  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} "-g -G")
+  endif()
+
+  # Find Eigen3
+  find_package(Eigen3 3.3 REQUIRED NO_MODULE)
+
+  add_definitions("-DTV_CUDA")
+
+  # cSpell:ignore expt
+  list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -diag-suppress 1675 --extended-lambda")
+
+  cuda_add_library(bev_ops SHARED
+    src/bev_ops/bev_pool_cuda.cu
+  )
+
+  add_library(autoware_tensorrt_plugins SHARED
+    src/tensorrt_plugins/plugin_utils.cpp
+    src/tensorrt_plugins/quick_cumsum_cuda_plugin.cpp
+    src/tensorrt_plugins/quick_cumsum_cuda_plugin_creator.cpp
+    src/tensorrt_plugins/get_indices_pairs_implicit_gemm_plugin_creator.cpp
+    src/tensorrt_plugins/get_indices_pairs_implicit_gemm_plugin.cpp
+    src/tensorrt_plugins/implicit_gemm_plugin_creator.cpp
+    src/tensorrt_plugins/implicit_gemm_plugin.cpp
+
+    src/tensorrt_plugins/plugin_registration.cpp
+  )
+
+  target_compile_definitions(autoware_tensorrt_plugins PRIVATE _GLIBCXX_USE_CXX11_ABI=1)
+
+  target_link_libraries(autoware_tensorrt_plugins PRIVATE
+      ${NVINFER}
+      CUDA::cudart
+      bev_ops
+      spconv::spconv
+  )
+
+  cuda_add_library(${PROJECT_NAME}_cuda_lib SHARED
+    lib/postprocess/circle_nms_kernel.cu
+    lib/postprocess/postprocess_kernel.cu
+    lib/preprocess/preprocess_kernel.cu
+  )
+
+  target_link_libraries(${PROJECT_NAME}_cuda_lib
+    spconv::spconv
+  )
+
+  target_include_directories(${PROJECT_NAME}_cuda_lib
+    SYSTEM PUBLIC
+    ${autoware_cuda_utils_INCLUDE_DIRS}
+  )
+
+  ament_auto_add_library(${PROJECT_NAME}_lib SHARED
+    lib/detection_class_remapper.cpp
+    lib/postprocess/non_maximum_suppression.cpp
+    lib/preprocess/voxel_generator.cpp
+    lib/preprocess/pointcloud_densification.cpp
+    lib/preprocess/precomputed_features.cpp
+    lib/ros_utils.cpp
+    lib/bevfusion_trt.cpp
+  )
+
+  target_compile_definitions(${PROJECT_NAME}_lib PRIVATE
+    TENSORRT_VERSION_MAJOR=${TENSORRT_VERSION_MAJOR}
+  )
+
+  target_link_libraries(${PROJECT_NAME}_lib
+    ${NVINFER}
+    ${NVONNXPARSER}
+    ${CUDA_LIBRARIES}
+    ${CUBLAS_LIBRARIES}
+    ${CUDA_curand_LIBRARY}
+    ${CUDNN_LIBRARY}
+    ${PROJECT_NAME}_cuda_lib
+  )
+
+  # To suppress unknown-pragmas error. The root-cause is CUB library in CUDA 11.6.
+  # This issue was fixed by https://github.com/NVIDIA/cub/commit/7d608bf1dc14553e2fb219eabeed80b76621b6fe
+  target_include_directories(${PROJECT_NAME}_lib
+    SYSTEM PUBLIC
+    ${CUDA_INCLUDE_DIRS}
+    $(autoware_point_types_INCLUDE_DIRS)
+  )
+
+  ament_auto_add_library(${PROJECT_NAME}_component SHARED
+    src/lidar_bevfusion_node.cpp
+  )
+
+  target_link_libraries(${PROJECT_NAME}_component
+    ${PROJECT_NAME}_lib
+  )
+
+  rclcpp_components_register_node(${PROJECT_NAME}_component
+    PLUGIN "autoware::lidar_bevfusion::LidarBEVFusionNode"
+    EXECUTABLE ${PROJECT_NAME}_node
+  )
+
+  install(
+    TARGETS ${PROJECT_NAME}_cuda_lib
+    DESTINATION lib
+  )
+
+  install(
+    TARGETS autoware_tensorrt_plugins
+    DESTINATION share/${PROJECT_NAME}/plugins
+  )
+
+  ament_auto_package(
+    INSTALL_TO_SHARE
+      launch
+      config
+  )
+
+else()
+  find_package(ament_cmake_auto REQUIRED)
+  ament_auto_find_build_dependencies()
+
+  ament_auto_package(
+    INSTALL_TO_SHARE
+      launch
+  )
+endif()
diff --git a/perception/autoware_lidar_bevfusion/README.md b/perception/autoware_lidar_bevfusion/README.md
@@ -0,0 +1,81 @@
+# autoware_lidar_bevfusion
+
+## Purpose
+
+The `autoware_lidar_bevfusion` package is used for 3D object detection based on camera-lidar fusion.
+
+## Inner-workings / Algorithms
+
+This package implements a TensorRT powered inference node for BEVFusion [1].
+The sparse convolution backend corresponds to [spconv](https://github.com/traveller59/spconv).
+Autoware installs it automatically in its setup script. If needed, the user can also build it and install it following the [following instructions](https://github.com/autowarefoundation/spconv_cpp).
+
+## Inputs / Outputs
+
+### Input
+
+| Name                   | Type                            | Description               |
+| ---------------------- | ------------------------------- | ------------------------- |
+| `~/input/pointcloud`   | `sensor_msgs::msg::PointCloud2` | Input pointcloud topics.  |
+| `~/input/image*`       | `sensor_msgs::msg::Image`       | Input image topics.       |
+| `~/input/camera_info*` | `sensor_msgs::msg::CameraInfo`  | Input camera info topics. |
+
+### Output
+
+| Name                                   | Type                                             | Description                 |
+| -------------------------------------- | ------------------------------------------------ | --------------------------- |
+| `~/output/objects`                     | `autoware_perception_msgs::msg::DetectedObjects` | Detected objects.           |
+| `debug/cyclic_time_ms`                 | `tier4_debug_msgs::msg::Float64Stamped`          | Cyclic time (ms).           |
+| `debug/pipeline_latency_ms`            | `tier4_debug_msgs::msg::Float64Stamped`          | Pipeline latency time (ms). |
+| `debug/processing_time/preprocess_ms`  | `tier4_debug_msgs::msg::Float64Stamped`          | Preprocess (ms).            |
+| `debug/processing_time/inference_ms`   | `tier4_debug_msgs::msg::Float64Stamped`          | Inference time (ms).        |
+| `debug/processing_time/postprocess_ms` | `tier4_debug_msgs::msg::Float64Stamped`          | Postprocess time (ms).      |
+| `debug/processing_time/total_ms`       | `tier4_debug_msgs::msg::Float64Stamped`          | Total processing time (ms). |
+
+## Parameters
+
+### BEVFusion node
+
+{{ json_to_markdown("perception/autoware_lidar_bevfusion/schema/bevfusion.schema.dummy.json") }}
+
+### BEVFusion model
+
+{{ json_to_markdown("perception/autoware_lidar_bevfusion/schema/bevfusion_ml_package.schema.json") }}
+
+### Detection class remapper
+
+{{ json_to_markdown("perception/autoware_lidar_bevfusion/schema/detection_class_remapper.schema.json") }}
+
+### The `build_only` option
+
+The `autoware_lidar_bevfusion` node has a `build_only` option to build the TensorRT engine file from the specified ONNX file, after which the program exits.
+
+```bash
+ros2 launch autoware_lidar_bevfusion lidar_bevfusion.launch.xml build_only:=true
+```
+
+### The `log_level` option
+
+The default logging severity level for `autoware_lidar_bevfusion` is `info`. For debugging purposes, the developer may decrease severity level using `log_level` parameter:
+
+```bash
+ros2 launch autoware_lidar_bevfusion lidar_bevfusion.launch.xml log_level:=debug
+```
+
+## Assumptions / Known limits
+
+This node assumes that the input pointcloud follows the `PointXYZIRC` layout defined in `autoware_point_types`.
+
+## Trained Models
+
+TODO
+
+### Changelog
+
+## References/External links
+
+[1] Zhijian Liu, Haotian Tang, Alexander Amini, Xinyu Yang, Huizi Mao, Daniela Rus, and Song Han. "BEVFusion: Multi-Task Multi-Sensor Fusion with Unified Bird's-Eye View Representation." 2023 International Conference on Robotics and Automation. <!-- cspell:disable-line -->
+
+## (Optional) Future extensions / Unimplemented parts
+
+Although this node can perform camera-lidar fusion, as is the first method in autoware to actually use images and lidars for inference, the package structure and its full integration in the autoware pipeline are left for future work. In the current structure, it can be employed without any changes as a lidar-based detector.
diff --git a/perception/autoware_lidar_bevfusion/config/bevfusion.param.yaml b/perception/autoware_lidar_bevfusion/config/bevfusion.param.yaml
@@ -0,0 +1,23 @@
+/**:
+  ros__parameters:
+    # modality
+    sensor_fusion: true
+    # non-network params
+    max_camera_lidar_delay: 0.12
+    # plugins
+    plugins_path: $(find-pkg-share autoware_lidar_bevfusion)/plugins/libautoware_tensorrt_plugins.so
+    # network
+    trt_precision: fp16
+    cloud_capacity: 2000000
+    onnx_path: "$(var model_path)/bevfusion_camera_lidar_v2.onnx"
+    engine_path: "$(var model_path)/bevfusion_camera_lidar_v2.engine"
+    # pre-process params
+    densification_num_past_frames: 0
+    densification_world_frame_id: map
+    # post-process params
+    circle_nms_dist_threshold: 0.5
+    iou_nms_target_class_names: ["CAR"]
+    iou_nms_search_distance_2d: 10.0
+    iou_nms_threshold: 0.1
+    yaw_norm_thresholds: [0.3, 0.3, 0.3, 0.3, 0.0] # refers to the class_names
+    score_threshold: 0.1
diff --git a/perception/autoware_lidar_bevfusion/config/bevfusion_ml_package.param.yaml b/perception/autoware_lidar_bevfusion/config/bevfusion_ml_package.param.yaml
@@ -0,0 +1,24 @@
+/**:
+  ros__parameters:
+    class_names: ["CAR", "TRUCK", "BUS", "BICYCLE", "PEDESTRIAN"]
+    voxels_num: [1, 128000, 256000] # [min, opt, max]
+    point_cloud_range: [-122.4, -122.4, -3.0, 122.4, 122.4, 5.0] # [x_min, y_min, z_min, x_max, y_max, z_max]
+    voxel_size: [0.17, 0.17, 0.2] # [x, y, z]
+    num_proposals: 500
+    out_size_factor: 8
+    max_points_per_voxel: 10
+
+    d_bound: [1.0, 166.2, 1.4]
+    x_bound: [-122.4, 122.4, 0.68]
+    y_bound: [-122.4, 122.4, 0.68]
+    z_bound: [-10.0, 10.0, 20.0]
+    num_cameras: 6
+    raw_image_height: 1080
+    raw_image_width: 1440
+    img_aug_scale_x: 0.489
+    img_aug_scale_y: 0.489
+    roi_height: 384
+    roi_width: 704
+    features_height: 48
+    features_width: 88
+    num_depth_features: 118
diff --git a/perception/autoware_lidar_bevfusion/config/detection_class_remapper.param.yaml b/perception/autoware_lidar_bevfusion/config/detection_class_remapper.param.yaml
@@ -0,0 +1,38 @@
+/**:
+  ros__parameters:
+    allow_remapping_by_area_matrix:
+      # NOTE(knzo25): We turn all vehicles into trailers if they go over 3x12 [m^2].
+      # NOTE(knzo25): We turn cars into trucks if they have an area between 2.2 x 5.5 and 3.0 * 12.0 [m^2]
+      # row: original class. column: class to remap to
+      #UNKNOWN, CAR, TRUCK, BUS,  TRAILER, MOTORBIKE, BICYCLE,PEDESTRIAN
+      [0,       0,   0,     0,    0,       0,         0,      0,         #UNKNOWN
+       0,       0,   1,     0,    1,       0,         0,      0,         #CAR
+       0,       0,   0,     0,    1,       0,         0,      0,         #TRUCK
+       0,       0,   0,     0,    1,       0,         0,      0,         #BUS
+       0,       0,   0,     0,    0,       0,         0,      0,         #TRAILER
+       0,       0,   0,     0,    0,       0,         0,      0,         #MOTORBIKE
+       0,       0,   0,     0,    0,       0,         0,      0,         #BICYCLE
+       0,       0,   0,     0,    0,       0,         0,      0]         #PEDESTRIAN
+
+    min_area_matrix:
+      #UNKNOWN, CAR,   TRUCK,   BUS,   TRAILER,  MOTORBIKE, BICYCLE, PEDESTRIAN
+      [ 0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #UNKNOWN
+        0.000,  0.000, 12.100,  0.000, 36.000,   0.000,     0.000,   0.000,      #CAR
+        0.000,  0.000,  0.000,  0.000, 36.000,   0.000,     0.000,   0.000,      #TRUCK
+        0.000,  0.000,  0.000,  0.000, 36.000,   0.000,     0.000,   0.000,      #BUS
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #TRAILER
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #MOTORBIKE
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000,      #BICYCLE
+        0.000,  0.000,  0.000,  0.000,  0.000,   0.000,     0.000,   0.000]      #PEDESTRIAN
+
+
+    max_area_matrix:
+      #UNKNOWN, CAR,   TRUCK,  BUS,    TRAILER,   MOTORBIKE, BICYCLE, PEDESTRIAN
+      [ 0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #UNKNOWN
+        0.000,  0.000, 36.000,  0.000, 999.999,   0.000,     0.000,   0.000,      #CAR
+        0.000,  0.000,  0.000,  0.000, 999.999,   0.000,     0.000,   0.000,      #TRUCK
+        0.000,  0.000,  0.000,  0.000, 999.999,   0.000,     0.000,   0.000,      #BUS
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #TRAILER
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #MOTORBIKE
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000,      #BICYCLE
+        0.000,  0.000,  0.000,  0.000,   0.000,   0.000,     0.000,   0.000]      #PEDESTRIAN