Skip to content

Commit c08e6b3

Browse files
authored
feat(autoware_probabilistic_occupancy_grid_map): improved data handling on the ogm (#10060)
feat: improved data handling in the ogm. When using the full concatenated pointcloud the processing time decreases from 8ms to 4ms Signed-off-by: Kenzo Lobos-Tsunekawa <kenzo.lobos@tier4.jp>
1 parent b59cd8d commit c08e6b3

File tree

8 files changed

+88
-77
lines changed

8 files changed

+88
-77
lines changed

Diff for: perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/costmap_2d/occupancy_grid_map_base.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D
102102

103103
bool isCudaEnabled() const;
104104

105+
void setCudaStream(const cudaStream_t & stream);
106+
105107
const autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> & getDeviceCostmap() const;
106108

107109
void copyDeviceCostmapToHost() const;

Diff for: perception/autoware_probabilistic_occupancy_grid_map/include/autoware/probabilistic_occupancy_grid_map/utils/cuda_pointcloud.hpp

+19-13
Original file line numberDiff line numberDiff line change
@@ -24,31 +24,37 @@
2424
class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2
2525
{
2626
public:
27-
void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2 & msg)
27+
void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_ptr)
2828
{
2929
// cSpell: ignore knzo25
3030
// NOTE(knzo25): replace this with the cuda blackboard later
31-
header = msg.header;
32-
fields = msg.fields;
33-
height = msg.height;
34-
width = msg.width;
35-
is_bigendian = msg.is_bigendian;
36-
point_step = msg.point_step;
37-
row_step = msg.row_step;
38-
is_dense = msg.is_dense;
39-
40-
if (!data || capacity_ < msg.data.size()) {
41-
capacity_ = 1024 * (msg.data.size() / 1024 + 1);
31+
cudaStreamSynchronize(stream);
32+
internal_msg_ = msg_ptr;
33+
34+
header = msg_ptr->header;
35+
fields = msg_ptr->fields;
36+
height = msg_ptr->height;
37+
width = msg_ptr->width;
38+
is_bigendian = msg_ptr->is_bigendian;
39+
point_step = msg_ptr->point_step;
40+
row_step = msg_ptr->row_step;
41+
is_dense = msg_ptr->is_dense;
42+
43+
if (!data || capacity_ < msg_ptr->data.size()) {
44+
const int factor = 4096 * point_step;
45+
capacity_ = factor * (msg_ptr->data.size() / factor + 1);
4246
data = autoware::cuda_utils::make_unique<std::uint8_t[]>(capacity_);
4347
}
4448

45-
cudaMemcpyAsync(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice, stream);
49+
cudaMemcpyAsync(
50+
data.get(), msg_ptr->data.data(), msg_ptr->data.size(), cudaMemcpyHostToDevice, stream);
4651
}
4752

4853
autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> data;
4954
cudaStream_t stream;
5055

5156
private:
57+
sensor_msgs::msg::PointCloud2::ConstSharedPtr internal_msg_;
5258
std::size_t capacity_{0};
5359
};
5460

Diff for: perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_base.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,6 @@ OccupancyGridMapInterface::OccupancyGridMapInterface(
9090
const auto num_cells_x = this->getSizeInCellsX();
9191
const auto num_cells_y = this->getSizeInCellsY();
9292

93-
cudaStreamCreate(&stream_);
9493
device_costmap_ = autoware::cuda_utils::make_unique<std::uint8_t[]>(num_cells_x * num_cells_y);
9594
device_costmap_aux_ =
9695
autoware::cuda_utils::make_unique<std::uint8_t[]>(num_cells_x * num_cells_y);
@@ -136,8 +135,9 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori
136135
device_costmap_.get(), lower_left_x, lower_left_y, size_x_, size_y_,
137136
device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, cell_size_x, cell_size_y, stream_);
138137

139-
cudaMemset(
140-
device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t));
138+
cudaMemsetAsync(
139+
device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t),
140+
stream_);
141141
} else {
142142
local_map = new unsigned char[cell_size_x * cell_size_y];
143143

@@ -207,6 +207,13 @@ bool OccupancyGridMapInterface::isCudaEnabled() const
207207
return use_cuda_;
208208
}
209209

210+
void OccupancyGridMapInterface::setCudaStream(const cudaStream_t & stream)
211+
{
212+
if (isCudaEnabled()) {
213+
stream_ = stream;
214+
}
215+
}
216+
210217
const autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> &
211218
OccupancyGridMapInterface::getDeviceCostmap() const
212219
{
@@ -215,9 +222,10 @@ OccupancyGridMapInterface::getDeviceCostmap() const
215222

216223
void OccupancyGridMapInterface::copyDeviceCostmapToHost() const
217224
{
218-
cudaMemcpy(
225+
cudaMemcpyAsync(
219226
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
220-
cudaMemcpyDeviceToHost);
227+
cudaMemcpyDeviceToHost, stream_);
228+
cudaStreamSynchronize(stream_);
221229
}
222230

223231
} // namespace costmap_2d

Diff for: perception/autoware_probabilistic_occupancy_grid_map/lib/costmap_2d/occupancy_grid_map_fixed.cpp

+5-11
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,12 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud(
125125
const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height;
126126
float range_resolution_inv = 1.0 / map_res;
127127

128-
cudaStreamSynchronize(stream_);
129-
130128
map_fixed::prepareTensorLaunch(
131129
reinterpret_cast<const float *>(raw_pointcloud.data.get()), num_raw_points,
132130
raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_,
133131
max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(),
134132
device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(),
135-
raw_points_tensor_.get(), raw_pointcloud.stream);
133+
raw_points_tensor_.get(), stream_);
136134

137135
const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height;
138136

@@ -141,27 +139,23 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud(
141139
obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_,
142140
max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(),
143141
device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(),
144-
obstacle_points_tensor_.get(), obstacle_pointcloud.stream);
142+
obstacle_points_tensor_.get(), stream_);
145143

146144
map_fixed::fillEmptySpaceLaunch(
147145
raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv,
148146
scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y,
149-
cost_value::FREE_SPACE, device_costmap_.get(), raw_pointcloud.stream);
150-
151-
cudaStreamSynchronize(obstacle_pointcloud.stream);
147+
cost_value::FREE_SPACE, device_costmap_.get(), stream_);
152148

153149
map_fixed::fillUnknownSpaceLaunch(
154150
raw_points_tensor_.get(), obstacle_points_tensor_.get(), distance_margin_, angle_bin_size,
155151
range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_,
156152
origin_y_, num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION,
157-
device_costmap_.get(), raw_pointcloud.stream);
153+
device_costmap_.get(), stream_);
158154

159155
map_fixed::fillObstaclesLaunch(
160156
obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size,
161157
range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y,
162-
cost_value::LETHAL_OBSTACLE, device_costmap_.get(), raw_pointcloud.stream);
163-
164-
cudaStreamSynchronize(raw_pointcloud.stream);
158+
cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_);
165159
}
166160

167161
void OccupancyGridMapFixedBlindSpot::initRosParam(rclcpp::Node & node)

Diff for: perception/autoware_probabilistic_occupancy_grid_map/lib/updater/binary_bayes_filter_updater.cpp

+2-8
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,9 @@ void OccupancyGridMapBBFUpdater::initRosParam(rclcpp::Node & node)
5656
}
5757
}
5858

59-
cudaMemcpy(
59+
cudaMemcpyAsync(
6060
device_probability_matrix_.get(), probability_matrix_vector.data(),
61-
sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice);
61+
sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice, stream_);
6262
}
6363

6464
inline unsigned char OccupancyGridMapBBFUpdater::applyBBF(
@@ -103,12 +103,6 @@ bool OccupancyGridMapBBFUpdater::update(
103103
Index::NUM_STATES, Index::FREE, Index::OCCUPIED, cost_value::FREE_SPACE,
104104
cost_value::LETHAL_OBSTACLE, cost_value::NO_INFORMATION, v_ratio_,
105105
getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_);
106-
107-
cudaMemcpy(
108-
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
109-
cudaMemcpyDeviceToHost);
110-
111-
cudaStreamSynchronize(stream_);
112106
} else {
113107
for (unsigned int x = 0; x < getSizeInCellsX(); x++) {
114108
for (unsigned int y = 0; y < getSizeInCellsY(); y++) {

Diff for: perception/autoware_probabilistic_occupancy_grid_map/lib/updater/log_odds_bayes_filter_updater.cpp

-6
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,6 @@ bool OccupancyGridMapLOBFUpdater::update(
7373
applyLOBFLaunch(
7474
single_frame_occupancy_grid_map.getDeviceCostmap().get(), cost_value::NO_INFORMATION,
7575
getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_);
76-
77-
cudaMemcpy(
78-
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
79-
cudaMemcpyDeviceToHost);
80-
81-
cudaStreamSynchronize(stream_);
8276
} else {
8377
for (unsigned int x = 0; x < getSizeInCellsX(); x++) {
8478
for (unsigned int y = 0; y < getSizeInCellsY(); y++) {

Diff for: perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.cpp

+39-21
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,13 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
7171
const double map_resolution = this->declare_parameter<double>("map_resolution");
7272

7373
/* Subscriber and publisher */
74-
obstacle_pointcloud_sub_.subscribe(
75-
this, "~/input/obstacle_pointcloud",
76-
rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile());
77-
raw_pointcloud_sub_.subscribe(
78-
this, "~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile());
79-
sync_ptr_ = std::make_shared<Sync>(SyncPolicy(5), obstacle_pointcloud_sub_, raw_pointcloud_sub_);
80-
81-
sync_ptr_->registerCallback(
82-
std::bind(&PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw, this, _1, _2));
74+
obstacle_pointcloud_sub_ptr_ = this->create_subscription<PointCloud2>(
75+
"~/input/obstacle_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
76+
std::bind(&PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback, this, _1));
77+
raw_pointcloud_sub_ptr_ = this->create_subscription<PointCloud2>(
78+
"~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
79+
std::bind(&PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback, this, _1));
80+
8381
occupancy_grid_map_pub_ = create_publisher<OccupancyGrid>("~/output/occupancy_grid_map", 1);
8482

8583
const std::string updater_type = this->declare_parameter<std::string>("updater_type");
@@ -94,7 +92,6 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
9492
occupancy_grid_map_updater_ptr_ = std::make_unique<OccupancyGridMapBBFUpdater>(
9593
true, map_length / map_resolution, map_length / map_resolution, map_resolution);
9694
}
97-
occupancy_grid_map_updater_ptr_->initRosParam(*this);
9895

9996
const std::string grid_map_type = this->declare_parameter<std::string>("grid_map_type");
10097

@@ -118,7 +115,15 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
118115
occupancy_grid_map_updater_ptr_->getSizeInCellsY(),
119116
occupancy_grid_map_updater_ptr_->getResolution());
120117
}
118+
119+
cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
120+
raw_pointcloud_.stream = stream_;
121+
obstacle_pointcloud_.stream = stream_;
122+
occupancy_grid_map_ptr_->setCudaStream(stream_);
123+
occupancy_grid_map_updater_ptr_->setCudaStream(stream_);
124+
121125
occupancy_grid_map_ptr_->initRosParam(*this);
126+
occupancy_grid_map_updater_ptr_->initRosParam(*this);
122127

123128
// initialize debug tool
124129
{
@@ -140,14 +145,29 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
140145
time_keeper_ = std::make_shared<autoware::universe_utils::TimeKeeper>(time_keeper);
141146
}
142147
}
148+
}
149+
150+
void PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback(
151+
const PointCloud2::ConstSharedPtr & input_obstacle_msg)
152+
{
153+
obstacle_pointcloud_.fromROSMsgAsync(input_obstacle_msg);
143154

144-
cudaStreamCreate(&raw_pointcloud_.stream);
145-
cudaStreamCreate(&obstacle_pointcloud_.stream);
155+
if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) {
156+
onPointcloudWithObstacleAndRaw();
157+
}
146158
}
147159

148-
void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
149-
const PointCloud2::ConstSharedPtr & input_obstacle_msg,
160+
void PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback(
150161
const PointCloud2::ConstSharedPtr & input_raw_msg)
162+
{
163+
raw_pointcloud_.fromROSMsgAsync(input_raw_msg);
164+
165+
if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) {
166+
onPointcloudWithObstacleAndRaw();
167+
}
168+
}
169+
170+
void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw()
151171
{
152172
std::unique_ptr<ScopedTimeTrack> st_ptr;
153173
if (time_keeper_) st_ptr = std::make_unique<ScopedTimeTrack>(__func__, *time_keeper_);
@@ -156,9 +176,6 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
156176
stop_watch_ptr_->toc("processing_time", true);
157177
}
158178

159-
raw_pointcloud_.fromROSMsgAsync(*input_raw_msg);
160-
obstacle_pointcloud_.fromROSMsgAsync(*input_obstacle_msg);
161-
162179
// if scan_origin_frame_ is "", replace it with raw_pointcloud_.header.frame_id
163180
if (scan_origin_frame_.empty()) {
164181
scan_origin_frame_ = raw_pointcloud_.header.frame_id;
@@ -172,7 +189,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
172189
return;
173190
}
174191
}
175-
if (input_obstacle_msg->header.frame_id != base_link_frame_) {
192+
if (obstacle_pointcloud_.header.frame_id != base_link_frame_) {
176193
if (!utils::transformPointcloudAsync(obstacle_pointcloud_, *tf2_, base_link_frame_)) {
177194
return;
178195
}
@@ -221,7 +238,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
221238

222239
// publish
223240
occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr(
224-
map_frame_, input_raw_msg->header.stamp, robot_pose.position.z,
241+
map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z,
225242
*occupancy_grid_map_ptr_)); // (todo) robot_pose may be altered with gridmap_origin
226243
} else {
227244
std::unique_ptr<ScopedTimeTrack> inner_st_ptr;
@@ -231,10 +248,11 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
231248

232249
// Update with bayes filter
233250
occupancy_grid_map_updater_ptr_->update(*occupancy_grid_map_ptr_);
251+
occupancy_grid_map_updater_ptr_->copyDeviceCostmapToHost();
234252

235253
// publish
236254
occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr(
237-
map_frame_, input_raw_msg->header.stamp, robot_pose.position.z,
255+
map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z,
238256
*occupancy_grid_map_updater_ptr_));
239257
}
240258

@@ -244,7 +262,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
244262
const double pipeline_latency_ms =
245263
std::chrono::duration<double, std::milli>(
246264
std::chrono::nanoseconds(
247-
(this->get_clock()->now() - input_raw_msg->header.stamp).nanoseconds()))
265+
(this->get_clock()->now() - raw_pointcloud_.header.stamp).nanoseconds()))
248266
.count();
249267
debug_publisher_ptr_->publish<autoware_internal_debug_msgs::msg::Float64Stamped>(
250268
"debug/cyclic_time_ms", cyclic_time_ms);

Diff for: perception/autoware_probabilistic_occupancy_grid_map/src/pointcloud_based_occupancy_grid_map/pointcloud_based_occupancy_grid_map_node.hpp

+8-13
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,7 @@
3030
#include <sensor_msgs/msg/laser_scan.hpp>
3131
#include <sensor_msgs/point_cloud2_iterator.hpp>
3232

33-
#include <message_filters/pass_through.h>
34-
#include <message_filters/subscriber.h>
35-
#include <message_filters/sync_policies/exact_time.h>
36-
#include <message_filters/synchronizer.h>
33+
#include <cuda_runtime.h>
3734
#include <tf2_ros/buffer.h>
3835
#include <tf2_ros/transform_listener.h>
3936

@@ -59,30 +56,28 @@ class PointcloudBasedOccupancyGridMapNode : public rclcpp::Node
5956
explicit PointcloudBasedOccupancyGridMapNode(const rclcpp::NodeOptions & node_options);
6057

6158
private:
62-
void onPointcloudWithObstacleAndRaw(
63-
const PointCloud2::ConstSharedPtr & input_obstacle_msg,
64-
const PointCloud2::ConstSharedPtr & input_raw_msg);
59+
void obstaclePointcloudCallback(const PointCloud2::ConstSharedPtr & input_obstacle_msg);
60+
void rawPointcloudCallback(const PointCloud2::ConstSharedPtr & input_raw_msg);
61+
void onPointcloudWithObstacleAndRaw();
62+
6563
OccupancyGrid::UniquePtr OccupancyGridMapToMsgPtr(
6664
const std::string & frame_id, const Time & stamp, const float & robot_pose_z,
6765
const Costmap2D & occupancy_grid_map);
6866

6967
private:
7068
rclcpp::Publisher<OccupancyGrid>::SharedPtr occupancy_grid_map_pub_;
71-
message_filters::Subscriber<PointCloud2> obstacle_pointcloud_sub_;
72-
message_filters::Subscriber<PointCloud2> raw_pointcloud_sub_;
69+
rclcpp::Subscription<PointCloud2>::SharedPtr obstacle_pointcloud_sub_ptr_;
70+
rclcpp::Subscription<PointCloud2>::SharedPtr raw_pointcloud_sub_ptr_;
7371
std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{};
7472
std::unique_ptr<autoware::universe_utils::DebugPublisher> debug_publisher_ptr_{};
7573

7674
std::shared_ptr<Buffer> tf2_{std::make_shared<Buffer>(get_clock())};
7775
std::shared_ptr<TransformListener> tf2_listener_{std::make_shared<TransformListener>(*tf2_)};
7876

79-
using SyncPolicy = message_filters::sync_policies::ExactTime<PointCloud2, PointCloud2>;
80-
using Sync = message_filters::Synchronizer<SyncPolicy>;
81-
std::shared_ptr<Sync> sync_ptr_;
82-
8377
std::unique_ptr<OccupancyGridMapInterface> occupancy_grid_map_ptr_;
8478
std::unique_ptr<OccupancyGridMapUpdaterInterface> occupancy_grid_map_updater_ptr_;
8579

80+
cudaStream_t stream_;
8681
CudaPointCloud2 raw_pointcloud_;
8782
CudaPointCloud2 obstacle_pointcloud_;
8883

0 commit comments

Comments
 (0)