Skip to content

Commit b4f3901

Browse files
author
M. Fatih Cırıt
committed
fix(tensorrt): update tensorrt code of traffic_light_ssd_fine_detector
Signed-off-by: M. Fatih Cırıt <mfc@leodrive.ai>
1 parent 7932302 commit b4f3901

File tree

3 files changed

+19
-12
lines changed

3 files changed

+19
-12
lines changed

perception/traffic_light_ssd_fine_detector/lib/include/trt_ssd.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class Net
7272
void save(const std::string & path);
7373

7474
// Infer using pre-allocated GPU buffers {data, scores, boxes}
75-
void infer(std::vector<void *> & buffers, const int batch_size);
75+
void infer(const int batch_size);
7676

7777
// Get (c, h, w) size of the fixed input
7878
std::vector<int> getInputSize();
@@ -90,6 +90,8 @@ class Net
9090
unique_ptr<nvinfer1::IHostMemory> plan_ = nullptr;
9191
unique_ptr<nvinfer1::ICudaEngine> engine_ = nullptr;
9292
unique_ptr<nvinfer1::IExecutionContext> context_ = nullptr;
93+
std::string name_tensor_in_;
94+
std::string name_tensor_out_;
9395
cudaStream_t stream_ = nullptr;
9496

9597
void load(const std::string & path);

perception/traffic_light_ssd_fine_detector/lib/src/trt_ssd.cpp

+15-9
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ Net::Net(const std::string & path, bool verbose)
5555
runtime_ = unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(logger));
5656
load(path);
5757
prepare();
58+
name_tensor_in_ = engine_->getIOTensorName(0);
59+
name_tensor_out_ = engine_->getIOTensorName(engine_->getNbIOTensors() - 1);
5860
}
5961

6062
Net::~Net()
@@ -155,6 +157,8 @@ Net::Net(
155157
std::cout << "Fail to create context" << std::endl;
156158
return;
157159
}
160+
name_tensor_in_ = engine_->getIOTensorName(0);
161+
name_tensor_out_ = engine_->getIOTensorName(engine_->getNbIOTensors() - 1);
158162
}
159163

160164
void Net::save(const std::string & path)
@@ -164,35 +168,37 @@ void Net::save(const std::string & path)
164168
file.write(reinterpret_cast<const char *>(plan_->data()), plan_->size());
165169
}
166170

167-
void Net::infer(std::vector<void *> & buffers, const int batch_size)
171+
void Net::infer(const int batch_size)
168172
{
169173
if (!context_) {
170174
throw std::runtime_error("Fail to create context");
171175
}
172-
auto input_dims = engine_->getBindingDimensions(0);
173-
context_->setBindingDimensions(
174-
0, nvinfer1::Dims4(batch_size, input_dims.d[1], input_dims.d[2], input_dims.d[3]));
175-
context_->enqueueV2(buffers.data(), stream_, nullptr);
176+
const auto input_dims = engine_->getTensorShape(name_tensor_in_.c_str());
177+
context_->setInputShape(
178+
name_tensor_in_.c_str(),
179+
nvinfer1::Dims4(batch_size, input_dims.d[1], input_dims.d[2], input_dims.d[3]));
180+
context_->enqueueV3(stream_);
176181
cudaStreamSynchronize(stream_);
177182
}
178183

179184
std::vector<int> Net::getInputSize()
180185
{
181-
auto dims = engine_->getBindingDimensions(0);
186+
const auto dims = engine_->getTensorShape(name_tensor_in_.c_str());
182187
return {dims.d[1], dims.d[2], dims.d[3]};
183188
}
184189

185190
std::vector<int> Net::getOutputScoreSize()
186191
{
187-
auto dims = engine_->getBindingDimensions(1);
192+
const auto dims = engine_->getTensorShape(name_tensor_out_.c_str());
188193
return {dims.d[1], dims.d[2]};
189194
}
190195

191196
int Net::getMaxBatchSize()
192197
{
193-
return engine_->getProfileDimensions(0, 0, nvinfer1::OptProfileSelector::kMAX).d[0];
198+
return engine_->getProfileShape(name_tensor_in_.c_str(), 0, nvinfer1::OptProfileSelector::kMAX)
199+
.d[0];
194200
}
195201

196-
int Net::getMaxDetections() { return engine_->getBindingDimensions(1).d[1]; }
202+
int Net::getMaxDetections() { return engine_->getTensorShape(name_tensor_in_.c_str()).d[1]; }
197203

198204
} // namespace ssd

perception/traffic_light_ssd_fine_detector/src/nodelet.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,6 @@ void TrafficLightSSDFineDetectorNodelet::callback(
144144
auto data_d = cuda::make_unique<float[]>(num_infer * channel_ * width_ * height_);
145145
auto scores_d = cuda::make_unique<float[]>(num_infer * detection_per_class_ * class_num_);
146146
auto boxes_d = cuda::make_unique<float[]>(num_infer * detection_per_class_ * 4);
147-
std::vector<void *> buffers = {data_d.get(), scores_d.get(), boxes_d.get()};
148147
std::vector<cv::Point> lts, rbs;
149148
std::vector<cv::Mat> cropped_imgs;
150149

@@ -168,7 +167,7 @@ void TrafficLightSSDFineDetectorNodelet::callback(
168167
cudaMemcpy(data_d.get(), data.data(), data.size() * sizeof(float), cudaMemcpyHostToDevice);
169168

170169
try {
171-
net_ptr_->infer(buffers, num_infer);
170+
net_ptr_->infer(num_infer);
172171
} catch (std::exception & e) {
173172
RCLCPP_ERROR(this->get_logger(), "%s", e.what());
174173
return;

0 commit comments

Comments
 (0)