@@ -122,6 +122,8 @@ Net::Net(const std::string & path, bool verbose)
122
122
std::cout << " Fail to prepare engine" << std::endl;
123
123
return ;
124
124
}
125
+ name_tensor_in_ = engine_->getIOTensorName (0 );
126
+ name_tensor_out_ = engine_->getIOTensorName (engine_->getNbIOTensors () - 1 );
125
127
}
126
128
127
129
Net::~Net ()
@@ -267,6 +269,8 @@ Net::Net(
267
269
std::cout << " Fail to create engine" << std::endl;
268
270
return ;
269
271
}
272
+ name_tensor_in_ = engine_->getIOTensorName (0 );
273
+ name_tensor_out_ = engine_->getIOTensorName (engine_->getNbIOTensors () - 1 );
270
274
}
271
275
272
276
void Net::save (const std::string & path) const
@@ -276,15 +280,16 @@ void Net::save(const std::string & path) const
276
280
file.write (reinterpret_cast <const char *>(plan_->data ()), plan_->size ());
277
281
}
278
282
279
- void Net::infer (std::vector< void *> & buffers, const int batch_size)
283
+ void Net::infer (const int batch_size)
280
284
{
281
285
if (!context_) {
282
286
throw std::runtime_error (" Fail to create context" );
283
287
}
284
- auto input_dims = engine_->getBindingDimensions (0 );
285
- context_->setBindingDimensions (
286
- 0 , nvinfer1::Dims4 (batch_size, input_dims.d [1 ], input_dims.d [2 ], input_dims.d [3 ]));
287
- context_->enqueueV2 (buffers.data (), stream_, nullptr );
288
+ const auto input_dims = engine_->getTensorShape (name_tensor_in_.c_str ());
289
+ context_->setInputShape (
290
+ name_tensor_in_.c_str (),
291
+ nvinfer1::Dims4 (batch_size, input_dims.d [1 ], input_dims.d [2 ], input_dims.d [3 ]));
292
+ context_->enqueueV3 (stream_);
288
293
cudaStreamSynchronize (stream_);
289
294
}
290
295
@@ -294,10 +299,8 @@ bool Net::detect(const cv::Mat & in_img, float * out_scores, float * out_boxes,
294
299
const auto input = preprocess (in_img, input_dims.at (0 ), input_dims.at (2 ), input_dims.at (1 ));
295
300
CHECK_CUDA_ERROR (
296
301
cudaMemcpy (input_d_.get (), input.data (), input.size () * sizeof (float ), cudaMemcpyHostToDevice));
297
- std::vector<void *> buffers = {
298
- input_d_.get (), out_scores_d_.get (), out_boxes_d_.get (), out_classes_d_.get ()};
299
302
try {
300
- infer (buffers, 1 );
303
+ infer (1 );
301
304
} catch (const std::runtime_error & e) {
302
305
return false ;
303
306
}
@@ -316,13 +319,14 @@ bool Net::detect(const cv::Mat & in_img, float * out_scores, float * out_boxes,
316
319
317
320
std::vector<int > Net::getInputDims () const
318
321
{
319
- auto dims = engine_->getBindingDimensions ( 0 );
322
+ const auto dims = engine_->getTensorShape (name_tensor_in_. c_str () );
320
323
return {dims.d [1 ], dims.d [2 ], dims.d [3 ]};
321
324
}
322
325
323
326
int Net::getMaxBatchSize () const
324
327
{
325
- return engine_->getProfileDimensions (0 , 0 , nvinfer1::OptProfileSelector::kMAX ).d [0 ];
328
+ return engine_->getProfileShape (name_tensor_in_.c_str (), 0 , nvinfer1::OptProfileSelector::kMAX )
329
+ .d [0 ];
326
330
}
327
331
328
332
int Net::getInputSize () const
@@ -333,6 +337,6 @@ int Net::getInputSize() const
333
337
return input_size;
334
338
}
335
339
336
- int Net::getMaxDetections () const { return engine_->getBindingDimensions ( 1 ).d [1 ]; }
340
+ int Net::getMaxDetections () const { return engine_->getTensorShape (name_tensor_out_. c_str () ).d [1 ]; }
337
341
338
342
} // namespace yolo
0 commit comments