@@ -225,6 +225,16 @@ void TrtCommon::setup()
225
225
is_initialized_ = true ;
226
226
}
227
227
228
+ #if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
229
+ void TrtCommon::setupBindings (std::vector<void *> & bindings)
230
+ {
231
+ for (int32_t i = 0 , e = engine_->getNbIOTensors (); i < e; i++) {
232
+ auto const name = engine_->getIOTensorName (i);
233
+ context_->setTensorAddress (name, bindings.at (i));
234
+ }
235
+ }
236
+ #endif
237
+
228
238
bool TrtCommon::loadEngine (const std::string & engine_file_path)
229
239
{
230
240
std::ifstream engine_file (engine_file_path);
@@ -303,8 +313,7 @@ void TrtCommon::printNetworkInfo(const std::string & onnx_file_path)
303
313
total_gflops += gflops;
304
314
total_params += num_weights;
305
315
std::cout << " L" << i << " [conv " << k_dims.d [0 ] << " x" << k_dims.d [1 ] << " (" << groups
306
- << " ) "
307
- << " /" << s_dims.d [0 ] << " ] " << dim_in.d [3 ] << " x" << dim_in.d [2 ] << " x"
316
+ << " ) " << " /" << s_dims.d [0 ] << " ] " << dim_in.d [3 ] << " x" << dim_in.d [2 ] << " x"
308
317
<< dim_in.d [1 ] << " -> " << dim_out.d [3 ] << " x" << dim_out.d [2 ] << " x"
309
318
<< dim_out.d [1 ];
310
319
std::cout << " weights:" << num_weights;
@@ -369,8 +378,7 @@ bool TrtCommon::buildEngineFromOnnx(
369
378
if (num_available_dla > 0 ) {
370
379
std::cout << " ###" << num_available_dla << " DLAs are supported! ###" << std::endl;
371
380
} else {
372
- std::cout << " ###Warning : "
373
- << " No DLA is supported! ###" << std::endl;
381
+ std::cout << " ###Warning : " << " No DLA is supported! ###" << std::endl;
374
382
}
375
383
config->setDefaultDeviceType (nvinfer1::DeviceType::kDLA );
376
384
config->setDLACore (build_config_->dla_core_id );
@@ -567,6 +575,24 @@ bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims &
567
575
return context_->setBindingDimensions (index , dimensions);
568
576
}
569
577
578
+ #if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
579
+ bool TrtCommon::enqueueV3 (cudaStream_t stream)
580
+ {
581
+ if (build_config_->profile_per_layer ) {
582
+ auto inference_start = std::chrono::high_resolution_clock::now ();
583
+
584
+ bool ret = context_->enqueueV3 (stream);
585
+
586
+ auto inference_end = std::chrono::high_resolution_clock::now ();
587
+ host_profiler_.reportLayerTime (
588
+ " inference" ,
589
+ std::chrono::duration<float , std::milli>(inference_end - inference_start).count ());
590
+ return ret;
591
+ }
592
+ return context_->enqueueV3 (stream);
593
+ }
594
+ #endif
595
+ #if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000
570
596
bool TrtCommon::enqueueV2 (void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
571
597
{
572
598
if (build_config_->profile_per_layer ) {
@@ -583,6 +609,7 @@ bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * i
583
609
return context_->enqueueV2 (bindings, stream, input_consumed);
584
610
}
585
611
}
612
+ #endif
586
613
587
614
void TrtCommon::printProfiling ()
588
615
{
0 commit comments