Skip to content

Commit 2be4572

Browse files
StepTurtleamadeuszsz
andauthoredOct 16, 2024
fix(tensorrt_common): add enqueueV3 support (autowarefoundation#9018)
* fix: add enqueueV3 support Signed-off-by: Barış Zeren <baris@leodrive.ai> * chore: update multiplier Signed-off-by: Barış Zeren <baris@leodrive.ai> * fix: wrong multiplier Signed-off-by: Barış Zeren <baris@leodrive.ai> * fix: macros Signed-off-by: Barış Zeren <baris@leodrive.ai> --------- Signed-off-by: Barış Zeren <baris@leodrive.ai> Co-authored-by: Amadeusz Szymko <amadeusz.szymko.2@tier4.jp>
1 parent 23f4c86 commit 2be4572

File tree

2 files changed

+40
-4
lines changed

2 files changed

+40
-4
lines changed
 

‎common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp

+9
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,21 @@ class TrtCommon // NOLINT
184184
*/
185185
void setup();
186186

187+
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
188+
void setupBindings(std::vector<void *> & bindings);
189+
#endif
190+
187191
bool isInitialized();
188192

189193
nvinfer1::Dims getBindingDimensions(const int32_t index) const;
190194
int32_t getNbBindings();
191195
bool setBindingDimensions(const int32_t index, const nvinfer1::Dims & dimensions) const;
196+
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
197+
bool enqueueV3(cudaStream_t stream);
198+
#endif
199+
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000
192200
bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed);
201+
#endif
193202

194203
/**
195204
* @brief output per-layer information

‎common/tensorrt_common/src/tensorrt_common.cpp

+31-4
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,16 @@ void TrtCommon::setup()
225225
is_initialized_ = true;
226226
}
227227

228+
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
229+
void TrtCommon::setupBindings(std::vector<void *> & bindings)
230+
{
231+
for (int32_t i = 0, e = engine_->getNbIOTensors(); i < e; i++) {
232+
auto const name = engine_->getIOTensorName(i);
233+
context_->setTensorAddress(name, bindings.at(i));
234+
}
235+
}
236+
#endif
237+
228238
bool TrtCommon::loadEngine(const std::string & engine_file_path)
229239
{
230240
std::ifstream engine_file(engine_file_path);
@@ -303,8 +313,7 @@ void TrtCommon::printNetworkInfo(const std::string & onnx_file_path)
303313
total_gflops += gflops;
304314
total_params += num_weights;
305315
std::cout << "L" << i << " [conv " << k_dims.d[0] << "x" << k_dims.d[1] << " (" << groups
306-
<< ") "
307-
<< "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x"
316+
<< ") " << "/" << s_dims.d[0] << "] " << dim_in.d[3] << "x" << dim_in.d[2] << "x"
308317
<< dim_in.d[1] << " -> " << dim_out.d[3] << "x" << dim_out.d[2] << "x"
309318
<< dim_out.d[1];
310319
std::cout << " weights:" << num_weights;
@@ -369,8 +378,7 @@ bool TrtCommon::buildEngineFromOnnx(
369378
if (num_available_dla > 0) {
370379
std::cout << "###" << num_available_dla << " DLAs are supported! ###" << std::endl;
371380
} else {
372-
std::cout << "###Warning : "
373-
<< "No DLA is supported! ###" << std::endl;
381+
std::cout << "###Warning : " << "No DLA is supported! ###" << std::endl;
374382
}
375383
config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA);
376384
config->setDLACore(build_config_->dla_core_id);
@@ -567,6 +575,24 @@ bool TrtCommon::setBindingDimensions(const int32_t index, const nvinfer1::Dims &
567575
return context_->setBindingDimensions(index, dimensions);
568576
}
569577

578+
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8500
579+
bool TrtCommon::enqueueV3(cudaStream_t stream)
580+
{
581+
if (build_config_->profile_per_layer) {
582+
auto inference_start = std::chrono::high_resolution_clock::now();
583+
584+
bool ret = context_->enqueueV3(stream);
585+
586+
auto inference_end = std::chrono::high_resolution_clock::now();
587+
host_profiler_.reportLayerTime(
588+
"inference",
589+
std::chrono::duration<float, std::milli>(inference_end - inference_start).count());
590+
return ret;
591+
}
592+
return context_->enqueueV3(stream);
593+
}
594+
#endif
595+
#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH < 10000
570596
bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * input_consumed)
571597
{
572598
if (build_config_->profile_per_layer) {
@@ -583,6 +609,7 @@ bool TrtCommon::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * i
583609
return context_->enqueueV2(bindings, stream, input_consumed);
584610
}
585611
}
612+
#endif
586613

587614
void TrtCommon::printProfiling()
588615
{

0 commit comments

Comments
 (0)