diff --git a/perception/autoware_mtr/.gitignore b/perception/autoware_mtr/.gitignore new file mode 100644 index 0000000000000..42bd33fbaa013 --- /dev/null +++ b/perception/autoware_mtr/.gitignore @@ -0,0 +1,51 @@ +devel/ +logs/ +build/ +bin/ +msg_gen/ +srv_gen/ +data/*onnx +msg/*Action.msg +msg/*ActionFeedback.msg +msg/*ActionGoal.msg +msg/*ActionResult.msg +msg/*Feedback.msg +msg/*Goal.msg +msg/*Result.msg +msg/_*.py +build_isolated/ +devel_isolated/ + +# Generated by dynamic reconfigure +*.cfgc +/cfg/cpp/ +/cfg/*.py + +# Ignore generated docs +*.dox +*.wikidoc + +# eclipse stuff +.project +.cproject + +# qcreator stuff +CMakeLists.txt.user + +srv/_*.py +*.pcd +*.pyc +qtcreator-* +*.user + +/planning/cfg +/planning/docs +/planning/src + +*~ + +# Emacs +.#* + +# Catkin custom files +CATKIN_IGNORE diff --git a/perception/autoware_mtr/.markdownlint.yaml b/perception/autoware_mtr/.markdownlint.yaml new file mode 100644 index 0000000000000..584154b2009de --- /dev/null +++ b/perception/autoware_mtr/.markdownlint.yaml @@ -0,0 +1,16 @@ +# This file is automatically synced from: +# https://github.com/autowarefoundation/sync-file-templates +# To make changes, update the source repository and follow the guidelines in its README. + +# See https://github.com/DavidAnson/markdownlint/blob/main/doc/Rules.md for all rules. +default: true +MD013: false +MD024: + siblings_only: true +MD029: + style: ordered +MD033: false +MD041: false +MD045: false +MD046: false +MD049: false diff --git a/perception/autoware_mtr/.pre-commit-config-optional.yaml b/perception/autoware_mtr/.pre-commit-config-optional.yaml new file mode 100644 index 0000000000000..8c9345e15f064 --- /dev/null +++ b/perception/autoware_mtr/.pre-commit-config-optional.yaml @@ -0,0 +1,6 @@ +repos: + - repo: https://github.com/tcort/markdown-link-check + rev: v3.12.2 + hooks: + - id: markdown-link-check + args: [--quiet, --config=.markdown-link-check.json] diff --git a/perception/autoware_mtr/.pre-commit-config.yaml b/perception/autoware_mtr/.pre-commit-config.yaml new file mode 100644 index 0000000000000..63dc504f61a2b --- /dev/null +++ b/perception/autoware_mtr/.pre-commit-config.yaml @@ -0,0 +1,95 @@ +ci: + autofix_commit_msg: "style(pre-commit): autofix" + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-json + - id: check-merge-conflict + - id: check-toml + - id: check-xml + - id: check-yaml + args: [--unsafe] + - id: detect-private-key + - id: end-of-file-fixer + - id: mixed-line-ending + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + + - repo: https://github.com/igorshubovych/markdownlint-cli + rev: v0.41.0 + hooks: + - id: markdownlint + args: [-c, .markdownlint.yaml, --fix] + + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v4.0.0-alpha.8 + hooks: + - id: prettier + + - repo: https://github.com/adrienverge/yamllint + rev: v1.35.1 + hooks: + - id: yamllint + + - repo: https://github.com/tier4/pre-commit-hooks-ros + rev: v0.10.0 + hooks: + - id: flake8-ros + - id: prettier-xacro + - id: prettier-launch-xml + - id: prettier-package-xml + - id: ros-include-guard + - id: sort-package-xml + + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.10.0.1 + hooks: + - id: shellcheck + + - repo: https://github.com/scop/pre-commit-shfmt + rev: v3.9.0-1 + hooks: + - id: shfmt + args: [-w, -s, -i=4] + + - repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + + - repo: https://github.com/psf/black + rev: 24.8.0 + hooks: + - id: black + args: [--line-length=100] + + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v18.1.8 + hooks: + - id: clang-format + types_or: [c++, c, cuda] + + - repo: https://github.com/cpplint/cpplint + rev: 1.6.1 + hooks: + - id: cpplint + args: [--quiet] + exclude: .cu + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.29.2 + hooks: + - id: check-metaschema + files: ^.+/schema/.*schema\.json$ + + - repo: local + hooks: + - id: prettier-svg + name: prettier svg + description: Apply Prettier with plugin-xml to svg. + entry: prettier --write --list-different --ignore-unknown --print-width 200 --xml-self-closing-space false --xml-whitespace-sensitivity ignore + language: node + files: .svg$ + additional_dependencies: [prettier@2.7.1, "@prettier/plugin-xml@2.2.0"] diff --git a/perception/autoware_mtr/CMakeLists.txt b/perception/autoware_mtr/CMakeLists.txt new file mode 100644 index 0000000000000..5dc5c2fb25106 --- /dev/null +++ b/perception/autoware_mtr/CMakeLists.txt @@ -0,0 +1,104 @@ +cmake_minimum_required(VERSION 3.8) +project(autoware_mtr) + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +# find dependencies +find_package(autoware_cmake REQUIRED) +autoware_package() + +find_package(CUDA REQUIRED) +if (CUDA_FOUND) + include_directories(${CUDA_INCLUDE_DIRS}) + find_library(CUBLAS_LIBRARIES cublas HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib) + find_library( + CUDNN_LIBRARIES + NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib + ${__cudnn_lib_win_name} + PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} + ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} + PATH_SUFFIXES lib lib64 bin + DOC "CUDNN library.") +else() + message(FATAL_ERROR "Can not find CUDA") +endif() + +list(APPEND TRT_PLUGINS "nvinfer") +list(APPEND TRT_PLUGINS "nvonnxparser") +list(APPEND TRT_PLUGINS "nvparsers") +foreach(libName ${TRT_PLUGINS}) + find_library(${libName}_lib NAMES ${libName} "/usr" PATH_SUFFIXES lib) + list(APPEND TRT_PLUGINS ${${libName}_lib}) +endforeach() + +# TRT plugins and cuda kernels +cuda_add_library(${PROJECT_NAME}_cuda_lib SHARED + lib/src/attention/trt_attn_value_computation_kernel.cu + lib/src/attention/trt_attn_value_computation.cpp + lib/src/attention/trt_attn_weight_computation_kernel.cu + lib/src/attention/trt_attn_weight_computation.cpp + lib/src/knn/trt_knn_batch_kernel.cu + lib/src/knn/trt_knn_batch.cpp + lib/src/knn/trt_knn_batch_mlogk_kernel.cu + lib/src/knn/trt_knn_batch_mlogk.cpp + lib/src/preprocess/agent_preprocess_kernel.cu + lib/src/preprocess/polyline_preprocess_kernel.cu + lib/src/postprocess/postprocess_kernel.cu +) +target_include_directories(${PROJECT_NAME}_cuda_lib PUBLIC + lib/include +) + +ament_auto_add_library(${PROJECT_NAME}_lib SHARED + src/builder.cpp + src/trt_mtr.cpp + src/conversions/lanelet.cpp + src/intention_point.cpp + src/conversions/history.cpp +) +target_link_libraries(${PROJECT_NAME}_lib + ${TRT_PLUGINS} + ${TRT_PLUGIN_LIBS} + ${CUDA_LIBRARIES} + ${CUBLAS_LIBRARIES} + ${CUDNN_LIBRARIES} + ${PROJECT_NAME}_cuda_lib +) +target_include_directories(${PROJECT_NAME}_lib PUBLIC + include + lib/include +) + +target_compile_options(${PROJECT_NAME}_lib PRIVATE + -Wall -Wextra -Wpedantic -Werror -Wno-deprecated-declarations +) + +# ROS 2 node +ament_auto_add_library(${PROJECT_NAME}_node SHARED + src/node.cpp +) + +target_link_libraries(${PROJECT_NAME}_node + ${PROJECT_NAME}_lib +) + +rclcpp_components_register_node(${PROJECT_NAME}_node + PLUGIN "autoware::mtr::MTRNode" + EXECUTABLE ${PROJECT_NAME} +) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + ament_lint_auto_find_test_dependencies() + ament_auto_add_gtest(test_fixed_queue test/test_fixed_queue.cpp) +endif() + +ament_auto_package( + INSTALL_TO_SHARE + launch + config + data +) diff --git a/perception/autoware_mtr/LICENSE b/perception/autoware_mtr/LICENSE new file mode 100644 index 0000000000000..261eeb9e9f8b2 --- /dev/null +++ b/perception/autoware_mtr/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/perception/autoware_mtr/README.md b/perception/autoware_mtr/README.md new file mode 100644 index 0000000000000..010fa8505dbdb --- /dev/null +++ b/perception/autoware_mtr/README.md @@ -0,0 +1,72 @@ +# autoware_mtr + +## Purpose + +The `autoware_mtr` package is used for 3D object motion prediction based on ML-based model called MTR. + +## Inner-workings / Algorithms + +The implementation bases on MTR [1] work. It uses TensorRT library for data process and network interface. + +## Inputs / Outputs + +### Input + +| Name | Type | Description | +| -------------------- | ----------------------------------------------- | ------------------------ | +| `~/input/objects` | `autoware_perception_msgs::msg::TrackedObjects` | Input agent state. | +| `~/input/vector_map` | `autoware_map_msgs::msg::LeneletMapBin` | Input vector map. | +| `~/input/ego` | `sensor_msgs::msg::Odometry` | Input ego vehicle state. | + +### Output + +| Name | Type | Description | +| ------------------ | ------------------------------------------------- | -------------------------- | +| `~/output/objects` | `autoware_perception_msgs::msg::PredictedObjects` | Predicted objects' motion. | + +## Parameters + +Following parameters can be specified in `.launch.xml` or command line. + +### `param_path` + +File path to the MTR configuration. (Default: `autoware_mtr/config/mtr.param.yaml`) + +### Configuration Parameters + +#### `model_params` + +| Name | type | Description | +| :---------------------------- | :-----: | :---------------------------------------------------------- | +| `model_path` | `str` | ONNX or engine file path. | +| `target_labels` | `str[]` | An array of label names to be predicted. | +| `num_past` | `int` | The number of history length. | +| `num_mode` | `int` | The number of predicted modes. | +| `num_future` | `int` | The number of predicted future length. | +| `max_num_polyline` | `int` | The maximum number of polylines to be contained in input. | +| `max_num_point` | `int` | The maximum number of points included in a single polyline. | +| `point_break_distance` | `float` | Distance threshold to separate points into two polylines. | +| `intention_point_filepath` | `str` | File path to intension points (.csv). | +| `num_intention_point_cluster` | `int` | The number of clusters of intention points. | + +#### `build_params` + +| Name | type | Description | +| :----------- | :----: | :---------------------------------------------------------- | +| `is_dynamic` | `bool` | Indicates whether the model allows dynamic shape inference. | +| `precision` | `str` | Precision mode. | +| `MINMAX` | `str` | Calibration mode. | + +### `data_path` + +Directory path to ONNX or TensorRT engine file. (Default: `autoware_mtr/data`) + +### `build_only` + +This option performs to build the TensorRT engine file from the ONNX file and exit after finishing it. (Default: `false`) + +You can execute with the following command: + +```bash +ros2 launch autoware_mtr mtr.launch.xml build_only:=true +``` diff --git a/perception/autoware_mtr/config/mtr.param.yaml b/perception/autoware_mtr/config/mtr.param.yaml new file mode 100644 index 0000000000000..932d544495d5b --- /dev/null +++ b/perception/autoware_mtr/config/mtr.param.yaml @@ -0,0 +1,17 @@ +/**: + ros__parameters: + model_params: + model_path: "$(var data_path)/mtr_static.onnx" + target_labels: ["VEHICLE", "PEDESTRIAN", "CYCLIST"] + num_past: 11 + num_mode: 6 + num_future: 80 + max_num_polyline: 768 + max_num_point: 20 + point_break_distance: 1.0 + intention_point_filepath: "$(var data_path)/intention_point.csv" + num_intention_point_cluster: 64 + build_params: + is_dynamic: false + precision: "FP32" + calibration: "MINMAX" diff --git a/perception/autoware_mtr/data/intention_point.csv b/perception/autoware_mtr/data/intention_point.csv new file mode 100644 index 0000000000000..4a1dd40d774df --- /dev/null +++ b/perception/autoware_mtr/data/intention_point.csv @@ -0,0 +1,192 @@ +83.31358111813775,-0.10072941747215425,VEHICLE +9.63708093098419,-0.21903979647725835,VEHICLE +177.02364184779515,0.14896461714506093,VEHICLE +30.202525932768452,39.770351748996276,VEHICLE +13.017453460143628,-47.65477812853313,VEHICLE +37.96551406831798,0.35882891080252355,VEHICLE +135.28288300240175,0.07318454776428718,VEHICLE +22.581299759917737,-11.499114037699696,VEHICLE +35.28419206368978,66.77646234434653,VEHICLE +102.21595906561359,0.03479736135528899,VEHICLE +53.018984885961935,-0.07070513548030011,VEHICLE +48.14500443347089,-63.176686454128905,VEHICLE +14.81040681866098,41.21025747789579,VEHICLE +70.98121857460572,48.76864840754326,VEHICLE +29.952205374359025,-24.491271384603404,VEHICLE +240.1121603513413,0.40876524870669395,VEHICLE +112.96998794807857,-0.6626782536634594,VEHICLE +11.535102863478023,24.6087503284935,VEHICLE +15.972999486515413,-22.402331618330425,VEHICLE +-29.749150635040934,19.365306554458183,VEHICLE +38.837490918969365,28.521719218554505,VEHICLE +75.12534715421508,0.45524707149479826,VEHICLE +22.1356009680794,-65.50042235168596,VEHICLE +147.667334683977,-0.15420460547298664,VEHICLE +32.92404725639907,13.425428408384349,VEHICLE +20.821241263140003,11.866509426621738,VEHICLE +16.421774854176704,61.40243322799172,VEHICLE +16.20521970358857,-0.05240557684667735,VEHICLE +30.39961023178619,0.02239890411088652,VEHICLE +71.3732282468828,-13.197252636743817,VEHICLE +160.46427534905007,-0.12450058691311172,VEHICLE +26.184995977566512,-50.72960305193536,VEHICLE +67.47509063983794,0.7088966461759214,VEHICLE +55.59341134160711,65.92540112801592,VEHICLE +110.19820491921735,13.36986405878499,VEHICLE +9.946459640959318,10.639932128723443,VEHICLE +96.45900409192926,38.745456793222075,VEHICLE +11.150642903256283,-11.065153981266088,VEHICLE +18.370803671188316,-35.208384113727355,VEHICLE +58.12332918930498,28.407400941348005,VEHICLE +122.46253938423963,-18.18162199541143,VEHICLE +-10.372362664539338,47.15608107630011,VEHICLE +123.88850325553135,0.8657285570358468,VEHICLE +23.14906846590817,0.14013577245075504,VEHICLE +36.72246632151757,-11.50791955554189,VEHICLE +62.17961723750621,-38.16113529166156,VEHICLE +60.255349648701284,0.3402476217172201,VEHICLE +24.676850823734142,26.15851575291674,VEHICLE +146.06185359624084,19.81777897597737,VEHICLE +87.37642530385885,-49.37120849166174,VEHICLE +29.217582632652448,52.466854930826635,VEHICLE +92.24255344207724,1.0269363594524006,VEHICLE +51.35773566968937,-16.858679079987603,VEHICLE +202.88969500859582,-0.6135434826913582,VEHICLE +44.54017657777002,47.644212807081004,VEHICLE +3.553274456564374,-0.050630482129089494,VEHICLE +48.341886681937126,14.266599993562439,VEHICLE +149.5579662788205,-24.15545279805254,VEHICLE +79.2199105091584,18.21919210874116,VEHICLE +-6.625598315633162,12.675144082377521,VEHICLE +94.47749147282539,-14.96227839803878,VEHICLE +45.51725648445333,-0.20613185836059866,VEHICLE +4.320320122401455,-25.861972448072912,VEHICLE +37.67711139073701,-38.236581219014134,VEHICLE +9.61012756292987,-1.626166760636742,PEDESTRIAN +1.6012943859685747,-0.5222880919927995,PEDESTRIAN +10.417423171793075,4.168765395082893,PEDESTRIAN +30.165877791012033,0.9443346726543759,PEDESTRIAN +11.275694770850642,0.602104658163488,PEDESTRIAN +7.575045215991116,2.229632545811262,PEDESTRIAN +5.440404657806666,-5.873988428286141,PEDESTRIAN +21.74637271118164,-0.20824338209629067,PEDESTRIAN +5.666046695576772,10.06275615427229,PEDESTRIAN +3.5434901504148772,0.3566361072142261,PEDESTRIAN +7.300691812980073,-1.4147541015839373,PEDESTRIAN +-7.239123311536066,5.278028183969957,PEDESTRIAN +12.838544084273805,0.27634052827289396,PEDESTRIAN +49.23597074809827,-1.083151083243521,PEDESTRIAN +10.232749005947733,-4.764095273689241,PEDESTRIAN +10.029744060560203,0.27960282964346456,PEDESTRIAN +20.097753611477934,12.677465005354446,PEDESTRIAN +-1.7234067196647338,6.012964824835459,PEDESTRIAN +-5.243654960241074,-4.54240223688957,PEDESTRIAN +4.227448060010606,4.672668618904917,PEDESTRIAN +10.924442376409258,-13.39580307688032,PEDESTRIAN +8.060251735871837,-5.231071510622576,PEDESTRIAN +9.363416959884319,6.717317995111996,PEDESTRIAN +14.50409186103127,-0.6558042344608993,PEDESTRIAN +9.079393733128594,3.082935659625415,PEDESTRIAN +12.050464391708372,2.326085873493335,PEDESTRIAN +18.95312432448069,-4.011247088511785,PEDESTRIAN +23.27299641548319,4.935875710020675,PEDESTRIAN +7.473063083456342,0.33913094917891407,PEDESTRIAN +2.1331759617569723,-8.755232001958268,PEDESTRIAN +1.6598805569020305,2.2264835146474247,PEDESTRIAN +25.060731768608093,-2.7297780113294716,PEDESTRIAN +6.24312299635352,-3.4591273970720238,PEDESTRIAN +12.771120711344025,-1.907544178221929,PEDESTRIAN +8.609554418619128,-7.480640052021412,PEDESTRIAN +11.839472696628974,-0.508671370756991,PEDESTRIAN +18.143608430845546,0.3659129395949102,PEDESTRIAN +38.62964275905064,-2.878807255199977,PEDESTRIAN +5.747641907439238,0.07273334688443972,PEDESTRIAN +12.750787362456322,5.3989472184330225,PEDESTRIAN +9.036868476563958,1.1392225010949337,PEDESTRIAN +8.900814069313066,-0.3074934092596276,PEDESTRIAN +5.77225670413436,-9.003250630102423,PEDESTRIAN +4.644974783536821,-1.7992167588678774,PEDESTRIAN +-2.734048414539984,1.3070609912082745,PEDESTRIAN +10.402230190194171,1.9045561768006598,PEDESTRIAN +7.672931251806371,4.772691267378191,PEDESTRIAN +2.8842629428003317,-4.572883587257534,PEDESTRIAN +34.02037658691406,13.169144248962402,PEDESTRIAN +-6.847214352000844,-0.384562531655485,PEDESTRIAN +8.649644048868026,-3.060604455724225,PEDESTRIAN +12.05244591361598,12.773428766350996,PEDESTRIAN +-0.9961213812009628,-6.185897613639262,PEDESTRIAN +18.33287762535943,6.177935240003799,PEDESTRIAN +2.5809229793501816,7.47769858790379,PEDESTRIAN +13.21153731608954,-5.18699638486847,PEDESTRIAN +14.148733378391636,1.7894855494592705,PEDESTRIAN +6.273506949164649,6.948122468861667,PEDESTRIAN +10.640743486095062,-0.6780669181702694,PEDESTRIAN +19.00134840878573,-11.189485073089601,PEDESTRIAN +11.03738529839831,-2.740356324669831,PEDESTRIAN +25.126273824207818,0.7949037749820683,PEDESTRIAN +-0.9026070001410034,-2.676681885078772,PEDESTRIAN +5.5613501533385215,2.4639029791278224,PEDESTRIAN +34.23404892454756,5.7000023380238956,CYCLIST +11.101248473572209,0.1878690882552445,CYCLIST +58.499493185397796,-0.12681177640910635,CYCLIST +24.58085452251553,2.471349286922017,CYCLIST +22.422780924829944,13.053710756630732,CYCLIST +48.74841719002559,0.516642472661775,CYCLIST +25.20883455276489,-20.044839859008786,CYCLIST +36.03024959564209,-2.68259093122823,CYCLIST +82.53688841599686,-0.20612470691020657,CYCLIST +46.24678649902344,21.0171902179718,CYCLIST +8.891835970037125,17.774530859554517,CYCLIST +20.25145509209431,-0.05082348066036646,CYCLIST +-10.578458561616781,0.3869281095616959,CYCLIST +21.098386228084564,34.59569489955903,CYCLIST +6.382585232908074,-16.506645029241387,CYCLIST +19.716625179563252,-5.445932748771849,CYCLIST +7.230897838592526,0.5130343590974809,CYCLIST +32.55335546794691,31.67215136477822,CYCLIST +72.18442005222127,1.009845858913357,CYCLIST +49.38804136003766,-28.37724140712193,CYCLIST +28.633797392177122,-0.3754431623335617,CYCLIST +15.30891491454325,4.449968129028509,CYCLIST +40.48208164932704,4.483412874807225,CYCLIST +18.588481744130455,27.931559165318806,CYCLIST +24.917308371407646,-9.43418445587158,CYCLIST +47.191020900920286,-5.356957855871168,CYCLIST +20.612224715096612,-39.57385090419225,CYCLIST +37.02404185703823,12.3643497467041,CYCLIST +26.742534319559734,21.288381364610455,CYCLIST +36.54223831027162,1.3074939094995166,CYCLIST +32.81819396373654,-0.6552566748020072,CYCLIST +44.87005353096204,0.5966045727236915,CYCLIST +8.368693745654564,27.36604765187139,CYCLIST +30.52261005200838,-6.0198339562667025,CYCLIST +57.868157704671226,32.555169423421226,CYCLIST +5.196329174814995,-7.779524571186786,CYCLIST +27.070191771895797,6.705634905232322,CYCLIST +47.03257816242722,8.60380427342541,CYCLIST +56.33857297897339,-7.308432310819625,CYCLIST +3.3166103733038277,-0.2503264608792952,CYCLIST +25.092793430426184,-1.8190701604653627,CYCLIST +52.86965798549964,-0.8058032429181647,CYCLIST +13.250703811645511,-26.973795970280964,CYCLIST +2.2024684443193365,10.044839859008789,CYCLIST +54.38298190081561,2.7558305506353027,CYCLIST +34.70839107737822,20.00758409500122,CYCLIST +64.3159403538966,-0.78287570637006,CYCLIST +15.62673833790947,-1.2094380376477014,CYCLIST +60.42839979088825,9.112664803214695,CYCLIST +33.95394577026367,-15.673711471557617,CYCLIST +12.455606278919038,-8.294502810826376,CYCLIST +-5.831538856029507,23.902103185653687,CYCLIST +39.58197693202807,-7.948247836983719,CYCLIST +40.74147271136849,-0.5924087476213362,CYCLIST +2.8944256986890515,-25.95755699702671,CYCLIST +28.955958038568497,12.641273751854897,CYCLIST +17.196987215677897,21.054426108466252,CYCLIST +31.001477972861448,2.74578192921504,CYCLIST +29.997188568115234,-29.97930075905539,CYCLIST +16.37813091278076,14.201883527967663,CYCLIST +20.38882696363661,6.6267309957080425,CYCLIST +42.03949113325639,36.66604232788086,CYCLIST +10.703137148981511,9.449819202008456,CYCLIST +16.409427689342962,-16.209246635437015,CYCLIST diff --git a/perception/autoware_mtr/include/autoware/mtr/agent.hpp b/perception/autoware_mtr/include/autoware/mtr/agent.hpp new file mode 100644 index 0000000000000..797ed0d8b5e07 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/agent.hpp @@ -0,0 +1,405 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__AGENT_HPP_ +#define AUTOWARE__MTR__AGENT_HPP_ + +#include "autoware/mtr/fixed_queue.hpp" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace autoware::mtr +{ +constexpr size_t AgentStateDim = 12; + +enum AgentLabel { VEHICLE = 0, PEDESTRIAN = 1, CYCLIST = 2 }; + +enum AgentDimLabels { + X = 0, + Y = 1, + Z = 2, + L = 3, + W = 4, + H = 5, + YAW = 6, + VX = 7, + VY = 8, + AX = 9, + AY = 10, + VALIDITY = 11 +}; + +/** + * @brief A class to represent a single state of an agent. + */ +struct AgentState +{ + // Construct a new instance filling all elements by `0.0f`. + AgentState() {} + + /** + * @brief Construct a new instance with specified values. + * + * @param position 3D position [m]. + * @param dimension Box dimension [m]. + * @param yaw Heading yaw angle [rad]. + * @param velocity Velocity [m/s]. + * @param acceleration Acceleration [m/s^2]. + * @param is_valid `1.0f` if valid, otherwise `0.0f`. + */ + AgentState( + const geometry_msgs::msg::Point & position, const geometry_msgs::msg::Vector3 & dimension, + float yaw, const geometry_msgs::msg::Vector3 & velocity, + const geometry_msgs::msg::Vector3 & acceleration, float is_valid) + : position_(position), + dimension_(dimension), + yaw_(yaw), + velocity_(velocity), + acceleration_(acceleration), + is_valid_(is_valid) + { + } + + // Construct a new instance filling all elements by `0.0f`. + static AgentState empty() noexcept { return AgentState(); } + + // Return the agent state dimensions `D`. + static size_t dim() { return AgentStateDim; } + + // Return the x position. + float x() const { return position_.x; } + + // Return the y position. + float y() const { return position_.y; } + + // Return the z position. + float z() const { return position_.z; } + + // Return the length of object size. + float length() const { return dimension_.x; } + + // Return the width of object size. + float width() const { return dimension_.y; } + + // Return the height of object size. + float height() const { return dimension_.z; } + + // Return the yaw angle `[rad]`. + float yaw() const { return yaw_; } + + // Return the x velocity. + float vx() const { return velocity_.x; } + + // Return the y velocity. + float vy() const { return velocity_.y; } + + // Return the x acceleration. + float ax() const { return acceleration_.x; } + + // Return the y acceleration. + float ay() const { return acceleration_.y; } + + // Return `true` if the value is `1.0`. + bool is_valid() const { return is_valid_ == 1.0; } + + // Return the state attribute as an array. + std::array as_array() const noexcept + { + return {x(), y(), z(), length(), width(), height(), yaw(), vx(), vy(), ax(), ay(), is_valid_}; + } + +private: + geometry_msgs::msg::Point position_; + geometry_msgs::msg::Vector3 dimension_; + float yaw_{0.0f}; + geometry_msgs::msg::Vector3 velocity_; + geometry_msgs::msg::Vector3 acceleration_; + float is_valid_{0.0f}; +}; + +/** + * @brief A class to represent the state history of an agent. + */ +struct AgentHistory +{ + /** + * @brief Construct a new Agent History filling the latest state by input state. + * + * @param state Object current state. + * @param object_id Object ID. + * @param label_id Label ID. + * @param current_time Current timestamp. + * @param max_time_length History length. + */ + AgentHistory( + const AgentState & state, const std::string & object_id, const size_t label_id, + const double current_time, const size_t max_time_length) + : queue_(max_time_length), + object_id_(object_id), + label_id_(label_id), + latest_time_(current_time), + max_time_length_(max_time_length) + { + queue_.push_back(state); + } + + // Return the history time length `T`. + size_t length() const { return max_time_length_; } + + // Return the number of agent state dimensions `D`. + static size_t state_dim() { return AgentStateDim; } + + // Return the data size of history `T * D`. + size_t size() const { return max_time_length_ * state_dim(); } + + // Return the shape of history matrix ordering in `(T, D)`. + std::tuple shape() const { return {max_time_length_, state_dim()}; } + + // Return the object id. + const std::string & object_id() const { return object_id_; } + + // Return the label id. + size_t label_id() const { return label_id_; } + + /** + * @brief Return the last timestamp when non-empty state was pushed. + * + * @return double + */ + double latest_time() const { return latest_time_; } + + /** + * @brief Update history with input state and latest time. + * + * @param current_time The current timestamp. + * @param state The current agent state. + */ + void update(double current_time, const AgentState & state) noexcept + { + queue_.push_back(state); + latest_time_ = current_time; + } + + // Update history with all-zeros state, but latest time is not updated. + void update_empty() noexcept + { + const auto state = AgentState::empty(); + queue_.push_back(state); + } + + // Return a history states as an array. + std::vector as_array() const noexcept + { + std::vector output; + for (const auto & state : queue_) { + for (const auto & v : state.as_array()) { + output.push_back(v); + } + } + return output; + } + + /** + * @brief Check whether the latest valid state is too old or not. + * + * @param current_time Current timestamp. + * @param threshold Time difference threshold value. + * @return true If the difference is greater than threshold. + * @return false Otherwise + */ + bool is_ancient(double current_time, double threshold) const + { + /* TODO: Raise error if the current time is smaller than latest */ + return current_time - latest_time_ >= threshold; + } + + /** + * @brief Check whether the latest state data is valid. + * + * @return true If the end of element is 1.0f. + * @return false Otherwise. + */ + bool is_valid_latest() const { return get_latest_state().is_valid(); } + + // Get the latest agent state at `T`. + const AgentState & get_latest_state() const { return queue_.back(); } + + // Get the latest agent state at `T`. + std::optional get_latest_valid_state() const + { + auto latest_valid_state = std::find_if( + queue_.rbegin(), queue_.rend(), [](const auto & state) { return state.is_valid(); }); + return (latest_valid_state != queue_.rend()) ? std::make_optional(*latest_valid_state) + : std::nullopt; + } + +private: + FixedQueue queue_; + const std::string object_id_; + const size_t label_id_; + double latest_time_; + const size_t max_time_length_; +}; + +/** + * @brief A class containing whole state histories of all agent. + */ +struct AgentData +{ + /** + * @brief Construct a new instance. + * + * @param histories An array of histories for each object. + * @param ego_index An index of ego. + * @param target_indices Indices of target agents. + * @param label_ids An array of label indices for each object. + * @param timestamps An array of timestamps. + */ + AgentData( + const std::vector & histories, const size_t ego_index, + const std::vector & target_indices, const std::vector & label_ids, + const std::vector & timestamps) + : num_target_(target_indices.size()), + num_agent_(histories.size()), + time_length_(timestamps.size()), + ego_index_(ego_index), + target_indices_(target_indices), + label_ids_(label_ids), + timestamps_(timestamps) + { + data_.reserve(num_agent_ * time_length_ * state_dim()); + for (auto & history : histories) { + for (const auto & v : history.as_array()) { + data_.push_back(v); + } + } + + current_target_data_.reserve(num_target_ * state_dim()); // (B, D) + target_label_ids_.reserve(num_target_); + for (const auto & idx : target_indices) { + target_label_ids_.emplace_back(label_ids.at(idx)); + for (const auto & v : histories.at(idx).get_latest_state().as_array()) { + current_target_data_.push_back(v); + } + } + + ego_data_.reserve(time_length_ * state_dim()); + for (const auto & v : histories.at(ego_index).as_array()) { + ego_data_.push_back(v); + } + } + + // Return the number of classes `C`. + static size_t num_class() { return 3; } // TODO(ktro2828): Do not use magic number. + + // Return the number of target agents `B`. + size_t num_target() const { return num_target_; } + + // Return the number of agents `N`. + size_t num_agent() const { return num_agent_; } + + // Return the timestamp length `T`. + size_t time_length() const { return time_length_; } + + // Return the number of agent state dimensions `D`. + static size_t state_dim() { return AgentStateDim; } + + // Return the index of ego. + size_t ego_index() const { return ego_index_; } + + // Return the vector of indices of target agents, in shape `[B]`. + const std::vector & target_indices() const { return target_indices_; } + + // Return the vector of label ids of all agents, in shape `[N]`. + const std::vector & label_ids() const { return label_ids_; } + + // Return the vector of label ids of target agents, in shape `[B]`. + const std::vector & target_label_ids() const { return target_label_ids_; } + + // Return the vector of timestamps in shape `[T]`. + const std::vector & timestamps() const { return timestamps_; } + + // Return the number of all elements `N*T*D`. + size_t size() const { return num_agent_ * time_length_ * state_dim(); } + + // Return the number of state dimensions of MTR input `T+C+D+3`. + size_t input_dim() const { return time_length_ + state_dim() + num_class() + 3; } + + // Return the data shape ordering in (N, T, D). + std::tuple shape() const + { + return {num_agent_, time_length_, state_dim()}; + } + + // Return the address pointer of data array. + const float * data_ptr() const noexcept { return data_.data(); } + + // Return the address pointer of data array for target agents. + const float * current_target_data_ptr() const noexcept { return current_target_data_.data(); } + + // Return the address pointer of data array for ego vehicle. + const float * ego_data_ptr() const noexcept { return ego_data_.data(); } + +private: + size_t num_target_; + size_t num_agent_; + size_t time_length_; + size_t ego_index_; + std::vector target_indices_; + std::vector label_ids_; + std::vector target_label_ids_; + std::vector timestamps_; + std::vector data_; + std::vector current_target_data_; + std::vector ego_data_; +}; + +// Get label names from label indices. +inline std::vector getLabelNames(const std::vector & label_index) +{ + std::vector label_names; + label_names.reserve(label_index.size()); + for (const auto & idx : label_index) { + switch (idx) { + case 0: + label_names.emplace_back("VEHICLE"); + break; + case 1: + label_names.emplace_back("PEDESTRIAN"); + break; + case 2: + label_names.emplace_back("CYCLIST"); + break; + default: + std::ostringstream msg; + msg << "Error invalid label index: " << idx; + throw std::runtime_error(msg.str()); + break; + } + } + return label_names; +} + +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__AGENT_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/builder.hpp b/perception/autoware_mtr/include/autoware/mtr/builder.hpp new file mode 100644 index 0000000000000..7dbe0895caa8d --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/builder.hpp @@ -0,0 +1,243 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__BUILDER_HPP_ +#define AUTOWARE__MTR__BUILDER_HPP_ + +#include +#include + +#include +namespace fs = ::std::filesystem; + +#include + +#include +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ + +template +struct TrtDeleter +{ + void operator()(T * obj) const + { + if (obj) { +#if NV_TENSORRT_MAJOR >= 8 + delete obj; +#else + obj->destroy(); +#endif + } + } +}; // struct TrtDeleter + +template +using TrtUniquePtr = std::unique_ptr>; + +// Type names of precisions. +enum PrecisionType { FP32 = 0, FP16 = 1, INT8 = 2 }; + +// Return corresponding PrecisionType from string. +inline PrecisionType toPrecisionType(const std::string & name) +{ + if (name == "FP32") { + return PrecisionType::FP32; + } + if (name == "FP16") { + return PrecisionType::FP16; + } + if (name == "INT8") { + return PrecisionType::INT8; + } + throw std::invalid_argument("Invalid precision name."); +} + +// Type names of calibrations. +enum CalibrationType { ENTROPY = 0, LEGACY = 1, PERCENTILE = 2, MINMAX = 3 }; + +// Return corresponding CalibrationType from string. +inline CalibrationType toCalibrationType(const std::string & name) +{ + if (name == "ENTROPY") { + return CalibrationType::ENTROPY; + } + if (name == "LEGACY") { + return CalibrationType::LEGACY; + } + if (name == "PERCENTILE") { + return CalibrationType::PERCENTILE; + } + if (name == "MINMAX") { + return CalibrationType::MINMAX; + } + throw std::invalid_argument("Invalid calibration name."); +} + +struct BatchOptConfig +{ + /** + * @brief Construct a new OptimizationConfig for a static shape inference. + * + * @param value + */ + explicit BatchOptConfig(const int32_t value) : k_min(value), k_opt(value), k_max(value) {} + + /** + * @brief Construct a new OptimizationConfig for a dynamic shape inference. + * + * @param k_min + * @param k_opt + * @param k_max + */ + BatchOptConfig(const int32_t k_min, const int32_t k_opt, const int32_t k_max) + : k_min(k_min), k_opt(k_opt), k_max(k_max) + { + } + + int32_t k_min, k_opt, k_max; +}; // struct BatchOptConfig + +struct BuildConfig +{ + // type of precision + PrecisionType precision; + + // type for calibration + CalibrationType calibration; + + BatchOptConfig batch_target; + BatchOptConfig batch_agent; + + /** + * @brief Construct a new instance with default configurations. + */ + BuildConfig() + : precision(PrecisionType::FP32), + calibration(CalibrationType::MINMAX), + batch_target(1, 10, 20), + batch_agent(1, 30, 50), + is_dynamic_(false) + { + } + + /** + * @brief Construct a new build config. + * + * @param is_dynamic + * @param precision + * @param calibration + */ + BuildConfig( + const bool is_dynamic, const std::string & precision, const std::string & calibration, + const BatchOptConfig & batch_target = BatchOptConfig(1, 10, 20), + const BatchOptConfig & batch_agent = BatchOptConfig(1, 30, 50)) + : precision(toPrecisionType(precision)), + calibration(toCalibrationType(calibration)), + batch_target(batch_target), + batch_agent(batch_agent), + is_dynamic_(is_dynamic) + { + } + + bool is_dynamic() const { return is_dynamic_; } + +private: + bool is_dynamic_; +}; // struct BuildConfig + +class MTRBuilder +{ +public: + /** + * @brief Construct a new instance. + * + * @param model_path Path to engine or onnx file. + * @param build_config The configuration of build. + * @param max_workspace_size The max workspace size. + */ + MTRBuilder( + const std::string & model_path, const BuildConfig & build_config = BuildConfig(), + const size_t max_workspace_size = (1ULL << 63)); + + /** + * @brief Setup engine for inference. After finishing setup successfully, `isInitialized` must + * return `true`. + */ + void setup(); + + /** + * @brief Check whether engine was initialized successfully. + * + * @return True if plugins were initialized successfully. + */ + [[nodiscard]] bool isInitialized() const; + + // Return true if the model supports dynamic shape inference. + [[nodiscard]] bool isDynamic() const; + + // Set binding dimensions for specified for dynamic shape inference. + bool setBindingDimensions(int index, nvinfer1::Dims dimensions); + + /** + * @brief A wrapper of `nvinfer1::IExecuteContext::enqueueV2`. + * + * @param bindings An array of pointers to input and output buffers for the network. + * @param stream A cuda stream on which the inference kernels will be enqueued. + * @param inputConsumed An optional event which will be signaled when the input buffers can be + * refilled with new data. + * @return True If the kernels were enqueued successfully. + */ + bool enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * inputConsumed); + +private: + /** + * @brief Load engin file. + * + * @param filepath Engine file path. + * @return True if the engine were loaded successfully. + */ + bool loadEngine(const std::string & filepath); + + // Create a cache path of engine file. + [[nodiscard]] fs::path createEngineCachePath() const; + + /** + * @brief Build engine from onnx file. + * + * @param filepath Onnx file path. + * @param output_engine_filepath Output engine file path. + * @return True if the engine were built successfully. + */ + bool buildEngineFromOnnx( + const std::string & filepath, const std::string & output_engine_filepath); + + autoware::tensorrt_common::Logger logger_; + TrtUniquePtr runtime_; + TrtUniquePtr engine_; + TrtUniquePtr context_; + + fs::path model_filepath_; + size_t max_workspace_size_; + std::unique_ptr build_config_; + + bool is_initialized_{false}; +}; // class MTRBuilder +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__BUILDER_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/conversions/history.hpp b/perception/autoware_mtr/include/autoware/mtr/conversions/history.hpp new file mode 100644 index 0000000000000..56d6bcb06a712 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/conversions/history.hpp @@ -0,0 +1,62 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__CONVERSIONS__HISTORY_HPP_ +#define AUTOWARE__MTR__CONVERSIONS__HISTORY_HPP_ + +#include "autoware/mtr/agent.hpp" + +#include +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ + +/** + * @brief Transform a 2D point with a reference point and rotation + * + * @param history history to modify. + * @param reference_state reference state used as reference frame. + * @return vector of histories, each in the reference frame of the last state. + */ +[[nodiscard]] std::pair transform2D( + const std::pair input_xy, const std::pair reference_xy, + const std::pair rotation_cos_sin); + +/** + * @brief Get histories with agent states in the reference frame of the last state + * + * @param history history to modify. + * @param reference_state reference state used as reference frame. + * @return vector of histories, each in the reference frame of the last state. + */ +[[nodiscard]] AgentHistory getAgentCentricHistory( + const AgentHistory & history, const AgentState & reference_state); + +/** + * @brief Get histories with agent states in the reference frame of the last state + * + * @param histories vector of histories to modify. + * @return vector of histories, each in the reference frame of the last state. + */ +[[nodiscard]] std::vector getAgentCentricHistories( + const std::vector & histories); + +} // namespace autoware::mtr + +#endif // AUTOWARE__MTR__CONVERSIONS__HISTORY_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/conversions/lanelet.hpp b/perception/autoware_mtr/include/autoware/mtr/conversions/lanelet.hpp new file mode 100644 index 0000000000000..17742d333acca --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/conversions/lanelet.hpp @@ -0,0 +1,234 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__CONVERSIONS__LANELET_HPP_ +#define AUTOWARE__MTR__CONVERSIONS__LANELET_HPP_ + +#include "autoware/mtr/polyline.hpp" + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace autoware::mtr +{ +/** + * @brief Insert lane points into the container from the end of it. + * + * @param points Sequence of points to be inserted. + * @param container Points container. + */ +inline void insertLanePoints( + const std::vector & points, std::vector & container) +{ + container.reserve(container.size() * 2); + container.insert(container.end(), points.begin(), points.end()); +} + +inline lanelet::Optional toTypeName(const lanelet::ConstLanelet & lanelet) +{ + return lanelet.hasAttribute("type") ? lanelet.attribute("type").as() + : lanelet::Optional(); +} + +inline lanelet::Optional toTypeName(const lanelet::ConstLineString3d & linestring) +{ + return linestring.hasAttribute("type") ? linestring.attribute("type").as() + : lanelet::Optional(); +} + +/** + * @brief Extract the subtype name from a lanelet. + * + * @param lanelet Lanelet instance. + * @return std::optional + */ +inline lanelet::Optional toSubtypeName(const lanelet::ConstLanelet & lanelet) noexcept +{ + return lanelet.hasAttribute("subtype") ? lanelet.attribute("subtype").as() + : lanelet::Optional(); +} + +/** + * @brief Extract the subtype name from a 3D linestring. + * + * @param linestring 3D linestring instance. + * @return lanelet::Optional + */ +inline lanelet::Optional toSubtypeName( + const lanelet::ConstLineString3d & linestring) noexcept +{ + return linestring.hasAttribute("subtype") ? linestring.attribute("subtype").as() + : lanelet::Optional(); +} + +/** + * @brief Check if the specified lanelet is the turnable intersection. + * + * @param lanelet Lanelet instance. + * @return true if the lanelet has the attribute named turn_direction. + */ +inline bool isTurnableIntersection(const lanelet::ConstLanelet & lanelet) noexcept +{ + return lanelet.hasAttribute("turn_direction"); +} + +/** + * @brief Check if the specified lanelet subtype is kind of lane. + * + * @param subtype + * @return True if the lanelet subtype is the one of the (road, highway, road_shoulder, + * pedestrian_lane, bicycle_lane, walkway). + */ +inline bool isLaneLike(const lanelet::Optional & subtype) +{ + if (!subtype) { + return false; + } + const auto & subtype_str = subtype.value(); + return ( + subtype_str == "road" || subtype_str == "highway" || subtype_str == "road_shoulder" || + subtype_str == "pedestrian_lane" || subtype_str == "bicycle_lane" || subtype_str == "walkway"); +} + +/** + * @brief Check if the specified lanelet subtype is kind of the roadway. + * + * @param subtype Subtype of the corresponding lanelet. + * @return True if the subtype is the one of the (road, highway, road_shoulder). + */ +inline bool isRoadwayLike(const lanelet::Optional & subtype) +{ + if (!subtype) { + return false; + } + const auto & subtype_str = subtype.value(); + return subtype_str == "road" || subtype_str == "highway" || subtype_str == "road_shoulder"; +} + +/** + * @brief Check if the specified linestring is kind of the boundary. + * + * @param linestring 3D linestring. + * @return True if the type is the one of the (line_thin, line_thick, road_boarder) and the subtype + * is not virtual. + */ +inline bool isBoundaryLike(const lanelet::ConstLineString3d & linestring) +{ + const auto type = toTypeName(linestring); + const auto subtype = toSubtypeName(linestring); + if (!type || !subtype) { + return false; + } + + const auto & type_str = type.value(); + const auto & subtype_str = subtype.value(); + return (type_str == "line_thin" || type_str == "line_thick" || type_str == "road_boarder") && + subtype_str != "virtual"; +} + +/** + * @brief Check if the specified linestring is the kind of crosswalk. + * + * @param subtype Subtype of the corresponding polygon. + * @return True if the lanelet subtype is the one of the (crosswalk,). + */ +inline bool isCrosswalkLike(const lanelet::Optional & subtype) +{ + if (!subtype) { + return false; + } + + const auto & subtype_str = subtype.value(); + return subtype_str == "crosswalk"; +} + +/** + * @brief A class to convert lanelet map to polyline. + */ +class LaneletConverter +{ +public: + /** + * @brief Construct a new Lanelet Converter object + * + * @param lanelet_map_ptr Pointer of loaded lanelet map. + * @param max_num_polyline The max number of polylines to be contained in the tensor. If the total + * number of polylines are less than this value, zero-filled polylines will be padded. + * @param max_num_point The max number of points to be contained in a single polyline. + * @param point_break_distance Distance threshold to separate two polylines. + */ + explicit LaneletConverter( + lanelet::LaneletMapConstPtr lanelet_map_ptr, size_t max_num_polyline, size_t max_num_point, + float point_break_distance) + : lanelet_map_ptr_(lanelet_map_ptr), + max_num_polyline_(max_num_polyline), + max_num_point_(max_num_point), + point_break_distance_(point_break_distance) + { + } + + /** + * @brief Convert a lanelet map to the polyline data except of points whose distance from the + * specified position is farther than the threshold. + * + * @param position Origin to check the distance from this. + * @param distance_threshold Distance threshold + * @return std::optional + */ + std::optional convert( + const geometry_msgs::msg::Point & position, double distance_threshold) const; + +private: + /** + * @brief Convert a linestring to the set of polylines. + * + * @param linestring Linestring instance. + * @param position Origin to check the distance from this. + * @param distance_threshold Distance threshold from the specified position. + * @return std::vector + */ + std::vector fromLinestring( + const lanelet::ConstLineString3d & linestring, const geometry_msgs::msg::Point & position, + double distance_threshold) const noexcept; + + /** + * @brief Convert a polygon to the set of polylines. + * + * @param polygon Polygon instance. + * @param position Origin to check the distance from this. + * @param distance_threshold Distance threshold from the specified position. + * @return std::vector + */ + std::vector fromPolygon( + const lanelet::CompoundPolygon3d & polygon, const geometry_msgs::msg::Point & position, + double distance_threshold) const noexcept; + + lanelet::LaneletMapConstPtr lanelet_map_ptr_; //!< Pointer of lanelet map. + size_t max_num_polyline_; //!< The max number of polylines. + size_t max_num_point_; //!< The max number of points. + float point_break_distance_; //!< Distance threshold to separate two polylines. +}; +} // namespace autoware::mtr + +#endif // AUTOWARE__MTR__CONVERSIONS__LANELET_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/cuda_helper.hpp b/perception/autoware_mtr/include/autoware/mtr/cuda_helper.hpp new file mode 100644 index 0000000000000..468b6b61d3352 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/cuda_helper.hpp @@ -0,0 +1,138 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This code is licensed under CC0 1.0 Universal (Public Domain). + * You can use this without any limitation. + * https://creativecommons.org/publicdomain/zero/1.0/deed.en + */ + +#ifndef AUTOWARE__MTR__CUDA_HELPER_HPP_ +#define AUTOWARE__MTR__CUDA_HELPER_HPP_ + +#include + +#include +#include +#include +#include + +#define CHECK_CUDA_ERROR(e) (cuda::check_error(e, __FILE__, __LINE__)) + +namespace cuda +{ +/** + * @brief Throw if the input cuda error is not `cudaSuccess`. + * + * @param e The cuda error types. + * @param f The file name. + * @param n The line number. + */ +inline void check_error(const ::cudaError_t e, const char * f, int n) +{ + if (e != ::cudaSuccess) { + ::std::stringstream s; + s << ::cudaGetErrorName(e) << " (" << e << ")@" << f << "#L" << n << ": " + << ::cudaGetErrorString(e); + throw ::std::runtime_error{s.str()}; + } +} + +struct deleter +{ + void operator()(void * p) const { CHECK_CUDA_ERROR(::cudaFree(p)); } +}; + +template +using unique_ptr = ::std::unique_ptr; + +template +typename ::std::enable_if<::std::is_array::value, cuda::unique_ptr>::type make_unique( + const ::std::size_t n) +{ + using U = typename ::std::remove_extent::type; + U * p; + CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast(&p), sizeof(U) * n)); + return cuda::unique_ptr{p}; +} + +template +cuda::unique_ptr make_unique() +{ + T * p; + CHECK_CUDA_ERROR(::cudaMalloc(reinterpret_cast(&p), sizeof(T))); + return cuda::unique_ptr{p}; +} + +constexpr size_t CUDA_ALIGN = 256; + +template +inline size_t get_size_aligned(size_t num_elem) +{ + size_t size = num_elem * sizeof(T); + size_t extra_align = 0; + if (size % CUDA_ALIGN != 0) { + extra_align = CUDA_ALIGN - size % CUDA_ALIGN; + } + return size + extra_align; +} + +template +inline T * get_next_ptr(size_t num_elem, void *& workspace, size_t & workspace_size) +{ + size_t size = get_size_aligned(num_elem); + if (size > workspace_size) { + throw ::std::runtime_error("Workspace is too small!"); + } + workspace_size -= size; + T * ptr = reinterpret_cast(workspace); + workspace = reinterpret_cast(reinterpret_cast(workspace) + size); + return ptr; +} + +class EventDebugger +{ +public: + void createEvent(cudaStream_t stream = 0) + { + CHECK_CUDA_ERROR(cudaEventCreate(&start_)); + CHECK_CUDA_ERROR(cudaEventCreate(&stop_)); + CHECK_CUDA_ERROR(cudaEventRecord(start_, stream)); + has_event_ = true; + } + + void printElapsedTime(cudaStream_t stream = 0) + { + if (!has_event_) { + std::cerr << "No event was found" << std::endl; + } else { + CHECK_CUDA_ERROR(cudaEventRecord(stop_, stream)); + CHECK_CUDA_ERROR(cudaEventSynchronize(stop_)); + float elapsed_time; + CHECK_CUDA_ERROR(cudaEventElapsedTime(&elapsed_time, start_, stop_)); + std::cout << "Processing time: " << elapsed_time << "ms" << std::endl; + cudaEventDestroy(start_); + cudaEventDestroy(stop_); + has_event_ = false; + } + } + +private: + cudaEvent_t start_, stop_; + bool has_event_{false}; +}; + +} // namespace cuda + +#endif // AUTOWARE__MTR__CUDA_HELPER_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/fixed_queue.hpp b/perception/autoware_mtr/include/autoware/mtr/fixed_queue.hpp new file mode 100644 index 0000000000000..8645628848792 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/fixed_queue.hpp @@ -0,0 +1,87 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__FIXED_QUEUE_HPP_ +#define AUTOWARE__MTR__FIXED_QUEUE_HPP_ + +#include +#include +#include + +namespace autoware::mtr +{ + +template +class FixedQueue +{ +public: + using size_type = typename std::deque::size_type; + using reference = typename std::deque::reference; + using const_reference = typename std::deque::const_reference; + using iterator = typename std::deque::iterator; + using riterator = typename std::reverse_iterator; + using const_iterator = typename std::deque::const_iterator; + using rconst_iterator = typename std::reverse_iterator; + + explicit FixedQueue(size_type size) : queue_(size) {} + + void push_back(const T && t) noexcept + { + queue_.pop_front(); + queue_.push_back(t); + } + + void push_back(const T & t) noexcept + { + queue_.pop_front(); + queue_.push_back(t); + } + + void push_front(const T && t) noexcept + { + queue_.pop_back(); + queue_.push_front(t); + } + + void push_front(const T & t) noexcept + { + queue_.pop_back(); + queue_.push_front(t); + } + + reference front() noexcept { return queue_.front(); } + const_reference front() const noexcept { return queue_.front(); } + + reference back() noexcept { return queue_.back(); } + const_reference back() const noexcept { return queue_.back(); } + + iterator begin() noexcept { return queue_.begin(); } + const_iterator begin() const noexcept { return queue_.begin(); } + + iterator end() noexcept { return queue_.end(); } + const_iterator end() const noexcept { return queue_.end(); } + + riterator rbegin() noexcept { return queue_.rbegin(); } + rconst_iterator rbegin() const noexcept { return queue_.rbegin(); } + + riterator rend() noexcept { return queue_.rend(); } + rconst_iterator rend() const noexcept { return queue_.rend(); } + + size_type size() const noexcept { return queue_.size(); } + +private: + std::deque queue_; +}; +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__FIXED_QUEUE_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/intention_point.hpp b/perception/autoware_mtr/include/autoware/mtr/intention_point.hpp new file mode 100644 index 0000000000000..0cdf1c739dc3d --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/intention_point.hpp @@ -0,0 +1,108 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__INTENTION_POINT_HPP_ +#define AUTOWARE__MTR__INTENTION_POINT_HPP_ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ +/** + * @brief A class to load and store intention points. + */ +struct IntentionPoint +{ +public: + /** + * @brief Construct a new instance from a csv file. + * + * @param csv_filepath Path to csv file. + * @param num_cluster The number of clusters. + */ + static IntentionPoint from_file(size_t num_cluster, const std::string csv_filepath) + { + std::ifstream file(csv_filepath); + if (!file.is_open()) { + std::ostringstream err_msg; + err_msg << "Error opening file: " << csv_filepath << ". Please check if the file exists."; + throw std::runtime_error(err_msg.str()); + } + + std::vector> buffer; + std::string line; + while (std::getline(file, line)) { + std::istringstream ss(line); + float x, y; + std::string label; + + ss >> x; + ss.ignore(); + ss >> y; + ss.ignore(); + std::getline(ss, label, ','); + + buffer.emplace_back(x, y, label); + } + file.close(); + + std::unordered_map> data_map; + for (const auto & [x, y, label] : buffer) { + data_map[label].emplace_back(x); + data_map[label].emplace_back(y); + } + + return {num_cluster, data_map}; + } + + // Returns the point state dimension, which is 2 (x, y). + static size_t state_dim() noexcept { return 2; } + + /** + * @brief Load intention points for specified label names. + * + * @param label_names Array of label names for all agents, in shape [N]. + * @return std::vector Array of all points in shape, [N * num_cluster * 2]. + */ + std::vector as_array(const std::vector & label_names) const; + + // Return the size of intension point `K*D`. + size_t size() const noexcept; + + size_t num_cluster; //!< The number of point clusters, which is K. + +private: + /** + * @brief Construct a new Intention. + * + * @param data_map Map of intention points hashed by label names. + * @param num_cluster The number of clusters. + */ + IntentionPoint( + size_t num_cluster, const std::unordered_map> data_map) + : num_cluster(num_cluster), data_map_(data_map) + { + } + + std::unordered_map> data_map_; //!< Map of label name and points. +}; +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__INTENTION_POINT_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/node.hpp b/perception/autoware_mtr/include/autoware/mtr/node.hpp new file mode 100644 index 0000000000000..4feaf0fe534a0 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/node.hpp @@ -0,0 +1,142 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__NODE_HPP_ +#define AUTOWARE__MTR__NODE_HPP_ + +#include "autoware/mtr/agent.hpp" +#include "autoware/mtr/conversions/lanelet.hpp" +#include "autoware/mtr/fixed_queue.hpp" +#include "autoware/mtr/polyline.hpp" +#include "autoware/mtr/trajectory.hpp" +#include "autoware/mtr/trt_mtr.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ +using HADMapBin = autoware_map_msgs::msg::LaneletMapBin; +using autoware::vehicle_info_utils::VehicleInfo; +using autoware_perception_msgs::msg::ObjectClassification; +using autoware_perception_msgs::msg::PredictedObject; +using autoware_perception_msgs::msg::PredictedObjectKinematics; +using autoware_perception_msgs::msg::PredictedObjects; +using autoware_perception_msgs::msg::PredictedPath; +using autoware_perception_msgs::msg::TrackedObject; +using autoware_perception_msgs::msg::TrackedObjects; +using nav_msgs::msg::Odometry; + +class MTRNode : public rclcpp::Node +{ +public: + explicit MTRNode(const rclcpp::NodeOptions & node_options); + + // Object ID of the ego vehicle + inline static const std::string EGO_ID{"EGO"}; + +private: + // Main callback being invoked when the tracked objects topic is subscribed. + void callback(const TrackedObjects::ConstSharedPtr object_msg); + + // Callback being invoked when the HD map topic is subscribed. + void onMap(const HADMapBin::ConstSharedPtr map_msg); + + // Fetch data of Ego's odometry topic. + std::optional getLatestEgo(); + + // Convert Lanelet to `PolylineData`. + bool convertLaneletToPolyline(); + + // Remove ancient agent histories. + void removeAncientAgentHistory( + const double current_time, const TrackedObjects::ConstSharedPtr objects_msg); + + // Appends new states to history. + void updateAgentHistory( + const double current_time, const TrackedObjects::ConstSharedPtr objects_msg, + const TrackedObject & ego_msg); + + // Extract target agents and return corresponding indices. + // NOTE: Extract targets in order of proximity, closest first. + std::vector extractTargetAgent(const std::vector & histories); + + // Return the timestamps relative from the first element.Return the timestamps relative from the + // first element. + std::vector getRelativeTimestamps() const; + + // Generate `PredictedObject` from `PredictedTrajectory`. + PredictedObject createPredictedObject( + const TrackedObject & object, const PredictedTrajectory & trajectory); + + // ROS Publisher and Subscriber + // TODO(ktro2828): add debug publisher + rclcpp::Publisher::SharedPtr pub_objects_; + rclcpp::Subscription::SharedPtr sub_objects_; + rclcpp::Subscription::SharedPtr sub_map_; + // polling subscriber + autoware::universe_utils::InterProcessPollingSubscriber sub_ego_{ + this, "/localization/kinematic_state"}; + + // Lanelet map pointers + std::shared_ptr lanelet_map_ptr_; + std::shared_ptr routing_graph_ptr_; + std::shared_ptr traffic_rules_ptr_; + std::unique_ptr lanelet_converter_ptr_; + + // Agent history + std::map agent_history_map_; + std::map object_msg_map_; + VehicleInfo vehicle_info_; + + // Pose transform listener + autoware::universe_utils::TransformListener transform_listener_; + + // MTR parameters + std::unique_ptr config_ptr_; + std::unique_ptr build_config_ptr_; + std::unique_ptr model_ptr_; + + std::unique_ptr>> ego_states_; + std::unique_ptr> timestamps_; +}; // class MTRNode +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__NODE_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/polyline.hpp b/perception/autoware_mtr/include/autoware/mtr/polyline.hpp new file mode 100644 index 0000000000000..3b729cf617522 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/polyline.hpp @@ -0,0 +1,235 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__POLYLINE_HPP_ +#define AUTOWARE__MTR__POLYLINE_HPP_ + +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ +constexpr size_t PointStateDim = 7; + +enum PolylineLabel { LANE = 0, ROAD_LINE = 1, ROAD_EDGE = 2, CROSSWALK = 3 }; + +struct LanePoint +{ + // Construct a new instance filling all elements by `0.0f`. + LanePoint() : data_({0.0f}) {} + + /** + * @brief Construct a new instance with specified values. + * + * @param x X position. + * @param y Y position. + * @param z Z position. + * @param dx Normalized delta x. + * @param dy Normalized delta y. + * @param dz Normalized delta z. + * @param label Label. + */ + LanePoint( + const float x, const float y, const float z, const float dx, const float dy, const float dz, + const float label) + : data_({x, y, z, dx, dy, dz, label}), x_(x), y_(y), z_(z), label_(label) + { + } + + // Construct a new instance filling all elements by `0.0f`. + static LanePoint empty() noexcept { return LanePoint(); } + + // Return the point state dimensions `D`. + static size_t dim() { return PointStateDim; } + + // Return the x position of the point. + float x() const { return x_; } + + // Return the y position of the point. + float y() const { return y_; } + + // Return the z position of the point. + float z() const { return z_; } + + // Return the label of the point. + float label() const { return label_; } + + /** + * @brief Return the distance between myself and another one. + * + * @param other Another point. + * @return float Distance between myself and another one. + */ + float distance(const LanePoint & other) const + { + return std::hypot(x_ - other.x(), y_ - other.y(), z_ - other.z()); + } + + // Return the address pointer of data array. + const float * data_ptr() const noexcept { return data_.data(); } + +private: + std::array data_; + float x_{0.0f}, y_{0.0f}, z_{0.0f}, label_{0.0f}; +}; + +struct PolylineData +{ + /** + * @brief Construct a new PolylineData instance. + * + * @param points Source points vector. + * @param min_num_polyline The minimum number of polylines should be generated. If the number of + * polylines, resulting in separating input points, is less than this value, empty polylines will + * be added. + * @param max_num_point The maximum number of points that each polyline can include. If the + * polyline contains fewer points than this value, empty points will be added. + * @param distance_threshold The distance threshold to separate polylines. + */ + PolylineData( + const std::vector & points, const size_t min_num_polyline, + const size_t max_num_point, const float distance_threshold) + : num_polyline_(0), num_point_(max_num_point), distance_threshold_(distance_threshold) + { + std::size_t point_cnt = 0; + + // point_cnt > PointNum at a to a new polyline group + // distance > threshold -> add to a new polyline group + for (std::size_t i = 0; i < points.size(); ++i) { + auto & cur_point = points.at(i); + + if (i == 0) { + addNewPolyline(cur_point, point_cnt); + continue; + } + + if (point_cnt >= num_point_) { + addNewPolyline(cur_point, point_cnt); + } else if (const auto & prev_point = points.at(i - 1); + cur_point.distance(prev_point) >= distance_threshold_ || + cur_point.label() != prev_point.label()) { + if (point_cnt < num_point_) { + addEmptyPoints(point_cnt); + } + addNewPolyline(cur_point, point_cnt); + } else { + addPoint(cur_point, point_cnt); + } + } + addEmptyPoints(point_cnt); + + if (num_polyline_ < min_num_polyline) { + addEmptyPolyline(min_num_polyline - num_polyline_); + } + } + + // Return the number of polylines `K`. + size_t num_polyline() const { return num_polyline_; } + + // Return the number of points contained in each polyline `P`. + size_t num_point() const { return num_point_; } + + // Return the number of point dimensions `D`. + static size_t state_dim() { return PointStateDim; } + + // Return the number of all elements `K*P*D`. + size_t size() const { return num_polyline_ * num_point_ * state_dim(); } + + // Return the number of state dimensions of MTR input `D+2`. + size_t input_dim() const { return state_dim() + 2; } + + // Return the data shape ordering in `(K, P, D)`. + std::tuple shape() const + { + return {num_polyline_, num_point_, state_dim()}; + } + + // Return the address pointer of data array. + const float * data_ptr() const noexcept { return data_.data(); } + +private: + /** + * @brief Add a new polyline group filled by empty points. This member function increments + * `PolylineNum` by `num_polyline` internally. + * + * @param num_polyline The number of polylines to add. + */ + void addEmptyPolyline(size_t num_polyline) + { + for (size_t i = 0; i < num_polyline; ++i) { + size_t point_cnt = 0; + auto empty_point = LanePoint::empty(); + addNewPolyline(empty_point, point_cnt); + addEmptyPoints(point_cnt); + } + } + + /** + * @brief Add a new polyline group with the specified point. This member function increments + * `PolylineNum` by `1` internally. + * + * @param point LanePoint instance. + * @param point_cnt The current count of points, which will be reset to `1`. + */ + void addNewPolyline(const LanePoint & point, size_t & point_cnt) + { + const auto s = point.data_ptr(); + for (size_t d = 0; d < state_dim(); ++d) { + data_.push_back(*(s + d)); + } + ++num_polyline_; + point_cnt = 1; + } + + /** + * @brief Add `(PointNum - point_cnt)` empty points filled by `0.0`. + * + * @param point_cnt The number of current count of points, which will be reset to `PointNum`. + */ + void addEmptyPoints(size_t & point_cnt) + { + const auto s = LanePoint::empty().data_ptr(); + for (std::size_t n = point_cnt; n < num_point_; ++n) { + for (std::size_t d = 0; d < state_dim(); ++d) { + data_.push_back(*(s + d)); + } + } + point_cnt = num_point_; + } + + /** + * @brief Add the specified point and increment `point_cnt` by `1`. + * + * @param point + * @param point_cnt + */ + void addPoint(const LanePoint & point, std::size_t & point_cnt) + { + const auto s = point.data_ptr(); + for (size_t d = 0; d < state_dim(); ++d) { + data_.push_back(*(s + d)); + } + ++point_cnt; + } + + size_t num_polyline_; + size_t num_point_; + std::vector data_; + const float distance_threshold_; +}; +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__POLYLINE_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/trajectory.hpp b/perception/autoware_mtr/include/autoware/mtr/trajectory.hpp new file mode 100644 index 0000000000000..24641d8d4999d --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/trajectory.hpp @@ -0,0 +1,170 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__TRAJECTORY_HPP_ +#define AUTOWARE__MTR__TRAJECTORY_HPP_ + +#include +#include +#include +#include + +namespace autoware::mtr +{ + +constexpr size_t PredictedStateDim = 7; + +/** + * @brief A class to represent a predicted state. Note that output elements are (x, y, stdX, + * stdY, rho, vx, vy). + */ +struct PredictedState +{ + explicit PredictedState(const std::array & state) + : x_(state.at(0)), + y_(state.at(1)), + std_x_(state.at(2)), + std_y_(state.at(3)), + rho_(state.at(4)), + vx_(state.at(5)), + vy_(state.at(6)) + { + } + + PredictedState( + const double x, const double y, const double std_x, const double std_y, const double rho, + const double vx, const double vy) + : x_(x), y_(y), std_x_(std_x), std_y_(std_y), rho_(rho), vx_(vx), vy_(vy) + { + } + + // Return the predicted state dimensions `D`. + static size_t dim() { return PredictedStateDim; } + + // Return the predicted x position. + double x() const { return x_; } + + // Return the predicted y position. + double y() const { return y_; } + + // Return the predicted std x. + double std_x() const { return std_x_; } + + // Return the predicted mean y. + double std_y() const { return std_y_; } + + // Return the predicted rho. + double rho() const { return rho_; } + + // Return the predicted x velocity. + double vx() const { return vx_; } + + // Return the predicted y velocity. + double vy() const { return vy_; } + +private: + double x_, y_, std_x_, std_y_, rho_, vx_, vy_; +}; // struct PredictedState + +/** + * @brief A class to represent waypoints of a single mode. + */ +struct PredictedMode +{ + PredictedMode(const double score, const std::vector & waypoints, const size_t num_future) + : score_(score), num_future_(num_future) + { + for (size_t t = 0; t < num_future_; ++t) { + const auto start_itr = waypoints.cbegin() + t * state_dim(); + std::array state; + std::copy_n(start_itr, PredictedStateDim, state.begin()); + waypoints_.emplace_back(state); + } + } + + // Return the number of predicted future timestamps `T`. + size_t num_future() const { return num_future_; } + + // Return the number of predicted state dimensions `D`. + static size_t state_dim() { return PredictedStateDim; } + + // Return the predicted score. + double score() const { return score_; } + + // Return the vector of waypoints. + const std::vector & get_waypoints() const { return waypoints_; } + +private: + double score_; + size_t num_future_; + std::vector waypoints_; +}; // struct PredictedMode + +/** + * @brief A class to represent predicted trajectory for a single target agent. + */ +struct PredictedTrajectory +{ + /** + * @brief Construct a new instance. + * + * @note Predicted trajectories are sorted with the smallest scores. + * + * @param scores Predicted cores for each target, in shape `[B*M]`. + * @param trajectories Predicted trajectories for each target. `[B*M*T*D]`. + * @param num_mode The number of predicted modes. + * @param num_future The number of predicted timestamps. + */ + PredictedTrajectory( + const std::vector & scores, const std::vector & modes, const size_t num_mode, + const size_t num_future) + : num_mode_(num_mode), num_future_(num_future) + { + for (size_t m = 0; m < num_mode_; ++m) { + const auto score = scores.at(m); + const auto wp_itr = modes.cbegin() + m * num_future_ * state_dim(); + std::vector waypoints(wp_itr, wp_itr + num_future_ * state_dim()); + modes_.emplace_back(score, waypoints, num_future_); + } + // sort by score + sort_by_score(); + } + + // Return the number of predicted modes `M`. + size_t num_mode() const { return num_mode_; } + + // Return the number of predicted future timestamps `T`. + size_t num_future() const { return num_future_; } + + // Return the number of predicted state dimensions `D`. + static size_t state_dim() { return PredictedStateDim; } + + // Return predicted modes. Modes are sorted in descending order based on their scores. + const std::vector & get_modes() const { return modes_; } + +private: + // Sort modes in descending order based on their scores. + void sort_by_score() + { + std::sort(modes_.begin(), modes_.end(), [](const auto & mode1, const auto & mode2) { + return mode1.score() > mode2.score(); + }); + } + + size_t num_mode_; + size_t num_future_; + std::vector modes_; +}; // struct PredictedTrajectory +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__TRAJECTORY_HPP_ diff --git a/perception/autoware_mtr/include/autoware/mtr/trt_mtr.hpp b/perception/autoware_mtr/include/autoware/mtr/trt_mtr.hpp new file mode 100644 index 0000000000000..a22bf806fe152 --- /dev/null +++ b/perception/autoware_mtr/include/autoware/mtr/trt_mtr.hpp @@ -0,0 +1,195 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__MTR__TRT_MTR_HPP_ +#define AUTOWARE__MTR__TRT_MTR_HPP_ + +#include "attention/trt_attn_value_computation.hpp" +#include "attention/trt_attn_weight_computation.hpp" +#include "autoware/mtr/agent.hpp" +#include "autoware/mtr/builder.hpp" +#include "autoware/mtr/cuda_helper.hpp" +#include "autoware/mtr/intention_point.hpp" +#include "autoware/mtr/polyline.hpp" +#include "autoware/mtr/trajectory.hpp" +#include "knn/trt_knn_batch.hpp" +#include "knn/trt_knn_batch_mlogk_kernel.hpp" + +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ +/** + * @brief A configuration of MTR. + */ +struct MTRConfig +{ + /** + * @brief Construct a new instance. + * + * @param target_labels An array of target label names. + * @param num_past The number of past timestamps. + * @param num_mode The number of modes. + * @param num_future The number of future time step length predicted by MTR. + * @param max_num_polyline The max number of polylines which can be contained in a single input. + * @param max_num_point The max number of points contained in each polyline. + * @param intention_point_filepath The path to intention points file. + * @param num_intention_point_cluster The number of clusters for intension points. + */ + explicit MTRConfig( + const std::vector & target_labels = {"VEHICLE", "PEDESTRIAN", "CYCLIST"}, + const size_t num_past = 11, const size_t num_mode = 6, const size_t num_future = 80, + const size_t max_num_polyline = 768, const size_t max_num_point = 20, + const float point_break_distance = 1.0f, + std::string intention_point_filepath = "./data/intention_point.csv", + const size_t num_intention_point_cluster = 64) + : target_labels(target_labels), + num_class(target_labels.size()), + num_past(num_past), + num_mode(num_mode), + num_future(num_future), + max_num_polyline(max_num_polyline), + max_num_point(max_num_point), + point_break_distance(point_break_distance), + intention_point_filepath(std::move(intention_point_filepath)), + num_intention_point_cluster(num_intention_point_cluster) + { + } + + std::vector target_labels; + size_t num_class; + size_t num_past; + size_t num_mode; + size_t num_future; + size_t max_num_polyline; + size_t max_num_point; + float point_break_distance; + std::string intention_point_filepath; + size_t num_intention_point_cluster; +}; // struct MTRConfig + +/** + * @brief A class to inference with MTR. + */ +class TrtMTR +{ +public: + /** + * @brief Construct a new instance. + * + * @param model_path The path to engine or onnx file. + * @param config The configuration of model. + * @param build_config The configuration of build. + * @param max_workspace_size The max size of workspace. + * @param build_config The configuration of build. + */ + explicit TrtMTR( + const std::string & model_path, const MTRConfig & config = MTRConfig(), + const BuildConfig & build_config = BuildConfig(), + const size_t max_workspace_size = (1ULL << 30)); + + /** + * @brief Execute inference. + * + * @param agent_data The agent data to input. + * @param polyline_data The polyline data to input. + * @param trajectories A container to store predicted trajectories. + * @return True if the inference finishes successfully. + */ + bool doInference( + const AgentData & agent_data, const PolylineData & polyline_data, + std::vector & trajectories); + + /** + * @brief Return the model configuration. + * + * @return const MtrConfig& The model configuration which can not be updated. + */ + const MTRConfig & config() const { return config_; } + +private: + /** + * @brief Initialize pointers of cuda containers. + * + * @param agent_data The input agent data. + * @param polyline_data The input polyline data. + */ + void initCudaPtr(const AgentData & agent_data, const PolylineData & polyline_data); + + /** + * @brief Execute pre-process. + * + * @param agent_data The input agent data. + * @param polyline_data The input polyline data. + * @return True if the pre-process finishes successfully. + */ + bool preProcess(const AgentData & agent_data, const PolylineData & polyline_data); + + /** + * @brief Execute post-process. + * + * @param agent_data The input agent data. + * @param trajectories A container to store predicted trajectories. + * @return True if the post-process finishes successfully. + */ + bool postProcess(const AgentData & agent_data, std::vector & trajectories); + + // model parameters + MTRConfig config_; + + std::unique_ptr builder_; + cudaStream_t stream_{nullptr}; + + IntentionPoint intention_point_; + + // data size + // load from input data + int32_t num_target_, num_agent_, num_timestamp_, num_agent_attr_; + int32_t num_polyline_, num_point_, num_point_dim_, num_point_attr_; + // load from config + int32_t max_num_polyline_, num_mode_, num_future_, num_intention_point_; + + // source data + cuda::unique_ptr d_target_index_{nullptr}; + cuda::unique_ptr d_label_index_{nullptr}; + cuda::unique_ptr d_timestamp_{nullptr}; + cuda::unique_ptr d_trajectory_{nullptr}; + cuda::unique_ptr d_target_state_{nullptr}; + cuda::unique_ptr d_intention_point_{nullptr}; + cuda::unique_ptr d_polyline_{nullptr}; + + // preprocessed inputs + cuda::unique_ptr d_in_trajectory_{nullptr}; + cuda::unique_ptr d_in_trajectory_mask_{nullptr}; + cuda::unique_ptr d_in_last_pos_{nullptr}; + cuda::unique_ptr d_in_polyline_{nullptr}; + cuda::unique_ptr d_in_polyline_mask_{nullptr}; + cuda::unique_ptr d_in_polyline_center_{nullptr}; + // only used for topk extraction + cuda::unique_ptr d_tmp_polyline_{nullptr}; + cuda::unique_ptr d_tmp_polyline_mask_{nullptr}; + cuda::unique_ptr d_tmp_distance_{nullptr}; + + // outputs + cuda::unique_ptr d_out_score_{nullptr}; + cuda::unique_ptr d_out_trajectory_{nullptr}; + std::vector h_out_score_; + std::vector h_out_trajectory_; +}; // class TrtMTR +} // namespace autoware::mtr +#endif // AUTOWARE__MTR__TRT_MTR_HPP_ diff --git a/perception/autoware_mtr/launch/mtr.launch.xml b/perception/autoware_mtr/launch/mtr.launch.xml new file mode 100644 index 0000000000000..11fca98298923 --- /dev/null +++ b/perception/autoware_mtr/launch/mtr.launch.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/perception/autoware_mtr/lib/include/attention/trt_attn_value_computation.hpp b/perception/autoware_mtr/lib/include/attention/trt_attn_value_computation.hpp new file mode 100644 index 0000000000000..70c958c0575ed --- /dev/null +++ b/perception/autoware_mtr/lib/include/attention/trt_attn_value_computation.hpp @@ -0,0 +1,101 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ATTENTION__TRT_ATTN_VALUE_COMPUTATION_HPP_ +#define ATTENTION__TRT_ATTN_VALUE_COMPUTATION_HPP_ + +#include "common/trt_plugin_base.hpp" + +#include + +namespace autoware::trt_mtr +{ +/** + * @brief Attention value computation plugin. + * + * @param queryBatchCnt + * @param keyBatchCnt + * @param indexPairBatch + * @param indexPair + * @param attnWeight + * @param valueFeature + * @return outputs + */ +class AttentionValueComputation : public TRTPluginBase +{ +public: + explicit AttentionValueComputation(const std::string & name); + AttentionValueComputation(const std::string & name, const void * data, size_t length); + ~AttentionValueComputation() TRT_NOEXCEPT override; + + /* IPluginV2DynamicExt methods */ + nvinfer1::IPluginV2DynamicExt * clone() const TRT_NOEXCEPT override; + + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) TRT_NOEXCEPT override; + + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; + + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT override; + + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::PluginTensorDesc * outDesc, int nbOutputs) const TRT_NOEXCEPT override; + + int enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) TRT_NOEXCEPT override; + + void attachToContext( + cudnnContext * cudnnCtx, cublasContext * cublasCtx, + nvinfer1::IGpuAllocator * gpuAllocator) TRT_NOEXCEPT override; + + void detachFromContext() TRT_NOEXCEPT override; + + /* IPluginV2Ext methods */ + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inTypes, int nbInputs) const TRT_NOEXCEPT override; + + /* IPluginV2 methods */ + const char * getPluginType() const TRT_NOEXCEPT override; + const char * getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void * buffer) const TRT_NOEXCEPT override; +}; // class AttentionValueComputation + +class AttentionValueComputationCreator : public TRTPluginCreatorBase +{ +public: + AttentionValueComputationCreator(); + + const char * getPluginName() const TRT_NOEXCEPT override; + + const char * getPluginVersion() const TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT override; +}; // class AttentionValueComputationCreator +REGISTER_TENSORRT_PLUGIN(AttentionValueComputationCreator); +} // namespace autoware::trt_mtr +#endif // ATTENTION__TRT_ATTN_VALUE_COMPUTATION_HPP_ diff --git a/perception/autoware_mtr/lib/include/attention/trt_attn_value_computation_kernel.hpp b/perception/autoware_mtr/lib/include/attention/trt_attn_value_computation_kernel.hpp new file mode 100644 index 0000000000000..fdcb20a4b448e --- /dev/null +++ b/perception/autoware_mtr/lib/include/attention/trt_attn_value_computation_kernel.hpp @@ -0,0 +1,48 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ATTENTION__TRT_ATTN_VALUE_COMPUTATION_KERNEL_HPP_ +#define ATTENTION__TRT_ATTN_VALUE_COMPUTATION_KERNEL_HPP_ + +#include + +#include + +/** + * @brief The launcher to invoke attention value computation kernel. + * + * @param B The size of batch. + * @param Q The size of query. + * @param L The size of local. + * @param K The size of key. + * @param numHead The number of heads. + * @param headDim The number of head dimensions. + * @param queryBatchCnt The number of queries for each batch, in shape [B]. + * @param keyBatchCnt The number of keys for each batch, in shape [B]. + * @param indexPairBatch The indices of batch for corresponding query, in shape [Q]. + * @param indexPair The indices of key for corresponding query, in shape [Q*L]. + * @param attnWeight Source attention weights, in shape [Q*L*numHead]. + * @param valueFeature Source value features, in shape [K*numHead*headDim]. + * @param output Output container, in shape [Q*numHead*headDim]. + * @param stream CUDA stream. + * + * @return cudaError_t CUDA error type. + */ +cudaError_t AttentionValueComputationLauncher( + const int32_t B, const int32_t Q, const int32_t L, const int32_t K, const int32_t numHead, + const int32_t headDim, const int * queryBatchCnt, const int * keyBatchCnt, + const int * indexPairBatch, const int * indexPair, const float * attnWeight, + const float * valueFeature, float * output, cudaStream_t stream); + +#endif // ATTENTION__TRT_ATTN_VALUE_COMPUTATION_KERNEL_HPP_ diff --git a/perception/autoware_mtr/lib/include/attention/trt_attn_weight_computation.hpp b/perception/autoware_mtr/lib/include/attention/trt_attn_weight_computation.hpp new file mode 100644 index 0000000000000..42aaf2ecfeba7 --- /dev/null +++ b/perception/autoware_mtr/lib/include/attention/trt_attn_weight_computation.hpp @@ -0,0 +1,101 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ATTENTION__TRT_ATTN_WEIGHT_COMPUTATION_HPP_ +#define ATTENTION__TRT_ATTN_WEIGHT_COMPUTATION_HPP_ + +#include "common/trt_plugin_base.hpp" + +#include + +namespace autoware::trt_mtr +{ +/** + * @brief The function to compute attention weight. + * + * @param queryBatchCnt + * @param keyBatchCnt + * @param indexPairBatch + * @param indexPair + * @param queryFeature + * @param keyFeature + * @param output + */ +class AttentionWeightComputation : public TRTPluginBase +{ +public: + explicit AttentionWeightComputation(const std::string & name); + AttentionWeightComputation(const std::string & name, const void * data, size_t length); + ~AttentionWeightComputation() TRT_NOEXCEPT override; + + /* IPluginV2DynamicExt methods */ + nvinfer1::IPluginV2DynamicExt * clone() const TRT_NOEXCEPT override; + + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) TRT_NOEXCEPT override; + + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; + + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT override; + + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::PluginTensorDesc * outDesc, int nbOutputs) const TRT_NOEXCEPT override; + + int enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) TRT_NOEXCEPT override; + + void attachToContext( + cudnnContext * cudnnCtx, cublasContext * cublasCtx, + nvinfer1::IGpuAllocator * gpuAllocator) TRT_NOEXCEPT override; + + void detachFromContext() TRT_NOEXCEPT override; + + /* IPluginV2Ext methods */ + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inTypes, int nbInputs) const TRT_NOEXCEPT override; + + /* IPluginV2 methods */ + const char * getPluginType() const TRT_NOEXCEPT override; + const char * getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void * buffer) const TRT_NOEXCEPT override; +}; // class AttentionWeightComputation + +class AttentionWeightComputationCreator : public TRTPluginCreatorBase +{ +public: + AttentionWeightComputationCreator(); + + const char * getPluginName() const TRT_NOEXCEPT override; + + const char * getPluginVersion() const TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT override; +}; // class AttentionWeightComputationCreator +REGISTER_TENSORRT_PLUGIN(AttentionWeightComputationCreator); +} // namespace autoware::trt_mtr +#endif // ATTENTION__TRT_ATTN_WEIGHT_COMPUTATION_HPP_ diff --git a/perception/autoware_mtr/lib/include/attention/trt_attn_weight_computation_kernel.hpp b/perception/autoware_mtr/lib/include/attention/trt_attn_weight_computation_kernel.hpp new file mode 100644 index 0000000000000..5ff72f4637efb --- /dev/null +++ b/perception/autoware_mtr/lib/include/attention/trt_attn_weight_computation_kernel.hpp @@ -0,0 +1,48 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ATTENTION__TRT_ATTN_WEIGHT_COMPUTATION_KERNEL_HPP_ +#define ATTENTION__TRT_ATTN_WEIGHT_COMPUTATION_KERNEL_HPP_ + +#include + +#include + +/** + * @brief The launcher to invoke attention weight computation kernel. + * + * @param B The size of batch. + * @param Q The size of query. + * @param L The size of local. + * @param K The size of key. + * @param numHead The number of heads. + * @param headDim The number of head dimensions. + * @param queryBatchCnt The number of queries for each batch, in shape [B]. + * @param keyBatchCnt The number of keys for each batch, in shape [B]. + * @param indexPairBatch The indices of batch for corresponding query, in shape [Q]. + * @param indexPair The indices of key for corresponding query, in shape [Q*L]. + * @param queryFeature Source query features, in shape [Q*numHead*headDim]. + * @param keyFeature Source key features, in shape [K*numHead*headDim]. + * @param output Output container, in shape [Q*L*numHead]. + * @param stream CUDA stream. + * + * @return cudaError_t CUDA error type. + */ +cudaError_t AttentionWeightComputationLauncher( + const int32_t B, const int32_t Q, const int32_t L, const int32_t K, const int32_t numHead, + const int32_t headDim, const int * queryBatchCnt, const int * keyBatchCnt, + const int * indexPairBatch, const int * indexPair, const float * queryFeature, + const float * keyFeature, float * output, cudaStream_t stream); + +#endif // ATTENTION__TRT_ATTN_WEIGHT_COMPUTATION_KERNEL_HPP_ diff --git a/perception/autoware_mtr/lib/include/common/trt_plugin_base.hpp b/perception/autoware_mtr/lib/include/common/trt_plugin_base.hpp new file mode 100644 index 0000000000000..685cf075c1e43 --- /dev/null +++ b/perception/autoware_mtr/lib/include/common/trt_plugin_base.hpp @@ -0,0 +1,173 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef COMMON__TRT_PLUGIN_BASE_HPP_ +#define COMMON__TRT_PLUGIN_BASE_HPP_ + +#include "trt_plugin_helper.hpp" + +#include +#include +#include +#include + +#include +#include + +namespace autoware::trt_mtr +{ +#if NV_TENSORRT_MAJOR > 7 +#define TRT_NOEXCEPT noexcept +#else +#define TRT_NOEXCEPT +#endif + +class TRTPluginBase : public nvinfer1::IPluginV2DynamicExt +{ +public: + explicit TRTPluginBase(const std::string & name) : mLayerName(name) {} + + /* IPluginV2 methods */ + + /** + * @brief Return the plugin version. Should match the plugin version returned by the corresponding + * plugin creator. + * + * @return const char* + */ + const char * getPluginVersion() const TRT_NOEXCEPT override { return "1"; } + + /** + * @brief Initialize the layer for execution. This is called when the engine is created. + * + * @return int 0 for success, else non-zero (which will cause engine termination). + */ + int initialize() TRT_NOEXCEPT override { return STATUS_SUCCESS; } + + /** + * @brief Release resources acquired during plugin layer initialization. This is called when the + * engine is destroyed. + * + */ + void terminate() TRT_NOEXCEPT override {} + + /** + * @brief Destroy the plugin objects. This will be called when the network, builder or engine is + * destroyed. + * + */ + void destroy() TRT_NOEXCEPT override { delete this; } + + /** + * @brief Set the Plugin Namespace object + * + * @param pluginNamespace + */ + void setPluginNamespace(const char * pluginNamespace) TRT_NOEXCEPT override + { + mNamespace = pluginNamespace; + } + + /** + * @brief Get the Plugin Namespace object + * + * @return const char* + */ + const char * getPluginNamespace() const TRT_NOEXCEPT override { return mNamespace.c_str(); } + + /** + * @brief + * + * @param inDesc + * @param nbInputs + * @param outDesc + * @param nbOutputs + */ + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc *, int, const nvinfer1::DynamicPluginTensorDesc *, + int) TRT_NOEXCEPT override + { + } + + /** + * @brief Get the Workspace Size object + * + * @param inDesc + * @param nbInputs + * @param outDesc + * @param nbOutputs + * @return size_t + */ + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc *, int, const nvinfer1::PluginTensorDesc *, + int) const TRT_NOEXCEPT override + { + return 0; + } + + /** + * @brief + * + * @param cudnnCtx + * @param cublasCtx + * @param gpuAllocator + */ + void attachToContext(cudnnContext *, cublasContext *, nvinfer1::IGpuAllocator *) + TRT_NOEXCEPT override + { + } + + /** + * @brief + * + */ + void detachFromContext() TRT_NOEXCEPT override {} + +protected: + const std::string mLayerName; + std::string mNamespace; + +#if NV_TENSORRT_MAJOR < 8 +protected: + // To prevent compiler warnings. + using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; + using nvinfer1::IPluginV2DynamicExt::enqueue; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; + using nvinfer1::IPluginV2DynamicExt::supportsFormat; +#endif +}; + +class TRTPluginCreatorBase : public nvinfer1::IPluginCreator +{ +public: + const char * getPluginVersion() const TRT_NOEXCEPT override { return "1"; } + + const nvinfer1::PluginFieldCollection * getFieldNames() TRT_NOEXCEPT override { return &mFC; } + + void setPluginNamespace(const char * pluginNamespace) TRT_NOEXCEPT override + { + mNamespace = pluginNamespace; + } + + const char * getPluginNamespace() const TRT_NOEXCEPT override { return mNamespace.c_str(); } + +protected: + nvinfer1::PluginFieldCollection mFC; + std::vector mPluginAttributes; + std::string mNamespace; +}; +} // namespace autoware::trt_mtr + +#endif // COMMON__TRT_PLUGIN_BASE_HPP_ diff --git a/perception/autoware_mtr/lib/include/common/trt_plugin_helper.hpp b/perception/autoware_mtr/lib/include/common/trt_plugin_helper.hpp new file mode 100644 index 0000000000000..5fcb1c571ec75 --- /dev/null +++ b/perception/autoware_mtr/lib/include/common/trt_plugin_helper.hpp @@ -0,0 +1,41 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef COMMON__TRT_PLUGIN_HELPER_HPP_ +#define COMMON__TRT_PLUGIN_HELPER_HPP_ + +#include + +#include + +#define THREADS_PER_BLOCK 256 +#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) + +enum pluginStatus_t { + STATUS_SUCCESS = 0, + STATUS_FAILURE = 1, + STATUS_BAD_PARAM = 2, + STATUS_NOT_SUPPORTED = 3, + STATUS_NOT_INITIALIZED = 4 +}; // enum pluginStatus_t + +#define PLUGIN_ASSERT(assertion) \ + { \ + if (!(assertion)) { \ + std::cerr << "#assertion" << __FILE__ << "," << __LINE__ << std::endl; \ + abort(); \ + } \ + } + +#endif // COMMON__TRT_PLUGIN_HELPER_HPP_ diff --git a/perception/autoware_mtr/lib/include/common/trt_serialize.hpp b/perception/autoware_mtr/lib/include/common/trt_serialize.hpp new file mode 100644 index 0000000000000..279dc9a9695df --- /dev/null +++ b/perception/autoware_mtr/lib/include/common/trt_serialize.hpp @@ -0,0 +1,127 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef COMMON__TRT_SERIALIZE_HPP_ +#define COMMON__TRT_SERIALIZE_HPP_ + +#include +#include +#include +#include +#include + +template +inline void serialize_value(void ** buffer, T const & value); + +template +inline void deserialize_value(void const ** buffer, size_t * buffer_size, T * value); + +namespace +{ +template +struct Serializer +{ +}; + +template +struct Serializer< + T, typename std::enable_if< + std::is_arithmetic::value || std::is_enum::value || std::is_pod::value>::type> +{ + static size_t serialized_size(T const & value) { return sizeof(T); } + static void serialize(void ** buffer, T const & value) + { + ::memcpy(*buffer, &value, sizeof(T)); + reinterpret_cast(*buffer) += sizeof(T); + } + static void deserialize(void const ** buffer, size_t * buffer_size, T * value) + { + assert(*buffer_size >= sizeof(T)); + ::memcpy(value, *buffer, sizeof(T)); + reinterpret_cast(*buffer) += sizeof(T); + *buffer_size -= sizeof(T); + } +}; // struct struct Serializer + +template <> +struct Serializer +{ + static size_t serialized_size(const char * value) { return strlen(value) + 1; } + static void serialize(void ** buffer, const char * value) + { + constexpr size_t max_len = 10000; + auto value_len = ::snprintf(static_cast(*buffer), max_len, "%s", value); + reinterpret_cast(*buffer) += value_len + 1; + } + static void deserialize(void const ** buffer, size_t * buffer_size, const char ** value) + { + *value = static_cast(*buffer); + size_t data_size = strnlen(*value, *buffer_size) + 1; + assert(*buffer_size >= data_size); + reinterpret_cast(*buffer) += data_size; + *buffer_size -= data_size; + } +}; // struct Serializer + +template +struct Serializer< + std::vector, + typename std::enable_if< + std::is_arithmetic::value || std::is_enum::value || std::is_pod::value>::type> +{ + static size_t serialized_size(std::vector const & value) + { + return sizeof(value.size()) + value.size() * sizeof(T); + } + + static void serialize(void ** buffer, std::vector const & value) + { + serialize_value(buffer, value.size()); + size_t nbyte = value.size() * sizeof(T); + ::memcpy(*buffer, value.data(), nbyte); + reinterpret_cast(*buffer) += nbyte; + } + + static void deserialize(void const ** buffer, size_t * buffer_size, std::vector * value) + { + size_t size; + deserialize_value(buffer, buffer_size, &size); + value->resize(size); + size_t nbyte = value->size() * sizeof(T); + assert(*buffer_size >= nbyte); + ::memcpy(value->data(), *buffer, nbyte); + reinterpret_cast(*buffer) += nbyte; + *buffer_size -= nbyte; + } +}; // struct Serializer, ...> +} // namespace + +template +inline size_t serialized_size(T const & value) +{ + return Serializer::serialized_size(value); +} + +template +inline void serialize_value(void ** buffer, T const & value) +{ + return Serializer::serialize(buffer, value); +} + +template +inline void deserialize_value(void const ** buffer, size_t * buffer_size, T * value) +{ + return Serializer::deserialize(buffer, buffer_size, value); +} +#endif // COMMON__TRT_SERIALIZE_HPP_ diff --git a/perception/autoware_mtr/lib/include/knn/trt_knn_batch.hpp b/perception/autoware_mtr/lib/include/knn/trt_knn_batch.hpp new file mode 100644 index 0000000000000..28077ee6c7b3d --- /dev/null +++ b/perception/autoware_mtr/lib/include/knn/trt_knn_batch.hpp @@ -0,0 +1,103 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KNN__TRT_KNN_BATCH_HPP_ +#define KNN__TRT_KNN_BATCH_HPP_ + +#include "common/trt_plugin_base.hpp" + +#include + +namespace autoware::trt_mtr +{ +/** + * @brief The function to compute KNN batch. + * + * @param xyz + * @param query_xyz + * @param batch_idxs + * @param query_batch_offsets + * @param top_k + * @return outputs + */ +class KnnBatch : public TRTPluginBase +{ +public: + KnnBatch(const std::string & name, const int32_t top_k); + KnnBatch(const std::string & name, const void * data, size_t length); + ~KnnBatch() TRT_NOEXCEPT override; + + /* IPluginV2DynamicExt methods */ + nvinfer1::IPluginV2DynamicExt * clone() const TRT_NOEXCEPT override; + + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) TRT_NOEXCEPT override; + + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; + + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT override; + + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::PluginTensorDesc * outDesc, int nbOutputs) const TRT_NOEXCEPT override; + + int enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) TRT_NOEXCEPT override; + + void attachToContext( + cudnnContext * cudnnCtx, cublasContext * cublasCtx, + nvinfer1::IGpuAllocator * gpuAllocator) TRT_NOEXCEPT override; + + void detachFromContext() TRT_NOEXCEPT override; + + /* IPluginV2Ext methods */ + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inTypes, int nbInputs) const TRT_NOEXCEPT override; + + /* IPluginV2 methods */ + const char * getPluginType() const TRT_NOEXCEPT override; + const char * getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void * buffer) const TRT_NOEXCEPT override; + +private: + int32_t mTopK; +}; // class KnnBatch + +class KnnBatchCreator : public TRTPluginCreatorBase +{ +public: + KnnBatchCreator(); + + const char * getPluginName() const TRT_NOEXCEPT override; + + const char * getPluginVersion() const TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT override; +}; // class KnnBatchCreator +REGISTER_TENSORRT_PLUGIN(KnnBatchCreator); +} // namespace autoware::trt_mtr +#endif // KNN__TRT_KNN_BATCH_HPP_ diff --git a/perception/autoware_mtr/lib/include/knn/trt_knn_batch_kernel.hpp b/perception/autoware_mtr/lib/include/knn/trt_knn_batch_kernel.hpp new file mode 100644 index 0000000000000..7997c196bf00e --- /dev/null +++ b/perception/autoware_mtr/lib/include/knn/trt_knn_batch_kernel.hpp @@ -0,0 +1,24 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KNN__TRT_KNN_BATCH_KERNEL_HPP_ +#define KNN__TRT_KNN_BATCH_KERNEL_HPP_ + +#include + +cudaError_t KnnBatchLauncher( + const int32_t n, const int32_t m, const int32_t k, const float * xyz, const float * query_xyz, + const int * batch_idx, const int * query_batch_offsets, int * output, cudaStream_t stream); + +#endif // KNN__TRT_KNN_BATCH_KERNEL_HPP_ diff --git a/perception/autoware_mtr/lib/include/knn/trt_knn_batch_mlogk.hpp b/perception/autoware_mtr/lib/include/knn/trt_knn_batch_mlogk.hpp new file mode 100644 index 0000000000000..71725d79764be --- /dev/null +++ b/perception/autoware_mtr/lib/include/knn/trt_knn_batch_mlogk.hpp @@ -0,0 +1,103 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KNN__TRT_KNN_BATCH_MLOGK_HPP_ +#define KNN__TRT_KNN_BATCH_MLOGK_HPP_ + +#include "common/trt_plugin_base.hpp" + +#include + +namespace autoware::trt_mtr +{ +/** + * @brief The function to compute KNN batch with MLogK. + * + * @param xyz + * @param query_xyz + * @param batch_idxs + * @param query_batch_offsets + * @param top_k + * @return outputs + */ +class KnnBatchMlogK : public TRTPluginBase +{ +public: + KnnBatchMlogK(const std::string & name, const int32_t top_k); + KnnBatchMlogK(const std::string & name, const void * data, size_t length); + ~KnnBatchMlogK() TRT_NOEXCEPT override; + + /* IPluginV2DynamicExt methods */ + nvinfer1::IPluginV2DynamicExt * clone() const TRT_NOEXCEPT override; + + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs * inputs, int nbInputs, + nvinfer1::IExprBuilder & exprBuilder) TRT_NOEXCEPT override; + + bool supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int nbInputs, + int nbOutputs) TRT_NOEXCEPT override; + + void configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT override; + + size_t getWorkspaceSize( + const nvinfer1::PluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::PluginTensorDesc * outDesc, int nbOutputs) const TRT_NOEXCEPT override; + + int enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void * workspace, + cudaStream_t stream) TRT_NOEXCEPT override; + + void attachToContext( + cudnnContext * cudnnCtx, cublasContext * cublasCtx, + nvinfer1::IGpuAllocator * gpuAllocator) TRT_NOEXCEPT override; + + void detachFromContext() TRT_NOEXCEPT override; + + /* IPluginV2Ext methods */ + nvinfer1::DataType getOutputDataType( + int index, const nvinfer1::DataType * inTypes, int nbInputs) const TRT_NOEXCEPT override; + + /* IPluginV2 methods */ + const char * getPluginType() const TRT_NOEXCEPT override; + const char * getPluginVersion() const TRT_NOEXCEPT override; + int getNbOutputs() const TRT_NOEXCEPT override; + size_t getSerializationSize() const TRT_NOEXCEPT override; + void serialize(void * buffer) const TRT_NOEXCEPT override; + +private: + int32_t mTopK; +}; // class KnnBatchMlogK + +class KnnBatchMlogKCreator : public TRTPluginCreatorBase +{ +public: + KnnBatchMlogKCreator(); + + const char * getPluginName() const TRT_NOEXCEPT override; + + const char * getPluginVersion() const TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) TRT_NOEXCEPT override; + + nvinfer1::IPluginV2DynamicExt * deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT override; +}; // class KnnBatchMlogKCreator +REGISTER_TENSORRT_PLUGIN(KnnBatchMlogKCreator); +} // namespace autoware::trt_mtr +#endif // KNN__TRT_KNN_BATCH_MLOGK_HPP_ diff --git a/perception/autoware_mtr/lib/include/knn/trt_knn_batch_mlogk_kernel.hpp b/perception/autoware_mtr/lib/include/knn/trt_knn_batch_mlogk_kernel.hpp new file mode 100644 index 0000000000000..d5bbffcc69d9f --- /dev/null +++ b/perception/autoware_mtr/lib/include/knn/trt_knn_batch_mlogk_kernel.hpp @@ -0,0 +1,24 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef KNN__TRT_KNN_BATCH_MLOGK_KERNEL_HPP_ +#define KNN__TRT_KNN_BATCH_MLOGK_KERNEL_HPP_ + +#include + +cudaError_t KnnBatchMlogKLauncher( + const int32_t n, const int32_t m, const int32_t k, const float * xyz, const float * query_xyz, + const int * batch_idx, const int * query_batch_offsets, int * output, cudaStream_t stream); + +#endif // KNN__TRT_KNN_BATCH_MLOGK_KERNEL_HPP_ diff --git a/perception/autoware_mtr/lib/include/postprocess/postprocess_kernel.cuh b/perception/autoware_mtr/lib/include/postprocess/postprocess_kernel.cuh new file mode 100644 index 0000000000000..949eacc6abeca --- /dev/null +++ b/perception/autoware_mtr/lib/include/postprocess/postprocess_kernel.cuh @@ -0,0 +1,54 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef POSTPROCESS__POSTPROCESS_KERNEL_CUH__ +#define POSTPROCESS__POSTPROCESS_KERNEL_CUH__ + +#include + +/** + * @brief Transform predicted trajectory from target agent coords to world coords. + * + * @param B The number of target agents. + * @param M The number of modes. + * @param T The number of future timestamps. + * @param AgentDim The number of target agent state dimensions, expecting (x, y, z, length, width, + * height, yaw, vx, vy, ax, ay). + * @param targetState Source target agent state, in shape [B*AgentDim]. + * @param PredDim The number of predicted state dimension, expecting (x, y, ?, ?, ?, vx, vy). + * @param predTrajectory Predicted trajectory, in shape [B*M*T*PredDim]. + */ +__global__ void transformTrajectoryKernel( + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory); + +/** + * @brief Execute postprocess to predicted score and trajectory. + * + * @param B The number of target agents. + * @param M The number of modes. + * @param T The number of future timestamps. + * @param AgentDim The number of target agent state dimensions, expecting (x, y, z, length, width, + * height, yaw, vx, vy, ax, ay). + * @param targetState Target agent states at the latest timestamp, in shape [B*inDim]. + * @param PredDim The number predicted state dimensions, expecting (x, y, ?, ?, ?, vx, vy). + * @param predTrajectory Predicted trajectory, in shape [B*M*T*PredDim]. + * @param stream CUDA stream. + * @return cudaError_t CUDA error type. + */ +cudaError_t postprocessLauncher( + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory, cudaStream_t stream); + +#endif // POSTPROCESS__POSTPROCESS_KERNEL_CUH__ diff --git a/perception/autoware_mtr/lib/include/preprocess/agent_preprocess_kernel.cuh b/perception/autoware_mtr/lib/include/preprocess/agent_preprocess_kernel.cuh new file mode 100644 index 0000000000000..9dd59ce2331a8 --- /dev/null +++ b/perception/autoware_mtr/lib/include/preprocess/agent_preprocess_kernel.cuh @@ -0,0 +1,69 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PREPROCESS__AGENT_PREPROCESS_KERNEL_CUH_ +#define PREPROCESS__AGENT_PREPROCESS_KERNEL_CUH_ + +#include + +/** + * @brief Preprocess kernel for agent data. + * + * @param B The number of target agents. + * @param N The number of all agents. + * @param T The number of timestamps. + * @param D The number of agent state dimensions. + * @param C The number of agent classes. + * @param sdc_index The index of ego. + * @param target_index The array of target indices, in shape [B]. + * @param object_type_index The array of agent class indices, in shape [N]. + * (e.g. 0: VEHICLE, 1:PEDESTRIAN, 2: CYCLIST). + * @param timestamps The array of timestamps, in shape [T]. + * @param in_trajectory The array of trajectory, in shape [N*T*D]. + * @param out_data Output trajectory data, in shape [B*N*T*(D+C+T+5)]. + * @param out_mask Output mask trajectory data, in shape [B*N*T]. + * @param out_last_pos Output last position, in shape [B*N*3]. + */ +__global__ void agentPreprocessKernel( + const int B, const int N, const int T, const int D, const int C, const int sdc_index, + const int * target_index, const int * object_type_index, const float * timestamps, + const float * in_trajectory, float * out_data, bool * out_mask, float * out_last_pos); + +/** + * @brief Preprocess kernel for agent data. + * + * @param B The number of target agents. + * @param N The number of all agents. + * @param T The number of timestamps. + * @param D The number of agent state dimensions. + * @param C The number of agent classes. + * @param sdc_index The index of ego. + * @param target_index The array of target indices, in shape [B]. + * @param object_type_index The array of agent class indices, in shape [N]. + * (e.g. 0: VEHICLE, 1:PEDESTRIAN, 2: CYCLIST). + * @param timestamps The array of timestamps, in shape [T]. + * @param in_trajectory The array of trajectory, in shape [N*T*D]. + * @param out_data Output trajectory data, in shape [B*N*T*(D+C+T+5)]. + * @param out_mask Output mask trajectory data, in shape [B*N*T]. + * @param out_last_pos Output last position, in shape [B*N*3]. + * @param stream CUDA stream. + * @return cudaError_t CUDA error code. + */ +cudaError_t agentPreprocessLauncher( + const int B, const int N, const int T, const int D, const int C, const int sdc_index, + const int * target_index, const int * object_type_index, const float * timestamps, + const float * in_trajectory, float * out_data, bool * out_mask, float * out_last_pos, + cudaStream_t stream); + +#endif // PREPROCESS__AGENT_PREPROCESS_KERNEL_HPP_ diff --git a/perception/autoware_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh b/perception/autoware_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh new file mode 100644 index 0000000000000..0ffec0e01daf6 --- /dev/null +++ b/perception/autoware_mtr/lib/include/preprocess/polyline_preprocess_kernel.cuh @@ -0,0 +1,162 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef PREPROCESS__POLYLINE_PREPROCESS_KERNEL_CUH_ +#define PREPROCESS__POLYLINE_PREPROCESS_KERNEL_CUH_ + +/** + * @brief Transform polylines to target agent coordinate system and extend its feature with previous + * x, y`(D->D+2)`. + * + * Points which all elements are 0.0 except of typeID are filled by 0.0 and the corresponding + * mask value is 0.0 too. + * + * @param K The number of polylines. + * @param P The number of points contained in each polyline. + * @param PointDim The number of point dimensions, expecting (x, y, z, dx, dy, dz, typeID). + * @param inPolyline Source polyline, in shape [K*P*D]. + * @param B The number of target agents. + * @param AgentDim The number of agent state dimensions, expecting (x, y, z, length, width, height, + * yaw, vx, vy, ax, ay). + * @param targetState Source target agent states, in shape [B*AgentDim]. + * @param outPolyline Output polyline, in shape [B*K*P*(PointDim+2)]. + * @param outPolylineMask Output polyline mask, in shape [B*K*P]. + */ +__global__ void transformPolylineKernel( + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask); + +/** + * @brief Set the Previous Position Kernel object + * + * @param B The number of target agents. + * @param K The number of polylines. + * @param P The number of points contained in each polyline. + * @param D The number of point dimensions, expecting (x, y, ..., preX, preY). + * @param polyline Source polyline, in shape [B*K*P*D]. + */ +__global__ void setPreviousPositionKernel( + const int B, const int K, const int P, const int D, float * polyline); + +/** + * @brief Calculate center distance from target agent to each polyline. + * + * @note Polyline must have been transformed to target agent coordinates system. + * + * @param B The number of target agents. + * @param K The number of polylines. + * @param P The number of points contained in each polyline. + * @param D The number of point dimensions, expecting [x, y, ...]. + * @param polyline Source polyline, in shape [B*K*P*D]. + * @param polylineMask Source polyline mask, in shape [B*K*P]. + * @param distance Output calculated distances, in shape [B*K]. + */ +__global__ void calculateCenterDistanceKernel( + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * distance); + +/** + * @brief Extract K polylines with the smallest distances. + * + * @note Because this kernel supposes to allocate shared memory dynamically it is necessary to + * specify `sizeof(float) * L` in the kernel execution configuration. + * + * @param K The number of polylines to be extracted. + * @param B The number of target agents. + * @param L The number of original polylines. + * @param P The number of points contained in each polyline. + * @param D The number of point dimensions. + * @param inPolyline Source polyline, in shape [B*L*P*D]. + * @param inPolylineMask Source polyline mask, in shape [B*L*P]. + * @param inDistance Source distances from target agents to the centers of each polyline, in shape + * [B*L]. + * @param outPolyline Output polyline, in shape [B*K*P*D]. + * @param outPolylineMask Output polyline mask, in shape [B*K*P]. + */ +__global__ void extractTopKPolylineKernel( + const int K, const int B, const int L, const int P, const int D, const float * inPolyline, + const bool * inPolylineMask, const float * inDistance, float * outPolyline, + bool * outPolylineMask); + +/** + * @brief Calculate center positions of each polyline with respect to target agent coordinates + * system. + * + * @note Polyline must have been transformed to target agent coordinates system. + * + * @param B The number of target agents. + * @param K The number of polylines. + * @param P The number of points contained in each polyline. + * @param D The number of point dimensions, expecting (x, y, z, ...). + * @param polyline Source polyline, in shape [B*K*P*D]. + * @param polylineMask Source polyline mask, in shape [B*K*P]. + * @param center Output centers, in shape [B*K*3]. + */ +__global__ void calculatePolylineCenterKernel( + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * center); + +/** + * @brief In cases of the number of batch polylines (L) is greater than K, + * extracts the topK elements. + * + * @param K The number of polylines to be extracted. + * @param L The number of original polylines. + * @param P The number of points contained in each polyline. + * @param PointDim The number of point state dimensions. + * @param inPolyline Source polylines, in shape [L*P*PointDim]. + * @param B The number of target agents. + * @param AgentDim The number of agent state dimensions. + * @param targetState Target agent state at the latest timestamp, in shape [B*AgentDim]. + * @param tmpPolyline A container to store transformed polyline temporary, in shape + * [B*L*P*(PointDim+2)]. + * @param tmpPolylineMask A container to store transformed polyline mask temporary, in shape + * [B*L*P]. + * @param tmpDistance A container to store distances temporary, in shape [B*L]. + * @param outPolyline Output polylines, in shape [B*K*P*(PointDim+2)]. + * @param outPolylineMask Output polyline masks, in shape [B*K*P]. + * @param outPolylineCenter Output magnitudes of each polyline with respect to target coords, + * in shape [B*K*3]. + * @param stream CUDA stream. + * @return cudaError_t + */ +cudaError_t polylinePreprocessWithTopkLauncher( + const int K, const int L, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * tmpPolyline, bool * tmpPolylineMask, + float * tmpDistance, float * outPolyline, bool * outPolylineMask, float * outPolylineCenter, + cudaStream_t stream); + +/** + * @brief Do preprocess for polyline if the number of batched polylines is K. + * + * @param K The number of polylines. + * @param P The number of points contained in each polyline. + * @param PointDim The number of point state dimensions. + * @param inPolyline Source polylines, in shape [K*P*PointDim]. + * @param B The number of target agents. + * @param AgentDim The number of agent state dimensions. + * @param targetState Target agent state at the latest timestamp, in shape [B*AgentDim]. + * @param outPolyline Output polylines, in shape [B*K*P*(PointDim+2)]. + * @param outPolylineMask Output polyline masks, in shape [B*K*P]. + * @param outPolylineCenter Output magnitudes of each polyline with respect to target coords, + * in shape [B*K*3]. + * @param stream CUDA stream. + * @return cudaError_t + */ +cudaError_t polylinePreprocessLauncher( + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask, + float * outPolylineCenter, cudaStream_t stream); + +#endif // PREPROCESS__POLYLINE_PREPROCESS_KERNEL_CUH_ diff --git a/perception/autoware_mtr/lib/src/attention/trt_attn_value_computation.cpp b/perception/autoware_mtr/lib/src/attention/trt_attn_value_computation.cpp new file mode 100644 index 0000000000000..ff8b81e0854d6 --- /dev/null +++ b/perception/autoware_mtr/lib/src/attention/trt_attn_value_computation.cpp @@ -0,0 +1,207 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "attention/trt_attn_value_computation.hpp" + +#include "attention/trt_attn_value_computation_kernel.hpp" + +#include + +namespace autoware::trt_mtr +{ +namespace +{ +static const char * PLUGIN_VERSION{"1"}; +static const char * PLUGIN_NAME{"TRTAttentionValueComputation"}; +} // namespace + +AttentionValueComputation::AttentionValueComputation(const std::string & name) : TRTPluginBase(name) +{ +} + +AttentionValueComputation::AttentionValueComputation(const std::string & name, const void *, size_t) +: TRTPluginBase(name) +{ +} + +AttentionValueComputation::~AttentionValueComputation() TRT_NOEXCEPT +{ +} + +nvinfer1::IPluginV2DynamicExt * AttentionValueComputation::clone() const TRT_NOEXCEPT +{ + auto * plugin = new AttentionValueComputation(mLayerName); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::DimsExprs AttentionValueComputation::getOutputDimensions( + int, const nvinfer1::DimsExprs * inputs, int, nvinfer1::IExprBuilder &) TRT_NOEXCEPT +{ + nvinfer1::DimsExprs ret; + ret.nbDims = 3; + ret.d[0] = inputs[3].d[0]; + ret.d[1] = inputs[5].d[1]; + ret.d[2] = inputs[5].d[2]; + + return ret; +} + +bool AttentionValueComputation::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int, int) TRT_NOEXCEPT +{ + return ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR && + ioDesc[pos].type == (pos < 4 ? nvinfer1::DataType::kINT32 : nvinfer1::DataType::kFLOAT); +} + +void AttentionValueComputation::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT +{ + // Validate input arguments + PLUGIN_ASSERT(nbInputs == 6); + for (int pos = 0; pos < 6; ++pos) { + PLUGIN_ASSERT( + inDesc[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + inDesc[pos].desc.type == (pos < 4 ? nvinfer1::DataType::kINT32 : nvinfer1::DataType::kFLOAT)); + } + + PLUGIN_ASSERT(nbOutputs == 1); + PLUGIN_ASSERT( + outDesc[0].desc.format == nvinfer1::TensorFormat::kLINEAR && + outDesc[0].desc.type == nvinfer1::DataType::kFLOAT); +} + +size_t AttentionValueComputation::getWorkspaceSize( + const nvinfer1::PluginTensorDesc *, int, const nvinfer1::PluginTensorDesc *, + int) const TRT_NOEXCEPT +{ + return 0; +} + +int AttentionValueComputation::enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void *, cudaStream_t stream) TRT_NOEXCEPT +{ + // parse query_batch_cnt description + const int32_t B = inDesc[0].dims.d[0]; + + // parse index_pair description + const int32_t Q = inDesc[3].dims.d[0]; + const int32_t L = inDesc[3].dims.d[1]; + + // parse value_features description + const int32_t K = inDesc[5].dims.d[0]; + const int32_t numHead = inDesc[5].dims.d[1]; + const int32_t headDim = inDesc[5].dims.d[2]; + + const void * queryBatchCnt = inputs[0]; + const void * keyBatchCnt = inputs[1]; + const void * indexPairBatch = inputs[2]; + const void * indexPair = inputs[3]; + const void * attnWeight = inputs[4]; + const void * valueFeature = inputs[5]; + + void * output = outputs[0]; + + switch (outDesc[0].type) { + case nvinfer1::DataType::kFLOAT: + AttentionValueComputationLauncher( + B, Q, L, K, numHead, headDim, reinterpret_cast(queryBatchCnt), + reinterpret_cast(keyBatchCnt), reinterpret_cast(indexPairBatch), + reinterpret_cast(indexPair), reinterpret_cast(attnWeight), + reinterpret_cast(valueFeature), reinterpret_cast(output), stream); + break; + + default: + break; + } + + return 0; +} + +void AttentionValueComputation::attachToContext( + cudnnContext *, cublasContext *, nvinfer1::IGpuAllocator *) TRT_NOEXCEPT +{ +} + +void AttentionValueComputation::detachFromContext() TRT_NOEXCEPT +{ +} + +nvinfer1::DataType AttentionValueComputation::getOutputDataType( + int, const nvinfer1::DataType * inTypes, int) const TRT_NOEXCEPT +{ + return inTypes[4]; +} + +const char * AttentionValueComputation::getPluginType() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * AttentionValueComputation::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +int AttentionValueComputation::getNbOutputs() const TRT_NOEXCEPT +{ + return 1; +} + +size_t AttentionValueComputation::getSerializationSize() const TRT_NOEXCEPT +{ + return 0; +} + +void AttentionValueComputation::serialize(void *) const TRT_NOEXCEPT +{ +} + +/* ====================== creator ====================== */ +AttentionValueComputationCreator::AttentionValueComputationCreator() +{ + mPluginAttributes.clear(); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +const char * AttentionValueComputationCreator::getPluginName() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * AttentionValueComputationCreator::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +nvinfer1::IPluginV2DynamicExt * AttentionValueComputationCreator::createPlugin( + const char * name, const nvinfer1::PluginFieldCollection *) TRT_NOEXCEPT +{ + auto plugin = new AttentionValueComputation(name); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::IPluginV2DynamicExt * AttentionValueComputationCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT +{ + auto plugin = new AttentionValueComputation(name, serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +} // namespace autoware::trt_mtr diff --git a/perception/autoware_mtr/lib/src/attention/trt_attn_value_computation_kernel.cu b/perception/autoware_mtr/lib/src/attention/trt_attn_value_computation_kernel.cu new file mode 100644 index 0000000000000..03871474d9f18 --- /dev/null +++ b/perception/autoware_mtr/lib/src/attention/trt_attn_value_computation_kernel.cu @@ -0,0 +1,139 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "attention/trt_attn_value_computation_kernel.hpp" + +#include + +/** + * @brief Attention value computation kernel. + * + * @tparam d The size of shared memory, which should be equal to `L`. + * @param B The size of batch. + * @param Q The size of query. + * @param L The size of local. + * @param K The size of key. + * @param numHead The number of heads. + * @param headDim The number of head dimensions. + * @param queryBatchCnt The number of queries for each batch, in shape [B]. + * @param keyBatchCnt The number of keys for each batch, in shape [B]. + * @param indexPairBatch The indices of batch for corresponding query, in shape [Q]. + * @param indexPair The indices of key for corresponding query, in shape [Q*L]. + * @param attnWeight Source attention weights, in shape [Q*L*numHead]. + * @param valueFeature Source value features, in shape [K*numHead*headDim]. + * @param output Output container, in shape [Q*numHead*headDim]. + */ +template +__global__ void attentionValueComputationKernel( + const int32_t B, const int32_t Q, const int32_t L, const int32_t K, const int32_t numHead, + const int32_t headDim, const int * queryBatchCnt, const int * keyBatchCnt, + const int * indexPairBatch, const int * indexPair, const float * attnWeight, + const float * valueFeature, float * output) +{ + const int query_idx = blockIdx.x; + const int head_idx = blockIdx.y; + const int hdim_idx = threadIdx.x; + + if (query_idx >= Q || head_idx >= numHead || hdim_idx >= headDim) { + return; + } + + // get key_start_idx + const int batch_idx = indexPairBatch[query_idx]; + if (batch_idx < 0) { + return; + } + + int key_start_idx = 0; + for (int i = 0; i < batch_idx; ++i) { + key_start_idx += keyBatchCnt[i]; + } + // get shared variables + __shared__ float sharedAttnWeight[d]; + __shared__ int sharedValueIdx[d]; + for (int i = 0; i < L; i += blockDim.x) { + sharedAttnWeight[i] = attnWeight[query_idx * L * numHead + i * numHead + head_idx]; + + const int cur_key_idx = indexPair[query_idx * L + i]; + sharedValueIdx[i] = cur_key_idx == -1 ? -1 : cur_key_idx + key_start_idx; + } + __syncthreads(); + + output += query_idx * numHead * headDim + head_idx * headDim + hdim_idx; + + float attn_result = 0.0f; + for (int i = 0; i < L; ++i) { + // TODO: fix bug (an illegal memory access was encountered) + // value_idx need to guard with value_idx >= 0 && value_idx < K + if (const int value_idx = sharedValueIdx[i]; value_idx >= 0 && value_idx < K) { + attn_result += sharedAttnWeight[i] * + valueFeature[value_idx * numHead * headDim + head_idx * headDim + hdim_idx]; + } + } + output[0] = attn_result; +} + +cudaError_t AttentionValueComputationLauncher( + const int32_t B, const int32_t Q, const int32_t L, const int32_t K, const int32_t numHead, + const int32_t headDim, const int * queryBatchCnt, const int * keyBatchCnt, + const int * indexPairBatch, const int * indexPair, const float * attnWeight, + const float * valueFeature, float * output, cudaStream_t stream) +{ + if (L > 512) { + return cudaError::cudaErrorInvalidValue; + } + + dim3 blocks(Q, numHead); + dim3 threads(headDim); + + switch (L) { + case 16: + attentionValueComputationKernel<16><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + case 32: + attentionValueComputationKernel<32><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + case 64: + attentionValueComputationKernel<64><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + case 128: + attentionValueComputationKernel<128><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + case 320: + attentionValueComputationKernel<320><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + case 384: + attentionValueComputationKernel<384><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + default: + attentionValueComputationKernel<512><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + attnWeight, valueFeature, output); + break; + } + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/lib/src/attention/trt_attn_weight_computation.cpp b/perception/autoware_mtr/lib/src/attention/trt_attn_weight_computation.cpp new file mode 100644 index 0000000000000..39641f021c1f1 --- /dev/null +++ b/perception/autoware_mtr/lib/src/attention/trt_attn_weight_computation.cpp @@ -0,0 +1,209 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "attention/trt_attn_weight_computation.hpp" + +#include "attention/trt_attn_weight_computation_kernel.hpp" + +#include + +namespace autoware::trt_mtr +{ +namespace +{ +static const char * PLUGIN_VERSION{"1"}; +static const char * PLUGIN_NAME{"TRTAttentionWeightComputation"}; +} // namespace + +AttentionWeightComputation::AttentionWeightComputation(const std::string & name) +: TRTPluginBase(name) +{ +} + +AttentionWeightComputation::AttentionWeightComputation( + const std::string & name, const void *, size_t) +: TRTPluginBase(name) +{ +} + +AttentionWeightComputation::~AttentionWeightComputation() TRT_NOEXCEPT +{ +} + +nvinfer1::IPluginV2DynamicExt * AttentionWeightComputation::clone() const TRT_NOEXCEPT +{ + auto * plugin = new AttentionWeightComputation(mLayerName); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::DimsExprs AttentionWeightComputation::getOutputDimensions( + int, const nvinfer1::DimsExprs * inputs, int, nvinfer1::IExprBuilder &) TRT_NOEXCEPT +{ + nvinfer1::DimsExprs ret; + ret.nbDims = 3; + ret.d[0] = inputs[3].d[0]; + ret.d[1] = inputs[3].d[1]; + ret.d[2] = inputs[5].d[1]; + + return ret; +} + +bool AttentionWeightComputation::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int, int) TRT_NOEXCEPT +{ + return ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR && + ioDesc[pos].type == (pos < 4 ? nvinfer1::DataType::kINT32 : nvinfer1::DataType::kFLOAT); +} + +void AttentionWeightComputation::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT +{ + // Validate input arguments + PLUGIN_ASSERT(nbInputs == 6); + for (int pos = 0; pos < 6; ++pos) { + PLUGIN_ASSERT( + inDesc[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + inDesc[pos].desc.type == (pos < 4 ? nvinfer1::DataType::kINT32 : nvinfer1::DataType::kFLOAT)); + } + + PLUGIN_ASSERT(nbOutputs == 1); + PLUGIN_ASSERT( + outDesc[0].desc.format == nvinfer1::TensorFormat::kLINEAR && + outDesc[0].desc.type == nvinfer1::DataType::kFLOAT); +} + +size_t AttentionWeightComputation::getWorkspaceSize( + const nvinfer1::PluginTensorDesc *, int, const nvinfer1::PluginTensorDesc *, + int) const TRT_NOEXCEPT +{ + return 0; +} + +int AttentionWeightComputation::enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void *, cudaStream_t stream) TRT_NOEXCEPT +{ + // parse query_batch_cnt description + const int32_t B = inDesc[0].dims.d[0]; + + // parse index_pair description + const int32_t Q = inDesc[3].dims.d[0]; + const int32_t L = inDesc[3].dims.d[1]; + + // parse key_features description + const int32_t K = inDesc[5].dims.d[0]; + const int32_t numHead = inDesc[5].dims.d[1]; + const int32_t headDim = inDesc[5].dims.d[2]; + + const void * queryBatchCnt = inputs[0]; + const void * keyBatchCnt = inputs[1]; + const void * indexPairBatch = inputs[2]; + const void * indexPair = inputs[3]; + const void * queryFeature = inputs[4]; + const void * keyFeature = inputs[5]; + + void * output = outputs[0]; + + switch (outDesc[0].type) { + case nvinfer1::DataType::kFLOAT: + AttentionWeightComputationLauncher( + B, Q, L, K, numHead, headDim, reinterpret_cast(queryBatchCnt), + reinterpret_cast(keyBatchCnt), reinterpret_cast(indexPairBatch), + reinterpret_cast(indexPair), reinterpret_cast(queryFeature), + reinterpret_cast(keyFeature), reinterpret_cast(output), stream); + break; + + default: + break; + } + + return 0; +} + +void AttentionWeightComputation::attachToContext( + cudnnContext *, cublasContext *, nvinfer1::IGpuAllocator *) TRT_NOEXCEPT +{ +} + +void AttentionWeightComputation::detachFromContext() TRT_NOEXCEPT +{ +} + +nvinfer1::DataType AttentionWeightComputation::getOutputDataType( + int, const nvinfer1::DataType * inTypes, int) const TRT_NOEXCEPT +{ + return inTypes[4]; +} + +const char * AttentionWeightComputation::getPluginType() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * AttentionWeightComputation::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +int AttentionWeightComputation::getNbOutputs() const TRT_NOEXCEPT +{ + return 1; +} + +size_t AttentionWeightComputation::getSerializationSize() const TRT_NOEXCEPT +{ + return 0; +} + +void AttentionWeightComputation::serialize(void *) const TRT_NOEXCEPT +{ +} + +/* ====================== creator ====================== */ +AttentionWeightComputationCreator::AttentionWeightComputationCreator() +{ + mPluginAttributes.clear(); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +const char * AttentionWeightComputationCreator::getPluginName() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * AttentionWeightComputationCreator::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +nvinfer1::IPluginV2DynamicExt * AttentionWeightComputationCreator::createPlugin( + const char * name, const nvinfer1::PluginFieldCollection *) TRT_NOEXCEPT +{ + auto plugin = new AttentionWeightComputation(name); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::IPluginV2DynamicExt * AttentionWeightComputationCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT +{ + auto plugin = new AttentionWeightComputation(name, serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +} // namespace autoware::trt_mtr diff --git a/perception/autoware_mtr/lib/src/attention/trt_attn_weight_computation_kernel.cu b/perception/autoware_mtr/lib/src/attention/trt_attn_weight_computation_kernel.cu new file mode 100644 index 0000000000000..f0cfd57dbd5a5 --- /dev/null +++ b/perception/autoware_mtr/lib/src/attention/trt_attn_weight_computation_kernel.cu @@ -0,0 +1,139 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "attention/trt_attn_weight_computation_kernel.hpp" + +/** + * @brief Attention weight computation kernel. + * + * @tparam d The size of shared memory, which should be equal to `L`. + * @param B The size of batch. + * @param Q The size of query. + * @param L The size of local. + * @param K The size of key. + * @param numHead The number of heads. + * @param headDim The number of head dimensions. + * @param queryBatchCnt The number of queries for each batch, in shape [B]. + * @param keyBatchCnt The number of keys for each batch, in shape [B]. + * @param indexPairBatch The indices of batch for corresponding query, in shape [Q]. + * @param indexPair The indices of key for corresponding query, in shape [Q*L]. + * @param queryFeature Source query features, in shape [Q*numHead*headDim]. + * @param keyFeature Source key features, in shape [K*numHead*headDim]. + * @param output Output container, in shape [Q*L*numHead]. + */ +template +__global__ void attentionWeightComputationKernel( + const int32_t B, const int32_t Q, const int32_t L, const int32_t K, const int32_t numHead, + const int32_t headDim, const int * queryBatchCnt, const int * keyBatchCnt, + const int * indexPairBatch, const int * indexPair, const float * queryFeature, + const float * keyFeature, float * output) +{ + const int query_idx = blockIdx.x; + const int head_idx = blockIdx.y; + const int local_key_idx = threadIdx.x; + + const int index = query_idx * L + local_key_idx; + + if (query_idx >= Q || head_idx >= numHead || local_key_idx >= L) { + return; + } + + // build shared query features + __shared__ float sharedQueryFeature[d]; + for (int i = local_key_idx; i < headDim; i += blockDim.x) { + sharedQueryFeature[i] = queryFeature[query_idx * numHead * headDim + head_idx * headDim + i]; + } + __syncthreads(); + + if (indexPair[index] < 0) { + // ignore index + return; + } + + // get real key index + const int batch_idx = indexPairBatch[query_idx]; + if (batch_idx < 0) { + return; + } + + int key_start_idx = 0; + for (int i = 0; i < batch_idx; ++i) { + key_start_idx += keyBatchCnt[i]; + } + key_start_idx += indexPair[index]; + + // get key features + keyFeature += key_start_idx * numHead * headDim + head_idx * headDim; + output += index * numHead + head_idx; + + float attn_weight = 0.0f; + for (int i = 0; i < headDim; ++i) { + attn_weight += keyFeature[i] * sharedQueryFeature[i]; + } + output[0] = attn_weight; +} + +cudaError_t AttentionWeightComputationLauncher( + const int32_t B, const int32_t Q, const int32_t L, const int32_t K, const int32_t numHead, + const int32_t headDim, const int * queryBatchCnt, const int * keyBatchCnt, + const int * indexPairBatch, const int * indexPair, const float * queryFeature, + const float * keyFeature, float * output, cudaStream_t stream) +{ + if (headDim > 150) { + return cudaError::cudaErrorInvalidValue; + } + + dim3 blocks(Q, numHead); + dim3 threads(L); + + switch (headDim) { + case 16: + attentionWeightComputationKernel<16><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + case 24: + attentionWeightComputationKernel<24><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + case 32: + attentionWeightComputationKernel<32><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + case 48: + attentionWeightComputationKernel<48><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + case 64: + attentionWeightComputationKernel<64><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + case 128: + attentionWeightComputationKernel<128><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + default: + attentionWeightComputationKernel<150><<>>( + B, Q, L, K, numHead, headDim, queryBatchCnt, keyBatchCnt, indexPairBatch, indexPair, + queryFeature, keyFeature, output); + break; + } + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/lib/src/knn/trt_knn_batch.cpp b/perception/autoware_mtr/lib/src/knn/trt_knn_batch.cpp new file mode 100644 index 0000000000000..86f6438ba9aa1 --- /dev/null +++ b/perception/autoware_mtr/lib/src/knn/trt_knn_batch.cpp @@ -0,0 +1,208 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "knn/trt_knn_batch.hpp" + +#include "common/trt_serialize.hpp" +#include "knn/trt_knn_batch_kernel.hpp" + +#include + +namespace autoware::trt_mtr +{ +namespace +{ +static const char * PLUGIN_VERSION{"1"}; +static const char * PLUGIN_NAME{"TRTKnnBatch"}; +} // namespace + +KnnBatch::KnnBatch(const std::string & name, const int32_t top_k) +: TRTPluginBase(name), mTopK(top_k) +{ +} + +KnnBatch::KnnBatch(const std::string & name, const void * data, size_t length) : TRTPluginBase(name) +{ + deserialize_value(&data, &length, &mTopK); +} + +KnnBatch::~KnnBatch() TRT_NOEXCEPT +{ +} + +nvinfer1::IPluginV2DynamicExt * KnnBatch::clone() const TRT_NOEXCEPT +{ + auto * plugin = new KnnBatch(mLayerName, mTopK); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::DimsExprs KnnBatch::getOutputDimensions( + int, const nvinfer1::DimsExprs * inputs, int, nvinfer1::IExprBuilder & exprBuilder) TRT_NOEXCEPT +{ + nvinfer1::DimsExprs ret; + ret.nbDims = 2; + ret.d[0] = inputs[0].d[0]; + ret.d[1] = exprBuilder.constant(mTopK); + + return ret; +} + +bool KnnBatch::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int, int) TRT_NOEXCEPT +{ + return ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR && + ioDesc[pos].type == (pos < 2 ? nvinfer1::DataType::kFLOAT : nvinfer1::DataType::kINT32); +} + +void KnnBatch::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT +{ + // Validate input arguments + PLUGIN_ASSERT(nbInputs == 4); + for (int pos = 0; pos < 4; ++pos) { + PLUGIN_ASSERT( + inDesc[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + inDesc[pos].desc.type == (pos < 2 ? nvinfer1::DataType::kFLOAT : nvinfer1::DataType::kINT32)); + } + + PLUGIN_ASSERT(nbOutputs == 1); + PLUGIN_ASSERT( + outDesc[0].desc.format == nvinfer1::TensorFormat::kLINEAR && + outDesc[0].desc.type == nvinfer1::DataType::kINT32); +} + +size_t KnnBatch::getWorkspaceSize( + const nvinfer1::PluginTensorDesc *, int, const nvinfer1::PluginTensorDesc *, + int) const TRT_NOEXCEPT +{ + return 0; +} + +int KnnBatch::enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void *, cudaStream_t stream) TRT_NOEXCEPT +{ + const int32_t n = inDesc[0].dims.d[0]; + const int32_t m = inDesc[1].dims.d[0]; + + const void * xyz = inputs[0]; + const void * query_xyz = inputs[1]; + const void * batch_idxs = inputs[2]; + const void * query_batch_offsets = inputs[3]; + + void * output = outputs[0]; + + switch (outDesc[0].type) { + case nvinfer1::DataType::kINT32: + KnnBatchLauncher( + n, m, mTopK, reinterpret_cast(xyz), + reinterpret_cast(query_xyz), reinterpret_cast(batch_idxs), + reinterpret_cast(query_batch_offsets), reinterpret_cast(output), + stream); + break; + + default: + break; + } + + return 0; +} + +void KnnBatch::attachToContext(cudnnContext *, cublasContext *, nvinfer1::IGpuAllocator *) + TRT_NOEXCEPT +{ +} + +void KnnBatch::detachFromContext() TRT_NOEXCEPT +{ +} + +nvinfer1::DataType KnnBatch::getOutputDataType(int, const nvinfer1::DataType *, int) const + TRT_NOEXCEPT +{ + return nvinfer1::DataType::kINT32; +} + +const char * KnnBatch::getPluginType() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * KnnBatch::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +int KnnBatch::getNbOutputs() const TRT_NOEXCEPT +{ + return 1; +} + +size_t KnnBatch::getSerializationSize() const TRT_NOEXCEPT +{ + return sizeof(mTopK); +} + +void KnnBatch::serialize(void * buffer) const TRT_NOEXCEPT +{ + serialize_value(&buffer, mTopK); +} + +/* ====================== creator ====================== */ +KnnBatchCreator::KnnBatchCreator() +{ + mPluginAttributes.emplace_back( + nvinfer1::PluginField("top_k", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +const char * KnnBatchCreator::getPluginName() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * KnnBatchCreator::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +nvinfer1::IPluginV2DynamicExt * KnnBatchCreator::createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) TRT_NOEXCEPT +{ + const nvinfer1::PluginField * fields = fc->fields; + int32_t top_k = 0; + for (int i = 0; i < fc->nbFields; ++i) { + const char * attrName = fields[i].name; + if (!strcmp(attrName, "top_k")) { + PLUGIN_ASSERT(fields[i].type == nvinfer1::PluginFieldType::kINT32); + top_k = *static_cast(fields[i].data); + } + } + auto plugin = new KnnBatch(name, top_k); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::IPluginV2DynamicExt * KnnBatchCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT +{ + auto plugin = new KnnBatch(name, serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +} // namespace autoware::trt_mtr diff --git a/perception/autoware_mtr/lib/src/knn/trt_knn_batch_kernel.cu b/perception/autoware_mtr/lib/src/knn/trt_knn_batch_kernel.cu new file mode 100644 index 0000000000000..870e8d538a178 --- /dev/null +++ b/perception/autoware_mtr/lib/src/knn/trt_knn_batch_kernel.cu @@ -0,0 +1,82 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/trt_plugin_helper.hpp" +#include "knn/trt_knn_batch_kernel.hpp" + +__global__ void knn_batch_kernel( + const int32_t n, const int32_t m, const int32_t k, const float * xyz, const float * query_xyz, + const int * batch_idxs, const int * query_batch_offsets, int * output) +{ + const int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= n) { + return; + } + + xyz += pt_idx * 3; + output += pt_idx * k; + + float ox = xyz[0]; + float oy = xyz[1]; + float oz = xyz[2]; + + float best[100]; + int best_idx[100]; + for (int i = 0; i < k; ++i) { + best[i] = 1e20; + best_idx[i] = -1; + } + + int batch_idx = batch_idxs[pt_idx]; + if (batch_idx < 0) { + return; + } + + int start_idx = query_batch_offsets[batch_idx]; + int end_idx = query_batch_offsets[batch_idx + 1]; + for (int i = start_idx; i < end_idx; ++i) { + float x = query_xyz[i * 3 + 0]; + float y = query_xyz[i * 3 + 1]; + float z = query_xyz[i * 3 + 2]; + float d2 = (ox - x) * (ox - x) + (oy - y) * (oy - y) + (oz - z) * (oz - z); + for (int32_t p = 0; p < k; ++p) { + if (d2 < best[p]) { + for (int32_t q = k - 1; q > p; --q) { + best[q] = best[q - 1]; + best_idx[q] = best_idx[q - 1]; + } + best[p] = d2; + best_idx[p] = i - start_idx; + break; + } + } + } + + for (int i = 0; i < k; ++i) { + output[i] = best_idx[i]; + } +} + +cudaError_t KnnBatchLauncher( + const int32_t n, const int32_t m, const int32_t k, const float * xyz, const float * query_xyz, + const int * batch_idxs, const int * query_batch_offsets, int * output, cudaStream_t stream) +{ + dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + + knn_batch_kernel<<>>( + n, m, k, xyz, query_xyz, batch_idxs, query_batch_offsets, output); + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/lib/src/knn/trt_knn_batch_mlogk.cpp b/perception/autoware_mtr/lib/src/knn/trt_knn_batch_mlogk.cpp new file mode 100644 index 0000000000000..618772b03262d --- /dev/null +++ b/perception/autoware_mtr/lib/src/knn/trt_knn_batch_mlogk.cpp @@ -0,0 +1,208 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "knn/trt_knn_batch_mlogk.hpp" + +#include "common/trt_serialize.hpp" +#include "knn/trt_knn_batch_mlogk_kernel.hpp" + +#include + +namespace autoware::trt_mtr +{ +namespace +{ +static const char * PLUGIN_VERSION{"1"}; +static const char * PLUGIN_NAME{"TRTKnnBatchMlogK"}; +} // namespace + +KnnBatchMlogK::KnnBatchMlogK(const std::string & name, const int32_t top_k) +: TRTPluginBase(name), mTopK(top_k) +{ +} + +KnnBatchMlogK::KnnBatchMlogK(const std::string & name, const void * data, size_t length) +: TRTPluginBase(name) +{ + deserialize_value(&data, &length, &mTopK); +} + +KnnBatchMlogK::~KnnBatchMlogK() TRT_NOEXCEPT +{ +} + +nvinfer1::IPluginV2DynamicExt * KnnBatchMlogK::clone() const TRT_NOEXCEPT +{ + auto * plugin = new KnnBatchMlogK(mLayerName, mTopK); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::DimsExprs KnnBatchMlogK::getOutputDimensions( + int, const nvinfer1::DimsExprs * inputs, int, nvinfer1::IExprBuilder & exprBuilder) TRT_NOEXCEPT +{ + nvinfer1::DimsExprs ret; + ret.nbDims = 2; + ret.d[0] = inputs[0].d[0]; + ret.d[1] = exprBuilder.constant(mTopK); + + return ret; +} + +bool KnnBatchMlogK::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc * ioDesc, int, int) TRT_NOEXCEPT +{ + return ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR && + ioDesc[pos].type == (pos < 2 ? nvinfer1::DataType::kFLOAT : nvinfer1::DataType::kINT32); +} + +void KnnBatchMlogK::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc * inDesc, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc * outDesc, int nbOutputs) TRT_NOEXCEPT +{ + // Validate input arguments + PLUGIN_ASSERT(nbInputs == 4); + for (int pos = 0; pos < 4; ++pos) { + PLUGIN_ASSERT( + inDesc[pos].desc.format == nvinfer1::TensorFormat::kLINEAR && + inDesc[pos].desc.type == (pos < 2 ? nvinfer1::DataType::kFLOAT : nvinfer1::DataType::kINT32)); + } + + PLUGIN_ASSERT(nbOutputs == 1); + PLUGIN_ASSERT( + outDesc[0].desc.format == nvinfer1::TensorFormat::kLINEAR && + outDesc[0].desc.type == nvinfer1::DataType::kINT32); +} + +size_t KnnBatchMlogK::getWorkspaceSize( + const nvinfer1::PluginTensorDesc *, int, const nvinfer1::PluginTensorDesc *, + int) const TRT_NOEXCEPT +{ + return 0; +} + +int KnnBatchMlogK::enqueue( + const nvinfer1::PluginTensorDesc * inDesc, const nvinfer1::PluginTensorDesc * outDesc, + const void * const * inputs, void * const * outputs, void *, cudaStream_t stream) TRT_NOEXCEPT +{ + const int32_t n = inDesc[0].dims.d[0]; + const int32_t m = inDesc[1].dims.d[0]; + + const void * xyz = inputs[0]; + const void * query_xyz = inputs[1]; + const void * batch_idxs = inputs[2]; + const void * query_batch_offsets = inputs[3]; + + void * output = outputs[0]; + + switch (outDesc[0].type) { + case nvinfer1::DataType::kINT32: + KnnBatchMlogKLauncher( + n, m, mTopK, reinterpret_cast(xyz), + reinterpret_cast(query_xyz), reinterpret_cast(batch_idxs), + reinterpret_cast(query_batch_offsets), reinterpret_cast(output), + stream); + break; + + default: + break; + } + + return 0; +} + +void KnnBatchMlogK::attachToContext(cudnnContext *, cublasContext *, nvinfer1::IGpuAllocator *) + TRT_NOEXCEPT +{ +} + +void KnnBatchMlogK::detachFromContext() TRT_NOEXCEPT +{ +} + +nvinfer1::DataType KnnBatchMlogK::getOutputDataType(int, const nvinfer1::DataType *, int) const + TRT_NOEXCEPT +{ + return nvinfer1::DataType::kINT32; +} + +const char * KnnBatchMlogK::getPluginType() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * KnnBatchMlogK::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +int KnnBatchMlogK::getNbOutputs() const TRT_NOEXCEPT +{ + return 1; +} + +size_t KnnBatchMlogK::getSerializationSize() const TRT_NOEXCEPT +{ + return sizeof(mTopK); +} + +void KnnBatchMlogK::serialize(void * buffer) const TRT_NOEXCEPT +{ + serialize_value(&buffer, mTopK); +} + +/* ====================== creator ====================== */ +KnnBatchMlogKCreator::KnnBatchMlogKCreator() +{ + mPluginAttributes.emplace_back( + nvinfer1::PluginField("top_k", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +const char * KnnBatchMlogKCreator::getPluginName() const TRT_NOEXCEPT +{ + return PLUGIN_NAME; +} + +const char * KnnBatchMlogKCreator::getPluginVersion() const TRT_NOEXCEPT +{ + return PLUGIN_VERSION; +} + +nvinfer1::IPluginV2DynamicExt * KnnBatchMlogKCreator::createPlugin( + const char * name, const nvinfer1::PluginFieldCollection * fc) TRT_NOEXCEPT +{ + const nvinfer1::PluginField * fields = fc->fields; + int32_t top_k = 0; + for (int i = 0; i < fc->nbFields; ++i) { + const char * attrName = fields[i].name; + if (!strcmp(attrName, "top_k")) { + PLUGIN_ASSERT(fields[i].type == nvinfer1::PluginFieldType::kINT32); + top_k = *static_cast(fields[i].data); + } + } + auto plugin = new KnnBatchMlogK(name, top_k); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} + +nvinfer1::IPluginV2DynamicExt * KnnBatchMlogKCreator::deserializePlugin( + const char * name, const void * serialData, size_t serialLength) TRT_NOEXCEPT +{ + auto plugin = new KnnBatchMlogK(name, serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; +} +} // namespace autoware::trt_mtr diff --git a/perception/autoware_mtr/lib/src/knn/trt_knn_batch_mlogk_kernel.cu b/perception/autoware_mtr/lib/src/knn/trt_knn_batch_mlogk_kernel.cu new file mode 100644 index 0000000000000..c0ccdc33cc1eb --- /dev/null +++ b/perception/autoware_mtr/lib/src/knn/trt_knn_batch_mlogk_kernel.cu @@ -0,0 +1,126 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/trt_plugin_helper.hpp" +#include "knn/trt_knn_batch_mlogk_kernel.hpp" + +__global__ void knn_batch_mlogk_kernel( + const int32_t n, const int32_t m, const int32_t k, const float * xyz, const float * query_xyz, + const int * batch_idxs, const int * query_batch_offsets, int * output) +{ + const int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (pt_idx >= n) { + return; + } + + xyz += pt_idx * 3; + output += pt_idx * k; + + float ox = xyz[0]; + float oy = xyz[1]; + float oz = xyz[2]; + + float best[150]; + int best_idx[150]; + + int32_t heap_len = 0; + + for (int i = 0; i <= k; ++i) { + best[i] = 1e20; + best_idx[i] = -1; + } + + int batch_idx = batch_idxs[pt_idx]; + if (batch_idx < 0) { + return; + } + + int start_idx = query_batch_offsets[batch_idx]; + int end_idx = query_batch_offsets[batch_idx + 1]; + + int tmp_idx; + float tmp_val; + for (int i = start_idx; i < end_idx; ++i) { + float x = query_xyz[i * 3 + 0]; + float y = query_xyz[i * 3 + 1]; + float z = query_xyz[i * 3 + 2]; + float d2 = (ox - x) * (ox - x) + (oy - y) * (oy - y) + (oz - z) * (oz - z); + + if (heap_len < k) { + ++heap_len; + best[heap_len] = d2; + best_idx[heap_len] = i - start_idx; + int cur_idx = heap_len, fa_idx = cur_idx >> 1; + while (fa_idx > 0) { + if (best[cur_idx] < best[fa_idx]) { + break; + } + tmp_idx = best_idx[cur_idx]; + best_idx[cur_idx] = best_idx[fa_idx]; + best_idx[fa_idx] = tmp_idx; + tmp_val = best[cur_idx]; + best[cur_idx] = best[fa_idx]; + best[fa_idx] = tmp_val; + cur_idx = fa_idx; + fa_idx = cur_idx >> 1; + } + } else { + if (d2 > best[1]) { + continue; + } + best[1] = d2; + best_idx[1] = i - start_idx; + + int32_t cur_idx = 1, son_idx; + while (cur_idx <= k) { + son_idx = cur_idx << 1; + if (son_idx > k) { + break; + } + if (son_idx + 1 <= k && best[son_idx] < best[son_idx + 1]) { + ++son_idx; + } + + if (son_idx <= k && best[cur_idx] < best[son_idx]) { + tmp_idx = best_idx[cur_idx]; + best_idx[cur_idx] = best_idx[son_idx]; + best_idx[son_idx] = tmp_idx; + tmp_val = best[cur_idx]; + best[cur_idx] = best[son_idx]; + best[son_idx] = tmp_val; + } else { + break; + } + cur_idx = son_idx; + } + } + } + + for (int i = 1; i <= k; ++i) { + output[i - 1] = best_idx[i]; + } +} + +cudaError_t KnnBatchMlogKLauncher( + const int32_t n, const int32_t m, const int32_t k, const float * xyz, const float * query_xyz, + const int * batch_idxs, const int * query_batch_offsets, int * output, cudaStream_t stream) +{ + dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); + dim3 threads(THREADS_PER_BLOCK); + + knn_batch_mlogk_kernel<<>>( + n, m, k, xyz, query_xyz, batch_idxs, query_batch_offsets, output); + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/lib/src/postprocess/postprocess_kernel.cu b/perception/autoware_mtr/lib/src/postprocess/postprocess_kernel.cu new file mode 100644 index 0000000000000..8a6a8ddc96d19 --- /dev/null +++ b/perception/autoware_mtr/lib/src/postprocess/postprocess_kernel.cu @@ -0,0 +1,54 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +__global__ void transformTrajectoryKernel( + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int m = blockIdx.y * blockDim.y + threadIdx.y; + int t = blockIdx.z * blockDim.z + threadIdx.z; + + if (b >= B || m >= M || t >= T) { + return; + } + + const int predIdx = (b * M * T + m * T + t) * PredDim; + const float predX = predTrajectory[predIdx]; + const float predY = predTrajectory[predIdx + 1]; + + const int targetIdx = b * AgentDim; + const float targetX = targetState[targetIdx]; + const float targetY = targetState[targetIdx + 1]; + const float targetYaw = targetState[targetIdx + 6]; + const float targetCos = cosf(targetYaw); + const float targetSin = sinf(targetYaw); + + predTrajectory[predIdx] = targetCos * predX - targetSin * predY + targetX; + predTrajectory[predIdx + 1] = targetSin * predX + targetCos * predY + targetY; +} + +cudaError_t postprocessLauncher( + const int B, const int M, const int T, const int AgentDim, const float * targetState, + const int PredDim, float * predTrajectory, cudaStream_t stream) +{ + // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` + constexpr int threadsPerBlock = 256; + dim3 blocks(B, M, T); + + transformTrajectoryKernel<<>>( + B, M, T, AgentDim, targetState, PredDim, predTrajectory); + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/lib/src/preprocess/agent_preprocess_kernel.cu b/perception/autoware_mtr/lib/src/preprocess/agent_preprocess_kernel.cu new file mode 100644 index 0000000000000..608dd812bd247 --- /dev/null +++ b/perception/autoware_mtr/lib/src/preprocess/agent_preprocess_kernel.cu @@ -0,0 +1,135 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "preprocess/agent_preprocess_kernel.cuh" + +#include + +__global__ void agentPreprocessKernel( + const int B, const int N, const int T, const int D, const int C, const int sdc_index, + const int * target_index, const int * object_type_index, const float * timestamps, + const float * in_trajectory, float * out_data, bool * out_mask, float * out_last_pos) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int n = blockIdx.y * blockDim.y + threadIdx.y; + int t = blockIdx.z * blockDim.z + threadIdx.z; + + if (b >= B || n >= N || t >= T) { + return; + } + + const int out_idx = (b * N * T + n * T + t) * (D - 2 + C + 2 + T + 1 + 2); + + // === out data === + // --- transform trajectory to target centric coords --- + const int src_trajectory_idx = (n * T + t) * D; + const float x = in_trajectory[src_trajectory_idx]; + const float y = in_trajectory[src_trajectory_idx + 1]; + const float z = in_trajectory[src_trajectory_idx + 2]; + const float length = in_trajectory[src_trajectory_idx + 3]; + const float width = in_trajectory[src_trajectory_idx + 4]; + const float height = in_trajectory[src_trajectory_idx + 5]; + const float yaw = in_trajectory[src_trajectory_idx + 6]; + const float vx = in_trajectory[src_trajectory_idx + 7]; + const float vy = in_trajectory[src_trajectory_idx + 8]; + const float ax = in_trajectory[src_trajectory_idx + 9]; + const float ay = in_trajectory[src_trajectory_idx + 10]; + const float is_valid = in_trajectory[src_trajectory_idx + 11]; + + // extract targets trajectories + const int center_idx = (target_index[b] * T + T - 1) * D; + const float center_x = in_trajectory[center_idx]; + const float center_y = in_trajectory[center_idx + 1]; + const float center_z = in_trajectory[center_idx + 2]; + const float center_yaw = in_trajectory[center_idx + 6]; + const float center_cos = cos(center_yaw); + const float center_sin = sin(center_yaw); + + // do transform + const float trans_x = center_cos * (x - center_x) - center_sin * (y - center_y); + const float trans_y = center_sin * (x - center_x) + center_cos * (y - center_y); + const float trans_z = z - center_z; + const float trans_yaw = yaw - center_yaw; + const float trans_vx = center_cos * vx - center_sin * vy; + const float trans_vy = center_sin * vx + center_cos * vy; + const float trans_ax = center_cos * ax - center_sin * ay; + const float trans_ay = center_sin * ax + center_cos * ay; + + out_data[out_idx] = trans_x; + out_data[out_idx + 1] = trans_y; + out_data[out_idx + 2] = trans_z; + out_data[out_idx + 3] = length; + out_data[out_idx + 4] = width; + out_data[out_idx + 5] = height; + + // --- onehot --- + const int onehot_idx = out_idx + 6; + out_data[onehot_idx + object_type_index[n]] = 1.0f; + + if (target_index[b] == n) { + out_data[onehot_idx + C] = 1.0f; + } + + if (sdc_index == n) { + out_data[onehot_idx + C + 1] = 1.0f; + } + + // --- embedding --- + const int embed_idx = onehot_idx + C + 2; + // time embedding + out_data[embed_idx + t] = 1.0f; + out_data[embed_idx + T] = timestamps[t]; + // heading embedding + out_data[embed_idx + T + 1] = sin(trans_yaw); + out_data[embed_idx + T + 2] = cos(trans_yaw); + + const int other_idx = embed_idx + T + 3; + out_data[other_idx] = trans_vx; + out_data[other_idx + 1] = trans_vy; + out_data[other_idx + 2] = trans_ax; + out_data[other_idx + 3] = trans_ay; + + // === mask === + const int mask_idx = b * N * T + n * T + t; + out_mask[mask_idx] = is_valid == 1.0f; + + // === last pos === + if (t == T - 1) { + const int pos_idx = (b * N + n) * 3; + out_last_pos[pos_idx] = trans_x; + out_last_pos[pos_idx + 1] = trans_y; + out_last_pos[pos_idx + 2] = trans_z; + } +} + +cudaError_t agentPreprocessLauncher( + const int B, const int N, const int T, const int D, const int C, const int sdc_index, + const int * target_index, const int * object_type_index, const float * timestamps, + const float * in_trajectory, float * out_data, bool * out_mask, float * out_last_pos, + cudaStream_t stream) +{ + if (D != 12) { + std::cerr << "D must be 12, but got " << D << std::endl; + return cudaError::cudaErrorInvalidValue; + } + + // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` + constexpr int threadsPerBlock = 256; + dim3 blocks(B, N, T); + agentPreprocessKernel<<>>( + B, N, T, D, C, sdc_index, target_index, object_type_index, timestamps, in_trajectory, out_data, + out_mask, out_last_pos); + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu b/perception/autoware_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu new file mode 100644 index 0000000000000..0ffc9d3ae6f96 --- /dev/null +++ b/perception/autoware_mtr/lib/src/preprocess/polyline_preprocess_kernel.cu @@ -0,0 +1,284 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "preprocess/polyline_preprocess_kernel.cuh" + +#include + +#include + +#include + +__global__ void transformPolylineKernel( + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int k = blockIdx.y * blockDim.y + threadIdx.y; + int p = blockIdx.z * blockDim.z + threadIdx.z; + + if (b >= B || k >= K || p >= P) { + return; + } + + const int inIdx = (k * P + p) * PointDim; + const int outIdx = b * K * P + k * P + p; + bool isValid = false; + for (int d = 0; d < PointDim - 1; ++d) { + if (inPolyline[inIdx + d] != 0.0f) { + isValid = true; + } + } + outPolylineMask[outIdx] = isValid; + + // initialize output polyline with 0.0 + for (int d = 0; d < PointDim + 2; ++d) { + outPolyline[outIdx * (PointDim + 2) + d] = 0.0f; + } + + // set transformed values if valid, otherwise all 0.0. + if (isValid) { + const float x = inPolyline[inIdx]; + const float y = inPolyline[inIdx + 1]; + const float z = inPolyline[inIdx + 2]; + const float dx = inPolyline[inIdx + 3]; + const float dy = inPolyline[inIdx + 4]; + const float dz = inPolyline[inIdx + 5]; + const float typeID = inPolyline[inIdx + 6]; + + const int centerIdx = b * AgentDim; + const float centerX = targetState[centerIdx]; + const float centerY = targetState[centerIdx + 1]; + const float centerZ = targetState[centerIdx + 2]; + const float centerYaw = targetState[centerIdx + 6]; + const float centerCos = cosf(centerYaw); + const float centerSin = sinf(centerYaw); + + // do transform + const float transX = centerCos * (x - centerX) - centerSin * (y - centerY); + const float transY = centerSin * (x - centerX) + centerCos * (y - centerY); + const float transZ = z - centerZ; + const float transDx = centerCos * dx - centerSin * dy; + const float transDy = centerSin * dx + centerCos * dy; + const float transDz = dz; + + outPolyline[outIdx * (PointDim + 2)] = transX; + outPolyline[outIdx * (PointDim + 2) + 1] = transY; + outPolyline[outIdx * (PointDim + 2) + 2] = transZ; + outPolyline[outIdx * (PointDim + 2) + 3] = transDx; + outPolyline[outIdx * (PointDim + 2) + 4] = transDy; + outPolyline[outIdx * (PointDim + 2) + 5] = transDz; + outPolyline[outIdx * (PointDim + 2) + 6] = typeID; + } +} + +__global__ void setPreviousPositionKernel( + const int B, const int K, const int P, const int D, float * polyline) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int k = blockIdx.y * blockDim.y + threadIdx.y; + int p = blockIdx.z * blockDim.z + threadIdx.z; + + if (b >= B || k >= K || p >= P) { + return; + } + + const int curIdx = (b * K * P + k * P + p) * D; + const int preIdx = p == 0 ? curIdx : (b * K * P + k * P + p - 1) * D; + + polyline[curIdx + D - 2] = polyline[preIdx]; // x + polyline[curIdx + D - 1] = polyline[preIdx + 1]; // y +} + +__global__ void calculateCenterDistanceKernel( + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * distance) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int k = blockIdx.y * blockDim.y + threadIdx.y; + if (b >= B || k >= K) { + return; + } + + // calculate polyline center + double sumX = 0.0, sumY = 0.0; + double numValid = 0.0; + for (int p = 0; p < P; ++p) { + int idx = b * K * P + k * P + p; + if (polylineMask[idx]) { + sumX += polyline[idx * D]; + sumY += polyline[idx * D + 1]; + ++numValid; + } + } + float centerX = static_cast(sumX / max(1.0, numValid)); + float centerY = static_cast(sumY / max(1.0, numValid)); + + distance[b * K + k] = hypot(centerX, centerY); +} + +template +__global__ void extractTopKPolylineKernel( + const int K, const int B, const int L, const int P, const int D, const float * inPolyline, + const bool * inPolylineMask, const float * inDistance, float * outPolyline, + bool * outPolylineMask) +{ + int b = blockIdx.x; // Batch index + int tid = threadIdx.x; // Thread local index in this CUDA block + int p = blockIdx.y * blockDim.y + threadIdx.y; // Point index + int d = blockIdx.z * blockDim.z + threadIdx.z; // Dim index + // Since all threads in a block expected to work wiht CUB, `return` shold not be called here + // if (b >= B || tid >= L || p >= P || d >= D) { + // return; + // } + + // Specialize BlockRadixSort type + using BlockRadixSortT = cub::BlockRadixSort; + using TempStorageT = typename BlockRadixSortT::TempStorage; + + __shared__ TempStorageT temp_storage; + + float distances[ITEMS_PER_THREAD] = {0}; + unsigned int distance_indices[ITEMS_PER_THREAD] = {0}; + for (unsigned int i = 0; i < ITEMS_PER_THREAD; i++) { + int polyline_idx = BLOCK_THREADS * i + tid; // index order don't need to care. + int distance_idx = b * L + polyline_idx; + distance_indices[i] = polyline_idx; + distances[i] = (polyline_idx < L && distance_idx < B * L) ? inDistance[distance_idx] : FLT_MAX; + } + + BlockRadixSortT(temp_storage).Sort(distances, distance_indices); + // Block-wide sync barrier necessary to refer the sort result + __syncthreads(); + + if (b >= B || tid >= L || p >= P || d >= D) { + return; + } + + for (unsigned int i = 0; i < ITEMS_PER_THREAD; i++) { + int consective_polyline_idx = + tid * ITEMS_PER_THREAD + i; // To keep sorted order, theads have to write consective region + int inIdx = b * L * P + distance_indices[i] * P + p; + int outIdx = b * K * P + consective_polyline_idx * P + p; + if (consective_polyline_idx >= K || fabsf(FLT_MAX - distances[i]) < FLT_EPSILON) { + continue; + } + outPolyline[outIdx * D + d] = inPolyline[inIdx * D + d]; + outPolylineMask[outIdx] = inPolylineMask[inIdx]; + } +} + +__global__ void calculatePolylineCenterKernel( + const int B, const int K, const int P, const int D, const float * polyline, + const bool * polylineMask, float * center) +{ + int b = blockIdx.x * blockDim.x + threadIdx.x; + int k = blockIdx.y * blockDim.y + threadIdx.y; + + if (b >= B || k >= K) { + return; + } + + // initialize with 0.0 + int centerIdx = (b * K + k) * 3; + for (int d = 0; d < 3; ++d) { + center[centerIdx + d] = 0.0f; + } + + // calculate polyline center + double sumX = 0.0, sumY = 0.0, sumZ = 0.0; + double numValid = 0.0; + for (int p = 0; p < P; ++p) { + int idx = b * K * P + k * P + p; + if (polylineMask[idx]) { + sumX += polyline[idx * D]; + sumY += polyline[idx * D + 1]; + sumZ += polyline[idx * D + 2]; + ++numValid; + } + } + + center[centerIdx] = static_cast(sumX / max(1.0, numValid)); + center[centerIdx + 1] = static_cast(sumY / max(1.0, numValid)); + center[centerIdx + 2] = static_cast(sumZ / max(1.0, numValid)); +} + +cudaError_t polylinePreprocessWithTopkLauncher( + const int K, const int L, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * tmpPolyline, bool * tmpPolylineMask, + float * tmpDistance, float * outPolyline, bool * outPolylineMask, float * outPolylineCenter, + cudaStream_t stream) +{ + if (L < K) { + std::cerr << "L must be greater than K, but got L: " << L << ", K: " << K << std::endl; + return cudaError_t::cudaErrorInvalidValue; + } + + const int outPointDim = PointDim + 2; + + // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` + constexpr int threadsPerBlock = 256; + const dim3 blocks1(B, L, P); + transformPolylineKernel<<>>( + L, P, PointDim, inPolyline, B, AgentDim, targetState, tmpPolyline, tmpPolylineMask); + + const dim3 blocks2(B, L); + calculateCenterDistanceKernel<<>>( + B, L, P, outPointDim, tmpPolyline, tmpPolylineMask, tmpDistance); + + const dim3 blocks3(B, P, outPointDim); + constexpr unsigned int itemsPerThread = 24; + if (threadsPerBlock * itemsPerThread < L) { + std::cerr << "Larger L (" << L << ") than acceptable range (< " + << threadsPerBlock * itemsPerThread << ") detected." << std::endl; + return cudaError_t::cudaErrorInvalidValue; + } + extractTopKPolylineKernel + <<>>( + K, B, L, P, outPointDim, tmpPolyline, tmpPolylineMask, tmpDistance, outPolyline, + outPolylineMask); + + const dim3 blocks4(B, K, P); + setPreviousPositionKernel<<>>( + B, K, P, outPointDim, outPolyline); + + const dim3 blocks5(B, K); + calculatePolylineCenterKernel<<>>( + B, K, P, outPointDim, outPolyline, outPolylineMask, outPolylineCenter); + + return cudaGetLastError(); +} + +cudaError_t polylinePreprocessLauncher( + const int K, const int P, const int PointDim, const float * inPolyline, const int B, + const int AgentDim, const float * targetState, float * outPolyline, bool * outPolylineMask, + float * outPolylineCenter, cudaStream_t stream) +{ + const int outPointDim = PointDim + 2; + + // TODO: update the number of blocks and threads to guard from `cudaErrorIllegalAccess` + constexpr int threadsPerBlock = 256; + const dim3 block3d(B, K, P); + transformPolylineKernel<<>>( + K, P, PointDim, inPolyline, B, AgentDim, targetState, outPolyline, outPolylineMask); + + setPreviousPositionKernel<<>>( + B, K, P, outPointDim, outPolyline); + + const dim3 block2d(B, K); + calculatePolylineCenterKernel<<>>( + B, K, P, outPointDim, outPolyline, outPolylineMask, outPolylineCenter); + + return cudaGetLastError(); +} diff --git a/perception/autoware_mtr/package.xml b/perception/autoware_mtr/package.xml new file mode 100644 index 0000000000000..bcdb22fab2713 --- /dev/null +++ b/perception/autoware_mtr/package.xml @@ -0,0 +1,33 @@ + + + + autoware_mtr + 0.1.0 + ROS 2 Node of Motion Transfromer(a.k.a MTR). + kotarouetake + Apache-2.0 + + ament_cmake_auto + autoware_cmake + + autoware_map_msgs + autoware_object_recognition_utils + autoware_perception_msgs + autoware_tensorrt_common + autoware_universe_utils + autoware_vehicle_info_utils + lanelet2_core + lanelet2_extension + lanelet2_routing + lanelet2_traffic_rules + nav_msgs + rclcpp + rclcpp_components + + ament_lint_auto + autoware_lint_common + + + ament_cmake + + diff --git a/perception/autoware_mtr/schema/tensorrt_mtr.schema.json b/perception/autoware_mtr/schema/tensorrt_mtr.schema.json new file mode 100644 index 0000000000000..93dbf94ff72ce --- /dev/null +++ b/perception/autoware_mtr/schema/tensorrt_mtr.schema.json @@ -0,0 +1,18 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for Motion Transformer Node", + "type": "object", + "definitions": {}, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/" + } + }, + "required": ["ros__parameters"] + } + }, + "required": ["/**"] +} diff --git a/perception/autoware_mtr/src/builder.cpp b/perception/autoware_mtr/src/builder.cpp new file mode 100644 index 0000000000000..d5aedd4a6134d --- /dev/null +++ b/perception/autoware_mtr/src/builder.cpp @@ -0,0 +1,392 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/builder.hpp" + +#include +#include + +namespace autoware::mtr +{ +namespace +{ +/** + * @brief Get the name of precision in string. + * + * @param type + * @return std::string + */ +std::string getPrecisionName(const PrecisionType & type) +{ + switch (type) { + case PrecisionType::FP32: + return "FP32"; + case PrecisionType::FP16: + return "FP16"; + case PrecisionType::INT8: + return "INT8"; + default: + throw std::runtime_error("Unsupported precision type."); + } +} + +/** + * @brief Get the name of calibration in string. + * + * @param type + * @return std::string + */ +std::string getCalibrationName(const CalibrationType & type) +{ + switch (type) { + case CalibrationType::ENTROPY: + return "ENTROPY"; + case CalibrationType::LEGACY: + return "LEGACY"; + case CalibrationType::PERCENTILE: + return "PERCENTILE"; + case CalibrationType::MINMAX: + return "MINMAX"; + default: + throw std::runtime_error("Unsupported calibration type."); + } +} +} // namespace + +MTRBuilder::MTRBuilder( + const std::string & model_filepath, const BuildConfig & build_config, + const size_t max_workspace_size) +: model_filepath_(model_filepath), max_workspace_size_(max_workspace_size) +{ + build_config_ = std::make_unique(build_config); + runtime_ = TrtUniquePtr(nvinfer1::createInferRuntime(logger_)); +} + +void MTRBuilder::setup() +{ + if (!fs::exists(model_filepath_)) { + is_initialized_ = false; + return; + } + std::string engine_path = model_filepath_; + if (model_filepath_.extension() == ".engine") { + std::cout << "Loading... " << model_filepath_ << std::endl; + loadEngine(model_filepath_); + } else if (model_filepath_.extension() == ".onnx") { + const auto engine_cache_path = createEngineCachePath(); + if (fs::exists(engine_cache_path)) { + std::cout << "Loading cached engine... " << engine_cache_path << std::endl; + if (!loadEngine(engine_cache_path)) { + std::cerr << "Fail to load engine" << std::endl; + is_initialized_ = false; + return; + } + } else { + std::cout << "Building... " << engine_cache_path << std::endl; + logger_.log(nvinfer1::ILogger::Severity::kINFO, "start build engine"); + if (!buildEngineFromOnnx(model_filepath_, engine_cache_path)) { + std::cerr << "Fail to build engine from onnx" << std::endl; + is_initialized_ = false; + return; + } + logger_.log(nvinfer1::ILogger::Severity::kINFO, "End build engine"); + } + engine_path = engine_cache_path; + } else { + is_initialized_ = false; + return; + } + + context_ = TrtUniquePtr(engine_->createExecutionContext()); + if (!context_) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create context"); + is_initialized_ = false; + return; + } + + is_initialized_ = true; +} + +bool MTRBuilder::loadEngine(const std::string & filepath) +{ + try { + std::ifstream engine_file(filepath); + std::stringstream buffer; + buffer << engine_file.rdbuf(); + std::string engine_str = buffer.str(); + engine_ = TrtUniquePtr(runtime_->deserializeCudaEngine( + reinterpret_cast(engine_str.data()), engine_str.size())); + return true; + } catch (std::exception & e) { + std::cerr << e.what() << std::endl; + return false; + } +} + +fs::path MTRBuilder::createEngineCachePath() const +{ + fs::path cache_engine_path{model_filepath_}; + auto precision_name = getPrecisionName(build_config_->precision); + auto calibration_name = build_config_->precision == PrecisionType::INT8 + ? getCalibrationName(build_config_->calibration) + : ""; + cache_engine_path.replace_extension(calibration_name + precision_name + ".engine"); + return cache_engine_path; +} + +bool MTRBuilder::buildEngineFromOnnx( + const std::string & filepath, const std::string & output_engine_filepath) +{ + auto builder = TrtUniquePtr(nvinfer1::createInferBuilder(logger_)); + if (!builder) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder"); + return false; + } + + const auto explicit_batch = + 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + + auto network = + TrtUniquePtr(builder->createNetworkV2(explicit_batch)); + if (!network) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create network"); + return false; + } + + auto config = TrtUniquePtr(builder->createBuilderConfig()); + if (!config) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create builder config"); + return false; + } + + if (build_config_->precision == PrecisionType::FP16) { + config->setFlag(nvinfer1::BuilderFlag::kFP16); + } else if (build_config_->precision == PrecisionType::INT8) { + config->setFlag(nvinfer1::BuilderFlag::kINT8); + } + +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8400 + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_); +#else + config->setMaxWorkspaceSize(max_workspace_size_); +#endif + + auto parser = TrtUniquePtr(nvonnxparser::createParser(*network, logger_)); + if (!parser->parseFromFile( + filepath.c_str(), static_cast(nvinfer1::ILogger::Severity::kERROR))) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to parse onnx file"); + return false; + } + + if (isDynamic()) { + auto profile = builder->createOptimizationProfile(); + const auto input0 = network->getInput(0); + const auto input0_dims = input0->getDimensions(); + const auto num_past_frames = input0_dims.d[2]; + const auto num_agent_dims = input0_dims.d[3]; + + const auto input2 = network->getInput(2); + const auto input2_dims = input2->getDimensions(); + const auto num_polylines = input2_dims.d[1]; + const auto num_points = input2_dims.d[2]; + const auto num_polyline_dims = input2_dims.d[3]; + + const auto & batch_target = build_config_->batch_target; + const auto & batch_agent = build_config_->batch_agent; + { // trajectory + auto name = network->getInput(0)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims4{batch_target.k_min, batch_agent.k_min, num_past_frames, num_agent_dims}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims4{batch_target.k_opt, batch_agent.k_opt, num_past_frames, num_agent_dims}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims4{batch_target.k_max, batch_agent.k_max, num_past_frames, num_agent_dims}); + } + { // trajectory mask + auto name = network->getInput(1)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims3{batch_target.k_min, batch_agent.k_min, num_past_frames}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims3{batch_target.k_opt, batch_agent.k_opt, num_past_frames}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims3{batch_target.k_max, batch_agent.k_max, num_past_frames}); + } + { // polyline + auto name = network->getInput(2)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims4{batch_target.k_min, num_polylines, num_points, num_polyline_dims}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims4{batch_target.k_opt, num_polylines, num_points, num_polyline_dims}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims4{batch_target.k_max, num_polylines, num_points, num_polyline_dims}); + } + { // polyline mask + auto name = network->getInput(3)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims3{batch_target.k_min, num_polylines, num_points}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims3{batch_target.k_opt, num_polylines, num_points}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims3{batch_target.k_max, num_polylines, num_points}); + } + { // polyline center + auto name = network->getInput(4)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims3{batch_target.k_min, num_polylines, 3}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims3{batch_target.k_opt, num_polylines, 3}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims3{batch_target.k_max, num_polylines, 3}); + } + { // last pos + auto name = network->getInput(5)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, + nvinfer1::Dims3{batch_target.k_min, batch_agent.k_min, 3}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, + nvinfer1::Dims3{batch_target.k_opt, batch_agent.k_opt, 3}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, + nvinfer1::Dims3{batch_target.k_max, batch_agent.k_max, 3}); + } + { // track index + auto name = network->getInput(6)->getName(); + nvinfer1::Dims minDim, optDim, maxDim; + minDim.nbDims = 1; + minDim.d[0] = batch_target.k_min; + profile->setDimensions(name, nvinfer1::OptProfileSelector::kMIN, minDim); + optDim.nbDims = 1; + optDim.d[0] = batch_target.k_opt; + profile->setDimensions(name, nvinfer1::OptProfileSelector::kOPT, optDim); + maxDim.nbDims = 1; + maxDim.d[0] = batch_target.k_max; + profile->setDimensions(name, nvinfer1::OptProfileSelector::kMAX, maxDim); + } + { + // intention points + auto name = network->getInput(7)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims3{batch_target.k_min, 64, 2}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims3{batch_target.k_opt, 64, 2}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims3{batch_target.k_max, 64, 2}); + } + { // pred scores + auto name = network->getOutput(0)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims2{batch_target.k_min, 6}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims2{batch_target.k_opt, 6}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims2{batch_target.k_max, 6}); + } + { // pred trajs + auto name = network->getOutput(1)->getName(); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMIN, nvinfer1::Dims4{batch_target.k_min, 6, 80, 7}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kOPT, nvinfer1::Dims4{batch_target.k_opt, 6, 80, 7}); + profile->setDimensions( + name, nvinfer1::OptProfileSelector::kMAX, nvinfer1::Dims4{batch_target.k_max, 6, 80, 7}); + } + config->addOptimizationProfile(profile); + } + + if (isDynamic()) { +#if (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSOR_PATCH >= 8200 + config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kDETAILED); +#else + config->setProfilingVerbosity(nvinfer1::ProfilingVerbosity::kVERBOSE); +#endif + } + +#if NV_TENSORRT_MAJOR >= 8 + auto plan = + TrtUniquePtr(builder->buildSerializedNetwork(*network, *config)); + if (!plan) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create host memory"); + return false; + } + engine_ = TrtUniquePtr( + runtime_->deserializeCudaEngine(plan->data(), plan->size())); +#else + engine_ = TrtUniquePtr(builder->buildEngineWithConfig(*network, *config)); +#endif + + if (!engine_) { + logger_.log(nvinfer1::ILogger::Severity::kERROR, "Fail to create engine"); + return false; + } + +// save engine +#if NV_TENSORRT_MAJOR < 8 + auto data = TrtUniquePtr(engine_->serialize()); +#endif + std::ofstream file; + file.open(output_engine_filepath, std::ios::binary | std::ios::out); + if (!file.is_open()) { + return false; + } +#if NV_TENSORRT_MAJOR < 8 + file.write(reinterpret_cast(data->data()), data->size()); +#else + file.write(reinterpret_cast(plan->data()), plan->size()); +#endif + + file.close(); + + return true; +} + +bool MTRBuilder::isInitialized() const +{ + return is_initialized_; +} + +bool MTRBuilder::isDynamic() const +{ + return build_config_->is_dynamic(); +} + +bool MTRBuilder::setBindingDimensions(int index, nvinfer1::Dims dimensions) +{ + if (isDynamic()) { + return context_->setBindingDimensions(index, dimensions); + } else { + return true; + } +} + +bool MTRBuilder::enqueueV2(void ** bindings, cudaStream_t stream, cudaEvent_t * inputConsumed) +{ + return context_->enqueueV2(bindings, stream, inputConsumed); +} +} // namespace autoware::mtr diff --git a/perception/autoware_mtr/src/conversions/history.cpp b/perception/autoware_mtr/src/conversions/history.cpp new file mode 100644 index 0000000000000..8175af8ba493e --- /dev/null +++ b/perception/autoware_mtr/src/conversions/history.cpp @@ -0,0 +1,111 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/conversions/history.hpp" + +#include +#include + +namespace autoware::mtr +{ +using adl = autoware::mtr::AgentDimLabels; + +/** + * @brief Transform a 2D point with a reference point and rotation + * + * @param history history to modify. + * @param reference_state reference state used as reference frame. + * @return vector of histories, each in the reference frame of the last state. + */ +[[nodiscard]] std::pair transform2D( + const std::pair input_xy, const std::pair reference_xy, + const std::pair rotation_cos_sin) +{ + auto [x, y] = input_xy; + auto [ref_x, ref_y] = reference_xy; + auto [cos, sin] = rotation_cos_sin; + x -= ref_x; + y -= ref_y; + return {x * cos - y * sin, x * sin + y * cos}; +} + +/** + * @brief Get histories with agent states in the reference frame of the last state + * + * @param history history to modify. + * @param reference_state reference state used as reference frame. + * @return vector of histories, each in the reference frame of the last state. + */ +[[nodiscard]] AgentHistory getAgentCentricHistory( + const AgentHistory & history, const AgentState & reference_state) +{ + const auto latest_valid_state = history.get_latest_valid_state(); + const auto & reference_state_dim = autoware::mtr::AgentState::dim(); + const auto & history_state_dim = autoware::mtr::AgentHistory::state_dim(); + + if (!latest_valid_state.has_value() || reference_state_dim != history_state_dim) { + return history; + } + AgentHistory agent_centric_history = history; + const auto reference_array = reference_state.as_array(); + auto agent_centric_states_array = agent_centric_history.as_array(); + const auto ref_x = reference_array.at(adl::X); + const auto ref_y = reference_array.at(adl::Y); + const auto ref_z = reference_array.at(adl::Z); + const auto ref_yaw = reference_array.at(adl::YAW); + const auto cos = std::cos(-ref_yaw); + const auto sin = std::sin(-ref_yaw); + + const auto & d = reference_state_dim; + // Use i as the index for each new state + for (size_t i = 0; i < agent_centric_states_array.size(); i += d) { + auto & x = agent_centric_states_array.at(i + adl::X); + auto & y = agent_centric_states_array.at(i + adl::Y); + auto & vx = agent_centric_states_array.at(i + adl::VX); + auto & vy = agent_centric_states_array.at(i + adl::VY); + auto & ax = agent_centric_states_array.at(i + adl::AX); + auto & ay = agent_centric_states_array.at(i + adl::AY); + auto & z = agent_centric_states_array.at(i + adl::Z); + auto & yaw = agent_centric_states_array.at(i + adl::YAW); + + std::tie(x, y) = transform2D({x, y}, {ref_x, ref_y}, {cos, sin}); + std::tie(vx, vy) = transform2D({vx, vy}, {0.0, 0.0}, {cos, sin}); + std::tie(ax, ay) = transform2D({x, y}, {0.0, 0.0}, {cos, sin}); + + z -= ref_z; + yaw -= ref_yaw; + } + return agent_centric_history; +}; + +/** + * @brief Get histories with agent states in the reference frame of the last state + * + * @param histories vector of histories to modify. + * @return vector of histories, each in the reference frame of the last state. + */ +[[nodiscard]] std::vector getAgentCentricHistories( + const std::vector & histories) +{ + std::vector agent_centric_histories; + agent_centric_histories.reserve(histories.size()); + for (const auto & history : histories) { + const auto & reference_state = + history.get_latest_state(); // Todo(Daniel): should it be the latest VALID state? + agent_centric_histories.push_back(getAgentCentricHistory(history, reference_state)); + } + return agent_centric_histories; +}; + +} // namespace autoware::mtr diff --git a/perception/autoware_mtr/src/conversions/lanelet.cpp b/perception/autoware_mtr/src/conversions/lanelet.cpp new file mode 100644 index 0000000000000..3512c4283f83b --- /dev/null +++ b/perception/autoware_mtr/src/conversions/lanelet.cpp @@ -0,0 +1,142 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/conversions/lanelet.hpp" + +#include "autoware/mtr/polyline.hpp" + +#include + +#include +#include +#include + +namespace autoware::mtr +{ +std::optional LaneletConverter::convert( + const geometry_msgs::msg::Point & position, double distance_threshold) const +{ + std::vector container; + // parse lanelet layers + for (const auto & lanelet : lanelet_map_ptr_->laneletLayer) { + const auto lanelet_subtype = toSubtypeName(lanelet); + if (isLaneLike(lanelet_subtype)) { + // convert centerlines + if (isRoadwayLike(lanelet_subtype)) { + auto points = fromLinestring(lanelet.centerline3d(), position, distance_threshold); + insertLanePoints(points, container); + } + // convert boundaries except of virtual lines + if (!isTurnableIntersection(lanelet)) { + const auto left_bound = lanelet.leftBound3d(); + if (isBoundaryLike(left_bound)) { + auto points = fromLinestring(left_bound, position, distance_threshold); + insertLanePoints(points, container); + } + const auto right_bound = lanelet.rightBound3d(); + if (isBoundaryLike(right_bound)) { + auto points = fromLinestring(right_bound, position, distance_threshold); + insertLanePoints(points, container); + } + } + } else if (isCrosswalkLike(lanelet_subtype)) { + auto points = fromPolygon(lanelet.polygon3d(), position, distance_threshold); + insertLanePoints(points, container); + } + } + + // parse linestring layers + for (const auto & linestring : lanelet_map_ptr_->lineStringLayer) { + if (isBoundaryLike(linestring)) { + auto points = fromLinestring(linestring, position, distance_threshold); + insertLanePoints(points, container); + } + } + + return container.size() == 0 + ? std::nullopt + : std::make_optional( + container, max_num_polyline_, max_num_point_, point_break_distance_); +} + +std::vector LaneletConverter::fromLinestring( + const lanelet::ConstLineString3d & linestring, const geometry_msgs::msg::Point & position, + double distance_threshold) const noexcept +{ + if (linestring.size() == 0) { + return {}; + } + + std::vector output; + for (auto itr = linestring.begin(); itr != linestring.end(); ++itr) { + if (auto distance = + std::hypot(itr->x() - position.x, itr->y() - position.y, itr->z() - position.z); + distance > distance_threshold) { + continue; + } + double dx, dy, dz; + constexpr double epsilon = 1e-6; + if (itr == linestring.begin()) { + dx = 0.0; + dy = 0.0; + dz = 0.0; + } else { + dx = itr->x() - (itr - 1)->x(); + dy = itr->y() - (itr - 1)->y(); + dz = itr->z() - (itr - 1)->z(); + const auto norm = std::hypot(dx, dy, dz); + dx /= (norm + epsilon); + dy /= (norm + epsilon); + dz /= (norm + epsilon); + } + output.emplace_back(itr->x(), itr->y(), itr->z(), dx, dy, dz, 0.0); // TODO(ktro2828): Label ID + } + return output; +} + +std::vector LaneletConverter::fromPolygon( + const lanelet::CompoundPolygon3d & polygon, const geometry_msgs::msg::Point & position, + double distance_threshold) const noexcept +{ + if (polygon.size() == 0) { + return {}; + } + + std::vector output; + for (auto itr = polygon.begin(); itr != polygon.end(); ++itr) { + if (auto distance = + std::hypot(itr->x() - position.x, itr->y() - position.y, itr->z() - position.z); + distance > distance_threshold) { + continue; + } + double dx, dy, dz; + constexpr double epsilon = 1e-6; + if (itr == polygon.begin()) { + dx = 0.0; + dy = 0.0; + dz = 0.0; + } else { + dx = itr->x() - (itr - 1)->x(); + dy = itr->y() - (itr - 1)->y(); + dz = itr->z() - (itr - 1)->z(); + const auto norm = std::hypot(dx, dy, dz); + dx /= (norm + epsilon); + dy /= (norm + epsilon); + dz /= (norm + epsilon); + } + output.emplace_back(itr->x(), itr->y(), itr->z(), dx, dy, dz, 0.0); // TODO(ktro2828): Label ID + } + return output; +} +} // namespace autoware::mtr diff --git a/perception/autoware_mtr/src/intention_point.cpp b/perception/autoware_mtr/src/intention_point.cpp new file mode 100644 index 0000000000000..9b4036c99e143 --- /dev/null +++ b/perception/autoware_mtr/src/intention_point.cpp @@ -0,0 +1,41 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/intention_point.hpp" + +#include +#include +#include +#include + +namespace autoware::mtr +{ +std::vector IntentionPoint::as_array(const std::vector & label_names) const +{ + std::vector points; + points.reserve(label_names.size() * num_cluster * state_dim()); + for (const auto & name : label_names) { + const auto & label_points = data_map_.at(name); + for (const auto & p : label_points) { + points.emplace_back(p); + } + } + return points; +} + +size_t IntentionPoint::size() const noexcept +{ + return num_cluster * state_dim(); +} +} // namespace autoware::mtr diff --git a/perception/autoware_mtr/src/node.cpp b/perception/autoware_mtr/src/node.cpp new file mode 100644 index 0000000000000..57ea7042e2d2c --- /dev/null +++ b/perception/autoware_mtr/src/node.cpp @@ -0,0 +1,450 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/node.hpp" + +#include "autoware/mtr/agent.hpp" +#include "autoware/mtr/conversions/history.hpp" +#include "autoware/mtr/conversions/lanelet.hpp" +#include "autoware/mtr/fixed_queue.hpp" + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace autoware::mtr +{ +// TODO(ktro2828): use a parameter +constexpr double TIME_THRESHOLD = 1.0; +constexpr size_t MAX_NUM_TARGET = 1; +constexpr double POLYLINE_DISTANCE_THRESHOLD = 100.0; +namespace +{ +// Convert `TrackedObject` to `AgentState`. +AgentState createAgentState(const TrackedObject & object, const bool is_valid) +{ + const auto & pose = object.kinematics.pose_with_covariance.pose; + const auto & twist = object.kinematics.twist_with_covariance.twist; + const auto & accel = object.kinematics.acceleration_with_covariance.accel; + const auto & dimension = object.shape.dimensions; + + const float yaw = tf2::getYaw(pose.orientation); + const float valid = is_valid ? 1.0f : 0.0f; + + return {pose.position, dimension, yaw, twist.linear, accel.linear, valid}; +} + +// Get the label index corresponding to AgentLabel. If the label of tracked object is not * defined +// in AgentLabel returns `-1`. +int getLabelIndex(const TrackedObject & object) +{ + const auto classification = + autoware::object_recognition_utils::getHighestProbLabel(object.classification); + if (autoware::object_recognition_utils::isCarLikeVehicle(classification)) { + return AgentLabel::VEHICLE; + } + if (classification == ObjectClassification::PEDESTRIAN) { + return AgentLabel::PEDESTRIAN; + } + if ( + classification == ObjectClassification::MOTORCYCLE || + classification == ObjectClassification::BICYCLE) { + return AgentLabel::CYCLIST; + } + return -1; // other labels +} +} // namespace + +MTRNode::MTRNode(const rclcpp::NodeOptions & node_options) +: rclcpp::Node("mtr", node_options), transform_listener_(this) +{ + { + // Build MTR + // Model config + const auto model_path = declare_parameter("model_params.model_path"); + const auto target_labels = + declare_parameter>("model_params.target_labels"); + const auto num_past = static_cast(declare_parameter("model_params.num_past")); + const auto num_mode = static_cast(declare_parameter("model_params.num_mode")); + const auto num_future = static_cast(declare_parameter("model_params.num_future")); + const auto max_num_polyline = + static_cast(declare_parameter("model_params.max_num_polyline")); + const auto max_num_point = + static_cast(declare_parameter("model_params.max_num_point")); + const auto point_break_distance = + static_cast(declare_parameter("model_params.point_break_distance")); + const auto intention_point_filepath = + declare_parameter("model_params.intention_point_filepath"); + const auto num_intention_point_cluster = + static_cast(declare_parameter("model_params.num_intention_point_cluster")); + config_ptr_ = std::make_unique( + target_labels, num_past, num_mode, num_future, max_num_polyline, max_num_point, + point_break_distance, intention_point_filepath, num_intention_point_cluster); + // Build config + const auto is_dynamic = declare_parameter("build_params.is_dynamic"); + const auto precision = declare_parameter("build_params.precision"); + const auto calibration = declare_parameter("build_params.calibration"); + build_config_ptr_ = std::make_unique(is_dynamic, precision, calibration); + model_ptr_ = std::make_unique(model_path, *config_ptr_, *build_config_ptr_); + } + + { + // Ego states and timestamps buffer + int ego_buffer_size = declare_parameter("ego_buffer_size", 100); + ego_states_ = std::make_unique>>(ego_buffer_size); + timestamps_ = std::make_unique>(config_ptr_->num_past); + } + + sub_objects_ = create_subscription( + "~/input/objects", rclcpp::QoS{1}, std::bind(&MTRNode::callback, this, std::placeholders::_1)); + sub_map_ = create_subscription( + "~/input/vector_map", rclcpp::QoS{1}.transient_local(), + std::bind(&MTRNode::onMap, this, std::placeholders::_1)); + + pub_objects_ = create_publisher("~/output/objects", rclcpp::QoS{1}); + + if (declare_parameter("build_only")) { + RCLCPP_INFO(get_logger(), "TensorRT engine file is built and exit."); + rclcpp::shutdown(); + } +} + +void MTRNode::callback(const TrackedObjects::ConstSharedPtr object_msg) +{ + const auto ego_msg = getLatestEgo(); + + if (!ego_msg) { + RCLCPP_WARN(get_logger(), "No ego data"); + return; + } + + const auto polyline_data = lanelet_converter_ptr_->convert( + ego_msg->kinematics.pose_with_covariance.pose.position, POLYLINE_DISTANCE_THRESHOLD); + + if (!polyline_data) { + RCLCPP_WARN(get_logger(), "No polyline"); + return; + } + + const auto current_time = rclcpp::Time(object_msg->header.stamp).seconds(); + timestamps_->push_back(current_time); + + removeAncientAgentHistory(current_time, object_msg); + updateAgentHistory(current_time, object_msg, ego_msg.value()); + + std::vector object_ids; + std::vector histories; + std::vector label_ids; + histories.reserve(agent_history_map_.size()); + object_ids.reserve(agent_history_map_.size()); + label_ids.reserve(agent_history_map_.size()); + int tmp_ego_index = -1; + for (const auto & [object_id, history] : agent_history_map_) { + object_ids.emplace_back(object_id); + histories.emplace_back(history); + label_ids.emplace_back(history.label_id()); + if (object_id == EGO_ID) { + tmp_ego_index = histories.size() - 1; + } + } + + if (tmp_ego_index == -1) { + RCLCPP_WARN(get_logger(), "No EGO"); + return; + } + + const auto target_indices = extractTargetAgent(histories); + if (target_indices.empty()) { + RCLCPP_WARN(get_logger(), "No target agents"); + return; + } + + const auto ego_index = static_cast(tmp_ego_index); + const auto relative_timestamps = getRelativeTimestamps(); + // For testing purposes, normally pre-processing is done in the cuda side. + // const auto agent_centric_histories = getAgentCentricHistories(histories); + AgentData agent_data(histories, ego_index, target_indices, label_ids, relative_timestamps); + + std::vector trajectories; + if (!model_ptr_->doInference(agent_data, *polyline_data, trajectories)) { + RCLCPP_WARN(get_logger(), "Inference failed"); + return; + } + + PredictedObjects output; + output.header = object_msg->header; + output.objects.reserve(target_indices.size()); + for (size_t i = 0; i < target_indices.size(); ++i) { + const auto & target_idx = target_indices.at(i); + const auto & object_id = object_ids.at(target_idx); + const auto & object = object_msg_map_.at(object_id); + const auto & trajectory = trajectories.at(i); + + auto predicted_object = createPredictedObject(object, trajectory); + output.objects.emplace_back(predicted_object); + } + + // Publish results + pub_objects_->publish(output); +} + +void MTRNode::onMap(const HADMapBin::ConstSharedPtr map_msg) +{ + lanelet_map_ptr_ = std::make_shared(); + lanelet::utils::conversion::fromBinMsg( + *map_msg, lanelet_map_ptr_, &traffic_rules_ptr_, &routing_graph_ptr_); + + lanelet_converter_ptr_ = std::make_unique( + lanelet_map_ptr_, config_ptr_->max_num_polyline, config_ptr_->max_num_point, + config_ptr_->point_break_distance); +} + +std::optional MTRNode::getLatestEgo() +{ + const Odometry::ConstSharedPtr odometry_msg = sub_ego_.takeData(); + if (!odometry_msg) { + return std::nullopt; + } + + auto createPoint32 = + [](const double x, const double y, const double z) -> geometry_msgs::msg::Point32 { + geometry_msgs::msg::Point32 p; + p.x = x; + p.y = y; + p.z = z; + return p; + }; + + TrackedObject output; + // Classification and probability + { + output.existence_probability = 1.0; + ObjectClassification classification; + classification.label = ObjectClassification::CAR; + output.classification = {classification}; + } + + // Kinematics + { + output.kinematics.pose_with_covariance = odometry_msg->pose; + output.kinematics.twist_with_covariance = odometry_msg->twist; + } + + // Shape + { + const auto & ego_max_long_offset = vehicle_info_.max_longitudinal_offset_m; + const auto & ego_rear_overhang = vehicle_info_.vehicle_height_m; + const auto & ego_length = vehicle_info_.vehicle_length_m; + const auto & ego_width = vehicle_info_.vehicle_width_m; + const auto & ego_height = vehicle_info_.vehicle_height_m; + + autoware_perception_msgs::msg::Shape shape; + shape.type = autoware_perception_msgs::msg::Shape::BOUNDING_BOX; + shape.dimensions.x = ego_length; + shape.dimensions.y = ego_width; + shape.dimensions.z = ego_height; + + // TODO(Daniel): Should use overhang and ego info utils + shape.footprint.points.emplace_back( + createPoint32(-ego_rear_overhang, -0.5 * ego_width, ego_height)); + shape.footprint.points.emplace_back( + createPoint32(-ego_rear_overhang, 0.5 * ego_width, ego_height)); + shape.footprint.points.emplace_back( + createPoint32(ego_max_long_offset, 0.5 * ego_width, ego_height)); + shape.footprint.points.emplace_back( + createPoint32(ego_max_long_offset, -0.5 * ego_width, ego_height)); + output.shape = shape; + } + return output; +} + +void MTRNode::removeAncientAgentHistory( + const double current_time, const TrackedObjects::ConstSharedPtr objects_msg) +{ + for (const auto & object : objects_msg->objects) { + const auto & object_id = autoware::universe_utils::toHexString(object.object_id); + if (agent_history_map_.count(object_id) == 0) { + continue; + } + + const auto & history = agent_history_map_.at(object_id); + if (history.is_ancient(current_time, TIME_THRESHOLD)) { + agent_history_map_.erase(object_id); + } + } + + if ( + agent_history_map_.count(EGO_ID) != 0 && + agent_history_map_.at(EGO_ID).is_ancient(current_time, TIME_THRESHOLD)) { + agent_history_map_.erase(EGO_ID); + } +} + +void MTRNode::updateAgentHistory( + const double current_time, const TrackedObjects::ConstSharedPtr objects_msg, + const TrackedObject & ego_msg) +{ + std::vector observed_ids; + // other agents + for (const auto & object : objects_msg->objects) { + auto label_index = getLabelIndex(object); + if (label_index == -1) { + continue; + } + + const auto object_id = autoware::universe_utils::toHexString(object.object_id); + observed_ids.emplace_back(object_id); + object_msg_map_.insert_or_assign(object_id, object); + + const auto state = createAgentState(object, true); + if (agent_history_map_.count(object_id) == 0) { + AgentHistory history(state, object_id, label_index, current_time, config_ptr_->num_past); + agent_history_map_.emplace(object_id, history); + } else { + agent_history_map_.at(object_id).update(current_time, state); + } + } + + // ego vehicle + observed_ids.emplace_back(EGO_ID); + object_msg_map_.insert_or_assign(EGO_ID, ego_msg); + + const auto ego = createAgentState(ego_msg, true); + if (agent_history_map_.count(EGO_ID) == 0) { + AgentHistory history(ego, EGO_ID, AgentLabel::VEHICLE, current_time, config_ptr_->num_past); + agent_history_map_.emplace(EGO_ID, history); + } else { + agent_history_map_.at(EGO_ID).update(current_time, ego); + } + + // update unobserved histories with empty + for (auto & [object_id, history] : agent_history_map_) { + if (std::find(observed_ids.cbegin(), observed_ids.cend(), object_id) != observed_ids.cend()) { + continue; + } + history.update_empty(); + } +} + +std::vector MTRNode::extractTargetAgent(const std::vector & histories) +{ + auto map2ego = transform_listener_.getTransform( + "base_link", "map", rclcpp::Time(), rclcpp::Duration::from_seconds(0.1)); + + if (!map2ego) { + RCLCPP_WARN(get_logger(), "Failed to get transform from map to base_link."); + return {}; + } + + std::vector> index_distances; + for (size_t idx = 0; idx < histories.size(); ++idx) { + const auto & history = histories.at(idx); + if (history.is_valid_latest() && history.object_id() != EGO_ID) { + const auto & state = history.get_latest_state(); + geometry_msgs::msg::PoseStamped pose_in_map; + pose_in_map.pose.position.x = state.x(); + pose_in_map.pose.position.y = state.y(); + pose_in_map.pose.position.z = state.z(); + pose_in_map.pose.orientation = autoware::universe_utils::createQuaternionFromYaw(state.yaw()); + + geometry_msgs::msg::PoseStamped pose_in_ego; + tf2::doTransform(pose_in_map, pose_in_ego, *map2ego); + + const auto distance = std::hypot(pose_in_ego.pose.position.x, pose_in_ego.pose.position.y); + index_distances.emplace_back(idx, distance); + } + } + + // sort by distance + std::sort( + index_distances.begin(), index_distances.end(), + [](const auto & item1, const auto & item2) { return item1.second < item2.second; }); + + std::vector target_indices; + for (const auto & [idx, _] : index_distances) { + target_indices.emplace_back(idx); + if (MAX_NUM_TARGET <= target_indices.size()) { + break; + } + } + + return target_indices; +} + +std::vector MTRNode::getRelativeTimestamps() const +{ + std::vector output; + output.reserve(timestamps_->size()); + for (const auto & t : *timestamps_) { + output.push_back(t - timestamps_->front()); + } + return output; +} + +PredictedObject MTRNode::createPredictedObject( + const TrackedObject & object, const PredictedTrajectory & trajectory) +{ + const auto & init_pose_with_cov = object.kinematics.pose_with_covariance; + const auto & init_twist_with_cov = object.kinematics.twist_with_covariance; + const auto & init_accel_with_cov = object.kinematics.acceleration_with_covariance; + + PredictedObject predicted_object; + predicted_object.kinematics.initial_pose_with_covariance = init_pose_with_cov; + predicted_object.kinematics.initial_twist_with_covariance = init_twist_with_cov; + predicted_object.kinematics.initial_acceleration_with_covariance = init_accel_with_cov; + predicted_object.classification = object.classification; + predicted_object.shape = object.shape; + predicted_object.object_id = object.object_id; + predicted_object.existence_probability = object.existence_probability; + + for (const auto & mode : trajectory.get_modes()) { + PredictedPath waypoints; + waypoints.confidence = mode.score(); + waypoints.time_step = rclcpp::Duration::from_seconds(0.1); // TODO(ktro282): use a parameter + waypoints.path.reserve(mode.num_future()); + + for (const auto & state : mode.get_waypoints()) { + geometry_msgs::msg::Pose predicted_pose; + predicted_pose.position.x = state.x(); + predicted_pose.position.y = state.y(); + predicted_pose.position.z = init_pose_with_cov.pose.position.z; + predicted_pose.orientation = init_pose_with_cov.pose.orientation; + waypoints.path.emplace_back(predicted_pose); + if (waypoints.path.size() >= waypoints.path.max_size()) { + break; + } + } + predicted_object.kinematics.predicted_paths.emplace_back(waypoints); + } + + return predicted_object; +} +} // namespace autoware::mtr + +#include +RCLCPP_COMPONENTS_REGISTER_NODE(autoware::mtr::MTRNode); diff --git a/perception/autoware_mtr/src/trt_mtr.cpp b/perception/autoware_mtr/src/trt_mtr.cpp new file mode 100644 index 0000000000000..1fd47e017ed8d --- /dev/null +++ b/perception/autoware_mtr/src/trt_mtr.cpp @@ -0,0 +1,233 @@ +// Copyright 2024 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/trt_mtr.hpp" + +#include "autoware/mtr/cuda_helper.hpp" +#include "autoware/mtr/intention_point.hpp" +#include "autoware/mtr/trajectory.hpp" +#include "postprocess/postprocess_kernel.cuh" +#include "preprocess/agent_preprocess_kernel.cuh" +#include "preprocess/polyline_preprocess_kernel.cuh" + +#include +#include +#include +#include + +namespace autoware::mtr +{ +TrtMTR::TrtMTR( + const std::string & model_path, const MTRConfig & config, const BuildConfig & build_config, + const size_t max_workspace_size) +: config_(config), + intention_point_(IntentionPoint::from_file( + config_.num_intention_point_cluster, config_.intention_point_filepath)) +{ + max_num_polyline_ = config_.max_num_polyline; + num_mode_ = config_.num_mode; + num_future_ = config_.num_future; + num_intention_point_ = config_.num_intention_point_cluster; + + // build engine + builder_ = std::make_unique(model_path, build_config, max_workspace_size); + builder_->setup(); + + if (!builder_->isInitialized()) { + return; + } + + CHECK_CUDA_ERROR(cudaStreamCreate(&stream_)); +} + +bool TrtMTR::doInference( + const AgentData & agent_data, const PolylineData & polyline_data, + std::vector & trajectories) +{ + initCudaPtr(agent_data, polyline_data); + + if (!preProcess(agent_data, polyline_data)) { + std::cerr << "Fail to preprocess" << std::endl; + return false; + } + + std::vector buffer = {d_in_trajectory_.get(), d_in_trajectory_mask_.get(), + d_in_polyline_.get(), d_in_polyline_mask_.get(), + d_in_polyline_center_.get(), d_in_last_pos_.get(), + d_target_index_.get(), d_intention_point_.get(), + d_out_trajectory_.get(), d_out_score_.get()}; + + if (!builder_->enqueueV2(buffer.data(), stream_, nullptr)) { + std::cerr << "Fail to do inference" << std::endl; + return false; + } + + if (!postProcess(agent_data, trajectories)) { + std::cerr << "Fail to postprocess" << std::endl; + return false; + } + + return true; +} + +void TrtMTR::initCudaPtr(const AgentData & agent_data, const PolylineData & polyline_data) +{ + num_target_ = agent_data.num_target(); + num_agent_ = agent_data.num_agent(); + num_timestamp_ = agent_data.time_length(); + num_agent_attr_ = agent_data.input_dim(); + num_polyline_ = polyline_data.num_polyline(); + num_point_ = polyline_data.num_point(); + num_point_attr_ = polyline_data.input_dim(); + + // source data + d_target_index_ = cuda::make_unique(num_target_); + d_label_index_ = cuda::make_unique(num_agent_); + d_timestamp_ = cuda::make_unique(num_timestamp_); + d_trajectory_ = cuda::make_unique(agent_data.size()); + d_target_state_ = cuda::make_unique(num_target_ * agent_data.state_dim()); + d_intention_point_ = cuda::make_unique(num_target_ * intention_point_.size()); + d_polyline_ = cuda::make_unique(polyline_data.size()); + + // preprocessed input + d_in_trajectory_ = + cuda::make_unique(num_target_ * num_agent_ * num_timestamp_ * num_agent_attr_); + d_in_trajectory_mask_ = cuda::make_unique(num_target_ * num_agent_ * num_timestamp_); + d_in_last_pos_ = cuda::make_unique(num_target_ * num_agent_ * 3); + d_in_polyline_ = + cuda::make_unique(num_target_ * max_num_polyline_ * num_point_ * num_point_attr_); + d_in_polyline_mask_ = cuda::make_unique(num_target_ * max_num_polyline_ * num_point_); + d_in_polyline_center_ = cuda::make_unique(num_target_ * max_num_polyline_ * 3); + + if (max_num_polyline_ < num_polyline_) { + d_tmp_polyline_ = + cuda::make_unique(num_target_ * num_polyline_ * num_point_ * num_point_attr_); + d_tmp_polyline_mask_ = cuda::make_unique(num_target_ * num_polyline_ * num_point_); + d_tmp_distance_ = cuda::make_unique(num_target_ * num_polyline_); + } + + // outputs + d_out_score_ = cuda::make_unique(num_target_ * num_mode_); + d_out_trajectory_ = + cuda::make_unique(num_target_ * num_mode_ * num_future_ * PredictedStateDim); + + if (builder_->isDynamic()) { + // trajectory: (B, N, T, Da) + builder_->setBindingDimensions( + 0, nvinfer1::Dims4{num_target_, num_agent_, num_timestamp_, num_agent_attr_}); + // trajectory mask: (B, N, T) + builder_->setBindingDimensions(1, nvinfer1::Dims3{num_target_, num_agent_, num_timestamp_}); + // polyline: (B, K, P, Dp) + builder_->setBindingDimensions( + 2, nvinfer1::Dims4{num_target_, max_num_polyline_, num_point_, num_point_attr_}); + // polyline mask: (B, K, P) + builder_->setBindingDimensions(3, nvinfer1::Dims3{num_target_, max_num_polyline_, num_point_}); + // polyline center: (B, K, 3) + builder_->setBindingDimensions(4, nvinfer1::Dims3{num_target_, max_num_polyline_, 3}); + // agent last position: (B, N, 3) + builder_->setBindingDimensions(5, nvinfer1::Dims3{num_target_, num_agent_, 3}); + // target indices: (B,) + nvinfer1::Dims targetIdxDim; + targetIdxDim.nbDims = 1; + targetIdxDim.d[0] = num_target_; + builder_->setBindingDimensions(6, targetIdxDim); + // intention points: (B, I, 2) + builder_->setBindingDimensions(7, nvinfer1::Dims3{num_target_, num_intention_point_, 2}); + } +} + +bool TrtMTR::preProcess(const AgentData & agent_data, const PolylineData & polyline_data) +{ + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_target_index_.get(), agent_data.target_indices().data(), sizeof(int) * num_target_, + cudaMemcpyHostToDevice, stream_)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_label_index_.get(), agent_data.label_ids().data(), sizeof(int) * num_agent_, + cudaMemcpyHostToDevice, stream_)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_timestamp_.get(), agent_data.timestamps().data(), sizeof(float) * num_timestamp_, + cudaMemcpyHostToDevice, stream_)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_trajectory_.get(), agent_data.data_ptr(), sizeof(float) * agent_data.size(), + cudaMemcpyHostToDevice, stream_)); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_target_state_.get(), agent_data.current_target_data_ptr(), + sizeof(float) * num_target_ * agent_data.state_dim(), cudaMemcpyHostToDevice, stream_)); + + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_polyline_.get(), polyline_data.data_ptr(), sizeof(float) * polyline_data.size(), + cudaMemcpyHostToDevice, stream_)); + + const auto target_label_names = getLabelNames(agent_data.target_label_ids()); + const auto intention_point = intention_point_.as_array(target_label_names); + CHECK_CUDA_ERROR(cudaMemcpyAsync( + d_intention_point_.get(), intention_point.data(), + sizeof(float) * num_target_ * intention_point_.size(), cudaMemcpyHostToDevice, stream_)); + + CHECK_CUDA_ERROR(agentPreprocessLauncher( + num_target_, num_agent_, num_timestamp_, agent_data.state_dim(), agent_data.num_class(), + agent_data.ego_index(), d_target_index_.get(), d_label_index_.get(), d_timestamp_.get(), + d_trajectory_.get(), d_in_trajectory_.get(), d_in_trajectory_mask_.get(), d_in_last_pos_.get(), + stream_)); + + if (max_num_polyline_ < num_polyline_) { + CHECK_CUDA_ERROR(polylinePreprocessWithTopkLauncher( + max_num_polyline_, num_polyline_, num_point_, polyline_data.state_dim(), d_polyline_.get(), + num_target_, agent_data.state_dim(), d_target_state_.get(), d_tmp_polyline_.get(), + d_tmp_polyline_mask_.get(), d_tmp_distance_.get(), d_in_polyline_.get(), + d_in_polyline_mask_.get(), d_in_polyline_center_.get(), stream_)); + } else { + CHECK_CUDA_ERROR(polylinePreprocessLauncher( + num_polyline_, num_point_, polyline_data.state_dim(), d_polyline_.get(), num_target_, + agent_data.state_dim(), d_target_state_.get(), d_in_polyline_.get(), + d_in_polyline_mask_.get(), d_in_polyline_center_.get(), stream_)); + } + return true; +} + +bool TrtMTR::postProcess( + const AgentData & agent_data, std::vector & trajectories) +{ + CHECK_CUDA_ERROR(postprocessLauncher( + num_target_, num_mode_, num_future_, agent_data.state_dim(), d_target_state_.get(), + PredictedStateDim, d_out_trajectory_.get(), stream_)); + + CHECK_CUDA_ERROR(cudaStreamSynchronize(stream_)); + + h_out_score_.clear(); + h_out_trajectory_.clear(); + h_out_score_.resize(num_target_ * num_mode_); + h_out_trajectory_.resize(num_target_ * num_mode_ * num_future_ * PredictedStateDim); + + CHECK_CUDA_ERROR(cudaMemcpy( + h_out_score_.data(), d_out_score_.get(), sizeof(float) * num_target_ * num_mode_, + cudaMemcpyDeviceToHost)); + CHECK_CUDA_ERROR(cudaMemcpy( + h_out_trajectory_.data(), d_out_trajectory_.get(), + sizeof(float) * num_target_ * num_mode_ * num_future_ * PredictedStateDim, + cudaMemcpyDeviceToHost)); + + trajectories.clear(); + trajectories.reserve(num_target_); + for (auto b = 0; b < num_target_; ++b) { + const auto score_itr = h_out_score_.cbegin() + b * num_mode_; + const std::vector scores(score_itr, score_itr + num_mode_); + const auto mode_itr = + h_out_trajectory_.cbegin() + b * num_mode_ * num_future_ * PredictedStateDim; + std::vector modes(mode_itr, mode_itr + num_mode_ * num_future_ * PredictedStateDim); + trajectories.emplace_back(scores, modes, num_mode_, num_future_); + } + return true; +} +} // namespace autoware::mtr diff --git a/perception/autoware_mtr/test/test_fixed_queue.cpp b/perception/autoware_mtr/test/test_fixed_queue.cpp new file mode 100644 index 0000000000000..5ed4ebf5b2a28 --- /dev/null +++ b/perception/autoware_mtr/test/test_fixed_queue.cpp @@ -0,0 +1,42 @@ +// Copyright 2024 TIER IV, inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/mtr/fixed_queue.hpp" + +#include + +#include + +TEST(testFixedQueue, test_name) +{ + constexpr size_t n_size = 5; + autoware::mtr::FixedQueue queue(n_size); + EXPECT_DOUBLE_EQ(queue.front(), 0.0); + EXPECT_DOUBLE_EQ(queue.back(), 0.0); + EXPECT_EQ(queue.size(), n_size); + + for (size_t n = 0; n < n_size; ++n) { + queue.push_back(n); + } + + EXPECT_DOUBLE_EQ(queue.front(), 0.0); + EXPECT_DOUBLE_EQ(queue.back(), 4.0); + EXPECT_EQ(queue.size(), n_size); +} + +int main(int argc, char ** argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +}