From db93800bfb58c07d9283e1ebfeb0c418df5f95d3 Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Wed, 17 Jul 2024 12:43:05 -0700
Subject: [PATCH 1/3] Add string input shape checks

---
 src/python_be.cc | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
diff --git a/src/python_be.cc b/src/python_be.cc
index cd31e79e..eb8de5f3 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -34,6 +34,65 @@
 #include "pb_log.h"
 
 namespace triton { namespace backend { namespace python {
+namespace {
+  
+TRITONSERVER_Error*
+CheckInputStringShape(
+    std::shared_ptr<PbTensor> tensor, const char* name)
+{
+  size_t element_idx = 0;
+
+  // For string data type, we always need to have the data on CPU so
+  // that we can read string length properly.
+  const char* content = reinterpret_cast<char*>(tensor->DataPtr());
+  size_t content_byte_size = tensor->ByteSize();
+  const size_t request_element_cnt = GetElementCount(tensor->Dims());
+
+  // Each string in 'content' is a 4-byte length followed by the string itself with no
+  // null-terminator.
+  while (content_byte_size >= sizeof(uint32_t)) {
+    if (element_idx >= request_element_cnt) {
+      return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              std::string(
+                  "unexpected number of string elements " +
+                  std::to_string(element_idx + 1) + " for inference input '" +
+                  name + "', expecting " + std::to_string(request_element_cnt))
+                  .c_str());
+    }
+
+    const uint32_t len = *(reinterpret_cast<const uint32_t*>(content));
+    content += sizeof(uint32_t);
+    content_byte_size -= sizeof(uint32_t);
+
+    if (content_byte_size < len) {
+      return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              std::string(
+                  "incomplete string data for inference input '" +
+                  std::string(name) + "', expecting string of length " +
+                  std::to_string(len) + " but only " +
+                  std::to_string(content_byte_size) + " bytes available")
+                  .c_str());
+    }
+
+    content += len;
+    content_byte_size -= len;
+    element_idx++;
+  }
+
+  if (element_idx != request_element_cnt) {
+    return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      std::string(
+                          "expected " + std::to_string(request_element_cnt) +
+                          " strings for inference input '" + name + "', got " +
+                          std::to_string(element_idx))
+                          .c_str());
+  }
+  return nullptr;
+}
+}
 
 namespace bi = boost::interprocess;
 
@@ -424,6 +483,10 @@ ModelInstanceState::GetInputTensor(
       RETURN_IF_ERROR(backend::ReadInputTensor(
           request, input_name, input_buffer, &byte_size));
     }
+
+    if (input_dtype == TRITONSERVER_TYPE_BYTES) {
+      RETURN_IF_ERROR(CheckInputStringShape(input_tensor, input_name));
+    }
   } else {
 #ifdef TRITON_ENABLE_GPU
     // Attempt to use the cuda shared memory pool for GPU tensor.

From 496bad685d1d980899e88b39fe4bd7bc12568aff Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Thu, 18 Jul 2024 15:50:50 -0700
Subject: [PATCH 2/3] Refactor string input checks

---
 src/python_be.cc | 69 +++++++-----------------------------------------
 1 file changed, 9 insertions(+), 60 deletions(-)

diff --git a/src/python_be.cc b/src/python_be.cc
index eb8de5f3..e19a5db6 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -34,65 +34,6 @@
 #include "pb_log.h"
 
 namespace triton { namespace backend { namespace python {
-namespace {
-  
-TRITONSERVER_Error*
-CheckInputStringShape(
-    std::shared_ptr<PbTensor> tensor, const char* name)
-{
-  size_t element_idx = 0;
-
-  // For string data type, we always need to have the data on CPU so
-  // that we can read string length properly.
-  const char* content = reinterpret_cast<char*>(tensor->DataPtr());
-  size_t content_byte_size = tensor->ByteSize();
-  const size_t request_element_cnt = GetElementCount(tensor->Dims());
-
-  // Each string in 'content' is a 4-byte length followed by the string itself with no
-  // null-terminator.
-  while (content_byte_size >= sizeof(uint32_t)) {
-    if (element_idx >= request_element_cnt) {
-      return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INVALID_ARG,
-              std::string(
-                  "unexpected number of string elements " +
-                  std::to_string(element_idx + 1) + " for inference input '" +
-                  name + "', expecting " + std::to_string(request_element_cnt))
-                  .c_str());
-    }
-
-    const uint32_t len = *(reinterpret_cast<const uint32_t*>(content));
-    content += sizeof(uint32_t);
-    content_byte_size -= sizeof(uint32_t);
-
-    if (content_byte_size < len) {
-      return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INVALID_ARG,
-              std::string(
-                  "incomplete string data for inference input '" +
-                  std::string(name) + "', expecting string of length " +
-                  std::to_string(len) + " but only " +
-                  std::to_string(content_byte_size) + " bytes available")
-                  .c_str());
-    }
-
-    content += len;
-    content_byte_size -= len;
-    element_idx++;
-  }
-
-  if (element_idx != request_element_cnt) {
-    return TRITONSERVER_ErrorNew(
-                      TRITONSERVER_ERROR_INTERNAL,
-                      std::string(
-                          "expected " + std::to_string(request_element_cnt) +
-                          " strings for inference input '" + name + "', got " +
-                          std::to_string(element_idx))
-                          .c_str());
-  }
-  return nullptr;
-}
-}
 
 namespace bi = boost::interprocess;
 
@@ -485,7 +426,15 @@ ModelInstanceState::GetInputTensor(
     }
 
     if (input_dtype == TRITONSERVER_TYPE_BYTES) {
-      RETURN_IF_ERROR(CheckInputStringShape(input_tensor, input_name));
+      const char* content = reinterpret_cast<char*>(input_tensor->DataPtr());
+      size_t content_byte_size = input_tensor->ByteSize();
+      const size_t request_element_cnt = GetElementCount(input_tensor->Dims());
+      size_t element_idx = 0;  // placeholder
+      auto callback = [](const size_t, const char*, const uint32_t) {
+      };  // no-op
+      RETURN_IF_ERROR(ValidateStringBuffer(
+          content, content_byte_size, request_element_cnt, input_name,
+          &element_idx, callback));
     }
   } else {
 #ifdef TRITON_ENABLE_GPU

From cd1318e424934e9212a9edb352c0ef1026a0981a Mon Sep 17 00:00:00 2001
From: Yingge He <yinggeh@nvidia.com>
Date: Fri, 19 Jul 2024 14:39:00 -0700
Subject: [PATCH 3/3] Improve readability

---
 src/python_be.cc | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/python_be.cc b/src/python_be.cc
index e19a5db6..2212176d 100644
--- a/src/python_be.cc
+++ b/src/python_be.cc
@@ -429,12 +429,9 @@ ModelInstanceState::GetInputTensor(
       const char* content = reinterpret_cast<char*>(input_tensor->DataPtr());
       size_t content_byte_size = input_tensor->ByteSize();
       const size_t request_element_cnt = GetElementCount(input_tensor->Dims());
-      size_t element_idx = 0;  // placeholder
-      auto callback = [](const size_t, const char*, const uint32_t) {
-      };  // no-op
       RETURN_IF_ERROR(ValidateStringBuffer(
           content, content_byte_size, request_element_cnt, input_name,
-          &element_idx, callback));
+          nullptr /* str_list */));
     }
   } else {
 #ifdef TRITON_ENABLE_GPU