updating and seperating allocators into seperate module

triton-inference-server · Jan 8, 2024 · 2d3fb24 · 2d3fb24
1 parent c77978d
commit 2d3fb24
Show file tree

Hide file tree

Showing 3 changed files with 223 additions and 211 deletions.
diff --git a/python/tritonserver/_api/_allocators.py b/python/tritonserver/_api/_allocators.py
@@ -0,0 +1,222 @@
+# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""Default / Example Allocators for Tensor Memory"""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any
+
+import numpy
+from _datautils import DLPackObject
+from tritonserver._c.triton_bindings import (
+    InvalidArgumentError,
+    TRITONSERVER_BufferAttributes,
+)
+from tritonserver._c.triton_bindings import TRITONSERVER_MemoryType as MemoryType
+
+try:
+    import cupy
+except ImportError:
+    cupy = None
+
+
+default_memory_allocators: dict[MemoryType, MemoryAllocator] = dict({})
+
+
+@dataclass
+class MemoryBuffer:
+    """Memory allocated for a Tensor.
+
+    This object does not own the memory but holds a reference to the
+    owner.
+
+    Parameters
+    ----------
+    data_ptr : int
+        Pointer to the allocated memory.
+    memory_type : MemoryType
+        memory type
+    memory_type_id : int
+        memory type id (typically the same as device id)
+    size : int
+        Size of the allocated memory in bytes.
+    owner : Any
+        Object that owns or manages the memory buffer.  Allocated
+        memory must not be freed while a reference to the owner is
+        held.
+
+    Examples
+    --------
+    >>> buffer = MemoryBuffer.from_dlpack(numpy.array([100],dtype=numpy.uint8))
+
+    """
+
+    data_ptr: int
+    memory_type: MemoryType
+    memory_type_id: int
+    size: int
+    owner: Any
+
+    @staticmethod
+    def from_dlpack(owner: Any) -> MemoryBuffer:
+        if not hasattr(owner, "__dlpack__"):
+            raise InvalidArgumentError("Object does not support DLpack protocol")
+
+        dlpack_object = DLPackObject(owner)
+
+        if not dlpack_object.contiguous:
+            raise InvalidArgumentError("Only contiguous memory is supported")
+
+        return MemoryBuffer(
+            int(dlpack_object.data_ptr),
+            dlpack_object.memory_type,
+            dlpack_object.memory_type_id,
+            dlpack_object.byte_size,
+            owner,
+        )
+
+    @staticmethod
+    def _from_dlpack_object(owner: Any, dlpack_object: DLPackObject) -> MemoryBuffer:
+        if not dlpack_object.contiguous:
+            raise InvalidArgumentError("Only contiguous memory is supported")
+
+        return MemoryBuffer(
+            int(dlpack_object.data_ptr),
+            dlpack_object.memory_type,
+            dlpack_object.memory_type_id,
+            dlpack_object.byte_size,
+            owner,
+        )
+
+    def _create_TRITONSERVER_BufferAttributes(self) -> TRITONSERVER_BufferAttributes:
+        buffer_attributes = TRITONSERVER_BufferAttributes()
+        buffer_attributes.memory_type = self.memory_type
+        buffer_attributes.memory_type_id = self.memory_type_id
+        buffer_attributes.byte_size = self.size
+        #        buffer_attributes.cuda_ipc_handle = None
+        return buffer_attributes
+
+
+class MemoryAllocator(ABC):
+    """Abstract interface to allow for custom memory allocation strategies
+
+    Classes implementing the MemoryAllocator interface have to provide
+    an allocate method returning MemoryBuffer objects.  A memory
+    allocator implementation does not need to match the requested
+    memory type or memory type id.
+
+
+
+    Examples
+    --------
+
+    class TorchAllocator(tritonserver.MemoryAllocator):
+        def allocate(self,
+                     size,
+                     memory_type,
+                     memory_type_id,
+                     tensor_name):
+
+            device = "cpu"
+
+            if memory_type == tritonserver.MemoryType.GPU:
+                device = "cuda"
+
+            tensor = torch.zeros(size,dtype=torch.uint8,device=device)
+            print("torch allocator!")
+            return tritonserver.MemoryBuffer.from_dlpack(tensor)
+
+    """
+
+    @abstractmethod
+    def allocate(
+        self, size: int, memory_type: MemoryType, memory_type_id: int, tensor_name: str
+    ) -> MemoryBuffer:
+        """Allocate memory buffer for tensor.
+
+        Note: A memory allocator implementation does not need to honor
+        the requested memory type or memory type id
+
+        Parameters
+        ----------
+        size : int
+            number of bytes requested
+        memory_type : MemoryType
+                type of memory requested (CPU, GPU, etc.)
+        memory_type_id : int
+            memory type id requested (typically device id)
+        tensor_name : str
+            name of tensor
+
+        Returns
+        -------
+        MemoryBuffer
+            memory buffer with requested size
+
+        Examples
+        --------
+        memory_buffer = allocator.allocate(100,MemoryType.CPU,0,"output")
+
+        """
+
+        pass
+
+
+class NumpyAllocator(MemoryAllocator):
+    def __init__(self):
+        pass
+
+    def allocate(
+        self, size: int, memory_type: MemoryType, memory_type_id: int, tensor_name: str
+    ) -> MemoryBuffer:
+        ndarray = numpy.empty(size, numpy.byte)
+        return MemoryBuffer.from_dlpack(ndarray)
+
+
+default_memory_allocators[MemoryType.CPU] = NumpyAllocator()
+
+if cupy is not None:
+
+    class CupyAllocator(MemoryAllocator):
+        def __init__(self):
+            pass
+
+        def allocate(
+            self,
+            size: int,
+            memory_type: MemoryType,
+            memory_type_id: int,
+            tensor_name: str,
+        ) -> MemoryBuffer:
+            with cupy.cuda.Device(memory_type_id):
+                ndarray = cupy.empty(size, cupy.byte)
+
+            return MemoryBuffer.from_dlpack(ndarray)
+
+    default_memory_allocators[MemoryType.GPU] = CupyAllocator()
diff --git a/python/tritonserver/_api/_datautils.py b/python/tritonserver/_api/_datautils.py
@@ -204,77 +204,6 @@ def __getitem__(self, key):
 )
 
 
-@dataclass
-class MemoryBuffer:
-    """Memory allocated for a Tensor"""
-
-    data_ptr: int
-    memory_type: MemoryType
-    memory_type_id: int
-    size: int
-    owner: Any
-
-    @staticmethod
-    def from_dlpack(
-        owner: Any, dlpack_object: Optional[DLPackObject] = None
-    ) -> MemoryBuffer:
-        if dlpack_object is None:
-            if not hasattr(owner, "__dlpack__"):
-                raise InvalidArgumentError("Object does not support DLpack protocol")
-
-            dlpack_object = DLPackObject(owner)
-
-        if not dlpack_object.contiguous:
-            raise InvalidArgumentError("Only contiguous memory is supported")
-
-        return MemoryBuffer(
-            int(dlpack_object.data_ptr),
-            dlpack_object.memory_type,
-            dlpack_object.memory_type_id,
-            dlpack_object.byte_size,
-            owner,
-        )
-
-    def _create_TRITONSERVER_BufferAttributes(self) -> BufferAttributes:
-        buffer_attributes = BufferAttributes()
-        buffer_attributes.memory_type = self.memory_type
-        buffer_attributes.memory_type_id = self.memory_type_id
-        buffer_attributes.byte_size = self.size
-        #        buffer_attributes.cuda_ipc_handle = None
-        return buffer_attributes
-
-
-class MemoryAllocator(ABC):
-    @abstractmethod
-    def allocate(
-        self, size: int, memory_type: MemoryType, memory_type_id: int, tensor_name: str
-    ) -> MemoryBuffer:
-        """Allocate memory buffer for tensor.
-
-        Parameters
-        ----------
-        size : int
-            number of bytes requested
-        memory_type : MemoryType
-                type of memory requested (CPU, GPU, etc.)
-        memory_type_id : int
-            memory type id requested (typically device id)
-        tensor_name : str
-            name of tensor
-
-        Returns
-        -------
-        MemoryBuffer
-            memory buffer with requested size
-
-        Examples
-        --------
-        memory_buffer = allocator.allocate(100,MemoryType.CPU,0,"output")
-        """
-
-        pass
-
-
 class NumpyAllocator(MemoryAllocator):
     def __init__(self):
         pass