Skip to content

Commit

Permalink
updates for consistency
Browse files Browse the repository at this point in the history
  • Loading branch information
nnshah1 committed Jan 8, 2024
1 parent 2d3fb24 commit 6152fe2
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 289 deletions.
1 change: 0 additions & 1 deletion python/tritonserver/_api/_allocators.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,6 @@ def allocate(self,
device = "cuda"
tensor = torch.zeros(size,dtype=torch.uint8,device=device)
print("torch allocator!")
return tritonserver.MemoryBuffer.from_dlpack(tensor)
"""
Expand Down
296 changes: 9 additions & 287 deletions python/tritonserver/_api/_datautils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from typing import Any, Callable, ClassVar, Optional, Sequence, Type

import numpy
from _allocators import MemoryAllocator, default_memory_allocators
from tritonserver._c import InvalidArgumentError
from tritonserver._c import TRITONSERVER_BufferAttributes as BufferAttributes
from tritonserver._c import TRITONSERVER_DataType as DataType
Expand All @@ -53,7 +54,7 @@
)


class _CustomKeyErrorDict(dict):
class CustomKeyErrorDict(dict):
def __init__(
self,
from_name: str,
Expand All @@ -76,7 +77,7 @@ def __getitem__(self, key):
) from None


STRING_TO_TRITON_MEMORY_TYPE: dict[str, MemoryType] = _CustomKeyErrorDict(
STRING_TO_TRITON_MEMORY_TYPE: dict[str, MemoryType] = CustomKeyErrorDict(
"Memory Type String",
"Triton server memory type",
{"CPU": MemoryType.CPU, "CPU_PINNED": MemoryType.CPU_PINNED, "GPU": MemoryType.GPU},
Expand All @@ -85,7 +86,7 @@ def __getitem__(self, key):

DLPACK_DEVICE_TYPE_TO_TRITON_MEMORY_TYPE: dict[
_dlpack.DLDeviceType, MemoryType
] = _CustomKeyErrorDict(
] = CustomKeyErrorDict(
"DLPack device type",
"Triton server memory type",
{
Expand All @@ -96,7 +97,7 @@ def __getitem__(self, key):

TRITON_MEMORY_TYPE_TO_DLPACK_DEVICE_TYPE: dict[
MemoryType, _dlpack.DLDeviceType
] = _CustomKeyErrorDict(
] = CustomKeyErrorDict(
"Triton server memory type",
"DLPack device type",
{
Expand All @@ -110,7 +111,7 @@ def __getitem__(self, key):

DLPACK_TO_TRITON_DTYPE: dict[
tuple[_dlpack.DLDataTypeCode, int], DataType
] = _CustomKeyErrorDict(
] = CustomKeyErrorDict(
"DLPack data type",
"Triton server data type",
{
Expand Down Expand Up @@ -163,7 +164,7 @@ def __getitem__(self, key):
},
)

TRITON_TO_DLPACK_DTYPE: dict[DataType, _dlpack.DLDataType] = _CustomKeyErrorDict(
TRITON_TO_DLPACK_DTYPE: dict[DataType, _dlpack.DLDataType] = CustomKeyErrorDict(
"Triton server data type",
"DLPack data type",
{
Expand All @@ -172,7 +173,7 @@ def __getitem__(self, key):
},
)

NUMPY_TO_TRITON_DTYPE: dict[type, DataType] = _CustomKeyErrorDict(
NUMPY_TO_TRITON_DTYPE: dict[type, DataType] = CustomKeyErrorDict(
"Numpy data type",
"Triton server data type",
{
Expand All @@ -194,7 +195,7 @@ def __getitem__(self, key):
},
)

TRITON_TO_NUMPY_DTYPE: dict[DataType, type] = _CustomKeyErrorDict(
TRITON_TO_NUMPY_DTYPE: dict[DataType, type] = CustomKeyErrorDict(
"Triton data type",
"Numpy data type",
{
Expand All @@ -204,44 +205,6 @@ def __getitem__(self, key):
)


class NumpyAllocator(MemoryAllocator):
def __init__(self):
pass

def allocate(
self, size: int, memory_type: MemoryType, memory_type_id: int, tensor_name: str
) -> MemoryBuffer:
ndarray = numpy.empty(size, numpy.byte)
return MemoryBuffer.from_dlpack(ndarray)


default_memory_allocators: dict[MemoryType, MemoryAllocator] = dict(
{
MemoryType.CPU: NumpyAllocator(),
}
)

if cupy is not None:

class CupyAllocator(MemoryAllocator):
def __init__(self):
pass

def allocate(
self,
size: int,
memory_type: MemoryType,
memory_type_id: int,
tensor_name: str,
) -> MemoryBuffer:
with cupy.cuda.Device(memory_type_id):
ndarray = cupy.empty(size, cupy.byte)

return MemoryBuffer.from_dlpack(ndarray)

default_memory_allocators[MemoryType.GPU] = CupyAllocator()


def _parse_device_or_memory_type(
device_or_memory_type: DeviceOrMemoryType,
) -> tuple[MemoryType, int]:
Expand Down Expand Up @@ -439,244 +402,3 @@ def contiguous(self) -> bool:
return _dlpack.is_contiguous_data(
self._tensor.ndim, self._tensor.shape, self._tensor.strides
)


@dataclass
class Tensor:
data_type: DataType
shape: Sequence[int]
memory_buffer: MemoryBuffer

@property
def data_ptr(self) -> int:
return self.memory_buffer.data_ptr

@property
def memory_type(self) -> MemoryType:
return self.memory_buffer.memory_type

@property
def memory_type_id(self) -> int:
return self.memory_buffer.memory_type_id

@property
def size(self) -> int:
return self.memory_buffer.size

def __dlpack__(self, *, stream=None):
# if not (stream is None or (isinstance(stream, int) and stream == 0)):
# raise UnsupportedError(
# f"DLPack stream synchronization on {stream} not currently supported"
# )

dl_managed_tensor = Tensor._create_managed_tensor()
dl_managed_tensor.dl_tensor.data = self.data_ptr
dl_managed_tensor.dl_tensor.device = _dlpack.DLDevice(
TRITON_MEMORY_TYPE_TO_DLPACK_DEVICE_TYPE[self.memory_type],
self.memory_type_id,
)

dl_managed_tensor.dl_tensor.dtype = TRITON_TO_DLPACK_DTYPE[self.data_type]
dl_managed_tensor.dl_tensor.ndim = len(self.shape)
dl_managed_tensor.dl_tensor.shape = (ctypes.c_int64 * len(self.shape))(
*self.shape
)
dl_managed_tensor.dl_tensor.strides = ctypes.POINTER(ctypes.c_int64)()
dl_managed_tensor.dl_tensor.byte_offset = 0
dl_managed_tensor.deleter = Tensor._managed_tensor_deleter

self._set_dlpack_manager_ctx(dl_managed_tensor)
pycapsule = ctypes.pythonapi.PyCapsule_New(
ctypes.byref(dl_managed_tensor),
_dlpack.c_str_dltensor,
Tensor._pycapsule_deleter,
)
return pycapsule

def __dlpack_device__(self):
return (
TRITON_MEMORY_TYPE_TO_DLPACK_DEVICE_TYPE[self.memory_type],
self.memory_type_id,
)

def to_bytes_array(self) -> numpy.ndarray:
if self.data_type != DataType.BYTES:
raise InvalidArgumentError(
f"Tensor has data type {self.data_type} not {DataType.BYTES}"
)

# Reshape into 1d array of bytes on host
original_data_type = self.data_type
original_shape = self.shape
self.data_type = DataType.UINT8
self.shape = [self.size]
numpy_ndarray = self._to_numpy_on_host()

# Deserialize bytes array and reshape
self.shape = original_shape
self.data_type = original_data_type
return Tensor._deserialize_bytes_array(numpy_ndarray).reshape(self.shape)

@staticmethod
def from_memory_buffer(data_type, shape, memory_buffer):
return Tensor(data_type, shape, memory_buffer)

@staticmethod
def from_object(obj: Any):
if type(obj) in Tensor._from_converters:
return Tensor._from_converters[type(obj)](obj)
elif hasattr(obj, "__dlpack__"):
return Tensor.from_dlpack(obj)
else:
raise InvalidArgumentError(
f"Input type {type(obj)} not supported. Must be one of {list(Tensor._from_converters.keys())} or the type must support __dlpack__"
)

@staticmethod
def from_dlpack(obj: Any) -> Tensor:
dlpack_object = DLPackObject(obj)
data_type = dlpack_object.triton_data_type
shape = dlpack_object.shape
memory_buffer = MemoryBuffer.from_dlpack(obj, dlpack_object=dlpack_object)
return Tensor(data_type, shape, memory_buffer)

def to_host(self) -> Tensor:
return self.to_device("cpu")

def to_device(self, device: DeviceOrMemoryType) -> Tensor:
memory_type, memory_type_id = _parse_device_or_memory_type(device)
if self.memory_type == memory_type and self.memory_type_id == memory_type_id:
return self
if self.memory_type == MemoryType.CPU_PINNED and memory_type == MemoryType.CPU:
return self
if cupy is not None:
if self.memory_type in (MemoryType.CPU, MemoryType.CPU_PINNED):
ndarray = numpy.from_dlpack(self)
else:
ndarray = cupy.from_dlpack(self)

if memory_type == MemoryType.CPU:
return Tensor.from_dlpack(cupy.asnumpy(ndarray))
if memory_type == MemoryType.GPU:
with cupy.cuda.Device(memory_type_id):
return Tensor.from_dlpack(cupy.asarray(ndarray))

raise UnsupportedError(
f"Conversion from {(self.memory_type,self.memory_type_id)} to {(memory_type, memory_type_id)} not supported."
)

def _to_numpy_on_host(self) -> numpy.ndarray:
if self.memory_type in (MemoryType.CPU, MemoryType.CPU_PINNED):
return numpy.from_dlpack(self)

if cupy is not None:
return cupy.asnumpy(cupy.from_dlpack(self))

raise UnsupportedError(
f"Conversion from {self.memory_type} to numpy array not supported."
)

@staticmethod
def _deserialize_bytes_array(numpy_ndarray: numpy.ndarray) -> numpy.ndarray:
result = []
_buffer = memoryview(numpy_ndarray)
offset = 0
while offset < len(_buffer):
(item_length,) = struct.unpack_from("@I", _buffer, offset)
offset += 4
result.append(bytes(_buffer[offset : offset + item_length]))
offset += item_length
return numpy.array(result, dtype=numpy.object_)

@staticmethod
def _serialize_numpy_bytes_array(array: numpy.ndarray) -> numpy.ndarray:
result = []
for array_item in numpy.nditer(array, flags=["refs_ok"], order="C"):
item = array_item.item()
if not isinstance(item, bytes):
item = str(item).encode("utf-8")
result.append(struct.pack("@I", len(item)))
result.append(item)
return numpy.frombuffer(b"".join(result), dtype=numpy.byte)

@staticmethod
def _from_list(obj: list[Any]) -> Tensor:
try:
return Tensor._from_numpy(numpy.array(obj))
except Exception as e:
raise InvalidArgumentError(
f"Conversion from {obj} to tensor not supported."
) from e

@staticmethod
def _from_numpy(obj: numpy.ndarray | numpy.generic) -> Tensor:
data_type = NUMPY_TO_TRITON_DTYPE[obj.dtype.type]
shape = obj.shape

if isinstance(obj, numpy.generic):
obj = numpy.asarray(obj)

if data_type == DataType.BYTES:
obj = Tensor._serialize_numpy_bytes_array(obj)

memory_buffer = MemoryBuffer(
data_ptr=obj.ctypes.data,
memory_type=MemoryType.CPU,
memory_type_id=0,
size=obj.itemsize * obj.size,
owner=obj,
)

return Tensor(data_type, shape, memory_buffer)

@staticmethod
def _create_managed_tensor():
size = ctypes.c_size_t(ctypes.sizeof(_dlpack.DLManagedTensor))
address = ctypes.pythonapi.PyMem_RawMalloc(size)
return _dlpack.DLManagedTensor.from_address(address)

@staticmethod
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def _managed_tensor_deleter(handle: int) -> None:
dl_managed_tensor = _dlpack.DLManagedTensor.from_address(handle)
tensor_obj_ptr = ctypes.cast(
dl_managed_tensor.manager_ctx, ctypes.POINTER(ctypes.py_object)
)
tensor_obj = tensor_obj_ptr.contents
ctypes.pythonapi.Py_DecRef(tensor_obj)
shape_obj = ctypes.py_object(dl_managed_tensor.dl_tensor.shape)
ctypes.pythonapi.Py_DecRef(shape_obj)
ctypes.pythonapi.PyMem_RawFree(handle)

@staticmethod
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
def _pycapsule_deleter(handle: ctypes.c_void_p) -> None:
try:
pycapsule: ctypes.py_object = ctypes.cast(handle, ctypes.py_object)
if ctypes.pythonapi.PyCapsule_IsValid(pycapsule, _dlpack.c_str_dltensor):
dl_managed_tensor = ctypes.pythonapi.PyCapsule_GetPointer(
pycapsule, _dlpack.c_str_dltensor
)

Tensor._managed_tensor_deleter(dl_managed_tensor)

ctypes.pythonapi.PyCapsule_SetDestructor(pycapsule, None)
except Exception as e:
print(f"Exception occurred while deleting capsule: {e}")
raise e

def _set_dlpack_manager_ctx(self, dl_managed_tensor):
tensor_obj = ctypes.py_object(self)
tensor_obj_ptr = ctypes.pointer(tensor_obj)
dl_managed_tensor.manager_ctx = ctypes.cast(tensor_obj_ptr, ctypes.c_void_p)
shape_obj = ctypes.py_object(dl_managed_tensor.dl_tensor.shape)
ctypes.pythonapi.Py_IncRef(tensor_obj)
ctypes.pythonapi.Py_IncRef(shape_obj)

_from_converters: ClassVar[dict[type, Callable[[Any], Tensor]]] = dict(
{numpy.ndarray: _from_numpy, numpy.generic: _from_numpy, list: _from_list},
)

_array_modules: ClassVar[dict[MemoryType, ModuleType | None]] = dict(
{MemoryType.CPU: numpy, MemoryType.GPU: cupy},
)
1 change: 0 additions & 1 deletion python/tritonserver/_api/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,6 @@ def config(self, config_version: int = 1) -> dict[str, Any]:
Examples
--------
FIXME: Add docs.
>>> server.model("test").config()
server.model("test").config()
Expand Down

0 comments on commit 6152fe2

Please sign in to comment.