diff --git a/CMakeLists.txt b/CMakeLists.txt index bc5387ef..35c4e95c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,12 +41,6 @@ option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON) option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON) option(TRITON_ENABLE_NVTX "Include nvtx markers collection in backend." OFF) -# FIXME: CI needs to enable the GPU flag. Python for window currently does not -# support GPU tensors. For simplicity, we will override this option here. -if(WIN32) - set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) -endif() - set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo") set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo") set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo") diff --git a/src/pb_utils.cc b/src/pb_utils.cc index 7bc17fa4..6f62fef5 100644 --- a/src/pb_utils.cc +++ b/src/pb_utils.cc @@ -286,6 +286,15 @@ IsUsingCUDAPool( reinterpret_cast(cuda_pool_address)); } +bool +DeviceSupportsIPC(const int64_t device_id) +{ + int supports_ipc = 0; + THROW_IF_CUDA_ERROR(cudaDeviceGetAttribute( + &supports_ipc, cudaDevAttrIpcEventSupport, device_id)); + return (supports_ipc == 1); +} + #endif // TRITON_ENABLE_GPU // FIXME: [DLIS-6078]: We should not need this function. However, some paths are diff --git a/src/pb_utils.h b/src/pb_utils.h index 6d5f21ce..28ae6af6 100644 --- a/src/pb_utils.h +++ b/src/pb_utils.h @@ -324,6 +324,7 @@ bool IsUsingCUDAPool( std::unique_ptr& cuda_pool, int64_t memory_type_id, void* data); +bool DeviceSupportsIPC(const int64_t device_id); #endif // TRITON_ENABLE_GPU // FIXME: [DLIS-6078]: We should not need this function. However, some paths are diff --git a/src/python_be.cc b/src/python_be.cc index 0fa318ff..838c6d7a 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -512,6 +512,25 @@ ModelInstanceState::GetInputTensor( cpu_only_tensors = true; #endif // TRITON_ENABLE_GPU +// For Windows, force CPU tensors if IPC is not supported on +// the target GPU device +#if defined(TRITON_ENABLE_GPU) && defined(_WIN32) + if (src_memory_type == TRITONSERVER_MEMORY_GPU) { + bool supports_ipc = DeviceSupportsIPC(src_memory_type_id); + if (!supports_ipc) { + LOG_MESSAGE( + TRITONSERVER_LOG_WARN, + (std::string( + "GPU memory storage requested, but GPU device " + + std::to_string(src_memory_type_id) + + " does not support IPC, which is necessary to support GPU " + "tensors. Forcing CPU only input tensors.") + .c_str())); + cpu_only_tensors = true; + } + } +#endif // TRITON_ENABLE_GPU && _WIN32 + if (cpu_only_tensors || src_memory_type != TRITONSERVER_MEMORY_GPU) { input_tensor = std::make_shared( std::string(input_name), @@ -611,9 +630,7 @@ ModelInstanceState::GetInputTensor( &cuda_used)); if (cuda_used) { -#ifdef TRITON_ENABLE_GPU cudaStreamSynchronize(stream_); -#endif } input_tensor = std::make_shared(