diff --git a/CMakeLists.txt b/CMakeLists.txt index 2be987cd..bc5387ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,12 @@ option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON) option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON) option(TRITON_ENABLE_NVTX "Include nvtx markers collection in backend." OFF) +# FIXME: CI needs to enable the GPU flag. Python for window currently does not +# support GPU tensors. For simplicity, we will override this option here. +if(WIN32) + set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE) +endif() + set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo") set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo") set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo") @@ -96,6 +102,9 @@ FetchContent_Declare( GIT_TAG "v0.8" GIT_SHALLOW ON ) +# Option must be set off so WIN32 build does not break +set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) +set(BUILD_MOCK OFF) FetchContent_MakeAvailable(dlpack) # @@ -129,7 +138,10 @@ if(${TRITON_ENABLE_NVTX}) endif() # TRITON_ENABLE_NVTX find_package(ZLIB REQUIRED) -find_package(Threads REQUIRED) + +if(NOT WIN32) + find_package(Threads REQUIRED) +endif() include_directories(${CMAKE_BINARY_DIR}) configure_file(src/libtriton_python.ldscript libtriton_python.ldscript COPYONLY) @@ -174,21 +186,21 @@ set( ) set( - PYTHON_BACKEND_SRCS - src/python_be.cc - src/python_be.h - src/pb_env.cc - src/pb_env.h - src/pb_metric_reporter.cc - src/pb_metric_reporter.h - src/memory_manager.cc - src/memory_manager.h - src/request_executor.cc - src/request_executor.h - src/stub_launcher.h - src/stub_launcher.cc - src/infer_payload.h - src/infer_payload.cc + PYTHON_BACKEND_SRCS + src/python_be.cc + src/python_be.h + src/pb_env.cc + src/pb_env.h + src/pb_metric_reporter.cc + src/pb_metric_reporter.h + src/memory_manager.cc + src/memory_manager.h + src/request_executor.cc + src/request_executor.h + src/stub_launcher.h + src/stub_launcher.cc + src/infer_payload.h + src/infer_payload.cc ) list(APPEND @@ -239,48 +251,82 @@ target_compile_options( triton-python-backend PRIVATE $<$,$,$>: -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror> + $<$:/Wall /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor> ) target_compile_features(triton-python-backend-stub PRIVATE cxx_std_${TRITON_MIN_CXX_STANDARD}) target_compile_options( triton-python-backend-stub PRIVATE $<$,$,$>: - -fvisibility=hidden -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror> + -fvisibility=hidden -Wall -Wextra -Wno-unused-parameter -Wno-type-limits -Werror> + $<$:/Wall /D_WIN32_WINNT=0x0A00 /EHsc /Zc:preprocessor> ) target_compile_definitions(triton-python-backend-stub PRIVATE TRITON_PB_STUB) -target_link_libraries( - triton-python-backend - PRIVATE +# For WIN32 do not link Threads and DL_LIBS +if(WIN32) + target_link_libraries( + triton-python-backend + PRIVATE + dlpack + triton-backend-utils # from repo-backend + -lrt # shared memory + triton-core-serverstub # from repo-core + ZLIB::ZLIB + -larchive + ) + + target_link_libraries( + triton-python-backend-stub + PRIVATE + dlpack + triton-backend-utils # from repo-backend + pybind11::embed + -lrt # shared memory + -larchive # libarchive + ) +else() + target_link_libraries( + triton-python-backend + PRIVATE + dlpack + Threads::Threads + triton-backend-utils # from repo-backend + ${CMAKE_DL_LIBS} # dlopen and dlclose + -lrt # shared memory + triton-core-serverstub # from repo-core + ZLIB::ZLIB + -larchive + ) + + target_link_libraries( + triton-python-backend-stub + PRIVATE dlpack Threads::Threads - triton-backend-utils # from repo-backend - ${CMAKE_DL_LIBS} # dlopen and dlclose - -lrt # shared memory - triton-core-serverstub # from repo-core - ZLIB::ZLIB - -larchive -) - -target_link_libraries( - triton-python-backend-stub - PRIVATE - dlpack - Threads::Threads - triton-backend-utils # from repo-backend - ${CMAKE_DL_LIBS} # dlopen and dlclose - pybind11::embed - -lrt # shared memory - -larchive # libarchive -) + triton-backend-utils # from repo-backend + ${CMAKE_DL_LIBS} # dlopen and dlclose + pybind11::embed + -lrt # shared memory + -larchive # libarchive + ) +endif() -set_target_properties( - triton-python-backend PROPERTIES - POSITION_INDEPENDENT_CODE ON - OUTPUT_NAME triton_python - LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_python.ldscript - LINK_FLAGS "-Wl,--version-script libtriton_python.ldscript" -) +if(WIN32) + set_target_properties( + triton-python-backend PROPERTIES + POSITION_INDEPENDENT_CODE ON + OUTPUT_NAME triton_python + ) +else() + set_target_properties( + triton-python-backend PROPERTIES + POSITION_INDEPENDENT_CODE ON + OUTPUT_NAME triton_python + LINK_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libtriton_python.ldscript + LINK_FLAGS "-Wl,--version-script libtriton_python.ldscript" + ) +endif() add_subdirectory(./src/shm_monitor) diff --git a/src/infer_request.h b/src/infer_request.h index b72e3aef..d7ca118f 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -88,7 +88,7 @@ class InferRequest { const uint64_t timeout = 0, const intptr_t response_factory_address = 0, const intptr_t request_address = 0, const PreferredMemory& preferred_memory = - PreferredMemory(PreferredMemory::DEFAULT, 0), + PreferredMemory(PreferredMemory::kDefault, 0), const InferenceTrace& trace = InferenceTrace()); const std::vector>& Inputs(); diff --git a/src/metric_family.cc b/src/metric_family.cc index fb0fb93a..77e8aedf 100644 --- a/src/metric_family.cc +++ b/src/metric_family.cc @@ -201,9 +201,9 @@ TRITONSERVER_MetricKind MetricFamily::ToTritonServerMetricKind(const MetricKind& kind) { switch (kind) { - case COUNTER: + case kCounter: return TRITONSERVER_METRIC_KIND_COUNTER; - case GAUGE: + case kGauge: return TRITONSERVER_METRIC_KIND_GAUGE; default: throw PythonBackendException("Unknown metric kind"); diff --git a/src/pb_env.cc b/src/pb_env.cc index 0b6eb9ec..d9643a62 100644 --- a/src/pb_env.cc +++ b/src/pb_env.cc @@ -26,9 +26,11 @@ #include "pb_env.h" +#ifndef _WIN32 #include #include #include +#endif #include #include @@ -40,6 +42,29 @@ namespace triton { namespace backend { namespace python { +bool +FileExists(std::string& path) +{ + struct stat buffer; + return stat(path.c_str(), &buffer) == 0; +} + +void +LastModifiedTime(const std::string& path, time_t* last_modified_time) +{ + struct stat result; + if (stat(path.c_str(), &result) == 0) { + *last_modified_time = result.st_mtime; + } else { + throw PythonBackendException(std::string( + "LastModifiedTime() failed as file \'" + path + + std::string("\' does not exists."))); + } +} + +// FIXME: [DLIS-5969]: Develop platforom-agnostic functions +// to support custom python environments. +#ifndef _WIN32 void CopySingleArchiveEntry(archive* input_archive, archive* output_archive) { @@ -73,7 +98,6 @@ CopySingleArchiveEntry(archive* input_archive, archive* output_archive) } } - void ExtractTarFile(std::string& archive_path, std::string& dst_path) { @@ -153,27 +177,6 @@ ExtractTarFile(std::string& archive_path, std::string& dst_path) } } -bool -FileExists(std::string& path) -{ - struct stat buffer; - return stat(path.c_str(), &buffer) == 0; -} - -void -LastModifiedTime(const std::string& path, time_t* last_modified_time) -{ - struct stat result; - if (stat(path.c_str(), &result) == 0) { - *last_modified_time = result.st_mtime; - } else { - throw PythonBackendException(std::string( - "LastModifiedTime() failed as file \'" + path + - std::string("\' does not exists."))); - } -} - - void RecursiveDirectoryDelete(const char* dir) { @@ -326,5 +329,6 @@ EnvironmentManager::~EnvironmentManager() { RecursiveDirectoryDelete(base_path_); } +#endif }}} // namespace triton::backend::python diff --git a/src/pb_env.h b/src/pb_env.h index 09890ee8..04e01fa3 100644 --- a/src/pb_env.h +++ b/src/pb_env.h @@ -30,6 +30,11 @@ #include #include +#ifdef WIN32 +#include +#undef PATH_MAX +#define PATH_MAX MAX_PATH +#endif namespace triton { namespace backend { namespace python { void ExtractTarFile(std::string& archive_path, std::string& dst_path); @@ -39,6 +44,7 @@ bool FileExists(std::string& path); // // A class that manages Python environments // +#ifndef _WIN32 class EnvironmentManager { std::map> env_map_; char base_path_[PATH_MAX + 1]; @@ -52,5 +58,6 @@ class EnvironmentManager { std::string ExtractIfNotExtracted(std::string env_path); ~EnvironmentManager(); }; +#endif }}} // namespace triton::backend::python diff --git a/src/pb_preferred_memory.h b/src/pb_preferred_memory.h index 55f4db89..c28f1b87 100644 --- a/src/pb_preferred_memory.h +++ b/src/pb_preferred_memory.h @@ -30,10 +30,10 @@ namespace triton { namespace backend { namespace python { class PreferredMemory { public: - enum MemoryType { GPU, CPU, DEFAULT }; + enum MemoryType { kGPU, kCPU, kDefault }; PreferredMemory() - : preferred_memory_type_(MemoryType::DEFAULT), preferred_device_id_(0) + : preferred_memory_type_(MemoryType::kDefault), preferred_device_id_(0) { } diff --git a/src/pb_stub.cc b/src/pb_stub.cc index f692ae13..9a5ca713 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -28,7 +28,6 @@ #include #include -#include #include #include @@ -55,6 +54,13 @@ #include "shm_manager.h" #include "triton/common/nvtx.h" +#ifdef _WIN32 +#include // SIGINT & SIGTERM +#include +#else +#include +#endif + #ifdef TRITON_ENABLE_GPU #include #endif // TRITON_ENABLE_GPU @@ -148,6 +154,7 @@ Stub::Instantiate( // interfere with the shared library resolution of other executable and // binaries. if (ipc_control_->uses_env) { +#ifndef _WIN32 char* ld_library_path = std::getenv("LD_LIBRARY_PATH"); if (ld_library_path != nullptr) { @@ -173,6 +180,11 @@ Stub::Instantiate( "When using an execution environment, LD_LIBRARY_PATH variable " "cannot be empty."); } +#else + throw PythonBackendException( + "Custom execution environments are not currently supported on " + "Windows."); +#endif } } catch (const PythonBackendException& pb_exception) { @@ -1444,10 +1456,22 @@ Logger::Log( // and pass messages to cerr if (!BackendLoggingActive()) { std::string path(filename); - size_t pos = path.rfind('/'); + size_t pos = path.rfind(std::filesystem::path::preferred_separator); if (pos != std::string::npos) { path = path.substr(pos + 1, std::string::npos); } +#ifdef _WIN32 + std::stringstream ss; + SYSTEMTIME system_time; + GetSystemTime(&system_time); + ss << LeadingLogChar(level) << std::setfill('0') << std::setw(2) + << system_time.wMonth << std::setw(2) << system_time.wDay << ' ' + << std::setw(2) << system_time.wHour << ':' << std::setw(2) + << system_time.wMinute << ':' << std::setw(2) << system_time.wSecond + << '.' << std::setw(6) << system_time.wMilliseconds * 1000 << ' ' + << static_cast(GetCurrentProcessId()) << ' ' << path << ':' + << lineno << "] "; +#else std::stringstream ss; struct timeval tv; gettimeofday(&tv, NULL); @@ -1460,6 +1484,7 @@ Logger::Log( << std::setw(6) << tv.tv_usec << ' ' << static_cast(getpid()) << ' ' << path << ':' << lineno << "] "; std::cerr << ss.str() << " " << message << std::endl; +#endif } else { // Ensure we do not create a stub instance before it has initialized std::unique_ptr& stub = Stub::GetOrCreateInstance(); @@ -1471,37 +1496,37 @@ Logger::Log( void Logger::LogInfo(const std::string& message) { - Logger::Log(message, LogLevel::INFO); + Logger::Log(message, LogLevel::kInfo); } void Logger::LogWarn(const std::string& message) { - Logger::Log(message, LogLevel::WARNING); + Logger::Log(message, LogLevel::kWarning); } void Logger::LogError(const std::string& message) { - Logger::Log(message, LogLevel::ERROR); + Logger::Log(message, LogLevel::kError); } void Logger::LogVerbose(const std::string& message) { - Logger::Log(message, LogLevel::VERBOSE); + Logger::Log(message, LogLevel::kVerbose); } const std::string Logger::LeadingLogChar(const LogLevel& level) { switch (level) { - case LogLevel::WARNING: + case LogLevel::kWarning: return "W"; - case LogLevel::ERROR: + case LogLevel::kError: return "E"; - case LogLevel::INFO: - case LogLevel::VERBOSE: + case LogLevel::kInfo: + case LogLevel::kVerbose: default: return "I"; } @@ -1580,8 +1605,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::arg("preferred_device_id").none(false) = 0); py::enum_(module, "MemoryType") - .value("TRITONSERVER_MEMORY_GPU", PreferredMemory::MemoryType::GPU) - .value("TRITONSERVER_MEMORY_CPU", PreferredMemory::MemoryType::CPU) + .value("TRITONSERVER_MEMORY_GPU", PreferredMemory::MemoryType::kGPU) + .value("TRITONSERVER_MEMORY_CPU", PreferredMemory::MemoryType::kCPU) .export_values(); py::class_>( @@ -1651,7 +1676,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::arg("model_version").none(false) = -1, py::arg("flags").none(false) = 0, py::arg("timeout").none(false) = 0, py::arg("preferred_memory").none(false) = - PreferredMemory(PreferredMemory::DEFAULT, 0), + PreferredMemory(PreferredMemory::kDefault, 0), py::arg("trace").none(false) = InferenceTrace(), py::arg("parameters").none(true) = py::none()) .def( @@ -1773,14 +1798,14 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) py::class_ logger(module, "Logger"); py::enum_(logger, "LogLevel") - .value("INFO", LogLevel::INFO) - .value("WARNING", LogLevel::WARNING) - .value("ERROR", LogLevel::ERROR) - .value("VERBOSE", LogLevel::VERBOSE) + .value("INFO", LogLevel::kInfo) + .value("WARNING", LogLevel::kWarning) + .value("ERROR", LogLevel::kError) + .value("VERBOSE", LogLevel::kVerbose) .export_values(); logger.def_static( "log", py::overload_cast(&Logger::Log), - py::arg("message"), py::arg("level") = LogLevel::INFO); + py::arg("message"), py::arg("level") = LogLevel::kInfo); logger.def_static("log_info", &Logger::LogInfo, py::arg("message")); logger.def_static("log_warn", &Logger::LogWarn, py::arg("message")); logger.def_static("log_error", &Logger::LogError, py::arg("message")); @@ -1792,8 +1817,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def("value", &Metric::SendGetValueRequest); py::enum_(module, "MetricKind") - .value("COUNTER", MetricKind::COUNTER) - .value("GAUGE", MetricKind::GAUGE) + .value("COUNTER", MetricKind::kCounter) + .value("GAUGE", MetricKind::kGauge) .export_values(); py::class_>( @@ -1805,8 +1830,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def( "Metric", &MetricFamily::CreateMetric, py::arg("labels").none(true) = py::none()); - module.attr("MetricFamily").attr("COUNTER") = MetricKind::COUNTER; - module.attr("MetricFamily").attr("GAUGE") = MetricKind::GAUGE; + module.attr("MetricFamily").attr("COUNTER") = MetricKind::kCounter; + module.attr("MetricFamily").attr("GAUGE") = MetricKind::kGauge; module.def( "load_model", &LoadModel, py::arg("model_name").none(false), @@ -1834,12 +1859,13 @@ ModelContext::Init( const std::string& model_path, const std::string& runtime_modeldir, const std::string& triton_install_path, const std::string& model_version) { - type_ = ModelType::DEFAULT; + const char os_slash = std::filesystem::path::preferred_separator; + type_ = ModelType::kDefault; if (runtime_modeldir != "DEFAULT") { // For python based backends, existence of `model.py` in the corresponding // backend folder happens on the core side, so we can omit this check here. - python_model_path_ = runtime_modeldir + "/model.py"; - type_ = ModelType::BACKEND; + python_model_path_ = runtime_modeldir + os_slash + "model.py"; + type_ = ModelType::kBackend; } else { python_model_path_ = model_path; // Check if model file exists in this path. @@ -1850,7 +1876,7 @@ ModelContext::Init( } } - model_dir_ = model_path.substr(0, model_path.find_last_of("\\/")); + model_dir_ = model_path.substr(0, model_path.find_last_of(os_slash)); python_backend_folder_ = triton_install_path; model_version_ = model_version; runtime_modeldir_ = runtime_modeldir; @@ -1859,8 +1885,9 @@ ModelContext::Init( void ModelContext::StubSetup(py::module& sys) { + const char os_slash = std::filesystem::path::preferred_separator; std::string model_name = - python_model_path_.substr(python_model_path_.find_last_of("/") + 1); + python_model_path_.substr(python_model_path_.find_last_of(os_slash) + 1); // Model name without the .py extension auto dotpy_pos = model_name.find_last_of(".py"); @@ -1873,11 +1900,11 @@ ModelContext::StubSetup(py::module& sys) // returned by 'find_last_of'. Need to manually adjust the position. std::string model_name_trimmed = model_name.substr(0, dotpy_pos - 2); - if (type_ == ModelType::DEFAULT) { + if (type_ == ModelType::kDefault) { std::string model_path_parent = - python_model_path_.substr(0, python_model_path_.find_last_of("/")); + python_model_path_.substr(0, python_model_path_.find_last_of(os_slash)); std::string model_path_parent_parent = - model_path_parent.substr(0, model_path_parent.find_last_of("/")); + model_path_parent.substr(0, model_path_parent.find_last_of(os_slash)); sys.attr("path").attr("append")(model_path_parent); sys.attr("path").attr("append")(model_path_parent_parent); sys.attr("path").attr("append")(python_backend_folder_); @@ -1885,7 +1912,7 @@ ModelContext::StubSetup(py::module& sys) (std::string(model_version_) + "." + model_name_trimmed).c_str()); } else { std::string model_path_parent = - python_model_path_.substr(0, python_model_path_.find_last_of("/")); + python_model_path_.substr(0, python_model_path_.find_last_of(os_slash)); std::string backend_model_dir(model_path_parent); sys.attr("path").attr("append")(backend_model_dir); sys.attr("path").attr("append")(python_backend_folder_); @@ -1893,6 +1920,22 @@ ModelContext::StubSetup(py::module& sys) } } +#ifdef _WIN32 +bool +ParentProcessActive(DWORD parent_id) +{ + HANDLE parent = OpenProcess(PROCESS_ALL_ACCESS, FALSE, parent_id); + DWORD exit_code; + GetExitCodeProcess(parent, &exit_code); + return (exit_code == STILL_ACTIVE); +} +#else +bool +ParentProcessActive(pid_t parent_id) +{ + return (kill(parent_id, 0) == 0); +} +#endif extern "C" { @@ -1917,8 +1960,9 @@ main(int argc, char** argv) // Find the package name from model path. size_t prev = 0, pos = 0; + const char os_slash = std::filesystem::path::preferred_separator; do { - pos = model_path.find("/", prev); + pos = model_path.find(os_slash, prev); if (pos == std::string::npos) pos = model_path.length(); std::string token = model_path.substr(prev, pos - prev); @@ -1953,8 +1997,11 @@ main(int argc, char** argv) // Start the Python Interpreter py::scoped_interpreter guard{}; +#ifdef _WIN32 + DWORD parent_pid = (DWORD)std::stoul(argv[5]); +#else pid_t parent_pid = std::stoi(argv[5]); - +#endif std::atomic background_thread_running = {true}; std::thread background_thread = std::thread([&parent_pid, &background_thread_running, &stub, &logger] { @@ -1973,7 +2020,7 @@ main(int argc, char** argv) stub->UpdateHealth(); - if (kill(parent_pid, 0) != 0) { + if (!ParentProcessActive(parent_pid)) { // When unhealthy, we should stop attempting to send // messages to the backend ASAP. if (stub->StubToParentServiceActive()) { diff --git a/src/pb_stub.h b/src/pb_stub.h index 74a66b95..a51f25f5 100644 --- a/src/pb_stub.h +++ b/src/pb_stub.h @@ -30,18 +30,7 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include "infer_request.h" #include "infer_response.h" @@ -81,17 +70,17 @@ namespace triton { namespace backend { namespace python { } while (false) /// Macros that use current filename and line number. -#define LOG_INFO LOG_FL(__FILE__, __LINE__, LogLevel::INFO) -#define LOG_WARN LOG_FL(__FILE__, __LINE__, LogLevel::WARNING) -#define LOG_ERROR LOG_FL(__FILE__, __LINE__, LogLevel::ERROR) -#define LOG_VERBOSE LOG_FL(__FILE__, __LINE__, LogLevel::VERBOSE) +#define LOG_INFO LOG_FL(__FILE__, __LINE__, LogLevel::kInfo) +#define LOG_WARN LOG_FL(__FILE__, __LINE__, LogLevel::kWarning) +#define LOG_ERROR LOG_FL(__FILE__, __LINE__, LogLevel::kError) +#define LOG_VERBOSE LOG_FL(__FILE__, __LINE__, LogLevel::kVerbose) class Logger { public: Logger() { backend_logging_active_ = false; }; ~Logger() { log_instance_.reset(); }; /// Python client log function - static void Log(const std::string& message, LogLevel level = LogLevel::INFO); + static void Log(const std::string& message, LogLevel level = LogLevel::kInfo); /// Python client log info function static void LogInfo(const std::string& message); @@ -138,7 +127,8 @@ class LogMessage { LogMessage(const char* file, int line, LogLevel level) : level_(level) { std::string path(file); - size_t pos = path.rfind('/'); + const char os_slash = std::filesystem::path::preferred_separator; + size_t pos = path.rfind(os_slash); if (pos != std::string::npos) { path = path.substr(pos + 1, std::string::npos); } @@ -185,10 +175,10 @@ class ModelContext { // Triton supports python-based backends, // i.e. backends that provide common `model.py`, that can be re-used // between different models. `ModelType` helps to differentiate - // between models running with c++ python backend (ModelType::DEFAULT) - // and models running with python-based backend (ModelType::BACKEND) + // between models running with c++ python backend (ModelType::kDefault) + // and models running with python-based backend (ModelType::kBackend) // at the time of ModelContext::StubSetup to properly set up paths. - enum ModelType { DEFAULT, BACKEND }; + enum ModelType { kDefault, kBackend }; ModelType type_; }; diff --git a/src/pb_utils.cc b/src/pb_utils.cc index 5aa95b8b..7bc17fa4 100644 --- a/src/pb_utils.cc +++ b/src/pb_utils.cc @@ -26,27 +26,14 @@ #include "pb_utils.h" -#include -#include +#ifdef _WIN32 +#include + +#include +#else #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "scoped_defer.h" +#endif + #ifdef TRITON_ENABLE_GPU #include @@ -59,42 +46,43 @@ namespace triton { namespace backend { namespace python { CUDAHandler::CUDAHandler() { - dl_open_handle_ = dlopen("libcuda.so", RTLD_LAZY); + dl_open_handle_ = LoadSharedObject("libcuda.so"); // If libcuda.so is successfully opened, it must be able to find // "cuPointerGetAttribute", "cuGetErrorString", and // "cuDevicePrimaryCtxGetState" symbols. if (dl_open_handle_ != nullptr) { - void* cu_pointer_get_attribute_fn = - dlsym(dl_open_handle_, "cuPointerGetAttribute"); + void* cu_pointer_get_attribute_fn = LocateSymbol("cuPointerGetAttribute"); if (cu_pointer_get_attribute_fn == nullptr) { throw PythonBackendException( - std::string("Failed to dlsym 'cuPointerGetAttribute'. Error: ") + - dlerror()); + std::string("Failed to locate 'cuPointerGetAttribute'. Error: ") + + LocateSymbolError()); } *((void**)&cu_pointer_get_attribute_fn_) = cu_pointer_get_attribute_fn; - void* cu_get_error_string_fn = dlsym(dl_open_handle_, "cuGetErrorString"); + void* cu_get_error_string_fn = LocateSymbol("cuGetErrorString"); if (cu_get_error_string_fn == nullptr) { throw PythonBackendException( - std::string("Failed to dlsym 'cuGetErrorString'. Error: ") + - dlerror()); + std::string("Failed to locate 'cuGetErrorString'. Error: ") + + LocateSymbolError()); } *((void**)&cu_get_error_string_fn_) = cu_get_error_string_fn; - void* cu_init_fn = dlsym(dl_open_handle_, "cuInit"); + void* cu_init_fn = LocateSymbol("cuInit"); if (cu_init_fn == nullptr) { throw PythonBackendException( - std::string("Failed to dlsym 'cuInit'. Error: ") + dlerror()); + std::string("Failed to locate 'cuInit'. Error: ") + + LocateSymbolError()); } *((void**)&cu_init_fn_) = cu_init_fn; void* cu_device_primary_ctx_get_state_fn = - dlsym(dl_open_handle_, "cuDevicePrimaryCtxGetState"); + LocateSymbol("cuDevicePrimaryCtxGetState"); if (cu_device_primary_ctx_get_state_fn == nullptr) { throw PythonBackendException( - std::string("Failed to dlsym 'cuDevicePrimaryCtxGetState'. Error: ") + - dlerror()); + std::string( + "Failed to locate 'cuDevicePrimaryCtxGetState'. Error: ") + + LocateSymbolError()); } *((void**)&cu_device_primary_ctx_get_state_fn_) = cu_device_primary_ctx_get_state_fn; @@ -105,10 +93,7 @@ CUDAHandler::CUDAHandler() const char* error_string; (*cu_get_error_string_fn_)(cuda_err, &error_string); error_str_ = std::string("failed to call cuInit: ") + error_string; - int status = dlclose(dl_open_handle_); - if (status != 0) { - throw PythonBackendException("Failed to close the libcuda handle."); - } + CloseLibrary(); dl_open_handle_ = nullptr; } } @@ -215,13 +200,58 @@ CUDAHandler::MaybeSetDevice(int device) CUDAHandler::~CUDAHandler() noexcept(false) { if (dl_open_handle_ != nullptr) { - int status = dlclose(dl_open_handle_); - if (status != 0) { - throw PythonBackendException("Failed to close the libcuda handle."); - } + CloseLibrary(); + } +} + +void* +CUDAHandler::LoadSharedObject(const char* filename) +{ +#ifdef _WIN32 + // NOTE: 'nvcuda.dll' is a placeholder library. Apparently, this should be the + // equivalent library for Windows, but need to verify. + return LoadLibraryA("nvcuda.dll"); +#else + return dlopen("libcuda.so", RTLD_LAZY); +#endif +} + +void* +CUDAHandler::LocateSymbol(const char* symbol) +{ +#ifdef _WIN32 + return GetProcAddress(static_cast(dl_open_handle_), symbol); +#else + return dlsym(dl_open_handle_, symbol); +#endif +} + + +std::string +CUDAHandler::LocateSymbolError() +{ +#ifdef _WIN32 + return std::to_string(GetLastError()); +#else + return dlerror(); +#endif +} + +void +CUDAHandler::CloseLibrary() +{ + bool successful = true; +#ifdef _WIN32 + successful = (FreeLibrary(static_cast(dl_open_handle_)) != 0); +#else + successful = (dlclose(dl_open_handle_) == 0); +#endif + if (!successful) { + throw PythonBackendException("Failed to close the cuda library handle."); } } + ScopedSetDevice::ScopedSetDevice(int device) { device_ = device; @@ -258,6 +288,14 @@ IsUsingCUDAPool( #endif // TRITON_ENABLE_GPU +// FIXME: [DLIS-6078]: We should not need this function. However, some paths are +// being retrieved from core that are not platform-agnostic. +void +SanitizePath(std::string& path) +{ + std::replace(path.begin(), path.end(), '/', '\\'); +} + #ifndef TRITON_PB_STUB std::shared_ptr WrapTritonErrorInSharedPtr(TRITONSERVER_Error* error) diff --git a/src/pb_utils.h b/src/pb_utils.h index 0873eb03..6d5f21ce 100644 --- a/src/pb_utils.h +++ b/src/pb_utils.h @@ -29,7 +29,6 @@ #ifdef TRITON_ENABLE_GPU #include #endif // TRITON_ENABLE_GPU -#include #include #include @@ -167,9 +166,9 @@ struct ResponseBatch : SendMessageBase { uint32_t response_size; }; -enum LogLevel { INFO = 0, WARNING, ERROR, VERBOSE }; +enum LogLevel { kInfo = 0, kWarning, kError, kVerbose }; -enum MetricKind { COUNTER, GAUGE }; +enum MetricKind { kCounter = 0, kGauge }; struct LogSendMessage : SendMessageBase { bi::managed_external_buffer::handle_t filename; @@ -294,6 +293,10 @@ class CUDAHandler { int64_t memory_type_id, cudaIpcMemHandle_t* cuda_mem_handle, void** data_ptr); void CloseCudaHandle(int64_t memory_type_id, void* data_ptr); + void* LoadSharedObject(const char* filename); + void* LocateSymbol(const char* symbol); + std::string LocateSymbolError(); + void CloseLibrary(); /// Set the device only if the primary context has already been created for /// this device. Inspired from PyTorch's MaybeSetDevice. @@ -323,6 +326,10 @@ bool IsUsingCUDAPool( #endif // TRITON_ENABLE_GPU +// FIXME: [DLIS-6078]: We should not need this function. However, some paths are +// being retrieved from core that are not platform-agnostic. +void SanitizePath(std::string& path); + #ifndef TRITON_PB_STUB std::shared_ptr WrapTritonErrorInSharedPtr( TRITONSERVER_Error* error); diff --git a/src/python_be.cc b/src/python_be.cc index 758e3668..74d7c364 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -25,6 +25,8 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "python_be.h" +#include + #include "gpu_buffers.h" #include "infer_payload.h" #include "model_loader.h" @@ -372,12 +374,15 @@ ModelInstanceState::SaveRequestsToSharedMemory( uint32_t flags; RETURN_IF_ERROR(TRITONBACKEND_RequestFlags(request, &flags)); + // Do not return if error in this case, because Triton core + // will return an error if tracing is disabled (see PYBE PR#295). TRITONSERVER_InferenceTrace* triton_trace; auto err = TRITONBACKEND_RequestTrace(request, &triton_trace); if (err != nullptr) { triton_trace = nullptr; TRITONSERVER_ErrorDelete(err); } + InferenceTrace trace = InferenceTrace(triton_trace); uint64_t request_timeout; @@ -395,14 +400,14 @@ ModelInstanceState::SaveRequestsToSharedMemory( parameters_string, flags, request_timeout, reinterpret_cast(factory_ptr), reinterpret_cast(request), - PreferredMemory(PreferredMemory::DEFAULT, 0), trace); + PreferredMemory(PreferredMemory::kDefault, 0), trace); } else { infer_request = std::make_unique( - id, correlation_id, correlation_id_string, pb_input_tensors, - requested_output_names, model_state->Name(), model_state->Version(), - parameters_string, flags, request_timeout, - 0 /* response_factory_address */, reinterpret_cast(request), - PreferredMemory(PreferredMemory::DEFAULT, 0), trace); + id, correlation_id, correlation_id_string, pb_input_tensors, requested_output_names, + model_state->Name(), model_state->Version(), parameters_string, flags, + request_timeout, 0 /* response_factory_address */, + reinterpret_cast(request), + PreferredMemory(PreferredMemory::kDefault, 0), trace); } RETURN_IF_EXCEPTION(infer_request->SaveToSharedMemory(Stub()->ShmPool())); @@ -890,25 +895,25 @@ ModelInstanceState::ProcessLogRequest( LogLevel level = pb_log_message->Level(); switch (level) { - case LogLevel::INFO: { + case LogLevel::kInfo: { TRITONSERVER_LogMessage( TRITONSERVER_LOG_INFO, (filename.c_str()), line, (log_message.c_str())); break; } - case LogLevel::WARNING: { + case LogLevel::kWarning: { TRITONSERVER_LogMessage( TRITONSERVER_LOG_WARN, (filename.c_str()), line, (log_message.c_str())); break; } - case LogLevel::ERROR: { + case LogLevel::kError: { TRITONSERVER_LogMessage( TRITONSERVER_LOG_ERROR, (filename.c_str()), line, (log_message.c_str())); break; } - case LogLevel::VERBOSE: { + case LogLevel::kVerbose: { TRITONSERVER_LogMessage( TRITONSERVER_LOG_VERBOSE, (filename.c_str()), line, (log_message.c_str())); @@ -1428,7 +1433,7 @@ ModelInstanceState::ProcessRequests( // This means that the stub process has exited and Python // backend failed to restart the stub process. - if (Stub()->StubPid() == 0) { + if (!Stub()->StubActive()) { const char* error_message = "The stub process has exited unexpectedly."; RespondErrorToAllRequests( error_message, responses, requests, request_count); @@ -2062,7 +2067,7 @@ ModelState::SetModelConfig() extern "C" { -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) { const char* cname; @@ -2245,27 +2250,33 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) .c_str()); // Use BackendArtifacts to determine the location of Python files - const char* location; + const char* clocation; TRITONBACKEND_ArtifactType artifact_type; RETURN_IF_ERROR( - TRITONBACKEND_BackendArtifacts(backend, &artifact_type, &location)); - + TRITONBACKEND_BackendArtifacts(backend, &artifact_type, &clocation)); + + const char os_slash = std::filesystem::path::preferred_separator; + std::string location(clocation); +#ifdef _WIN32 + const std::string stub_executable_name = "triton_python_backend_stub.exe"; + SanitizePath(location); + SanitizePath(default_backend_dir_string); +#else + const std::string stub_executable_name = "triton_python_backend_stub"; +#endif // Check if `triton_python_backend_stub` and `triton_python_backend_utils.py` // are located under `location`. - // DLIS-5596: Add forward slash to be platform agnostic - // (i.e. For Windows, we need to use backward slash). std::string default_python_backend_dir = - default_backend_dir_string + "/python"; - std::string backend_stub_path = - std::string(location) + "/triton_python_backend_stub"; + default_backend_dir_string + os_slash + "python"; + std::string backend_stub_path = location + os_slash + stub_executable_name; std::string backend_utils = - std::string(location) + "/triton_python_backend_utils.py"; + location + os_slash + "triton_python_backend_utils.py"; // Both, stub and utils should be in the same location if (FileExists(backend_stub_path) && FileExists(backend_utils)) { backend_state->python_lib = location; // If `location` is default location of a python backend, // then we are using default python backend. - if (default_python_backend_dir == std::string(location)) { + if (default_python_backend_dir == location) { backend_state->runtime_modeldir = ""; } else { // If `location` is not default location of a python backend, @@ -2278,22 +2289,26 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) // then we are using a python backend based backend and stub and utils are // stored in the default python backend location. if (!default_backend_dir_string.empty()) { - std::string backend_stub_path = - default_backend_dir_string + "/python/triton_python_backend_stub"; + std::string backend_stub_path = default_backend_dir_string + os_slash + + "python" + os_slash + + stub_executable_name; if (!FileExists(backend_stub_path)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_NOT_FOUND, - (std::string("triton_python_backend_stub") + - " is not found. Searched paths: " + default_backend_dir_string + - "/python and" + std::string(location)) + (stub_executable_name + " is not found. Searched paths: " + + default_backend_dir_string + os_slash + "python and " + location) .c_str()); } } backend_state->runtime_modeldir = location; - backend_state->python_lib = default_backend_dir_string + "/python"; + backend_state->python_lib = + default_backend_dir_string + os_slash + "python"; } - +// FIXME [DLIS-5969]: Enable for Windows when custom execution environments +// are supported. +#ifndef _WIN32 backend_state->env_manager = std::make_unique(); +#endif RETURN_IF_ERROR(TRITONBACKEND_BackendSetState( backend, reinterpret_cast(backend_state.get()))); @@ -2302,7 +2317,7 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) return nullptr; } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend) { LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, "TRITONBACKEND_Finalize: Start"); @@ -2314,7 +2329,7 @@ TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend) return nullptr; // success } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model) { const char* cname; @@ -2341,7 +2356,7 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model) return nullptr; } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model) { void* vstate; @@ -2357,7 +2372,7 @@ TRITONBACKEND_ModelFinalize(TRITONBACKEND_Model* model) return nullptr; } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance) { const char* cname; @@ -2400,7 +2415,7 @@ TRITONBACKEND_ModelInstanceInitialize(TRITONBACKEND_ModelInstance* instance) return nullptr; } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceExecute( TRITONBACKEND_ModelInstance* instance, TRITONBACKEND_Request** requests, const uint32_t request_count) @@ -2525,7 +2540,7 @@ TRITONBACKEND_ModelInstanceExecute( return nullptr; } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance) { void* vstate; @@ -2542,7 +2557,7 @@ TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance) return nullptr; } -TRITONSERVER_Error* +TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_GetBackendAttribute( TRITONBACKEND_Backend* backend, TRITONBACKEND_BackendAttribute* backend_attributes) diff --git a/src/python_be.h b/src/python_be.h index f5620d07..4430767c 100644 --- a/src/python_be.h +++ b/src/python_be.h @@ -26,12 +26,8 @@ #pragma once -#include #include #include -#include -#include -#include #include #include @@ -84,6 +80,14 @@ #include "triton/core/tritonbackend.h" #include "triton/core/tritonserver.h" +#ifdef _WIN32 +#define NOMINMAX +#include +#else +#include +#include +#endif + #define LOG_IF_EXCEPTION(X) \ do { \ try { \ @@ -217,7 +221,12 @@ struct BackendState { std::atomic number_of_instance_inits; std::string shared_memory_region_prefix; int64_t thread_pool_size; + +// FIXME [DLIS-5969]: Enable for Windows when custom execution environments +// are supported. +#ifndef _WIN32 std::unique_ptr env_manager; +#endif std::string runtime_modeldir; }; @@ -299,7 +308,8 @@ class ModelInstanceState : public BackendModelInstance { // Launch stub process. TRITONSERVER_Error* LaunchStubProcess(); - TRITONSERVER_Error* SendMessageToStub(off_t message); + TRITONSERVER_Error* SendMessageToStub( + bi::managed_external_buffer::handle_t message); void ResponseSendDecoupled(std::shared_ptr response_send_message); // Checks whether the stub process is live @@ -307,7 +317,8 @@ class ModelInstanceState : public BackendModelInstance { // Get a message from the stub process void SendMessageAndReceiveResponse( - off_t message, off_t& response, bool& restart, + bi::managed_external_buffer::handle_t message, + bi::managed_external_buffer::handle_t& response, bool& restart, std::shared_ptr>& responses, TRITONBACKEND_Request** requests, const uint32_t request_count); diff --git a/src/request_executor.cc b/src/request_executor.cc index a462176e..f7cca1b4 100644 --- a/src/request_executor.cc +++ b/src/request_executor.cc @@ -48,10 +48,10 @@ MemoryTypeToTritonMemoryType( const PreferredMemory::MemoryType& memory_type) { switch (memory_type) { - case PreferredMemory::MemoryType::CPU: + case PreferredMemory::MemoryType::kCPU: *triton_memory_type = TRITONSERVER_MEMORY_CPU; break; - case PreferredMemory::MemoryType::GPU: + case PreferredMemory::MemoryType::kGPU: *triton_memory_type = TRITONSERVER_MEMORY_GPU; break; @@ -202,7 +202,7 @@ ResponseAlloc( ScopedDefer _([&shm_pool] { shm_pool.release(); }); if (p->preferred_memory.PreferredMemoryType() == - PreferredMemory::MemoryType::DEFAULT) { + PreferredMemory::MemoryType::kDefault) { *actual_memory_type = preferred_memory_type; *actual_memory_type_id = preferred_memory_type_id; } else { diff --git a/src/shm_manager.h b/src/shm_manager.h index 5063273b..25e04570 100644 --- a/src/shm_manager.h +++ b/src/shm_manager.h @@ -26,8 +26,6 @@ #pragma once -#include - #include #include #include @@ -92,9 +90,9 @@ struct AllocatedSharedMemory { // info is placed in the beginning and the actual object is placed after that // (i.e. 4 plus the aligned address is not 16-bytes aligned). The aligned memory // is required by semaphore otherwise it may lead to SIGBUS error on ARM. -struct AllocatedShmOwnership { +struct alignas(16) AllocatedShmOwnership { uint32_t ref_count_; -} __attribute__((aligned(16))); +}; class SharedMemoryManager { public: diff --git a/src/stub_launcher.cc b/src/stub_launcher.cc index b0627486..a9956b55 100644 --- a/src/stub_launcher.cc +++ b/src/stub_launcher.cc @@ -26,12 +26,18 @@ #include "stub_launcher.h" +#include + #include "python_be.h" +#ifdef _WIN32 +#include // getpid() +#endif + namespace triton { namespace backend { namespace python { StubLauncher::StubLauncher(const std::string stub_process_kind) - : parent_pid_(0), stub_pid_(0), is_initialized_(false), + : parent_pid_(0), is_initialized_(false), stub_process_kind_(stub_process_kind), model_instance_name_(""), device_id_(0), kind_("") { @@ -40,8 +46,7 @@ StubLauncher::StubLauncher(const std::string stub_process_kind) StubLauncher::StubLauncher( const std::string stub_process_kind, const std::string model_instance_name, const int32_t device_id, const std::string kind) - : parent_pid_(0), stub_pid_(0), is_initialized_(false), - stub_process_kind_(stub_process_kind), + : is_initialized_(false), stub_process_kind_(stub_process_kind), model_instance_name_(model_instance_name), device_id_(device_id), kind_(kind) { @@ -65,6 +70,13 @@ StubLauncher::Initialize(ModelState* model_state) if (runtime_modeldir_.empty()) { runtime_modeldir_ = "DEFAULT"; } +#ifdef _WIN32 + ZeroMemory(&startup_info_, sizeof(startup_info_)); + startup_info_.cb = sizeof(startup_info_); + ZeroMemory(&stub_pid_, sizeof(stub_pid_)); +#else + stub_pid_ = 0; +#endif // Atomically increase and read the stub process count to avoid shared memory // region name collision @@ -76,7 +88,8 @@ StubLauncher::Initialize(ModelState* model_state) model_version_ = model_state->Version(); std::stringstream ss; - ss << model_repository_path_ << "/" << model_version_ << "/"; + const char os_slash = std::filesystem::path::preferred_separator; + ss << model_repository_path_ << os_slash << model_version_ << os_slash; std::string artifact_name; RETURN_IF_ERROR(model_state->ModelConfig().MemberAsString( "default_model_filename", &artifact_name)); @@ -89,31 +102,20 @@ StubLauncher::Initialize(ModelState* model_state) model_path_ = ss.str(); - // Path to the extracted Python env - std::string python_execution_env = ""; + // FIXME [DLIS-5969]: Enable for Windows when custom execution environments + // are supported. if (python_execution_env_ != "") { - try { - python_execution_env = - model_state->StateForBackend()->env_manager->ExtractIfNotExtracted( - python_execution_env_); - } - catch (PythonBackendException& pb_exception) { - return TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_INTERNAL, pb_exception.what()); - } - - path_to_activate_ = python_execution_env + "/bin/activate"; - path_to_libpython_ = python_execution_env + "/lib"; - if (python_execution_env.length() > 0 && !FileExists(path_to_activate_)) { - return TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_INTERNAL, - ("Path " + path_to_activate_ + - " does not exist. The Python environment should contain an " - "'activate' script.") - .c_str()); - } +#ifndef _WIN32 + RETURN_IF_ERROR(GetPythonEnvironment(model_state)); +#else + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_UNSUPPORTED, + "Custom execution environments are not currently supported on " + "Windows."); +#endif } + parent_pid_ = getpid(); return nullptr; @@ -195,6 +197,139 @@ StubLauncher::Setup() return nullptr; } +// FIXME: This should be merged with the Unix launch function once Windows +// CI and functionality are demonstrably stable. The goal of keeping the +// functions separate is to help debug Windows-specific issues without worrying +// about the impact to our Unix builds. +#ifdef _WIN32 +TRITONSERVER_Error* +StubLauncher::Launch() +{ + std::string stub_name; + if (stub_process_kind_ == "AUTOCOMPLETE_STUB") { + stub_name = model_name_; + } else { + stub_name = model_instance_name_; + } + + const char os_slash = std::filesystem::path::preferred_separator; + + const std::string stub_executable_name = "triton_python_backend_stub.exe"; + SanitizePath(model_path_); + SanitizePath(model_repository_path_); + + // Default Python backend stub + std::string python_backend_stub = + python_lib_ + os_slash + stub_executable_name; + + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, + (std::string("Stub path ") + python_backend_stub).c_str()); + + // Path to alternative Python backend stub + std::string model_python_backend_stub = + std::string(model_repository_path_) + os_slash + stub_executable_name; + + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, + (std::string("Alt path ") + python_backend_stub).c_str()); + + // Check if file exists + // TODO: Integrate win32 and pb_env + if (FileExists(model_python_backend_stub)) { + python_backend_stub = model_python_backend_stub; + } + + std::string launch_command; + + std::stringstream ss; + ss << python_backend_stub << " " << model_path_ << " " << shm_region_name_ + << " " << shm_default_byte_size_ << " " << shm_growth_byte_size_ << " " + << parent_pid_ << " " << python_lib_ << " " << ipc_control_handle_ << " " + << stub_name << " " << runtime_modeldir_; + launch_command = ss.str(); + + LOG_MESSAGE( + TRITONSERVER_LOG_INFO, + (std::string("Starting Python backend stub: ") + launch_command).c_str()); + + LPSTR launch_command_lpstr = const_cast(launch_command.c_str()); + // Start the child process. Unlike fork(), the remainder of this + // function exists in the context of the parent, only. + if (!CreateProcess( + NULL, // No module name (use command line) + launch_command_lpstr, // Command line + NULL, // Process handle not inheritable + NULL, // Thread handle not inheritable + FALSE, // Set handle inheritance to FALSE + 0, // No creation flags + NULL, // Use parent's environment block + NULL, // Use parent's starting directory + &startup_info_, // Pointer to STARTUPINFO structure + &stub_pid_) // Pointer to PROCESS_INFORMATION structure + ) { + std::stringstream ss; + ss << "Failed to run python backend stub. Errno = " << errno << '\n' + << "Python backend stub path: " << python_backend_stub << '\n' + << "Shared Memory Region Name: " << shm_region_name_ << '\n' + << "Shared Memory Default Byte Size: " << shm_default_byte_size_ << '\n' + << "Shared Memory Growth Byte Size: " << shm_growth_byte_size_ << '\n'; + // Print the error message directly because the underlying mutexes in + // LOG_MESSAGE() could be forked when it is locked by other thread(s). + std::cerr << '\n' << ss.str() << '\n'; + _Exit(1); + } + ScopedDefer _([&] { + // Push a dummy message to the message queue so that the stub + // process is notified that it can release the object stored in + // shared memory. + stub_message_queue_->Push(DUMMY_MESSAGE); + + // If the model is not initialized, wait for the stub process to exit. + if (!is_initialized_) { + stub_message_queue_.reset(); + parent_message_queue_.reset(); + memory_manager_.reset(); + WaitForStubProcess(); + } + }); + + // The stub process would send two messages to the parent process during the + // initialization. + // 1. When the stub process's health monitoring thread has started. + // 2. When the initialization is fully completed and the Python model is + // loaded. + // + // The reason it is broken into two steps is that creation of the health + // monitoring thread may take longer which can make the server process think + // that the stub process is unhealthy and return early. Waiting until the + // health thread is spawn would make sure would prevent this issue. + parent_message_queue_->Pop(); + + if (stub_process_kind_ == "AUTOCOMPLETE_STUB") { + try { + AutocompleteStubProcess(); + } + catch (const PythonBackendException& ex) { + // Need to kill the stub process first + KillStubProcess(); + throw BackendModelException( + TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what())); + } + } else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") { + RETURN_IF_ERROR(ModelInstanceStubProcess()); + } else { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, + (std::string("Unknown stub_process_kind: ") + stub_process_kind_) + .c_str()); + } + + is_initialized_ = true; + + return nullptr; +} +#else TRITONSERVER_Error* StubLauncher::Launch() { @@ -307,11 +442,10 @@ StubLauncher::Launch() // If the model is not initialized, wait for the stub process to exit. if (!is_initialized_) { - int status; stub_message_queue_.reset(); parent_message_queue_.reset(); memory_manager_.reset(); - waitpid(stub_pid_, &status, 0); + WaitForStubProcess(); } }); @@ -335,10 +469,7 @@ StubLauncher::Launch() } catch (const PythonBackendException& ex) { // Need to kill the stub process first - kill(stub_pid_, SIGKILL); - int status; - waitpid(stub_pid_, &status, 0); - stub_pid_ = 0; + KillStubProcess(); throw BackendModelException( TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what())); } @@ -357,6 +488,34 @@ StubLauncher::Launch() return nullptr; } +TRITONSERVER_Error* +StubLauncher::GetPythonEnvironment(ModelState* model_state) +{ + std::string python_execution_env = ""; + try { + python_execution_env = + model_state->StateForBackend()->env_manager->ExtractIfNotExtracted( + python_execution_env_); + } + catch (PythonBackendException& pb_exception) { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, pb_exception.what()); + } + + path_to_activate_ = python_execution_env + "/bin/activate"; + path_to_libpython_ = python_execution_env + "/lib"; + if (python_execution_env.length() > 0 && !FileExists(path_to_activate_)) { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, + ("Path " + path_to_activate_ + + " does not exist. The Python environment should contain an " + "'activate' script.") + .c_str()); + } + return nullptr; +} +#endif + void StubLauncher::AutocompleteStubProcess() { @@ -473,6 +632,18 @@ StubLauncher::ModelInstanceStubProcess() return nullptr; } +bool +StubLauncher::StubActive() +{ +#ifdef _WIN32 + DWORD ec; + GetExitCodeProcess(stub_pid_.hProcess, &ec); + return (ec == STILL_ACTIVE); +#else + return (stub_pid_ != 0); +#endif +} + void StubLauncher::UpdateHealth() { @@ -483,9 +654,13 @@ StubLauncher::UpdateHealth() ipc_control_->stub_health = false; } - // Sleep 1 second so that the child process has a chance to change the - // health variable +// Sleep 1 second so that the child process has a chance to change the +// health variable +#ifdef _WIN32 + Sleep(1); +#else sleep(1); +#endif { bi::scoped_lock lock(*health_mutex_); @@ -515,11 +690,11 @@ StubLauncher::TerminateStub() force_kill = true; } - int status; if (force_kill) { - kill(stub_pid_, SIGKILL); + KillStubProcess(); + } else { + WaitForStubProcess(); } - waitpid(stub_pid_, &status, 0); } // First destroy the IPCControl. This makes sure that IPCControl is @@ -540,10 +715,16 @@ StubLauncher::ClearQueues() void StubLauncher::KillStubProcess() { +#ifdef _WIN32 + unsigned int exit_code; + TerminateProcess(stub_pid_.hProcess, exit_code); + CloseHandle(stub_pid_.hProcess); + CloseHandle(stub_pid_.hThread); +#else kill(stub_pid_, SIGKILL); - int status; - waitpid(stub_pid_, &status, 0); + WaitForStubProcess(); stub_pid_ = 0; +#endif } TRITONSERVER_Error* @@ -600,6 +781,19 @@ StubLauncher::ReceiveMessageFromStub( return nullptr; // success } +void +StubLauncher::WaitForStubProcess() +{ +#ifdef _WIN32 + WaitForSingleObject(stub_pid_.hProcess, INFINITE); + CloseHandle(stub_pid_.hProcess); + CloseHandle(stub_pid_.hThread); +#else + int status; + waitpid(stub_pid_, &status, 0); +#endif +} + #ifdef TRITON_ENABLE_GPU void StubLauncher::ShareCUDAMemoryPool( diff --git a/src/stub_launcher.h b/src/stub_launcher.h index fbbbdbad..6c8dd910 100644 --- a/src/stub_launcher.h +++ b/src/stub_launcher.h @@ -26,8 +26,6 @@ #pragma once -#include - #include #include #include @@ -79,8 +77,8 @@ class StubLauncher { // Model instance stub process TRITONSERVER_Error* ModelInstanceStubProcess(); - // Stub PID - pid_t StubPid() { return stub_pid_; } + // Check if Stub PID is active + bool StubActive(); // Health mutex bi::interprocess_mutex* HealthMutex() { return health_mutex_; } @@ -151,6 +149,14 @@ class StubLauncher { TRITONSERVER_Error* ReceiveMessageFromStub( bi::managed_external_buffer::handle_t& message); + // Wait for stub process + void WaitForStubProcess(); + +#ifndef _WIN32 + // FIXME [DLIS-5969]: Enable for Windows when custom execution environments + // are supported. + TRITONSERVER_Error* GetPythonEnvironment(ModelState* model_state); +#endif #ifdef TRITON_ENABLE_GPU // Share CUDA memory pool with stub process void ShareCUDAMemoryPool( @@ -158,9 +164,14 @@ class StubLauncher { #endif // TRITON_ENABLE_GPU private: +#ifdef _WIN32 + STARTUPINFO startup_info_; + DWORD parent_pid_; + PROCESS_INFORMATION stub_pid_; +#else pid_t parent_pid_; pid_t stub_pid_; - +#endif bool is_initialized_; bool is_decoupled_; bool is_healthy_;