From 91fc000fe7f49c285df1c81733e511f645a26565 Mon Sep 17 00:00:00 2001 From: Wouter Doppenberg Date: Fri, 5 Apr 2024 19:41:33 +0200 Subject: [PATCH] removed version constraints for candle deps --- Cargo.toml | 8 ++--- src/bin/server.rs | 4 +-- src/infer/handler.rs | 2 +- src/infer/mod.rs | 1 + src/infer/pool.rs | 1 + src/lib.rs | 1 - src/model/device.rs | 28 ++++++++++++++++ src/model/embedder.rs | 3 +- src/model/mod.rs | 2 ++ src/model/utils.rs | 5 +++ src/server/utils.rs | 19 +++++++++++ src/utils.rs | 53 ------------------------------- tests/test_async_openai_client.py | 2 +- 13 files changed, 66 insertions(+), 63 deletions(-) create mode 100644 src/infer/pool.rs create mode 100644 src/model/device.rs create mode 100644 src/model/utils.rs delete mode 100644 src/utils.rs diff --git a/Cargo.toml b/Cargo.toml index aa78dbe..4aa9a19 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,9 +16,9 @@ tracing = "0.1.37" tracing-subscriber = "0.3.18" uuid = { version = "1.6.1", features = ["v4"] } serde_json = "1.0.111" -candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.4.0" } -candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.4.0" } -candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.4.0" } +candle-core = { git = "https://github.com/huggingface/candle.git" } +candle-nn = { git = "https://github.com/huggingface/candle.git" } +candle-transformers = { git = "https://github.com/huggingface/candle.git" } tokenizers = "0.15.0" hf-hub = { version = "0.3.2", features = ["tokio"] } anyhow = "1.0.79" @@ -27,7 +27,7 @@ tracing-chrome = "0.7.1" tower-http = { version = "0.5.1", features = ["trace", "timeout"] } once_cell = "1.19.0" clap = { version = "4.4.18", features = ["derive"] } -pin-project = "1.1.3" +workerpool = "1.2.1" [features] default = [] diff --git a/src/bin/server.rs b/src/bin/server.rs index f9bd404..42d5bd8 100644 --- a/src/bin/server.rs +++ b/src/bin/server.rs @@ -5,10 +5,10 @@ use clap::Parser; use tokio::net::TcpListener; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; -use glowrs::utils::{device::print_device_info, port_in_range}; +use glowrs::model::device::print_device_info; use glowrs::server::utils; use glowrs::server::{init_router, RouterArgs}; - +use glowrs::server::utils::port_in_range; #[derive(Debug, Parser)] diff --git a/src/infer/handler.rs b/src/infer/handler.rs index 5d28471..129a415 100644 --- a/src/infer/handler.rs +++ b/src/infer/handler.rs @@ -70,7 +70,7 @@ mod test { use super::*; use crate::infer::client::Client; use crate::infer::Queue; - use crate::utils::device::DEVICE; + use crate::model::device::DEVICE; fn append_str(s_in: String) -> String { format!("{}-processed", s_in) diff --git a/src/infer/mod.rs b/src/infer/mod.rs index b9e1fbf..0c9d5ae 100644 --- a/src/infer/mod.rs +++ b/src/infer/mod.rs @@ -3,6 +3,7 @@ mod client; pub mod embed; mod handler; pub mod batch; +pub(crate) mod pool; use uuid::Uuid; pub use queue::Queue; diff --git a/src/infer/pool.rs b/src/infer/pool.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/infer/pool.rs @@ -0,0 +1 @@ + diff --git a/src/lib.rs b/src/lib.rs index ee54fff..6fa6f39 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,3 @@ pub mod model; pub mod infer; -pub mod utils; pub mod server; diff --git a/src/model/device.rs b/src/model/device.rs new file mode 100644 index 0000000..d372f41 --- /dev/null +++ b/src/model/device.rs @@ -0,0 +1,28 @@ +use once_cell::sync::Lazy; +use candle_core::Device; + +#[cfg(all(feature = "metal", feature = "cuda"))] +compile_error!("feature \"metal\" and feature \"cuda\" cannot be enabled at the same time"); + +#[cfg(feature = "metal")] +pub static DEVICE: Lazy = + Lazy::new(|| Device::new_metal(0).expect("No Metal device found.")); + +#[cfg(feature = "cuda")] +pub static DEVICE: Lazy = + Lazy::new(|| Device::new_cuda(0).expect("No CUDA device found.")); + +#[cfg(not(any(feature = "metal", feature = "cuda")))] +pub static DEVICE: Lazy = Lazy::new(|| Device::Cpu); + +pub fn print_device_info() { + + #[cfg(not(any(feature = "metal", feature = "cuda")))] + tracing::info!("Using CPU"); + + #[cfg(feature = "cuda")] + tracing::info!("Using CUDA"); + + #[cfg(feature = "metal")] + tracing::info!("Using Metal"); +} diff --git a/src/model/embedder.rs b/src/model/embedder.rs index 81cbe9d..c391213 100644 --- a/src/model/embedder.rs +++ b/src/model/embedder.rs @@ -10,9 +10,10 @@ use tokenizers::Tokenizer; // Re-exports pub use candle_transformers::models::{bert::BertModel, jina_bert::BertModel as JinaBertModel}; +use crate::model::utils::normalize_l2; use crate::server::data_models::{Sentences, Usage}; -use crate::utils::{normalize_l2, device::DEVICE}; +use crate::model::device::DEVICE; pub trait LoadableModel: Sized { diff --git a/src/model/mod.rs b/src/model/mod.rs index ad6180b..49ed823 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -1,2 +1,4 @@ pub mod embedder; pub mod sentence_transformer; +mod utils; +pub mod device; diff --git a/src/model/utils.rs b/src/model/utils.rs new file mode 100644 index 0000000..fb3f4e5 --- /dev/null +++ b/src/model/utils.rs @@ -0,0 +1,5 @@ +use candle_core::Tensor; + +pub fn normalize_l2(v: &Tensor) -> candle_core::Result { + v.broadcast_div(&v.sqr()?.sum_keepdim(1)?.sqrt()?) +} diff --git a/src/server/utils.rs b/src/server/utils.rs index e4146b2..a03564d 100644 --- a/src/server/utils.rs +++ b/src/server/utils.rs @@ -1,5 +1,7 @@ use anyhow::Result; use tokio::signal; +use std::result; +use std::ops::RangeInclusive; type Nullary = fn() -> Result<()>; @@ -32,3 +34,20 @@ pub async fn shutdown_signal(shutdown_fns_opt: Option<&[Nullary]>) { _ = terminate => {}, } } + +const PORT_RANGE: RangeInclusive = 1..=65535; + +pub fn port_in_range(s: &str) -> result::Result { + let port: u16 = s + .parse() + .map_err(|_| format!("`{s}` isn't a port number"))?; + if PORT_RANGE.contains(&port) { + Ok(port) + } else { + Err(format!( + "port not in range {}-{}", + PORT_RANGE.start(), + PORT_RANGE.end() + )) + } +} diff --git a/src/utils.rs b/src/utils.rs deleted file mode 100644 index 21661d6..0000000 --- a/src/utils.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::ops::RangeInclusive; -use candle_core::Tensor; - -pub mod device { - use candle_core::Device; - use once_cell::sync::Lazy; - - #[cfg(all(feature = "metal", feature = "cuda"))] - compile_error!("feature \"metal\" and feature \"cuda\" cannot be enabled at the same time"); - - #[cfg(feature = "metal")] - pub static DEVICE: Lazy = - Lazy::new(|| Device::new_metal(0).expect("No Metal device found.")); - - #[cfg(feature = "cuda")] - pub static DEVICE: Lazy = - Lazy::new(|| Device::new_cuda(0).expect("No CUDA device found.")); - - #[cfg(not(any(feature = "metal", feature = "cuda")))] - pub static DEVICE: Lazy = Lazy::new(|| Device::Cpu); - - pub fn print_device_info() { - - #[cfg(not(any(feature = "metal", feature = "cuda")))] - tracing::info!("Using CPU"); - - #[cfg(feature = "cuda")] - tracing::info!("Using CUDA"); - - #[cfg(feature = "metal")] - tracing::info!("Using Metal"); - } -} - -pub fn normalize_l2(v: &Tensor) -> candle_core::Result { - v.broadcast_div(&v.sqr()?.sum_keepdim(1)?.sqrt()?) -} - -const PORT_RANGE: RangeInclusive = 1..=65535; -pub fn port_in_range(s: &str) -> Result { - let port: u16 = s - .parse() - .map_err(|_| format!("`{s}` isn't a port number"))?; - if PORT_RANGE.contains(&port) { - Ok(port) - } else { - Err(format!( - "port not in range {}-{}", - PORT_RANGE.start(), - PORT_RANGE.end() - )) - } -} \ No newline at end of file diff --git a/tests/test_async_openai_client.py b/tests/test_async_openai_client.py index d166f62..6f0495f 100644 --- a/tests/test_async_openai_client.py +++ b/tests/test_async_openai_client.py @@ -21,7 +21,7 @@ async def create_embeddings() -> CreateEmbeddingResponse: async def call_health() -> None: - # Call the /health endpoint 10k times + # Call the /health endpoint 100 times async with AsyncClient() as client: for _ in range(100): response = await client.get("http://127.0.0.1:3000/health")