From 91fc000fe7f49c285df1c81733e511f645a26565 Mon Sep 17 00:00:00 2001
From: Wouter Doppenberg <wouterdoppenberg@gmail.com>
Date: Fri, 5 Apr 2024 19:41:33 +0200
Subject: [PATCH] removed version constraints for candle deps

---
 Cargo.toml                        |  8 ++---
 src/bin/server.rs                 |  4 +--
 src/infer/handler.rs              |  2 +-
 src/infer/mod.rs                  |  1 +
 src/infer/pool.rs                 |  1 +
 src/lib.rs                        |  1 -
 src/model/device.rs               | 28 ++++++++++++++++
 src/model/embedder.rs             |  3 +-
 src/model/mod.rs                  |  2 ++
 src/model/utils.rs                |  5 +++
 src/server/utils.rs               | 19 +++++++++++
 src/utils.rs                      | 53 -------------------------------
 tests/test_async_openai_client.py |  2 +-
 13 files changed, 66 insertions(+), 63 deletions(-)
 create mode 100644 src/infer/pool.rs
 create mode 100644 src/model/device.rs
 create mode 100644 src/model/utils.rs
 delete mode 100644 src/utils.rs
diff --git a/Cargo.toml b/Cargo.toml
index aa78dbe..4aa9a19 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -16,9 +16,9 @@ tracing = "0.1.37"
 tracing-subscriber = "0.3.18"
 uuid = { version = "1.6.1", features = ["v4"] }
 serde_json = "1.0.111"
-candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.4.0" }
-candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.4.0" }
-candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.4.0" }
+candle-core = { git = "https://github.com/huggingface/candle.git" }
+candle-nn = { git = "https://github.com/huggingface/candle.git" }
+candle-transformers = { git = "https://github.com/huggingface/candle.git" }
 tokenizers = "0.15.0"
 hf-hub = { version = "0.3.2", features = ["tokio"] }
 anyhow = "1.0.79"
@@ -27,7 +27,7 @@ tracing-chrome = "0.7.1"
 tower-http = { version = "0.5.1", features = ["trace", "timeout"] }
 once_cell = "1.19.0"
 clap = { version = "4.4.18", features = ["derive"] }
-pin-project = "1.1.3"
+workerpool = "1.2.1"
 
 [features]
 default = []
diff --git a/src/bin/server.rs b/src/bin/server.rs
index f9bd404..42d5bd8 100644
--- a/src/bin/server.rs
+++ b/src/bin/server.rs
@@ -5,10 +5,10 @@ use clap::Parser;
 use tokio::net::TcpListener;
 use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt};
 
-use glowrs::utils::{device::print_device_info, port_in_range};
+use glowrs::model::device::print_device_info;
 use glowrs::server::utils;
 use glowrs::server::{init_router, RouterArgs};
-
+use glowrs::server::utils::port_in_range;
 
 
 #[derive(Debug, Parser)]
diff --git a/src/infer/handler.rs b/src/infer/handler.rs
index 5d28471..129a415 100644
--- a/src/infer/handler.rs
+++ b/src/infer/handler.rs
@@ -70,7 +70,7 @@ mod test {
     use super::*;
     use crate::infer::client::Client;
     use crate::infer::Queue;
-    use crate::utils::device::DEVICE;
+    use crate::model::device::DEVICE;
 
     fn append_str(s_in: String) -> String {
         format!("{}-processed", s_in)
diff --git a/src/infer/mod.rs b/src/infer/mod.rs
index b9e1fbf..0c9d5ae 100644
--- a/src/infer/mod.rs
+++ b/src/infer/mod.rs
@@ -3,6 +3,7 @@ mod client;
 pub mod embed;
 mod handler;
 pub mod batch;
+pub(crate) mod pool;
 
 use uuid::Uuid;
 pub use queue::Queue;
diff --git a/src/infer/pool.rs b/src/infer/pool.rs
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/src/infer/pool.rs
@@ -0,0 +1 @@
+
diff --git a/src/lib.rs b/src/lib.rs
index ee54fff..6fa6f39 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,4 +1,3 @@
 pub mod model;
 pub mod infer;
-pub mod utils;
 pub mod server;
diff --git a/src/model/device.rs b/src/model/device.rs
new file mode 100644
index 0000000..d372f41
--- /dev/null
+++ b/src/model/device.rs
@@ -0,0 +1,28 @@
+use once_cell::sync::Lazy;
+use candle_core::Device;
+
+#[cfg(all(feature = "metal", feature = "cuda"))]
+compile_error!("feature \"metal\" and feature \"cuda\" cannot be enabled at the same time");
+
+#[cfg(feature = "metal")]
+pub static DEVICE: Lazy<Device> =
+	Lazy::new(|| Device::new_metal(0).expect("No Metal device found."));
+
+#[cfg(feature = "cuda")]
+pub static DEVICE: Lazy<Device> =
+	Lazy::new(|| Device::new_cuda(0).expect("No CUDA device found."));
+
+#[cfg(not(any(feature = "metal", feature = "cuda")))]
+pub static DEVICE: Lazy<Device> = Lazy::new(|| Device::Cpu);
+
+pub fn print_device_info() {
+	
+	#[cfg(not(any(feature = "metal", feature = "cuda")))]
+		tracing::info!("Using CPU");
+
+	#[cfg(feature = "cuda")]
+		tracing::info!("Using CUDA");
+
+	#[cfg(feature = "metal")]
+	tracing::info!("Using Metal");
+}
diff --git a/src/model/embedder.rs b/src/model/embedder.rs
index 81cbe9d..c391213 100644
--- a/src/model/embedder.rs
+++ b/src/model/embedder.rs
@@ -10,9 +10,10 @@ use tokenizers::Tokenizer;
 
 // Re-exports
 pub use candle_transformers::models::{bert::BertModel, jina_bert::BertModel as JinaBertModel};
+use crate::model::utils::normalize_l2;
 
 use crate::server::data_models::{Sentences, Usage};
-use crate::utils::{normalize_l2, device::DEVICE};
+use crate::model::device::DEVICE;
 
 
 pub trait LoadableModel: Sized {
diff --git a/src/model/mod.rs b/src/model/mod.rs
index ad6180b..49ed823 100644
--- a/src/model/mod.rs
+++ b/src/model/mod.rs
@@ -1,2 +1,4 @@
 pub mod embedder;
 pub mod sentence_transformer;
+mod utils;
+pub mod device;
diff --git a/src/model/utils.rs b/src/model/utils.rs
new file mode 100644
index 0000000..fb3f4e5
--- /dev/null
+++ b/src/model/utils.rs
@@ -0,0 +1,5 @@
+use candle_core::Tensor;
+
+pub fn normalize_l2(v: &Tensor) -> candle_core::Result<Tensor> {
+	v.broadcast_div(&v.sqr()?.sum_keepdim(1)?.sqrt()?)
+}
diff --git a/src/server/utils.rs b/src/server/utils.rs
index e4146b2..a03564d 100644
--- a/src/server/utils.rs
+++ b/src/server/utils.rs
@@ -1,5 +1,7 @@
 use anyhow::Result;
 use tokio::signal;
+use std::result;
+use std::ops::RangeInclusive;
 
 type Nullary = fn() -> Result<()>;
 
@@ -32,3 +34,20 @@ pub async fn shutdown_signal(shutdown_fns_opt: Option<&[Nullary]>) {
         _ = terminate => {},
     }
 }
+
+const PORT_RANGE: RangeInclusive<u16> = 1..=65535;
+
+pub fn port_in_range(s: &str) -> result::Result<u16, String> {
+    let port: u16 = s
+        .parse()
+        .map_err(|_| format!("`{s}` isn't a port number"))?;
+    if PORT_RANGE.contains(&port) {
+        Ok(port)
+    } else {
+        Err(format!(
+            "port not in range {}-{}",
+            PORT_RANGE.start(),
+            PORT_RANGE.end()
+        ))
+    }
+}
diff --git a/src/utils.rs b/src/utils.rs
deleted file mode 100644
index 21661d6..0000000
--- a/src/utils.rs
+++ /dev/null
@@ -1,53 +0,0 @@
-use std::ops::RangeInclusive;
-use candle_core::Tensor;
-
-pub mod device {
-	use candle_core::Device;
-	use once_cell::sync::Lazy;
-
-	#[cfg(all(feature = "metal", feature = "cuda"))]
-	compile_error!("feature \"metal\" and feature \"cuda\" cannot be enabled at the same time");
-
-	#[cfg(feature = "metal")]
-	pub static DEVICE: Lazy<Device> =
-		Lazy::new(|| Device::new_metal(0).expect("No Metal device found."));
-
-	#[cfg(feature = "cuda")]
-	pub static DEVICE: Lazy<Device> =
-		Lazy::new(|| Device::new_cuda(0).expect("No CUDA device found."));
-
-	#[cfg(not(any(feature = "metal", feature = "cuda")))]
-	pub static DEVICE: Lazy<Device> = Lazy::new(|| Device::Cpu);
-
-	pub fn print_device_info() {
-		
-		#[cfg(not(any(feature = "metal", feature = "cuda")))]
-			tracing::info!("Using CPU");
-
-		#[cfg(feature = "cuda")]
-			tracing::info!("Using CUDA");
-
-		#[cfg(feature = "metal")]
-		tracing::info!("Using Metal");
-	}
-}
-
-pub fn normalize_l2(v: &Tensor) -> candle_core::Result<Tensor> {
-	v.broadcast_div(&v.sqr()?.sum_keepdim(1)?.sqrt()?)
-}
-
-const PORT_RANGE: RangeInclusive<u16> = 1..=65535;
-pub fn port_in_range(s: &str) -> Result<u16, String> {
-    let port: u16 = s
-        .parse()
-        .map_err(|_| format!("`{s}` isn't a port number"))?;
-    if PORT_RANGE.contains(&port) {
-        Ok(port)
-    } else {
-        Err(format!(
-            "port not in range {}-{}",
-            PORT_RANGE.start(),
-            PORT_RANGE.end()
-        ))
-    }
-}
\ No newline at end of file
diff --git a/tests/test_async_openai_client.py b/tests/test_async_openai_client.py
index d166f62..6f0495f 100644
--- a/tests/test_async_openai_client.py
+++ b/tests/test_async_openai_client.py
@@ -21,7 +21,7 @@ async def create_embeddings() -> CreateEmbeddingResponse:
 
 
 async def call_health() -> None:
-	# Call the /health endpoint 10k times
+	# Call the /health endpoint 100 times
 	async with AsyncClient() as client:
 		for _ in range(100):
 			response = await client.get("http://127.0.0.1:3000/health")