From c42793f9172e72efde7de60aad34cc7dbba251b9 Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Thu, 6 Feb 2025 01:51:03 +0100 Subject: [PATCH 1/3] ability to automatically set shm size based on sys memory --- miner/src/cli/command.rs | 6 ++++++ miner/src/docker/docker_manager.rs | 2 ++ miner/src/docker/service.rs | 16 ++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/miner/src/cli/command.rs b/miner/src/cli/command.rs index b21cc79..f06f512 100644 --- a/miner/src/cli/command.rs +++ b/miner/src/cli/command.rs @@ -225,9 +225,15 @@ pub async fn execute_command( None => None, }; + let system_memory = node_config + .compute_specs + .as_ref() + .map(|specs| specs.ram_mb.unwrap_or(0)); + let docker_service = Arc::new(DockerService::new( cancellation_token.clone(), has_gpu, + system_memory, task_bridge.socket_path.clone(), docker_storage_path, )); diff --git a/miner/src/docker/docker_manager.rs b/miner/src/docker/docker_manager.rs index 01a472b..998378d 100644 --- a/miner/src/docker/docker_manager.rs +++ b/miner/src/docker/docker_manager.rs @@ -105,6 +105,7 @@ impl DockerManager { gpu_enabled: bool, // Simple Vec of (host_path, container_path, read_only) volumes: Option>, + shm_size: Option, ) -> Result { println!("Starting to pull image: {}", image); @@ -178,6 +179,7 @@ impl DockerManager { options: Some(HashMap::new()), }]), binds: volume_binds, + shm_size: shm_size.map(|s| s as i64), ..Default::default() }) } else { diff --git a/miner/src/docker/service.rs b/miner/src/docker/service.rs index 874d1f6..d6595bf 100644 --- a/miner/src/docker/service.rs +++ b/miner/src/docker/service.rs @@ -18,6 +18,7 @@ pub struct DockerService { cancellation_token: CancellationToken, pub state: Arc, has_gpu: bool, + system_memory_mb: Option, task_bridge_socket_path: String, } @@ -27,6 +28,7 @@ impl DockerService { pub fn new( cancellation_token: CancellationToken, has_gpu: bool, + system_memory_mb: Option, task_bridge_socket_path: String, storage_path: Option, ) -> Self { @@ -36,6 +38,7 @@ impl DockerService { cancellation_token, state: Arc::new(DockerState::new()), has_gpu, + system_memory_mb, task_bridge_socket_path, } } @@ -142,6 +145,7 @@ impl DockerService { let manager_clone = manager_clone.clone(); let state_clone = state.clone(); let has_gpu = self.has_gpu; + let system_memory_mb = self.system_memory_mb.clone(); let task_bridge_socket_path = self.task_bridge_socket_path.clone(); let handle = tokio::spawn(async move { let payload = task_clone.unwrap(); @@ -170,8 +174,14 @@ impl DockerService { false, ) ]; - - match manager_clone.start_container(&payload.image, &container_task_id, Some(env_vars), Some(cmd), has_gpu, Some(volumes)).await { + let shm_size = match system_memory_mb { + Some(mem_mb) => (mem_mb as u64) * 1024 * 1024 / 2, // Convert MB to bytes and divide by 2 + None => { + Console::warning("System memory not available, using default shm size"); + 67108864 // Default to 64MB in bytes + } + }; + match manager_clone.start_container(&payload.image, &container_task_id, Some(env_vars), Some(cmd), has_gpu, Some(volumes), Some(shm_size)).await { Ok(container_id) => { Console::info("DockerService", &format!("Container started with id: {}", container_id)); }, @@ -276,6 +286,7 @@ mod tests { let docker_service = DockerService::new( cancellation_token.clone(), false, + Some(1024), "/tmp/com.prime.miner/metrics.sock".to_string(), None, ); @@ -319,6 +330,7 @@ mod tests { let docker_service = DockerService::new( cancellation_token.clone(), false, + Some(1024), "/tmp/com.prime.miner/metrics.sock".to_string(), None, ); From f1f753801cf735c95042e05abe9583f5cac9d364 Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Thu, 6 Feb 2025 01:53:32 +0100 Subject: [PATCH 2/3] clippy --- miner/src/docker/docker_manager.rs | 1 + miner/src/docker/service.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/miner/src/docker/docker_manager.rs b/miner/src/docker/docker_manager.rs index 998378d..a4963e1 100644 --- a/miner/src/docker/docker_manager.rs +++ b/miner/src/docker/docker_manager.rs @@ -95,6 +95,7 @@ impl DockerManager { Ok(()) } + #[allow(clippy::too_many_arguments)] /// Start a new container with the given image and configuration pub async fn start_container( &self, diff --git a/miner/src/docker/service.rs b/miner/src/docker/service.rs index d6595bf..d1797f2 100644 --- a/miner/src/docker/service.rs +++ b/miner/src/docker/service.rs @@ -145,7 +145,7 @@ impl DockerService { let manager_clone = manager_clone.clone(); let state_clone = state.clone(); let has_gpu = self.has_gpu; - let system_memory_mb = self.system_memory_mb.clone(); + let system_memory_mb = self.system_memory_mb; let task_bridge_socket_path = self.task_bridge_socket_path.clone(); let handle = tokio::spawn(async move { let payload = task_clone.unwrap(); From ca57e744d639012cb0fdf601c2988b00d643bf55 Mon Sep 17 00:00:00 2001 From: Jannik Straube Date: Thu, 6 Feb 2025 11:30:06 +0100 Subject: [PATCH 3/3] bump version --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9bdaf1..7e647f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2211,7 +2211,7 @@ dependencies = [ [[package]] name = "discovery" -version = "0.1.2" +version = "0.1.3" dependencies = [ "actix-web", "alloy", @@ -3596,7 +3596,7 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "miner" -version = "0.1.2" +version = "0.1.3" dependencies = [ "actix-web", "alloy", @@ -4020,7 +4020,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "orchestrator" -version = "0.1.2" +version = "0.1.3" dependencies = [ "actix-web", "alloy", @@ -5868,7 +5868,7 @@ dependencies = [ [[package]] name = "validator" -version = "0.1.2" +version = "0.1.3" dependencies = [ "actix-web", "alloy", diff --git a/Cargo.toml b/Cargo.toml index a270743..41eb450 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["discovery", "miner", "validator", "shared", "orchestrator", "dev-uti resolver = "2" [workspace.package] -version = "0.1.2" +version = "0.1.3" edition = "2021" [workspace.features]