Skip to content

Commit cb0c1d0

Browse files
committed
monitor: gracefully stop runners when image gets too old (#6)
1 parent 0f09e84 commit cb0c1d0

File tree

4 files changed

+57
-3
lines changed

4 files changed

+57
-3
lines changed

monitor/monitor.toml.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# Prepend this to any internal URL in our own responses. Must end with trailing slash.
22
external_base_url = "http://[::1]:8000/"
33

4+
# Maximum age of base images before they need to be rebuilt, in seconds.
5+
base_image_max_age = 86400
6+
47
[profiles.servo-windows10]
58
configuration_name = "windows10"
69
base_vm_name = "servo-windows10"

monitor/src/main.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,30 @@ fn monitor_thread() -> eyre::Result<()> {
413413
runner.log_info();
414414
}
415415

416+
// Determine whether any profiles need their images rebuilt.
417+
for (key, profile) in profiles.iter() {
418+
let needs_rebuild = profile.image_needs_rebuild();
419+
if needs_rebuild.unwrap_or(true) {
420+
let runner_count = profile.runners(&runners).count();
421+
if needs_rebuild.is_none() {
422+
info!(
423+
key,
424+
runner_count, "profile image may or may not need rebuild"
425+
);
426+
} else if runner_count > 0 {
427+
info!(
428+
key,
429+
runner_count, "profile image needs rebuild; waiting for runners"
430+
);
431+
} else {
432+
info!(
433+
key,
434+
runner_count, "profile image needs rebuild; TODO start image rebuild"
435+
);
436+
}
437+
}
438+
}
439+
416440
let mut unregister_and_destroy = |id, runner: &Runner| {
417441
if runner.registration().is_some() {
418442
if let Err(error) = runners.unregister_runner(id) {

monitor/src/profile.rs

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@ use std::{
55

66
use jane_eyre::eyre::{self, Context};
77
use serde::{Deserialize, Serialize};
8-
use tracing::{debug, info};
8+
use tracing::{debug, info, warn};
99

1010
use crate::{
1111
data::get_profile_data_path,
1212
libvirt::update_screenshot,
1313
runner::{Runner, Runners, Status},
1414
zfs::snapshot_creation_time_unix,
15-
DOTENV,
15+
DOTENV, TOML,
1616
};
1717

1818
#[derive(Clone, Debug, Deserialize)]
@@ -106,7 +106,7 @@ impl Profile {
106106
}
107107

108108
pub fn target_runner_count(&self) -> usize {
109-
if DOTENV.dont_create_runners {
109+
if DOTENV.dont_create_runners || self.image_needs_rebuild().unwrap_or(true) {
110110
0
111111
} else {
112112
self.target_count
@@ -172,6 +172,28 @@ impl Profile {
172172
}
173173
}
174174

175+
/// Returns whether the image definitely needs to be rebuilt or not, or None
176+
/// if we don’t know.
177+
pub fn image_needs_rebuild(&self) -> Option<bool> {
178+
if self.target_count == 0 {
179+
// Profiles with zero target_count may have been set to zero because
180+
// there is insufficient hugepages space to run them
181+
return Some(false);
182+
}
183+
184+
// If we fail to get the image age, err on the side of caution
185+
let image_age = match self.image_age() {
186+
Ok(result) => result,
187+
Err(error) => {
188+
warn!(?error, "Failed to get image age");
189+
return None;
190+
}
191+
};
192+
193+
// If the profile has no image age, we may need to build its image for the first time
194+
Some(image_age.map_or(true, |age| age > TOML.base_image_max_age()))
195+
}
196+
175197
pub fn image_age(&self) -> eyre::Result<Option<Duration>> {
176198
let now = SystemTime::now()
177199
.duration_since(UNIX_EPOCH)

monitor/src/settings.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ pub struct Dotenv {
3939
#[derive(Deserialize)]
4040
pub struct Toml {
4141
pub external_base_url: String,
42+
base_image_max_age: u64,
4243
profiles: BTreeMap<String, Profile>,
4344
}
4445

@@ -90,6 +91,10 @@ impl Toml {
9091
Ok(result)
9192
}
9293

94+
pub fn base_image_max_age(&self) -> Duration {
95+
Duration::from_secs(self.base_image_max_age)
96+
}
97+
9398
pub fn initial_profiles(&self) -> BTreeMap<String, Profile> {
9499
self.profiles.clone()
95100
}

0 commit comments

Comments
 (0)