Skip to content

Commit 0ce6c21

Browse files
WIP Make daemon restart logic a bit more robust
1 parent 3de9523 commit 0ce6c21

File tree

4 files changed

+94
-15
lines changed

4 files changed

+94
-15
lines changed

test/test-manager/src/run_tests.rs

+18-9
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,7 @@ pub async fn run(
102102
)
103103
.await;
104104

105-
if test.mullvad_client_version == MullvadClientVersion::New {
106-
// Try to reset the daemon state if the test failed OR if the test doesn't explicitly
107-
// disabled cleanup.
108-
if test.cleanup || matches!(test_result.result, Err(_) | Ok(Err(_))) {
109-
crate::tests::cleanup_after_test(client.clone(), &test_context.rpc_provider)
110-
.await?;
111-
}
112-
}
113-
105+
// Record test result
114106
if print_failed_tests_only {
115107
// Print results of failed test
116108
if matches!(test_result.result, Err(_) | Ok(Err(_))) {
@@ -137,6 +129,23 @@ pub async fn run(
137129
.await
138130
.context("Failed to log test result")?;
139131

132+
// Perform clean up between tests
133+
if test.mullvad_client_version == MullvadClientVersion::New {
134+
// Try to reset the daemon state if the test failed OR if the test doesn't explicitly
135+
// disabled cleanup.
136+
if test.cleanup || matches!(test_result.result, Err(_) | Ok(Err(_))) {
137+
if let Err(cleanup_error) =
138+
crate::tests::cleanup_after_test(client.clone(), &test_context.rpc_provider)
139+
.await
140+
{
141+
log::error!("Clean up between tests failed");
142+
log::error!("{cleanup_error}");
143+
log::error!("Exiting test run");
144+
break;
145+
}
146+
}
147+
}
148+
140149
match test_result.result {
141150
Err(panic) => {
142151
failed_tests.push(test.name);

test/test-manager/src/tests/helpers.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -581,14 +581,14 @@ pub fn unreachable_wireguard_tunnel() -> talpid_types::net::wireguard::Connectio
581581
/// # Note
582582
/// This is independent of the running daemon's environment.
583583
/// It is solely dependant on the current value of [`TEST_CONFIG`].
584-
pub async fn get_app_env() -> anyhow::Result<HashMap<String, String>> {
584+
pub async fn get_app_env() -> Result<HashMap<String, String>, Error> {
585585
use mullvad_api::env;
586586

587587
let api_host = format!("api.{}", TEST_CONFIG.mullvad_host);
588588
let api_host_with_port = format!("{api_host}:443");
589589
let api_addr = resolve_hostname_with_retries(api_host_with_port)
590590
.await
591-
.context("failed to resolve API host")?;
591+
.map_err(Error::DnsLookup)?;
592592

593593
Ok(HashMap::from_iter(vec![
594594
(env::API_HOST_VAR.to_string(), api_host),

test/test-manager/src/tests/mod.rs

+55-4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ pub enum Error {
4444
#[error("geoip lookup failed")]
4545
GeoipLookup(#[source] test_rpc::Error),
4646

47+
#[error("DNS lookup failed")]
48+
DnsLookup(#[source] std::io::Error),
49+
4750
#[error("Found running daemon unexpectedly")]
4851
DaemonRunning,
4952

@@ -76,13 +79,61 @@ pub fn get_tests() -> Vec<&'static TestMetadata> {
7679

7780
/// Restore settings to the defaults.
7881
pub async fn cleanup_after_test(
79-
rpc: ServiceClient,
82+
mut rpc: ServiceClient,
8083
rpc_provider: &RpcClientProvider,
8184
) -> anyhow::Result<()> {
8285
log::debug!("Cleaning up daemon in test cleanup");
8386
// Check if daemon should be restarted.
84-
// TODO: The deamon needs to be up and running after this line.
85-
restart_daemon(rpc).await?;
87+
// TODO: Move this shizzle up one level?
88+
// TODO: The daemon needs to be up and running after this line.
89+
if let Err(daemon_restart_error) = restart_daemon(&rpc).await {
90+
match daemon_restart_error {
91+
// Something went wrong in the communication between test-manager <-> test-runner.
92+
Error::Rpc(rpc_error) => {
93+
log::warn!("Could not restart the daemon due to RPC-error: {rpc_error}");
94+
// TODO: Try to create a new gRPC client. Need to move this logic up one level to
95+
// do this.
96+
97+
// Restart the test-runner
98+
// HACK: Accomplish this by restarting the virtual machine. This should not be
99+
// necessary.
100+
rpc.reboot().await.inspect_err(|_| {
101+
log::error!("Failed to reboot test runner virtual machine!");
102+
})?;
103+
rpc.reset_daemon_environment().await.inspect_err(|daemon_restart_failure| {
104+
log::warn!("Rebooting the test runner virtual machine did not work: {daemon_restart_failure}")
105+
})?;
106+
}
107+
// Something wen't wrong in the daemon.
108+
daemon_error @ (Error::DaemonNotRunning
109+
| Error::Daemon(_)
110+
| Error::UnexpectedErrorState(_)
111+
| Error::ManagementInterface(_)) => {
112+
log::warn!("Could not restart daemon due to daemon error: {daemon_error}");
113+
log::warn!("Rebooting the test runner virtual machine");
114+
// First, reboot the test-runner VM.
115+
rpc.reboot().await.inspect_err(|_| {
116+
log::error!("Failed to reboot test runner virtual machine!");
117+
})?;
118+
if let Err(daemon_restart_failure) = rpc.reset_daemon_environment().await {
119+
log::warn!("Rebooting the test runner virtual machine did not work: {daemon_restart_failure}");
120+
log::warn!("Reinstalling the app");
121+
// TODO: If rebooting the VM did not work, try to re-install the app.
122+
}
123+
}
124+
// We don't really care about these errors in this context.
125+
non_fatal @ (Error::DaemonRunning
126+
| Error::GeoipLookup(_)
127+
| Error::DnsLookup(_)
128+
| Error::MissingGuiTest) => {
129+
log::warn!("Could not restart daemon due to non-fatal error: {non_fatal}");
130+
log::warn!("Restarting dameon one more time");
131+
rpc.reset_daemon_environment().await?;
132+
}
133+
#[cfg(target_os = "macos")]
134+
Other(_) => todo!("Remove this variant, we can't handle this error properly"),
135+
}
136+
}
86137
let mut mullvad_client = rpc_provider.new_client().await;
87138

88139
helpers::disconnect_and_wait(&mut mullvad_client).await?;
@@ -197,7 +248,7 @@ pub async fn cleanup_after_test(
197248
/// If the daemon was started with non-standard environment variables, subsequent tests may break
198249
/// due to assuming a default configuration. In that case, reset the environment variables and
199250
/// restart.
200-
async fn restart_daemon(rpc: ServiceClient) -> anyhow::Result<()> {
251+
async fn restart_daemon(rpc: &ServiceClient) -> Result<(), Error> {
201252
let current_env = rpc.get_daemon_environment().await?;
202253
let default_env = get_app_env().await?;
203254
if current_env != default_env {

test/test-rpc/src/client.rs

+19
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,24 @@ impl ServiceClient {
302302
Ok(())
303303
}
304304

305+
/// Restart the Mullvad daemon with default environment variables..
306+
///
307+
/// # Returns
308+
/// - `Result::Ok` if the daemon was successfully restarted.
309+
/// - `Result::Err(Error)` if the daemon could not be restarted and is thus no longer running.
310+
pub async fn reset_daemon_environment(&self) -> Result<(), Error> {
311+
let mut ctx = tarpc::context::current();
312+
ctx.deadline = SystemTime::now().checked_add(LOG_LEVEL_TIMEOUT).unwrap();
313+
self.client
314+
.set_daemon_environment(ctx, Default::default())
315+
.await??;
316+
317+
self.mullvad_daemon_wait_for_state(|state| state == ServiceStatus::Running)
318+
.await?;
319+
320+
Ok(())
321+
}
322+
305323
/// Set environment variables specified by `env` and restart the Mullvad daemon.
306324
///
307325
/// # Returns
@@ -370,6 +388,7 @@ impl ServiceClient {
370388
self.connection_handle.reset_connected_state().await;
371389
self.connection_handle.wait_for_server().await?;
372390

391+
// TODO: Get rid of this magic number.
373392
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
374393

375394
Ok(())

0 commit comments

Comments
 (0)