From 0a0b29be8bc62afdce1d6b47aadaa497e303a098 Mon Sep 17 00:00:00 2001 From: kpop-dfinity <125868903+kpop-dfinity@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:25:25 +0100 Subject: [PATCH] test(consensus): add an option to start a Jaeger instance in consensus performance test (#3288) This will allow us, after adding enough instrumentation, to visualize traces like: Screenshot 2024-12-23 at 14 08 01 Maybe it will be helpful in identifying bottlenecks --- rs/tests/consensus/BUILD.bazel | 4 ++- rs/tests/consensus/consensus_performance.rs | 20 +++++++++++-- rs/tests/consensus/utils/src/performance.rs | 32 ++++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/rs/tests/consensus/BUILD.bazel b/rs/tests/consensus/BUILD.bazel index 94cac3a2553..60400c6c4e5 100644 --- a/rs/tests/consensus/BUILD.bazel +++ b/rs/tests/consensus/BUILD.bazel @@ -328,7 +328,9 @@ system_test_nns( ], target_compatible_with = ["@platforms//os:linux"], # requires libssh that does not build on Mac OS test_timeout = "eternal", - runtime_deps = GUESTOS_RUNTIME_DEPS + GRAFANA_RUNTIME_DEPS + COUNTER_CANISTER_RUNTIME_DEPS, + runtime_deps = GUESTOS_RUNTIME_DEPS + GRAFANA_RUNTIME_DEPS + COUNTER_CANISTER_RUNTIME_DEPS + [ + "//rs/tests:jaeger_uvm_config_image", + ], deps = [ # Keep sorted. "//rs/registry/subnet_type", diff --git a/rs/tests/consensus/consensus_performance.rs b/rs/tests/consensus/consensus_performance.rs index 38910b37cc3..50aa75ab51f 100644 --- a/rs/tests/consensus/consensus_performance.rs +++ b/rs/tests/consensus/consensus_performance.rs @@ -45,7 +45,7 @@ // // Happy testing! -use ic_consensus_system_test_utils::performance::persist_metrics; +use ic_consensus_system_test_utils::performance::{persist_metrics, setup_jaeger_vm}; use ic_consensus_system_test_utils::rw_message::install_nns_with_customizations_and_check_progress; use ic_registry_subnet_type::SubnetType; use ic_system_test_driver::driver::group::SystemTestGroup; @@ -79,12 +79,28 @@ const NETWORK_SIMULATION: FixedNetworkSimulation = FixedNetworkSimulation::new() .with_latency(LATENCY) .with_bandwidth(BANDWIDTH_MBITS); +/// When set to `true` a [Jaeger](https://www.jaegertracing.io/) instance will be spawned. +/// Look for "Jaeger frontend available at: $URL" in the logs and follow the link to visualize & +/// analyze traces. +const SHOULD_SPAWN_JAEGER_VM: bool = false; + fn setup(env: TestEnv) { PrometheusVm::default() .with_required_host_features(vec![HostFeature::Performance]) .start(&env) .expect("Failed to start prometheus VM"); - InternetComputer::new() + + let mut ic_builder = InternetComputer::new(); + + if SHOULD_SPAWN_JAEGER_VM { + let jaeger_ipv6 = setup_jaeger_vm(&env); + ic_builder = ic_builder.with_jaeger_addr(std::net::SocketAddr::new( + std::net::IpAddr::V6(jaeger_ipv6), + 4317, + )); + } + + ic_builder .with_required_host_features(vec![HostFeature::Performance]) .add_subnet( Subnet::new(SubnetType::System) diff --git a/rs/tests/consensus/utils/src/performance.rs b/rs/tests/consensus/utils/src/performance.rs index fe53607761e..de2dae5179c 100644 --- a/rs/tests/consensus/utils/src/performance.rs +++ b/rs/tests/consensus/utils/src/performance.rs @@ -2,7 +2,10 @@ use ic_registry_subnet_type::SubnetType; use ic_system_test_driver::canister_agent::HasCanisterAgentCapability; use ic_system_test_driver::canister_api::{CallMode, GenericRequest}; use ic_system_test_driver::canister_requests; -use ic_system_test_driver::driver::test_env_api::IcNodeSnapshot; +use ic_system_test_driver::driver::farm::HostFeature; +use ic_system_test_driver::driver::ic::{AmountOfMemoryKiB, ImageSizeGiB, NrOfVCPUs, VmResources}; +use ic_system_test_driver::driver::test_env_api::{get_dependency_path, IcNodeSnapshot}; +use ic_system_test_driver::driver::universal_vm::{UniversalVm, UniversalVms}; use ic_system_test_driver::driver::{ test_env::TestEnv, test_env_api::{HasTopologySnapshot, IcNodeContainer}, @@ -425,3 +428,30 @@ fn average_f64(nums: &[f64]) -> f64 { nums.iter().sum::() / (nums.len() as f64) } + +pub fn setup_jaeger_vm(env: &TestEnv) -> std::net::Ipv6Addr { + const JAEGER_VM_NAME: &str = "jaeger-vm"; + + let path = get_dependency_path("rs/tests/jaeger_uvm_config_image.zst"); + UniversalVm::new(JAEGER_VM_NAME.to_string()) + .with_required_host_features(vec![HostFeature::Performance]) + .with_vm_resources(VmResources { + vcpus: Some(NrOfVCPUs::new(16)), + memory_kibibytes: Some(AmountOfMemoryKiB::new(33560000)), // 32GiB + boot_image_minimal_size_gibibytes: Some(ImageSizeGiB::new(1024)), + }) + .with_config_img(path) + .start(env) + .expect("failed to setup Jaeger Universal VM"); + + let deployed_jaeger_vm = env.get_deployed_universal_vm(JAEGER_VM_NAME).unwrap(); + let jaeger_vm = deployed_jaeger_vm.get_vm().unwrap(); + let jaeger_ipv6 = jaeger_vm.ipv6; + + info!( + env.logger(), + "Jaeger frontend available at: http://[{}]:16686", jaeger_ipv6 + ); + + jaeger_ipv6 +}