From 675a231b45efd7f1755e7a7cf1312e2cdb5c1fd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Oliveira?= Date: Thu, 8 Feb 2024 02:00:49 +0000 Subject: [PATCH 01/10] increment peer per topic count on graft messages (#5212) * increment peer per topic count on graft messages --- beacon_node/lighthouse_network/src/gossipsub/behaviour.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/beacon_node/lighthouse_network/src/gossipsub/behaviour.rs b/beacon_node/lighthouse_network/src/gossipsub/behaviour.rs index 8a89e5904a1..9769adca278 100644 --- a/beacon_node/lighthouse_network/src/gossipsub/behaviour.rs +++ b/beacon_node/lighthouse_network/src/gossipsub/behaviour.rs @@ -1380,7 +1380,11 @@ where tracing::error!(peer_id = %peer_id, "Peer non-existent when handling graft"); return; }; - connected_peer.topics.insert(topic.clone()); + if connected_peer.topics.insert(topic.clone()) { + if let Some(m) = self.metrics.as_mut() { + m.inc_topic_peers(topic); + } + } } // we don't GRAFT to/from explicit peers; complain loudly if this happens From 4db84de563eda826d4a7abc4d1f4ce8a7e043569 Mon Sep 17 00:00:00 2001 From: Age Manning Date: Thu, 8 Feb 2024 02:40:47 +0000 Subject: [PATCH 02/10] Improve network parameters (#5177) * Modify network parameters for current mainnet conditions --- beacon_node/lighthouse_network/src/config.rs | 2 +- beacon_node/lighthouse_network/src/discovery/mod.rs | 2 +- beacon_node/lighthouse_network/src/service/mod.rs | 2 +- beacon_node/src/config.rs | 2 -- 4 files changed, 3 insertions(+), 5 deletions(-) diff --git a/beacon_node/lighthouse_network/src/config.rs b/beacon_node/lighthouse_network/src/config.rs index 93ab3d2d81b..5b13730f971 100644 --- a/beacon_node/lighthouse_network/src/config.rs +++ b/beacon_node/lighthouse_network/src/config.rs @@ -350,7 +350,7 @@ impl Default for Config { enr_udp6_port: None, enr_quic6_port: None, enr_tcp6_port: None, - target_peers: 50, + target_peers: 100, gs_config, discv5_config, boot_nodes_enr: vec![], diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index 829124e1233..6659ba1d26f 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -59,7 +59,7 @@ const MAX_DISCOVERY_RETRY: usize = 3; /// Note: we always allow a single FindPeers query, so we would be /// running a maximum of `MAX_CONCURRENT_SUBNET_QUERIES + 1` /// discovery queries at a time. -const MAX_CONCURRENT_SUBNET_QUERIES: usize = 2; +const MAX_CONCURRENT_SUBNET_QUERIES: usize = 4; /// The max number of subnets to search for in a single subnet discovery query. const MAX_SUBNETS_IN_QUERY: usize = 3; /// The number of closest peers to search for when doing a regular peer search. diff --git a/beacon_node/lighthouse_network/src/service/mod.rs b/beacon_node/lighthouse_network/src/service/mod.rs index a66670e7faf..879f13ad7c8 100644 --- a/beacon_node/lighthouse_network/src/service/mod.rs +++ b/beacon_node/lighthouse_network/src/service/mod.rs @@ -51,7 +51,7 @@ mod gossip_cache; pub mod gossipsub_scoring_parameters; pub mod utils; /// The number of peers we target per subnet for discovery queries. -pub const TARGET_SUBNET_PEERS: usize = 6; +pub const TARGET_SUBNET_PEERS: usize = 3; const MAX_IDENTIFY_ADDRESSES: usize = 10; diff --git a/beacon_node/src/config.rs b/beacon_node/src/config.rs index b485775dbfe..1699c51784f 100644 --- a/beacon_node/src/config.rs +++ b/beacon_node/src/config.rs @@ -1122,8 +1122,6 @@ pub fn set_network_config( config.target_peers = target_peers_str .parse::() .map_err(|_| format!("Invalid number of target peers: {}", target_peers_str))?; - } else { - config.target_peers = 80; // default value } if let Some(value) = cli_args.value_of("network-load") { From 0b59d10ab6150c6d2ecb4fceb76ca7ef77d5c217 Mon Sep 17 00:00:00 2001 From: Pawan Dhananjay Date: Thu, 8 Feb 2024 08:10:51 +0530 Subject: [PATCH 03/10] Fix backfill stalling (#5192) * Prevent early short circuit in `peer_disconnected` * lint --- beacon_node/network/src/sync/backfill_sync/mod.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/beacon_node/network/src/sync/backfill_sync/mod.rs b/beacon_node/network/src/sync/backfill_sync/mod.rs index 0d7e7c16c36..4e24aca07ff 100644 --- a/beacon_node/network/src/sync/backfill_sync/mod.rs +++ b/beacon_node/network/src/sync/backfill_sync/mod.rs @@ -332,7 +332,16 @@ impl BackFillSync { } // If we have run out of peers in which to retry this batch, the backfill state // transitions to a paused state. - self.retry_batch_download(network, id)?; + // We still need to reset the state for all the affected batches, so we should not + // short circuit early + if self.retry_batch_download(network, id).is_err() { + debug!( + self.log, + "Batch could not be retried"; + "batch_id" => id, + "error" => "no synced peers" + ); + } } else { debug!(self.log, "Batch not found while removing peer"; "peer" => %peer_id, "batch" => id) From 0c3fef59b3c6697ee84e4f89a8537c097eacfc5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Oliveira?= Date: Thu, 8 Feb 2024 02:40:54 +0000 Subject: [PATCH 04/10] improve libp2p connected peer metrics (#4870) * improve libp2p connected peer metrics * separate discv5 port from libp2p for NAT open * use metric family for DISCOVERY_BYTES * Merge branch 'unstable' of https://github.com/sigp/lighthouse into improve-metrics --- beacon_node/http_api/src/lib.rs | 10 +-- .../lighthouse_network/src/discovery/mod.rs | 8 ++- beacon_node/lighthouse_network/src/metrics.rs | 68 +++---------------- .../src/peer_manager/mod.rs | 18 ----- .../src/peer_manager/network_behaviour.rs | 26 ++++--- common/lighthouse_metrics/src/lib.rs | 10 +++ common/system_health/src/lib.rs | 25 +++++-- 7 files changed, 65 insertions(+), 100 deletions(-) diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index f89bc3c29fb..ad841b20e3a 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -67,7 +67,7 @@ use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; use sysinfo::{System, SystemExt}; -use system_health::observe_system_health_bn; +use system_health::{observe_nat, observe_system_health_bn}; use task_spawner::{Priority, TaskSpawner}; use tokio::sync::{ mpsc::{Sender, UnboundedSender}, @@ -4071,13 +4071,7 @@ pub fn serve( .and(warp::path::end()) .then(|task_spawner: TaskSpawner| { task_spawner.blocking_json_task(Priority::P1, move || { - Ok(api_types::GenericResponse::from( - lighthouse_network::metrics::NAT_OPEN - .as_ref() - .map(|v| v.get()) - .unwrap_or(0) - != 0, - )) + Ok(api_types::GenericResponse::from(observe_nat())) }) }); diff --git a/beacon_node/lighthouse_network/src/discovery/mod.rs b/beacon_node/lighthouse_network/src/discovery/mod.rs index 6659ba1d26f..c1781c85832 100644 --- a/beacon_node/lighthouse_network/src/discovery/mod.rs +++ b/beacon_node/lighthouse_network/src/discovery/mod.rs @@ -1003,11 +1003,13 @@ impl NetworkBehaviour for Discovery { } discv5::Event::SocketUpdated(socket_addr) => { info!(self.log, "Address updated"; "ip" => %socket_addr.ip(), "udp_port" => %socket_addr.port()); - metrics::inc_counter(&metrics::ADDRESS_UPDATE_COUNT); - metrics::check_nat(); + // We have SOCKET_UPDATED messages. This occurs when discovery has a majority of + // users reporting an external port and our ENR gets updated. + // Which means we are able to do NAT traversal. + metrics::set_gauge_vec(&metrics::NAT_OPEN, &["discv5"], 1); + // Discv5 will have updated our local ENR. We save the updated version // to disk. - if (self.update_ports.tcp4 && socket_addr.is_ipv4()) || (self.update_ports.tcp6 && socket_addr.is_ipv6()) { diff --git a/beacon_node/lighthouse_network/src/metrics.rs b/beacon_node/lighthouse_network/src/metrics.rs index ae02b689d81..ada48d6ebd0 100644 --- a/beacon_node/lighthouse_network/src/metrics.rs +++ b/beacon_node/lighthouse_network/src/metrics.rs @@ -1,29 +1,17 @@ pub use lighthouse_metrics::*; lazy_static! { - pub static ref NAT_OPEN: Result = try_create_int_counter( + pub static ref NAT_OPEN: Result = try_create_int_gauge_vec( "nat_open", - "An estimate indicating if the local node is exposed to the internet." + "An estimate indicating if the local node is reachable from external nodes", + &["protocol"] ); pub static ref ADDRESS_UPDATE_COUNT: Result = try_create_int_counter( "libp2p_address_update_total", "Count of libp2p socked updated events (when our view of our IP address has changed)" ); - pub static ref PEERS_CONNECTED: Result = try_create_int_gauge( - "libp2p_peers", - "Count of libp2p peers currently connected" - ); - - pub static ref TCP_PEERS_CONNECTED: Result = try_create_int_gauge( - "libp2p_tcp_peers", - "Count of libp2p peers currently connected via TCP" - ); - - pub static ref QUIC_PEERS_CONNECTED: Result = try_create_int_gauge( - "libp2p_quic_peers", - "Count of libp2p peers currently connected via QUIC" - ); - + pub static ref PEERS_CONNECTED: Result = + try_create_int_gauge_vec("libp2p_peers", "Count of libp2p peers currently connected", &["direction", "transport"]); pub static ref PEER_CONNECT_EVENT_COUNT: Result = try_create_int_counter( "libp2p_peer_connect_event_total", "Count of libp2p peer connect events (not the current number of connected peers)" @@ -32,13 +20,10 @@ lazy_static! { "libp2p_peer_disconnect_event_total", "Count of libp2p peer disconnect events" ); - pub static ref DISCOVERY_SENT_BYTES: Result = try_create_int_gauge( - "discovery_sent_bytes", - "The number of bytes sent in discovery" - ); - pub static ref DISCOVERY_RECV_BYTES: Result = try_create_int_gauge( - "discovery_recv_bytes", - "The number of bytes received in discovery" + pub static ref DISCOVERY_BYTES: Result = try_create_int_gauge_vec( + "discovery_bytes", + "The number of bytes sent and received in discovery", + &["direction"] ); pub static ref DISCOVERY_QUEUE: Result = try_create_int_gauge( "discovery_queue_size", @@ -135,17 +120,6 @@ lazy_static! { &["type"] ); - /* - * Inbound/Outbound peers - */ - /// The number of peers that dialed us. - pub static ref NETWORK_INBOUND_PEERS: Result = - try_create_int_gauge("network_inbound_peers","The number of peers that are currently connected that have dialed us."); - - /// The number of peers that we dialed us. - pub static ref NETWORK_OUTBOUND_PEERS: Result = - try_create_int_gauge("network_outbound_peers","The number of peers that are currently connected that we dialed."); - /* * Peer Reporting */ @@ -156,31 +130,11 @@ lazy_static! { ); } -/// Checks if we consider the NAT open. -/// -/// Conditions for an open NAT: -/// 1. We have 1 or more SOCKET_UPDATED messages. This occurs when discovery has a majority of -/// users reporting an external port and our ENR gets updated. -/// 2. We have 0 SOCKET_UPDATED messages (can be true if the port was correct on boot), then we -/// rely on whether we have any inbound messages. If we have no socket update messages, but -/// manage to get at least one inbound peer, we are exposed correctly. -pub fn check_nat() { - // NAT is already deemed open. - if NAT_OPEN.as_ref().map(|v| v.get()).unwrap_or(0) != 0 { - return; - } - if ADDRESS_UPDATE_COUNT.as_ref().map(|v| v.get()).unwrap_or(0) != 0 - || NETWORK_INBOUND_PEERS.as_ref().map(|v| v.get()).unwrap_or(0) != 0_i64 - { - inc_counter(&NAT_OPEN); - } -} - pub fn scrape_discovery_metrics() { let metrics = discv5::metrics::Metrics::from(discv5::Discv5::::raw_metrics()); set_float_gauge(&DISCOVERY_REQS, metrics.unsolicited_requests_per_second); set_gauge(&DISCOVERY_SESSIONS, metrics.active_sessions as i64); - set_gauge(&DISCOVERY_SENT_BYTES, metrics.bytes_sent as i64); - set_gauge(&DISCOVERY_RECV_BYTES, metrics.bytes_recv as i64); + set_gauge_vec(&DISCOVERY_BYTES, &["inbound"], metrics.bytes_recv as i64); + set_gauge_vec(&DISCOVERY_BYTES, &["outbound"], metrics.bytes_sent as i64); } diff --git a/beacon_node/lighthouse_network/src/peer_manager/mod.rs b/beacon_node/lighthouse_network/src/peer_manager/mod.rs index e4976a0d374..8227c3420b2 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/mod.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/mod.rs @@ -726,29 +726,14 @@ impl PeerManager { return; } - let mut connected_peer_count = 0; - let mut inbound_connected_peers = 0; - let mut outbound_connected_peers = 0; let mut clients_per_peer = HashMap::new(); for (_peer, peer_info) in self.network_globals.peers.read().connected_peers() { - connected_peer_count += 1; - if let PeerConnectionStatus::Connected { n_in, .. } = peer_info.connection_status() { - if *n_in > 0 { - inbound_connected_peers += 1; - } else { - outbound_connected_peers += 1; - } - } *clients_per_peer .entry(peer_info.client().kind.to_string()) .or_default() += 1; } - metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peer_count); - metrics::set_gauge(&metrics::NETWORK_INBOUND_PEERS, inbound_connected_peers); - metrics::set_gauge(&metrics::NETWORK_OUTBOUND_PEERS, outbound_connected_peers); - for client_kind in ClientKind::iter() { let value = clients_per_peer.get(&client_kind.to_string()).unwrap_or(&0); metrics::set_gauge_vec( @@ -853,11 +838,8 @@ impl PeerManager { // start a ping and status timer for the peer self.status_peers.insert(*peer_id); - let connected_peers = self.network_globals.connected_peers() as i64; - // increment prometheus metrics metrics::inc_counter(&metrics::PEER_CONNECT_EVENT_COUNT); - metrics::set_gauge(&metrics::PEERS_CONNECTED, connected_peers); true } diff --git a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs index cb60906f632..1c9274af233 100644 --- a/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs +++ b/beacon_node/lighthouse_network/src/peer_manager/network_behaviour.rs @@ -154,6 +154,8 @@ impl NetworkBehaviour for PeerManager { self.on_dial_failure(peer_id); } FromSwarm::ExternalAddrConfirmed(_) => { + // We have an external address confirmed, means we are able to do NAT traversal. + metrics::set_gauge_vec(&metrics::NAT_OPEN, &["libp2p"], 1); // TODO: we likely want to check this against our assumed external tcp // address } @@ -243,14 +245,14 @@ impl PeerManager { self.events.push(PeerManagerEvent::MetaData(peer_id)); } - // Check NAT if metrics are enabled - if self.network_globals.local_enr.read().udp4().is_some() { - metrics::check_nat(); - } - // increment prometheus metrics if self.metrics_enabled { let remote_addr = endpoint.get_remote_address(); + let direction = if endpoint.is_dialer() { + "outbound" + } else { + "inbound" + }; match remote_addr.iter().find(|proto| { matches!( proto, @@ -258,10 +260,10 @@ impl PeerManager { ) }) { Some(multiaddr::Protocol::QuicV1) => { - metrics::inc_gauge(&metrics::QUIC_PEERS_CONNECTED); + metrics::inc_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "quic"]); } Some(multiaddr::Protocol::Tcp(_)) => { - metrics::inc_gauge(&metrics::TCP_PEERS_CONNECTED); + metrics::inc_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "tcp"]); } Some(_) => unreachable!(), None => { @@ -339,6 +341,12 @@ impl PeerManager { let remote_addr = endpoint.get_remote_address(); // Update the prometheus metrics if self.metrics_enabled { + let direction = if endpoint.is_dialer() { + "outbound" + } else { + "inbound" + }; + match remote_addr.iter().find(|proto| { matches!( proto, @@ -346,10 +354,10 @@ impl PeerManager { ) }) { Some(multiaddr::Protocol::QuicV1) => { - metrics::dec_gauge(&metrics::QUIC_PEERS_CONNECTED); + metrics::dec_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "quic"]); } Some(multiaddr::Protocol::Tcp(_)) => { - metrics::dec_gauge(&metrics::TCP_PEERS_CONNECTED); + metrics::dec_gauge_vec(&metrics::PEERS_CONNECTED, &[direction, "tcp"]); } // If it's an unknown protocol we already logged when connection was established. _ => {} diff --git a/common/lighthouse_metrics/src/lib.rs b/common/lighthouse_metrics/src/lib.rs index 5d25bb313f6..ba9900cc507 100644 --- a/common/lighthouse_metrics/src/lib.rs +++ b/common/lighthouse_metrics/src/lib.rs @@ -244,6 +244,16 @@ pub fn inc_counter_vec(int_counter_vec: &Result, name: &[&str]) { } } +/// Sets the `int_counter_vec` with the given `name` to the `amount`, +/// should only be called with `ammount`s equal or above the current value +/// as per Prometheus spec, the `counter` type should only go up. +pub fn set_counter_vec_by(int_counter_vec: &Result, name: &[&str], amount: u64) { + if let Some(counter) = get_int_counter(int_counter_vec, name) { + counter.reset(); + counter.inc_by(amount); + } +} + pub fn inc_counter_vec_by(int_counter_vec: &Result, name: &[&str], amount: u64) { if let Some(counter) = get_int_counter(int_counter_vec, name) { counter.inc_by(amount); diff --git a/common/system_health/src/lib.rs b/common/system_health/src/lib.rs index d10540e506c..ec64ce31ad3 100644 --- a/common/system_health/src/lib.rs +++ b/common/system_health/src/lib.rs @@ -198,6 +198,25 @@ pub fn observe_system_health_vc( } } +/// Observes if NAT traversal is possible. +pub fn observe_nat() -> bool { + let discv5_nat = lighthouse_network::metrics::get_int_gauge( + &lighthouse_network::metrics::NAT_OPEN, + &["discv5"], + ) + .map(|g| g.get() == 1) + .unwrap_or_default(); + + let libp2p_nat = lighthouse_network::metrics::get_int_gauge( + &lighthouse_network::metrics::NAT_OPEN, + &["libp2p"], + ) + .map(|g| g.get() == 1) + .unwrap_or_default(); + + discv5_nat && libp2p_nat +} + /// Observes the Beacon Node system health. pub fn observe_system_health_bn( sysinfo: Arc>, @@ -223,11 +242,7 @@ pub fn observe_system_health_bn( .unwrap_or_else(|| (String::from("None"), 0, 0)); // Determine if the NAT is open or not. - let nat_open = lighthouse_network::metrics::NAT_OPEN - .as_ref() - .map(|v| v.get()) - .unwrap_or(0) - != 0; + let nat_open = observe_nat(); SystemHealthBN { system_health, From e4705967150bd589baa7cd8cd43a6ab8f330f039 Mon Sep 17 00:00:00 2001 From: zilayo <84344709+zilayo@users.noreply.github.com> Date: Thu, 8 Feb 2024 02:40:58 +0000 Subject: [PATCH 05/10] chore(docs): amend port guidance to enable QUIC support (#5029) * chore(docs): amend port guidance to enable QUIC support --- book/src/advanced_networking.md | 57 ++++++++++++++++++++++++-------- book/src/docker.md | 8 ++--- book/src/faq.md | 58 +++++++++++++++++---------------- 3 files changed, 77 insertions(+), 46 deletions(-) diff --git a/book/src/advanced_networking.md b/book/src/advanced_networking.md index b1f05450c48..5fabf57d568 100644 --- a/book/src/advanced_networking.md +++ b/book/src/advanced_networking.md @@ -40,7 +40,7 @@ drastically and use the (recommended) default. ### NAT Traversal (Port Forwarding) -Lighthouse, by default, uses port 9000 for both TCP and UDP. Lighthouse will +Lighthouse, by default, uses port 9000 for both TCP and UDP. Since v4.5.0, Lighthouse will also attempt to make QUIC connections via UDP port 9001 by default. Lighthouse will still function if it is behind a NAT without any port mappings. Although Lighthouse still functions, we recommend that some mechanism is used to ensure that your Lighthouse node is publicly accessible. This will typically improve @@ -50,8 +50,8 @@ peers for your node and overall improve the Ethereum consensus network. Lighthouse currently supports UPnP. If UPnP is enabled on your router, Lighthouse will automatically establish the port mappings for you (the beacon node will inform you of established routes in this case). If UPnP is not -enabled, we recommend you to manually set up port mappings to both of Lighthouse's -TCP and UDP ports (9000 by default). +enabled, we recommend you to manually set up port mappings to Lighthouse's +TCP and UDP ports (9000 TCP/UDP, and 9001 UDP by default). > Note: Lighthouse needs to advertise its publicly accessible ports in > order to inform its peers that it is contactable and how to connect to it. @@ -66,7 +66,7 @@ TCP and UDP ports (9000 by default). The steps to do port forwarding depends on the router, but the general steps are given below: 1. Determine the default gateway IP: -- On Linux: open a terminal and run `ip route | grep default`, the result should look something similar to `default via 192.168.50.1 dev wlp2s0 proto dhcp metric 600`. The `192.168.50.1` is your router management default gateway IP. +- On Linux: open a terminal and run `ip route | grep default`, the result should look something similar to `default via 192.168.50.1 dev wlp2s0 proto dhcp metric 600`. The `192.168.50.1` is your router management default gateway IP. - On MacOS: open a terminal and run `netstat -nr|grep default` and it should return the default gateway IP. - On Windows: open a command prompt and run `ipconfig` and look for the `Default Gateway` which will show you the gateway IP. @@ -74,16 +74,22 @@ The steps to do port forwarding depends on the router, but the general steps are 2. Login to the router management page. The login credentials are usually available in the manual or the router, or it can be found on a sticker underneath the router. You can also try the login credentials for some common router brands listed [here](https://www.noip.com/support/knowledgebase/general-port-forwarding-guide/). -3. Navigate to the port forward settings in your router. The exact step depends on the router, but typically it will fall under the "Advanced" section, under the name "port forwarding" or "virtual server". +3. Navigate to the port forward settings in your router. The exact step depends on the router, but typically it will fall under the "Advanced" section, under the name "port forwarding" or "virtual server". 4. Configure a port forwarding rule as below: - Protocol: select `TCP/UDP` or `BOTH` - External port: `9000` - Internal port: `9000` -- IP address: Usually there is a dropdown list for you to select the device. Choose the device that is running Lighthouse +- IP address: Usually there is a dropdown list for you to select the device. Choose the device that is running Lighthouse. -5. To check that you have successfully open the ports, go to [yougetsignal](https://www.yougetsignal.com/tools/open-ports/) and enter `9000` in the `port number`. If it shows "open", then you have successfully set up port forwarding. If it shows "closed", double check your settings, and also check that you have allowed firewall rules on port 9000. +Since V4.5.0 port 9001/UDP is also used for QUIC support. +- Protocol: select `UDP` +- External port: `9001` +- Internal port: `9001` +- IP address: Choose the device that is running Lighthouse. + +5. To check that you have successfully opened the ports, go to [yougetsignal](https://www.yougetsignal.com/tools/open-ports/) and enter `9000` in the `port number`. If it shows "open", then you have successfully set up port forwarding. If it shows "closed", double check your settings, and also check that you have allowed firewall rules on port 9000. Note: this will only confirm if port 9000/TCP is open. You will need to ensure you have correctly setup port forwarding for the UDP ports (`9000` and `9001` by default). ### ENR Configuration @@ -125,6 +131,9 @@ IPv4 only: TCP and UDP. - `--listen-address :: --port 9909 --discovery-port 9999` will listen over IPv6 using port `9909` for TCP and port `9999` for UDP. +- By default, QUIC listens for UDP connections using a port number that is one greater than the specified port. + If the specified port is 9909, QUIC will use port 9910 for IPv6 UDP connections. + This can be configured with `--quic-port`. To listen over both IPv4 and IPv6: - Set two listening addresses using the `--listen-address` flag twice ensuring @@ -133,18 +142,38 @@ To listen over both IPv4 and IPv6: that this behaviour differs from the Ipv6 only case described above. - If necessary, set the `--port6` flag to configure the port used for TCP and UDP over IPv6. This flag has no effect when listening over IPv6 only. -- If necessary, set the `--discovery-port6` flag to configure the IPv6 UDP +- If necessary, set the `--discovery-port6` flag to configure the IPv6 UDP port. This will default to the value given to `--port6` if not set. This flag has no effect when listening over IPv6 only. +- If necessary, set the `--quic-port6` flag to configure the port used by QUIC for + UDP over IPv6. This will default to the value given to `--port6` + 1. This flag + has no effect when listening over IPv6 only. ##### Configuration Examples -- `--listen-address :: --listen-address 0.0.0.0 --port 9909` will listen - over IPv4 using port `9909` for TCP and UDP. It will also listen over IPv6 but - using the default value for `--port6` for UDP and TCP (`9090`). -- `--listen-address :: --listen-address --port 9909 --discovery-port6 9999` - will have the same configuration as before except for the IPv6 UDP socket, - which will use port `9999`. +> When using `--listen-address :: --listen-address 0.0.0.0 --port 9909`, listening will be set up as follows: +> +> **IPv4**: +> +> It listens on port `9909` for both TCP and UDP. +> QUIC will use the next sequential port `9910` for UDP. +> +> **IPv6**: +> +> It listens on the default value of --port6 (`9090`) for both UDP and TCP. +> QUIC will use port `9091` for UDP, which is the default `--port6` value (`9090`) + 1. + +> When using `--listen-address :: --listen-address --port 9909 --discovery-port6 9999`, listening will be set up as follows: +> +> **IPv4**: +> +> It listens on port `9909` for both TCP and UDP. +> QUIC will use the next sequential port `9910` for UDP. +> +> **IPv6**: +> +> It listens on the default value of `--port6` (`9090`) for TCP, and port `9999` for UDP. +> QUIC will use port `9091` for UDP, which is the default `--port6` value (`9090`) + 1. #### Configuring Lighthouse to advertise IPv6 reachable addresses Lighthouse supports IPv6 to connect to other nodes both over IPv6 exclusively, diff --git a/book/src/docker.md b/book/src/docker.md index defa89517e1..c48c745a044 100644 --- a/book/src/docker.md +++ b/book/src/docker.md @@ -112,7 +112,7 @@ docker run lighthouse:local lighthouse --help You can run a Docker beacon node with the following command: ```bash -docker run -p 9000:9000/tcp -p 9000:9000/udp -p 127.0.0.1:5052:5052 -v $HOME/.lighthouse:/root/.lighthouse sigp/lighthouse lighthouse --network mainnet beacon --http --http-address 0.0.0.0 +docker run -p 9000:9000/tcp -p 9000:9000/udp -p 9001:9001/udp -p 127.0.0.1:5052:5052 -v $HOME/.lighthouse:/root/.lighthouse sigp/lighthouse lighthouse --network mainnet beacon --http --http-address 0.0.0.0 ``` > To join the Goerli testnet, use `--network goerli` instead. @@ -135,18 +135,18 @@ docker run -v $HOME/.lighthouse:/root/.lighthouse sigp/lighthouse lighthouse bea ### Ports -In order to be a good peer and serve other peers you should expose port `9000` for both TCP and UDP. +In order to be a good peer and serve other peers you should expose port `9000` for both TCP and UDP, and port `9001` for UDP. Use the `-p` flag to do this: ```bash -docker run -p 9000:9000/tcp -p 9000:9000/udp sigp/lighthouse lighthouse beacon +docker run -p 9000:9000/tcp -p 9000:9000/udp -p 9001:9001/udp sigp/lighthouse lighthouse beacon ``` If you use the `--http` flag you may also want to expose the HTTP port with `-p 127.0.0.1:5052:5052`. ```bash -docker run -p 9000:9000/tcp -p 9000:9000/udp -p 127.0.0.1:5052:5052 sigp/lighthouse lighthouse beacon --http --http-address 0.0.0.0 +docker run -p 9000:9000/tcp -p 9000:9000/udp -p 9001:9001/udp -p 127.0.0.1:5052:5052 sigp/lighthouse lighthouse beacon --http --http-address 0.0.0.0 ``` [docker_hub]: https://hub.docker.com/repository/docker/sigp/lighthouse/ diff --git a/book/src/faq.md b/book/src/faq.md index c1717f7fc88..b8b267f17c6 100644 --- a/book/src/faq.md +++ b/book/src/faq.md @@ -80,13 +80,13 @@ The `WARN Execution engine called failed` log is shown when the beacon node cann `error: Reqwest(reqwest::Error { kind: Request, url: Url { scheme: "http", cannot_be_a_base: false, username: "", password: None, host: Some(Ipv4(127.0.0.1)), port: Some(8551), path: "/", query: None, fragment: None }, source: TimedOut }), service: exec` which says `TimedOut` at the end of the message. This means that the execution engine has not responded in time to the beacon node. One option is to add the flags `--execution-timeout-multiplier 3` and `--disable-lock-timeouts` to the beacon node. However, if the error persists, it is worth digging further to find out the cause. There are a few reasons why this can occur: -1. The execution engine is not synced. Check the log of the execution engine to make sure that it is synced. If it is syncing, wait until it is synced and the error will disappear. You will see the beacon node logs `INFO Execution engine online` when it is synced. +1. The execution engine is not synced. Check the log of the execution engine to make sure that it is synced. If it is syncing, wait until it is synced and the error will disappear. You will see the beacon node logs `INFO Execution engine online` when it is synced. 1. The computer is overloaded. Check the CPU and RAM usage to see if it has overloaded. You can use `htop` to check for CPU and RAM usage. 1. Your SSD is slow. Check if your SSD is in "The Bad" list [here](https://gist.github.com/yorickdowne/f3a3e79a573bf35767cd002cc977b038). If your SSD is in "The Bad" list, it means it cannot keep in sync to the network and you may want to consider upgrading to a better SSD. If the reason for the error message is caused by no. 1 above, you may want to look further. If the execution engine is out of sync suddenly, it is usually caused by ungraceful shutdown. The common causes for ungraceful shutdown are: -- Power outage. If power outages are an issue at your place, consider getting a UPS to avoid ungraceful shutdown of services. -- The service file is not stopped properly. To overcome this, make sure that the process is stopped properly, e.g., during client updates. +- Power outage. If power outages are an issue at your place, consider getting a UPS to avoid ungraceful shutdown of services. +- The service file is not stopped properly. To overcome this, make sure that the process is stopped properly, e.g., during client updates. - Out of memory (oom) error. This can happen when the system memory usage has reached its maximum and causes the execution engine to be killed. When this occurs, the log file will show `Main process exited, code=killed, status=9/KILL`. You can also run `sudo journalctl -a --since "18 hours ago" | grep -i "killed process` to confirm that the execution client has been killed due to oom. If you are using geth as the execution client, a short term solution is to reduce the resources used. For example, you can reduce the cache by adding the flag `--cache 2048`. If the oom occurs rather frequently, a long term solution is to increase the memory capacity of the computer. ### My beacon node is stuck at downloading historical block using checkpoint sync. What should I do? @@ -98,8 +98,8 @@ INFO Downloading historical blocks est_time: --, distance: 4524545 slo ``` If the same log appears every minute and you do not see progress in downloading historical blocks, you can try one of the followings: - - - Check the number of peers you are connected to. If you have low peers (less than 50), try to do port forwarding on the port 9000 TCP/UDP to increase peer count. + + - Check the number of peers you are connected to. If you have low peers (less than 50), try to do port forwarding on the ports 9000 TCP/UDP and 9001 UDP to increase peer count. - Restart the beacon node. @@ -110,7 +110,7 @@ INFO Block from HTTP API already known` WARN Could not publish message error: Duplicate, service: libp2p ``` -This error usually happens when users are running mev-boost. The relay will publish the block on the network before returning it back to you. After the relay published the block on the network, it will propagate through nodes, and it happens quite often that your node will receive the block from your connected peers via gossip first, before getting the block from the relay, hence the message `duplicate`. +This error usually happens when users are running mev-boost. The relay will publish the block on the network before returning it back to you. After the relay published the block on the network, it will propagate through nodes, and it happens quite often that your node will receive the block from your connected peers via gossip first, before getting the block from the relay, hence the message `duplicate`. In short, it is nothing to worry about. @@ -124,7 +124,7 @@ WARN Head is optimistic execution_block_hash: 0x47e7555f1d4215d1ad409b1ac1 It means the beacon node will follow the chain, but it will not be able to attest or produce blocks. This is because the execution client is not synced, so the beacon chain cannot verify the authenticity of the chain head, hence the word `optimistic`. What you need to do is to make sure that the execution client is up and syncing. Once the execution client is synced, the error will disappear. -### My beacon node logs `CRIT Beacon block processing error error: ValidatorPubkeyCacheLockTimeout, service: beacon`, what should I do? +### My beacon node logs `CRIT Beacon block processing error error: ValidatorPubkeyCacheLockTimeout, service: beacon`, what should I do? An example of the log is shown below: @@ -133,7 +133,7 @@ CRIT Beacon block processing error error: ValidatorPubkeyCacheLockTime WARN BlockProcessingFailure outcome: ValidatorPubkeyCacheLockTimeout, msg: unexpected condition in processing block. ``` -A `Timeout` error suggests that the computer may be overloaded at the moment, for example, the execution client is still syncing. You may use the flag `--disable-lock-timeouts` to silence this error, although it will not fix the underlying slowness. Nevertheless, this is a relatively harmless log, and the error should go away once the resources used are back to normal. +A `Timeout` error suggests that the computer may be overloaded at the moment, for example, the execution client is still syncing. You may use the flag `--disable-lock-timeouts` to silence this error, although it will not fix the underlying slowness. Nevertheless, this is a relatively harmless log, and the error should go away once the resources used are back to normal. ### My beacon node logs `WARN BlockProcessingFailure outcome: MissingBeaconBlock`, what should I do? @@ -143,7 +143,7 @@ An example of the full log is shown below: WARN BlockProcessingFailure outcome: MissingBeaconBlock(0xbdba211f8d72029554e405d8e4906690dca807d1d7b1bc8c9b88d7970f1648bc), msg: unexpected condition in processing block. ``` -`MissingBeaconBlock` suggests that the database has corrupted. You should wipe the database and use [Checkpoint Sync](./checkpoint-sync.md) to resync the beacon chain. +`MissingBeaconBlock` suggests that the database has corrupted. You should wipe the database and use [Checkpoint Sync](./checkpoint-sync.md) to resync the beacon chain. ### After checkpoint sync, the progress of `downloading historical blocks` is slow. Why? @@ -173,7 +173,7 @@ The error is `503 Service Unavailable`. This means that the beacon node is still ERRO Failed to download attester duties err: FailedToDownloadAttesters("Some endpoints failed, num_failed: 2 http://localhost:5052/ => Unavailable(NotSynced), http://localhost:5052/ => RequestFailed(ServerMessage(ErrorMessage { code: 503, message: \"SERVICE_UNAVAILABLE: beacon node is syncing ``` -This means that the validator client is sending requests to the beacon node. However, as the beacon node is still syncing, it is therefore unable to fulfil the request. The error will disappear once the beacon node is synced. +This means that the validator client is sending requests to the beacon node. However, as the beacon node is still syncing, it is therefore unable to fulfil the request. The error will disappear once the beacon node is synced. ### My beacon node logs `WARN Error signalling fork choice waiter`, what should I do? @@ -268,9 +268,9 @@ repeats until the queue is cleared. The churn limit is summarised in the table b
-| Number of active validators | Validators activated per epoch | Validators activated per day | +| Number of active validators | Validators activated per epoch | Validators activated per day | |-------------------|--------------------------------------------|----| -| 327679 or less | 4 | 900 | +| 327679 or less | 4 | 900 | | 327680-393215 | 5 | 1125 | | 393216-458751 | 6 | 1350 | 458752-524287 | 7 | 1575 @@ -285,7 +285,7 @@ repeats until the queue is cleared. The churn limit is summarised in the table b
-For example, the number of active validators on Mainnet is about 574000 on May 2023. This means that 8 validators can be activated per epoch or 1800 per day (it is noted that the same applies to the exit queue). If, for example, there are 9000 validators waiting to be activated, this means that the waiting time can take up to 5 days. +For example, the number of active validators on Mainnet is about 574000 on May 2023. This means that 8 validators can be activated per epoch or 1800 per day (it is noted that the same applies to the exit queue). If, for example, there are 9000 validators waiting to be activated, this means that the waiting time can take up to 5 days. Once a validator has been activated, congratulations! It's time to produce blocks and attestations! @@ -298,14 +298,14 @@ duplicate your JSON keystores and don't run `lighthouse vc` twice). This will le However, there are some components which can be configured with redundancy. See the [Redundancy](./redundancy.md) guide for more information. -### I am missing attestations. Why? +### I am missing attestations. Why? The first thing is to ensure both consensus and execution clients are synced with the network. If they are synced, there may still be some issues with the node setup itself that is causing the missed attestations. Check the setup to ensure that: - the clock is synced - the computer has sufficient resources and is not overloaded - the internet is working well - you have sufficient peers -You can see more information on the [Ethstaker KB](https://ethstaker.gitbook.io/ethstaker-knowledge-base/help/missed-attestations). +You can see more information on the [Ethstaker KB](https://ethstaker.gitbook.io/ethstaker-knowledge-base/help/missed-attestations). Another cause for missing attestations is delays during block processing. When this happens, the debug logs will show (debug logs can be found under `$datadir/beacon/logs`): @@ -313,14 +313,14 @@ Another cause for missing attestations is delays during block processing. When t DEBG Delayed head block set_as_head_delay: Some(93.579425ms), imported_delay: Some(1.460405278s), observed_delay: Some(2.540811921s), block_delay: 4.094796624s, slot: 6837344, proposer_index: 211108, block_root: 0x2c52231c0a5a117401f5231585de8aa5dd963bc7cbc00c544e681342eedd1700, service: beacon ``` -The fields to look for are `imported_delay > 1s` and `observed_delay < 3s`. The `imported_delay` is how long the node took to process the block. The `imported_delay` of larger than 1 second suggests that there is slowness in processing the block. It could be due to high CPU usage, high I/O disk usage or the clients are doing some background maintenance processes. The `observed_delay` is determined mostly by the proposer and partly by your networking setup (e.g., how long it took for the node to receive the block). The `observed_delay` of less than 3 seconds means that the block is not arriving late from the block proposer. Combining the above, this implies that the validator should have been able to attest to the block, but failed due to slowness in the node processing the block. +The fields to look for are `imported_delay > 1s` and `observed_delay < 3s`. The `imported_delay` is how long the node took to process the block. The `imported_delay` of larger than 1 second suggests that there is slowness in processing the block. It could be due to high CPU usage, high I/O disk usage or the clients are doing some background maintenance processes. The `observed_delay` is determined mostly by the proposer and partly by your networking setup (e.g., how long it took for the node to receive the block). The `observed_delay` of less than 3 seconds means that the block is not arriving late from the block proposer. Combining the above, this implies that the validator should have been able to attest to the block, but failed due to slowness in the node processing the block. ### Sometimes I miss the attestation head vote, resulting in penalty. Is this normal? In general, it is unavoidable to have some penalties occasionally. This is particularly the case when you are assigned to attest on the first slot of an epoch and if the proposer of that slot releases the block late, then you will get penalised for missing the target and head votes. Your attestation performance does not only depend on your own setup, but also on everyone elses performance. -You could also check for the sync aggregate participation percentage on block explorers such as [beaconcha.in](https://beaconcha.in/). A low sync aggregate participation percentage (e.g., 60-70%) indicates that the block that you are assigned to attest to may be published late. As a result, your validator fails to correctly attest to the block. +You could also check for the sync aggregate participation percentage on block explorers such as [beaconcha.in](https://beaconcha.in/). A low sync aggregate participation percentage (e.g., 60-70%) indicates that the block that you are assigned to attest to may be published late. As a result, your validator fails to correctly attest to the block. Another possible reason for missing the head vote is due to a chain "reorg". A reorg can happen if the proposer publishes block `n` late, and the proposer of block `n+1` builds upon block `n-1` instead of `n`. This is called a "reorg". Due to the reorg, block `n` was never included in the chain. If you are assigned to attest at slot `n`, it is possible you may still attest to block `n` despite most of the network recognizing the block as being late. In this case you will miss the head reward. @@ -388,7 +388,7 @@ If the ports are open, you should have incoming peers. To check that you have in If you have incoming peers, it should return a lot of data containing information of peers. If the response is empty, it means that you have no incoming peers and there the ports are not open. You may want to double check if the port forward was correctly set up. -2. Check that you do not lower the number of peers using the flag `--target-peers`. The default is 80. A lower value set will lower the maximum number of peers your node can connect to, which may potentially interrupt the validator performance. We recommend users to leave the `--target peers` untouched to keep a diverse set of peers. +2. Check that you do not lower the number of peers using the flag `--target-peers`. The default is 80. A lower value set will lower the maximum number of peers your node can connect to, which may potentially interrupt the validator performance. We recommend users to leave the `--target peers` untouched to keep a diverse set of peers. 3. Ensure that you have a quality router for the internet connection. For example, if you connect the router to many devices including the node, it may be possible that the router cannot handle all routing tasks, hence struggling to keep up the number of peers. Therefore, using a quality router for the node is important to keep a healthy number of peers. @@ -435,8 +435,8 @@ For these reasons, we recommend that you make your node publicly accessible. Lighthouse supports UPnP. If you are behind a NAT with a router that supports UPnP, you can simply ensure UPnP is enabled (Lighthouse will inform you in its initial logs if a route has been established). You can also manually [set up port mappings/port forwarding](./advanced_networking.md#how-to-open-ports) in your router to your local Lighthouse instance. By default, -Lighthouse uses port 9000 for both TCP and UDP. Opening both these ports will -make your Lighthouse node maximally contactable. +Lighthouse uses port 9000 for both TCP and UDP, and optionally 9001 UDP for QUIC support. +Opening these ports will make your Lighthouse node maximally contactable. ### How can I monitor my validators? @@ -449,7 +449,7 @@ Monitoring](./validator-monitoring.md) for more information. Lighthouse has also The setting on the beacon node is the same for both cases below. In the beacon node, specify `lighthouse bn --http-address local_IP` so that the beacon node is listening on the local network rather than `localhost`. You can find the `local_IP` by running the command `hostname -I | awk '{print $1}'` on the server running the beacon node. 1. If the beacon node and validator clients are on different servers *in the same network*, the setting in the validator client is as follows: - + Use the flag `--beacon-nodes` to point to the beacon node. For example, `lighthouse vc --beacon-nodes http://local_IP:5052` where `local_IP` is the local IP address of the beacon node and `5052` is the default `http-port` of the beacon node. If you have firewall setup, e.g., `ufw`, you will need to allow port 5052 (assuming that the default port is used) with `sudo ufw allow 5052`. Note: this will allow all IP addresses to access the HTTP API of the beacon node. If you are on an untrusted network (e.g., a university or public WiFi) or the host is exposed to the internet, use apply IP-address filtering as described later in this section. @@ -472,7 +472,7 @@ The setting on the beacon node is the same for both cases below. In the beacon n If you have firewall setup, e.g., `ufw`, you will need to allow connections to port 5052 (assuming that the default port is used). Since the beacon node HTTP/HTTPS API is public-facing (i.e., the 5052 port is now exposed to the internet due to port forwarding), we strongly recommend users to apply IP-address filtering to the BN/VC connection from malicious actors. This can be done using the command: - + ``` sudo ufw allow from vc_IP_address proto tcp to any port 5052 ``` @@ -485,14 +485,16 @@ It is also worth noting that the `--beacon-nodes` flag can also be used for redu No. Lighthouse will auto-detect the change and update your Ethereum Node Record (ENR). You just need to make sure you are not manually setting the ENR with `--enr-address` (which, for common use cases, this flag is not used). ### How to change the TCP/UDP port 9000 that Lighthouse listens on? -Use the flag ```--port ``` in the beacon node. This flag can be useful when you are running two beacon nodes at the same time. You can leave one beacon node as the default port 9000, and configure the second beacon node to listen on, e.g., ```--port 9001```. +Use the flag `--port ` in the beacon node. This flag can be useful when you are running two beacon nodes at the same time. You can leave one beacon node as the default port 9000, and configure the second beacon node to listen on, e.g., `--port 9100`. +Since V4.5.0, Lighthouse supports QUIC and by default will use the value of `--port` + 1 to listen via UDP (default `9001`). +This can be configured by using the flag `--quic-port`. Refer to [Advanced Networking](./advanced_networking.md#nat-traversal-port-forwarding) for more information. ### Lighthouse `v4.3.0` introduces a change where a node will subscribe to only 2 subnets in total. I am worried that this will impact my validators return. -Previously, having more validators means subscribing to more subnets. Since the change, a node will now only subscribe to 2 subnets in total. This will bring about significant reductions in bandwidth for nodes with multiple validators. +Previously, having more validators means subscribing to more subnets. Since the change, a node will now only subscribe to 2 subnets in total. This will bring about significant reductions in bandwidth for nodes with multiple validators. + +While subscribing to more subnets can ensure you have peers on a wider range of subnets, these subscriptions consume resources and bandwidth. This does not significantly increase the performance of the node, however it does benefit other nodes on the network. -While subscribing to more subnets can ensure you have peers on a wider range of subnets, these subscriptions consume resources and bandwidth. This does not significantly increase the performance of the node, however it does benefit other nodes on the network. - If you would still like to subscribe to all subnets, you can use the flag `subscribe-all-subnets`. This may improve the block rewards by 1-5%, though it comes at the cost of a much higher bandwidth requirement. ### How to know how many of my peers are connected via QUIC? @@ -549,7 +551,7 @@ which says that the version is v4.1.0. ### Does Lighthouse have pruning function like the execution client to save disk space? -There is no pruning of Lighthouse database for now. However, since v4.2.0, a feature to only sync back to the weak subjectivity point (approximately 5 months) when syncing via a checkpoint sync was added. This will help to save disk space since the previous behaviour will sync back to the genesis by default. +There is no pruning of Lighthouse database for now. However, since v4.2.0, a feature to only sync back to the weak subjectivity point (approximately 5 months) when syncing via a checkpoint sync was added. This will help to save disk space since the previous behaviour will sync back to the genesis by default. ### Can I use a HDD for the freezer database and only have the hot db on SSD? @@ -557,7 +559,7 @@ Yes, you can do so by using the flag `--freezer-dir /path/to/freezer_db` in the ### Can Lighthouse log in local timestamp instead of UTC? -The reason why Lighthouse logs in UTC is due to the dependency on an upstream library that is [yet to be resolved](https://github.com/sigp/lighthouse/issues/3130). Alternatively, using the flag `disable-log-timestamp` in combination with systemd will suppress the UTC timestamps and print the logs in local timestamps. +The reason why Lighthouse logs in UTC is due to the dependency on an upstream library that is [yet to be resolved](https://github.com/sigp/lighthouse/issues/3130). Alternatively, using the flag `disable-log-timestamp` in combination with systemd will suppress the UTC timestamps and print the logs in local timestamps. ### My hard disk is full and my validator is down. What should I do? From 6f442f2bb8a6ea976943b5fd0c620d04d25f8245 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Thu, 8 Feb 2024 21:05:08 +1100 Subject: [PATCH 06/10] Improve database compaction and `prune-states` (#5142) * Fix no-op state prune check * Compact freezer DB after pruning * Refine DB compaction * Add blobs-db options to inspect/compact * Better key size * Fix compaction end key --- beacon_node/store/src/hot_cold_store.rs | 3 + beacon_node/store/src/leveldb_store.rs | 28 ++---- beacon_node/store/src/lib.rs | 18 +++- beacon_node/store/src/memory_store.rs | 2 +- database_manager/src/lib.rs | 117 ++++++++++++++++++++---- 5 files changed, 127 insertions(+), 41 deletions(-) diff --git a/beacon_node/store/src/hot_cold_store.rs b/beacon_node/store/src/hot_cold_store.rs index 49bc75393c1..4bdb0deca33 100644 --- a/beacon_node/store/src/hot_cold_store.rs +++ b/beacon_node/store/src/hot_cold_store.rs @@ -2376,6 +2376,9 @@ impl, Cold: ItemStore> HotColdDB self.cold_db.do_atomically(cold_ops)?; } + // In order to reclaim space, we need to compact the freezer DB as well. + self.cold_db.compact()?; + Ok(()) } } diff --git a/beacon_node/store/src/leveldb_store.rs b/beacon_node/store/src/leveldb_store.rs index 62619dd2c74..d799bdedd3b 100644 --- a/beacon_node/store/src/leveldb_store.rs +++ b/beacon_node/store/src/leveldb_store.rs @@ -154,25 +154,15 @@ impl KeyValueStore for LevelDB { self.transaction_mutex.lock() } - /// Compact all values in the states and states flag columns. - fn compact(&self) -> Result<(), Error> { - let endpoints = |column: DBColumn| { - ( - BytesKey::from_vec(get_key_for_col(column.as_str(), Hash256::zero().as_bytes())), - BytesKey::from_vec(get_key_for_col( - column.as_str(), - Hash256::repeat_byte(0xff).as_bytes(), - )), - ) - }; - - for (start_key, end_key) in [ - endpoints(DBColumn::BeaconStateTemporary), - endpoints(DBColumn::BeaconState), - endpoints(DBColumn::BeaconStateSummary), - ] { - self.db.compact(&start_key, &end_key); - } + fn compact_column(&self, column: DBColumn) -> Result<(), Error> { + // Use key-size-agnostic keys [] and 0xff..ff with a minimum of 32 bytes to account for + // columns that may change size between sub-databases or schema versions. + let start_key = BytesKey::from_vec(get_key_for_col(column.as_str(), &[])); + let end_key = BytesKey::from_vec(get_key_for_col( + column.as_str(), + &vec![0xff; std::cmp::max(column.key_size(), 32)], + )); + self.db.compact(&start_key, &end_key); Ok(()) } diff --git a/beacon_node/store/src/lib.rs b/beacon_node/store/src/lib.rs index eacd28d2db6..e86689b0cf1 100644 --- a/beacon_node/store/src/lib.rs +++ b/beacon_node/store/src/lib.rs @@ -80,8 +80,22 @@ pub trait KeyValueStore: Sync + Send + Sized + 'static { /// this method. In future we may implement a safer mandatory locking scheme. fn begin_rw_transaction(&self) -> MutexGuard<()>; - /// Compact the database, freeing space used by deleted items. - fn compact(&self) -> Result<(), Error>; + /// Compact a single column in the database, freeing space used by deleted items. + fn compact_column(&self, column: DBColumn) -> Result<(), Error>; + + /// Compact a default set of columns that are likely to free substantial space. + fn compact(&self) -> Result<(), Error> { + // Compact state and block related columns as they are likely to have the most churn, + // i.e. entries being created and deleted. + for column in [ + DBColumn::BeaconState, + DBColumn::BeaconStateSummary, + DBColumn::BeaconBlock, + ] { + self.compact_column(column)?; + } + Ok(()) + } /// Iterate through all keys and values in a particular column. fn iter_column(&self, column: DBColumn) -> ColumnIter { diff --git a/beacon_node/store/src/memory_store.rs b/beacon_node/store/src/memory_store.rs index c2e494dce6a..302d2c2add2 100644 --- a/beacon_node/store/src/memory_store.rs +++ b/beacon_node/store/src/memory_store.rs @@ -108,7 +108,7 @@ impl KeyValueStore for MemoryStore { self.transaction_mutex.lock() } - fn compact(&self) -> Result<(), Error> { + fn compact_column(&self, _column: DBColumn) -> Result<(), Error> { Ok(()) } } diff --git a/database_manager/src/lib.rs b/database_manager/src/lib.rs index 87a1a8bd149..3583d9e2792 100644 --- a/database_manager/src/lib.rs +++ b/database_manager/src/lib.rs @@ -77,7 +77,15 @@ pub fn inspect_cli_app<'a, 'b>() -> App<'a, 'b> { Arg::with_name("freezer") .long("freezer") .help("Inspect the freezer DB rather than the hot DB") - .takes_value(false), + .takes_value(false) + .conflicts_with("blobs-db"), + ) + .arg( + Arg::with_name("blobs-db") + .long("blobs-db") + .help("Inspect the blobs DB rather than the hot DB") + .takes_value(false) + .conflicts_with("freezer"), ) .arg( Arg::with_name("output-dir") @@ -88,6 +96,34 @@ pub fn inspect_cli_app<'a, 'b>() -> App<'a, 'b> { ) } +pub fn compact_cli_app<'a, 'b>() -> App<'a, 'b> { + App::new("compact") + .setting(clap::AppSettings::ColoredHelp) + .about("Compact database manually") + .arg( + Arg::with_name("column") + .long("column") + .value_name("TAG") + .help("3-byte column ID (see `DBColumn`)") + .takes_value(true) + .required(true), + ) + .arg( + Arg::with_name("freezer") + .long("freezer") + .help("Inspect the freezer DB rather than the hot DB") + .takes_value(false) + .conflicts_with("blobs-db"), + ) + .arg( + Arg::with_name("blobs-db") + .long("blobs-db") + .help("Inspect the blobs DB rather than the hot DB") + .takes_value(false) + .conflicts_with("freezer"), + ) +} + pub fn prune_payloads_app<'a, 'b>() -> App<'a, 'b> { App::new("prune-payloads") .alias("prune_payloads") @@ -162,6 +198,7 @@ pub fn cli_app<'a, 'b>() -> App<'a, 'b> { .subcommand(migrate_cli_app()) .subcommand(version_cli_app()) .subcommand(inspect_cli_app()) + .subcommand(compact_cli_app()) .subcommand(prune_payloads_app()) .subcommand(prune_blobs_app()) .subcommand(prune_states_app()) @@ -251,6 +288,7 @@ pub struct InspectConfig { skip: Option, limit: Option, freezer: bool, + blobs_db: bool, /// Configures where the inspect output should be stored. output_dir: PathBuf, } @@ -261,6 +299,7 @@ fn parse_inspect_config(cli_args: &ArgMatches) -> Result let skip = clap_utils::parse_optional(cli_args, "skip")?; let limit = clap_utils::parse_optional(cli_args, "limit")?; let freezer = cli_args.is_present("freezer"); + let blobs_db = cli_args.is_present("blobs-db"); let output_dir: PathBuf = clap_utils::parse_optional(cli_args, "output-dir")?.unwrap_or_else(PathBuf::new); @@ -270,6 +309,7 @@ fn parse_inspect_config(cli_args: &ArgMatches) -> Result skip, limit, freezer, + blobs_db, output_dir, }) } @@ -277,32 +317,20 @@ fn parse_inspect_config(cli_args: &ArgMatches) -> Result pub fn inspect_db( inspect_config: InspectConfig, client_config: ClientConfig, - runtime_context: &RuntimeContext, - log: Logger, ) -> Result<(), String> { - let spec = runtime_context.eth2_config.spec.clone(); let hot_path = client_config.get_db_path(); let cold_path = client_config.get_freezer_db_path(); let blobs_path = client_config.get_blobs_db_path(); - let db = HotColdDB::, LevelDB>::open( - &hot_path, - &cold_path, - &blobs_path, - |_, _, _| Ok(()), - client_config.store, - spec, - log, - ) - .map_err(|e| format!("{:?}", e))?; - let mut total = 0; let mut num_keys = 0; let sub_db = if inspect_config.freezer { - &db.cold_db + LevelDB::::open(&cold_path).map_err(|e| format!("Unable to open freezer DB: {e:?}"))? + } else if inspect_config.blobs_db { + LevelDB::::open(&blobs_path).map_err(|e| format!("Unable to open blobs DB: {e:?}"))? } else { - &db.hot_db + LevelDB::::open(&hot_path).map_err(|e| format!("Unable to open hot DB: {e:?}"))? }; let skip = inspect_config.skip.unwrap_or(0); @@ -385,6 +413,50 @@ pub fn inspect_db( Ok(()) } +pub struct CompactConfig { + column: DBColumn, + freezer: bool, + blobs_db: bool, +} + +fn parse_compact_config(cli_args: &ArgMatches) -> Result { + let column = clap_utils::parse_required(cli_args, "column")?; + let freezer = cli_args.is_present("freezer"); + let blobs_db = cli_args.is_present("blobs-db"); + Ok(CompactConfig { + column, + freezer, + blobs_db, + }) +} + +pub fn compact_db( + compact_config: CompactConfig, + client_config: ClientConfig, + log: Logger, +) -> Result<(), Error> { + let hot_path = client_config.get_db_path(); + let cold_path = client_config.get_freezer_db_path(); + let blobs_path = client_config.get_blobs_db_path(); + let column = compact_config.column; + + let (sub_db, db_name) = if compact_config.freezer { + (LevelDB::::open(&cold_path)?, "freezer_db") + } else if compact_config.blobs_db { + (LevelDB::::open(&blobs_path)?, "blobs_db") + } else { + (LevelDB::::open(&hot_path)?, "hot_db") + }; + info!( + log, + "Compacting database"; + "db" => db_name, + "column" => ?column + ); + sub_db.compact_column(column)?; + Ok(()) +} + pub struct MigrateConfig { to: SchemaVersion, } @@ -538,7 +610,10 @@ pub fn prune_states( // Check that the user has confirmed they want to proceed. if !prune_config.confirm { match db.get_anchor_info() { - Some(anchor_info) if anchor_info.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN => { + Some(anchor_info) + if anchor_info.state_lower_limit == 0 + && anchor_info.state_upper_limit == STATE_UPPER_LIMIT_NO_RETAIN => + { info!(log, "States have already been pruned"); return Ok(()); } @@ -586,7 +661,11 @@ pub fn run(cli_args: &ArgMatches<'_>, env: Environment) -> Result } ("inspect", Some(cli_args)) => { let inspect_config = parse_inspect_config(cli_args)?; - inspect_db(inspect_config, client_config, &context, log) + inspect_db::(inspect_config, client_config) + } + ("compact", Some(cli_args)) => { + let compact_config = parse_compact_config(cli_args)?; + compact_db::(compact_config, client_config, log).map_err(format_err) } ("prune-payloads", Some(_)) => { prune_payloads(client_config, &context, log).map_err(format_err) From 4b19eac8cef6c93feb0c86363cf06c4b683a4478 Mon Sep 17 00:00:00 2001 From: Jimmy Chen Date: Fri, 9 Feb 2024 03:57:19 +1100 Subject: [PATCH 07/10] Remove `curve25519-dalek` patch (#5214) * Remove patch dependencies --- Cargo.lock | 8 +++++--- Cargo.toml | 3 --- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4fc8ef8fe97..1082c2fd449 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1583,8 +1583,9 @@ dependencies = [ [[package]] name = "curve25519-dalek" -version = "4.1.1" -source = "git+https://github.com/jimmygchen/curve25519-dalek.git?rev=24019783e9bb9dc1464e7e503732f273a69969c6#24019783e9bb9dc1464e7e503732f273a69969c6" +version = "4.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a677b8922c94e01bdbb12126b0bc852f00447528dee1782229af9c720c3f348" dependencies = [ "cfg-if", "cpufeatures", @@ -1600,7 +1601,8 @@ dependencies = [ [[package]] name = "curve25519-dalek-derive" version = "0.1.1" -source = "git+https://github.com/jimmygchen/curve25519-dalek.git?rev=24019783e9bb9dc1464e7e503732f273a69969c6#24019783e9bb9dc1464e7e503732f273a69969c6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index ce3b47012c8..12a7474212a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -235,6 +235,3 @@ inherits = "release" lto = "fat" codegen-units = 1 incremental = false - -[patch.crates-io] -curve25519-dalek = { git = "https://github.com/jimmygchen/curve25519-dalek.git", rev = "24019783e9bb9dc1464e7e503732f273a69969c6" } From e3533584845462de3b2b6e41476fd3d41f2ae053 Mon Sep 17 00:00:00 2001 From: Akihito Nakano Date: Fri, 9 Feb 2024 01:57:26 +0900 Subject: [PATCH 08/10] Update to warp v0.3.6 (#5172) * Update to warp 0.3.6 * Update Cargo.lock --- Cargo.lock | 88 ++++++++++++------------------------------------------ Cargo.toml | 3 +- 2 files changed, 20 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1082c2fd449..83673d239c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3112,7 +3112,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35bd3cf68c183738046838e300353e4716c674dc5e56890de4826801a6622a28" dependencies = [ "futures-io", - "rustls 0.21.10", + "rustls", ] [[package]] @@ -3735,7 +3735,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.5", + "socket2 0.4.10", "tokio", "tower-service", "tracing", @@ -3770,9 +3770,9 @@ dependencies = [ "futures-util", "http 0.2.11", "hyper 0.14.28", - "rustls 0.21.10", + "rustls", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", ] [[package]] @@ -4624,7 +4624,7 @@ dependencies = [ "quinn", "rand", "ring 0.16.20", - "rustls 0.21.10", + "rustls", "socket2 0.5.5", "thiserror", "tokio", @@ -4695,8 +4695,8 @@ dependencies = [ "libp2p-identity", "rcgen", "ring 0.16.20", - "rustls 0.21.10", - "rustls-webpki 0.101.7", + "rustls", + "rustls-webpki", "thiserror", "x509-parser", "yasna", @@ -6486,7 +6486,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls 0.21.10", + "rustls", "thiserror", "tokio", "tracing", @@ -6502,7 +6502,7 @@ dependencies = [ "rand", "ring 0.16.20", "rustc-hash", - "rustls 0.21.10", + "rustls", "slab", "thiserror", "tinyvec", @@ -6733,15 +6733,15 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.10", - "rustls-pemfile 1.0.4", + "rustls", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "system-configuration", "tokio", "tokio-native-tls", - "tokio-rustls 0.24.1", + "tokio-rustls", "tokio-util 0.7.10", "tower-service", "url", @@ -6964,24 +6964,10 @@ checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", "ring 0.17.7", - "rustls-webpki 0.101.7", + "rustls-webpki", "sct", ] -[[package]] -name = "rustls" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" -dependencies = [ - "log", - "ring 0.17.7", - "rustls-pki-types", - "rustls-webpki 0.102.1", - "subtle", - "zeroize", -] - [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -6991,22 +6977,6 @@ dependencies = [ "base64 0.21.7", ] -[[package]] -name = "rustls-pemfile" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" -dependencies = [ - "base64 0.21.7", - "rustls-pki-types", -] - -[[package]] -name = "rustls-pki-types" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" - [[package]] name = "rustls-webpki" version = "0.101.7" @@ -7017,17 +6987,6 @@ dependencies = [ "untrusted 0.9.0", ] -[[package]] -name = "rustls-webpki" -version = "0.102.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4ca26037c909dedb327b48c3327d0ba91d3dd3c4e05dad328f210ffb68e95b" -dependencies = [ - "ring 0.17.7", - "rustls-pki-types", - "untrusted 0.9.0", -] - [[package]] name = "rustversion" version = "1.0.14" @@ -8334,18 +8293,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.10", - "tokio", -] - -[[package]] -name = "tokio-rustls" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" -dependencies = [ - "rustls 0.22.2", - "rustls-pki-types", + "rustls", "tokio", ] @@ -8991,7 +8939,8 @@ dependencies = [ [[package]] name = "warp" version = "0.3.6" -source = "git+https://github.com/seanmonstar/warp.git#7b07043cee0ca24e912155db4e8f6d9ab7c049ed" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1e92e22e03ff1230c03a1a8ee37d2f89cd489e2e541b7550d6afad96faed169" dependencies = [ "bytes", "futures-channel", @@ -9004,13 +8953,14 @@ dependencies = [ "mime_guess", "percent-encoding", "pin-project", - "rustls-pemfile 2.0.0", + "rustls-pemfile", "scoped-tls", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls 0.25.0", + "tokio-rustls", + "tokio-stream", "tokio-util 0.7.10", "tower-service", "tracing", diff --git a/Cargo.toml b/Cargo.toml index 12a7474212a..a7f44551ee8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,8 +170,7 @@ tree_hash = "0.5" tree_hash_derive = "0.5" url = "2" uuid = { version = "0.8", features = ["serde", "v4"] } -# TODO update to warp 0.3.6 after released. -warp = { git = "https://github.com/seanmonstar/warp.git", default-features = false, features = ["tls"] } +warp = { version = "0.3.6", default-features = false, features = ["tls"] } zeroize = { version = "1", features = ["zeroize_derive"] } zip = "0.6" From 4172d9f75c052f963d440ee2dad00f3512542117 Mon Sep 17 00:00:00 2001 From: realbigsean Date: Thu, 8 Feb 2024 13:08:21 -0500 Subject: [PATCH 09/10] Update to consensus spec v1.4.0-beta.6 (#5094) * get latest ef tests passing * fix tests * Fix invalid payload recovery tests * Merge branch 'unstable' into update-to-spec-v1.4.0-beta.6 * Revert "fix tests" This reverts commit 0c875b02e032ebd9bebd822183a1c3d10d333a4c. * Fix fork choice def. tests * Update beacon_node/beacon_chain/tests/payload_invalidation.rs --- .../tests/payload_invalidation.rs | 107 ++++++++++-------- .../ffg_updates.rs | 53 +++++++-- .../fork_choice_test_definition/no_votes.rs | 14 ++- consensus/proto_array/src/proto_array.rs | 13 +-- testing/ef_tests/Makefile | 2 +- testing/ef_tests/check_all_files_accessed.py | 3 +- testing/ef_tests/tests/tests.rs | 3 +- 7 files changed, 119 insertions(+), 76 deletions(-) diff --git a/beacon_node/beacon_chain/tests/payload_invalidation.rs b/beacon_node/beacon_chain/tests/payload_invalidation.rs index a0b7fbd365a..597d53fddd2 100644 --- a/beacon_node/beacon_chain/tests/payload_invalidation.rs +++ b/beacon_node/beacon_chain/tests/payload_invalidation.rs @@ -1820,81 +1820,94 @@ struct InvalidHeadSetup { } impl InvalidHeadSetup { + /// This function aims to produce two things: + /// + /// 1. A chain where the only viable head block has an invalid execution payload. + /// 2. A block (`fork_block`) which will become the head of the chain when + /// it is imported. async fn new() -> InvalidHeadSetup { + let slots_per_epoch = E::slots_per_epoch(); let mut rig = InvalidPayloadRig::new().enable_attestations(); rig.move_to_terminal_block(); rig.import_block(Payload::Valid).await; // Import a valid transition block. - // Import blocks until the first time the chain finalizes. + // Import blocks until the first time the chain finalizes. This avoids + // some edge-cases around genesis. while rig.cached_head().finalized_checkpoint().epoch == 0 { rig.import_block(Payload::Syncing).await; } - let slots_per_epoch = E::slots_per_epoch(); - let start_slot = rig.cached_head().head_slot() + 1; - let mut opt_fork_block = None; - - assert_eq!(start_slot % slots_per_epoch, 1); - for i in 0..slots_per_epoch - 1 { - let slot = start_slot + i; - let slot_offset = slot.as_u64() % slots_per_epoch; - - rig.harness.set_current_slot(slot); - - if slot_offset == slots_per_epoch - 1 { - // Optimistic head block right before epoch boundary. - let is_valid = Payload::Syncing; - rig.import_block_parametric(is_valid, is_valid, Some(slot), |error| { - matches!( - error, - BlockError::ExecutionPayloadError( - ExecutionPayloadError::RejectedByExecutionEngine { .. } - ) - ) - }) - .await; - } else if 3 * slot_offset < 2 * slots_per_epoch { - // Valid block in previous epoch. - rig.import_block(Payload::Valid).await; - } else if slot_offset == slots_per_epoch - 2 { - // Fork block one slot prior to invalid head, not applied immediately. - let parent_state = rig - .harness - .chain - .state_at_slot(slot - 1, StateSkipConfig::WithStateRoots) - .unwrap(); - let (fork_block_tuple, _) = rig.harness.make_block(parent_state, slot).await; - opt_fork_block = Some(fork_block_tuple.0); - } else { - // Skipped slot. - }; + // Define a helper function. + let chain = rig.harness.chain.clone(); + let get_unrealized_justified_epoch = move || { + chain + .canonical_head + .fork_choice_read_lock() + .unrealized_justified_checkpoint() + .epoch + }; + + // Import more blocks until there is a new and higher unrealized + // justified checkpoint. + // + // The result will be a single chain where the head block has a higher + // unrealized justified checkpoint than all other blocks in the chain. + let initial_unrealized_justified = get_unrealized_justified_epoch(); + while get_unrealized_justified_epoch() == initial_unrealized_justified { + rig.import_block(Payload::Syncing).await; } + // Create a forked block that competes with the head block. Both the + // head block and this fork block will share the same parent. + // + // The fork block and head block will both have an unrealized justified + // checkpoint at epoch `N` whilst their parent is at `N - 1`. + let head_slot = rig.cached_head().head_slot(); + let parent_slot = head_slot - 1; + let fork_block_slot = head_slot + 1; + let parent_state = rig + .harness + .chain + .state_at_slot(parent_slot, StateSkipConfig::WithStateRoots) + .unwrap(); + let (fork_block_tuple, _) = rig.harness.make_block(parent_state, fork_block_slot).await; + let fork_block = fork_block_tuple.0; + let invalid_head = rig.cached_head(); - assert_eq!( - invalid_head.head_slot() % slots_per_epoch, - slots_per_epoch - 1 - ); - // Advance clock to new epoch to realize the justification of soon-to-be-invalid head block. - rig.harness.set_current_slot(invalid_head.head_slot() + 1); + // Advance the chain forward two epochs past the current head block. + // + // This ensures that `voting_source.epoch + 2 >= current_epoch` is + // `false` in the `node_is_viable_for_head` function. In effect, this + // ensures that no other block but the current head block is viable as a + // head block. + let invalid_head_epoch = invalid_head.head_slot().epoch(slots_per_epoch); + let new_wall_clock_epoch = invalid_head_epoch + 2; + rig.harness + .set_current_slot(new_wall_clock_epoch.start_slot(slots_per_epoch)); // Invalidate the head block. rig.invalidate_manually(invalid_head.head_block_root()) .await; + // Since our setup ensures that there is only a single, invalid block + // that's viable for head (according to FFG filtering), setting the + // head block as invalid should not result in another head being chosen. + // Rather, it should fail to run fork choice and leave the invalid block as + // the head. assert!(rig .canonical_head() .head_execution_status() .unwrap() .is_invalid()); - // Finding a new head should fail since the only possible head is not valid. + // Ensure that we're getting the correct error when trying to find a new + // head. rig.assert_get_head_error_contains("InvalidBestNode"); Self { rig, - fork_block: opt_fork_block.unwrap(), + fork_block, invalid_head, } } diff --git a/consensus/proto_array/src/fork_choice_test_definition/ffg_updates.rs b/consensus/proto_array/src/fork_choice_test_definition/ffg_updates.rs index 77211a86a7d..3b31616145d 100644 --- a/consensus/proto_array/src/fork_choice_test_definition/ffg_updates.rs +++ b/consensus/proto_array/src/fork_choice_test_definition/ffg_updates.rs @@ -59,7 +59,7 @@ pub fn get_ffg_case_01_test_definition() -> ForkChoiceTestDefinition { expected_head: get_root(3), }); - // Ensure that with justified epoch 1 we find 2 + // Ensure that with justified epoch 1 we find 3 // // 0 // | @@ -68,11 +68,15 @@ pub fn get_ffg_case_01_test_definition() -> ForkChoiceTestDefinition { // 2 <- start // | // 3 <- head + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. ops.push(Operation::FindHead { justified_checkpoint: get_checkpoint(1), finalized_checkpoint: get_checkpoint(0), justified_state_balances: balances.clone(), - expected_head: get_root(2), + expected_head: get_root(3), }); // Ensure that with justified epoch 2 we find 3 @@ -247,14 +251,19 @@ pub fn get_ffg_case_02_test_definition() -> ForkChoiceTestDefinition { justified_state_balances: balances.clone(), expected_head: get_root(10), }); - // Same as above, but with justified epoch 3 (should be invalid). - ops.push(Operation::InvalidFindHead { + // Same as above, but with justified epoch 3. + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. + ops.push(Operation::FindHead { justified_checkpoint: Checkpoint { epoch: Epoch::new(3), root: get_root(6), }, finalized_checkpoint: get_checkpoint(0), justified_state_balances: balances.clone(), + expected_head: get_root(10), }); // Add a vote to 1. @@ -305,14 +314,19 @@ pub fn get_ffg_case_02_test_definition() -> ForkChoiceTestDefinition { justified_state_balances: balances.clone(), expected_head: get_root(9), }); - // Save as above but justified epoch 3 (should fail). - ops.push(Operation::InvalidFindHead { + // Save as above but justified epoch 3. + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. + ops.push(Operation::FindHead { justified_checkpoint: Checkpoint { epoch: Epoch::new(3), root: get_root(5), }, finalized_checkpoint: get_checkpoint(0), justified_state_balances: balances.clone(), + expected_head: get_root(9), }); // Add a vote to 2. @@ -363,14 +377,19 @@ pub fn get_ffg_case_02_test_definition() -> ForkChoiceTestDefinition { justified_state_balances: balances.clone(), expected_head: get_root(10), }); - // Same as above but justified epoch 3 (should fail). - ops.push(Operation::InvalidFindHead { + // Same as above but justified epoch 3. + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. + ops.push(Operation::FindHead { justified_checkpoint: Checkpoint { epoch: Epoch::new(3), root: get_root(6), }, finalized_checkpoint: get_checkpoint(0), justified_state_balances: balances.clone(), + expected_head: get_root(10), }); // Ensure that if we start at 1 we find 9 (just: 0, fin: 0). @@ -405,14 +424,19 @@ pub fn get_ffg_case_02_test_definition() -> ForkChoiceTestDefinition { justified_state_balances: balances.clone(), expected_head: get_root(9), }); - // Same as above but justified epoch 3 (should fail). - ops.push(Operation::InvalidFindHead { + // Same as above but justified epoch 3. + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. + ops.push(Operation::FindHead { justified_checkpoint: Checkpoint { epoch: Epoch::new(3), root: get_root(5), }, finalized_checkpoint: get_checkpoint(0), justified_state_balances: balances.clone(), + expected_head: get_root(9), }); // Ensure that if we start at 2 we find 10 (just: 0, fin: 0). @@ -444,14 +468,19 @@ pub fn get_ffg_case_02_test_definition() -> ForkChoiceTestDefinition { justified_state_balances: balances.clone(), expected_head: get_root(10), }); - // Same as above but justified epoch 3 (should fail). - ops.push(Operation::InvalidFindHead { + // Same as above but justified epoch 3. + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. + ops.push(Operation::FindHead { justified_checkpoint: Checkpoint { epoch: Epoch::new(3), root: get_root(6), }, finalized_checkpoint: get_checkpoint(0), justified_state_balances: balances, + expected_head: get_root(10), }); // END OF TESTS diff --git a/consensus/proto_array/src/fork_choice_test_definition/no_votes.rs b/consensus/proto_array/src/fork_choice_test_definition/no_votes.rs index a60b3e6b368..27a7969e49b 100644 --- a/consensus/proto_array/src/fork_choice_test_definition/no_votes.rs +++ b/consensus/proto_array/src/fork_choice_test_definition/no_votes.rs @@ -184,7 +184,7 @@ pub fn get_no_votes_test_definition() -> ForkChoiceTestDefinition { root: Hash256::zero(), }, }, - // Ensure the head is still 4 whilst the justified epoch is 0. + // Ensure the head is now 5 whilst the justified epoch is 0. // // 0 // / \ @@ -203,9 +203,10 @@ pub fn get_no_votes_test_definition() -> ForkChoiceTestDefinition { root: Hash256::zero(), }, justified_state_balances: balances.clone(), - expected_head: get_root(4), + expected_head: get_root(5), }, - // Ensure there is an error when starting from a block that has the wrong justified epoch. + // Ensure there is no error when starting from a block that has the + // wrong justified epoch. // // 0 // / \ @@ -214,7 +215,11 @@ pub fn get_no_votes_test_definition() -> ForkChoiceTestDefinition { // 4 3 // | // 5 <- starting from 5 with justified epoch 0 should error. - Operation::InvalidFindHead { + // + // Since https://github.com/ethereum/consensus-specs/pull/3431 it is valid + // to elect head blocks that have a higher justified checkpoint than the + // store. + Operation::FindHead { justified_checkpoint: Checkpoint { epoch: Epoch::new(1), root: get_root(5), @@ -224,6 +229,7 @@ pub fn get_no_votes_test_definition() -> ForkChoiceTestDefinition { root: Hash256::zero(), }, justified_state_balances: balances.clone(), + expected_head: get_root(5), }, // Set the justified epoch to 2 and the start block to 5 and ensure 5 is the head. // diff --git a/consensus/proto_array/src/proto_array.rs b/consensus/proto_array/src/proto_array.rs index 4726715a162..7c2ecfe26a6 100644 --- a/consensus/proto_array/src/proto_array.rs +++ b/consensus/proto_array/src/proto_array.rs @@ -961,16 +961,9 @@ impl ProtoArray { node_justified_checkpoint }; - let mut correct_justified = self.justified_checkpoint.epoch == genesis_epoch - || voting_source.epoch == self.justified_checkpoint.epoch; - - if let Some(node_unrealized_justified_checkpoint) = node.unrealized_justified_checkpoint { - if !correct_justified && self.justified_checkpoint.epoch + 1 == current_epoch { - correct_justified = node_unrealized_justified_checkpoint.epoch - >= self.justified_checkpoint.epoch - && voting_source.epoch + 2 >= current_epoch; - } - } + let correct_justified = self.justified_checkpoint.epoch == genesis_epoch + || voting_source.epoch == self.justified_checkpoint.epoch + || voting_source.epoch + 2 >= current_epoch; let correct_finalized = self.finalized_checkpoint.epoch == genesis_epoch || self.is_finalized_checkpoint_or_descendant::(node.root); diff --git a/testing/ef_tests/Makefile b/testing/ef_tests/Makefile index e42db1801df..508c284275a 100644 --- a/testing/ef_tests/Makefile +++ b/testing/ef_tests/Makefile @@ -1,4 +1,4 @@ -TESTS_TAG := v1.4.0-beta.4 +TESTS_TAG := v1.4.0-beta.6 TESTS = general minimal mainnet TARBALLS = $(patsubst %,%-$(TESTS_TAG).tar.gz,$(TESTS)) diff --git a/testing/ef_tests/check_all_files_accessed.py b/testing/ef_tests/check_all_files_accessed.py index a5ab897c33f..1d1f2fa49a0 100755 --- a/testing/ef_tests/check_all_files_accessed.py +++ b/testing/ef_tests/check_all_files_accessed.py @@ -51,7 +51,8 @@ "bls12-381-tests/deserialization_G1", "bls12-381-tests/deserialization_G2", "bls12-381-tests/hash_to_G2", - "tests/.*/eip6110" + "tests/.*/eip6110", + "tests/.*/whisk" ] diff --git a/testing/ef_tests/tests/tests.rs b/testing/ef_tests/tests/tests.rs index dd25dba8b60..5ed657c6522 100644 --- a/testing/ef_tests/tests/tests.rs +++ b/testing/ef_tests/tests/tests.rs @@ -1,6 +1,6 @@ #![cfg(feature = "ef_tests")] -use ef_tests::{KzgInclusionMerkleProofValidityHandler, *}; +use ef_tests::*; use types::{MainnetEthSpec, MinimalEthSpec, *}; // Check that the hand-computed multiplications on EthSpec are correctly computed. @@ -605,6 +605,7 @@ fn merkle_proof_validity() { } #[test] +#[cfg(feature = "fake_crypto")] fn kzg_inclusion_merkle_proof_validity() { KzgInclusionMerkleProofValidityHandler::::default().run(); KzgInclusionMerkleProofValidityHandler::::default().run(); From e7ef2a3a54a871bac30ca1ce8a1382dd567eab8d Mon Sep 17 00:00:00 2001 From: Eitan Seri-Levi Date: Fri, 9 Feb 2024 06:59:39 +0200 Subject: [PATCH 10/10] validator liveness endpoint should accept string encoded indices (#5184) * deserialize string indices as u64 * client should send quoted indices --- beacon_node/http_api/src/lib.rs | 5 +++-- common/eth2/src/lib.rs | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/beacon_node/http_api/src/lib.rs b/beacon_node/http_api/src/lib.rs index ad841b20e3a..ec3fd494d49 100644 --- a/beacon_node/http_api/src/lib.rs +++ b/beacon_node/http_api/src/lib.rs @@ -3864,7 +3864,7 @@ pub fn serve( }, ); - // POST vaidator/liveness/{epoch} + // POST validator/liveness/{epoch} let post_validator_liveness_epoch = eth_v1 .and(warp::path("validator")) .and(warp::path("liveness")) @@ -3875,7 +3875,7 @@ pub fn serve( .and(chain_filter.clone()) .then( |epoch: Epoch, - indices: Vec, + indices: api_types::ValidatorIndexData, task_spawner: TaskSpawner, chain: Arc>| { task_spawner.blocking_json_task(Priority::P0, move || { @@ -3894,6 +3894,7 @@ pub fn serve( } let liveness: Vec = indices + .0 .iter() .cloned() .map(|index| { diff --git a/common/eth2/src/lib.rs b/common/eth2/src/lib.rs index 6687788d528..3c22c822b8a 100644 --- a/common/eth2/src/lib.rs +++ b/common/eth2/src/lib.rs @@ -2258,7 +2258,7 @@ impl BeaconNodeHttpClient { pub async fn post_validator_liveness_epoch( &self, epoch: Epoch, - indices: &Vec, + indices: &[u64], ) -> Result>, Error> { let mut path = self.eth_path(V1)?; @@ -2268,8 +2268,12 @@ impl BeaconNodeHttpClient { .push("liveness") .push(&epoch.to_string()); - self.post_with_timeout_and_response(path, indices, self.timeouts.liveness) - .await + self.post_with_timeout_and_response( + path, + &ValidatorIndexDataRef(indices), + self.timeouts.liveness, + ) + .await } /// `POST validator/duties/attester/{epoch}`