Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(recovery): [CON-1407] Add CLI option to use existing binaries for local recoveries #3301

Merged
merged 3 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ic-os/guestos/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def image_deps(mode, malicious = False):
"//publish/binaries:ic-boundary-tls": "/opt/ic/bin/ic-boundary:0755", # API boundary node binary, required by the IC protocol. The same GuestOS is used both for the replica and API boundary nodes.
"//publish/binaries:ic-consensus-pool-util": "/opt/ic/bin/ic-consensus-pool-util:0755", # May be used during recoveries to export/import consensus pool artifacts.
"//publish/binaries:ic-recovery": "/opt/ic/bin/ic-recovery:0755", # Required for performing subnet recoveries on the node directly.
"//publish/binaries:ic-admin": "/opt/ic/bin/ic-admin:0755", # Required for issuing recovery proposals directly from the node (primarily used for system tests).
"//publish/binaries:state-tool": "/opt/ic/bin/state-tool:0755", # May be used during recoveries for calculating the state hash and inspecting the state more generally.
"//publish/binaries:ic-regedit": "/opt/ic/bin/ic-regedit:0755", # May be used for inspecting and recovering the registry.
# Required by the GuestOS
Expand Down
3 changes: 3 additions & 0 deletions rs/recovery/src/args_merger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ mod tests {
key_file: Some(PathBuf::from("/dir1/key_file")),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};
let args2 = RecoveryArgs {
dir: PathBuf::from("/dir2/"),
Expand All @@ -74,6 +75,7 @@ mod tests {
key_file: None,
test_mode: false,
skip_prompts: true,
use_local_binaries: false,
};

let expected = RecoveryArgs {
Expand All @@ -83,6 +85,7 @@ mod tests {
key_file: args1.key_file.clone(),
test_mode: args2.test_mode,
skip_prompts: true,
use_local_binaries: false,
};

assert_eq!(expected, merge(&logger, "test", &args1, &args2).unwrap());
Expand Down
6 changes: 6 additions & 0 deletions rs/recovery/src/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ pub struct RecoveryToolArgs {
#[clap(long)]
pub skip_prompts: bool,

/// Flag to indicate we're running recovery directly on a node, and should use
/// the locally available binaries. If this option is not set, missing binaries
/// will be downloaded.
#[clap(long)]
pub use_local_binaries: bool,

#[clap(subcommand)]
pub subcmd: Option<SubCommand>,
}
16 changes: 13 additions & 3 deletions rs/recovery/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ pub struct RecoveryArgs {
pub key_file: Option<PathBuf>,
pub test_mode: bool,
pub skip_prompts: bool,
pub use_local_binaries: bool,
}

/// The recovery struct comprises working directories for the recovery of a
Expand Down Expand Up @@ -145,13 +146,22 @@ impl Recovery {
) -> RecoveryResult<Self> {
let ssh_confirmation = !args.test_mode;
let recovery_dir = args.dir.join(RECOVERY_DIRECTORY_NAME);
let binary_dir = recovery_dir.join("binaries");
let binary_dir = if args.use_local_binaries {
PathBuf::from_str("/opt/ic/bin/").expect("bad file path string")
} else {
recovery_dir.join("binaries")
};
let data_dir = recovery_dir.join("original_data");
let work_dir = recovery_dir.join("working_dir");
let local_store_path = work_dir.join("data").join(IC_REGISTRY_LOCAL_STORE);
let nns_pem = recovery_dir.join("nns.pem");

match Recovery::create_dirs(&[&binary_dir, &data_dir, &work_dir, &local_store_path]) {
let mut to_create: Vec<&Path> = vec![&data_dir, &work_dir, &local_store_path];
if !args.use_local_binaries {
to_create.push(&binary_dir);
}

match Recovery::create_dirs(&to_create) {
Err(RecoveryError::IoError(s, err)) => match err.kind() {
ErrorKind::PermissionDenied => Err(RecoveryError::IoError(
format!(
Expand Down Expand Up @@ -180,7 +190,7 @@ impl Recovery {
wait_for_confirmation(&logger);
}

if !binary_dir.join("ic-admin").exists() {
if !args.use_local_binaries && !binary_dir.join("ic-admin").exists() {
if let Some(version) = args.replica_version {
block_on(download_binary(
&logger,
Expand Down
1 change: 1 addition & 0 deletions rs/recovery/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ fn main() {
key_file: args.key_file,
test_mode: args.test,
skip_prompts: args.skip_prompts,
use_local_binaries: args.use_local_binaries,
};

let recovery_state = cli::read_and_maybe_update_state(&logger, recovery_args, args.subcmd);
Expand Down
1 change: 1 addition & 0 deletions rs/recovery/src/recovery_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ mod tests {
key_file: Some(PathBuf::from(dir)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
},
subcommand_args: SubCommand::AppSubnetRecovery(AppSubnetRecoveryArgs {
subnet_id: fake_subnet_id(),
Expand Down
7 changes: 7 additions & 0 deletions rs/recovery/subnet_splitting/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ struct SplitArgs {
#[clap(long)]
pub skip_prompts: bool,

/// Flag to indicate we're running recovery directly on a node, and should use
/// the locally available binaries. If this option is not set, missing binaries
/// will be downloaded.
#[clap(long)]
pub use_local_binaries: bool,

#[clap(flatten)]
subnet_splitting_args: SubnetSplittingArgs,
}
Expand Down Expand Up @@ -145,6 +151,7 @@ fn do_split(args: SplitArgs, logger: Logger) -> RecoveryResult<()> {
key_file: args.key_file,
test_mode: args.test,
skip_prompts: args.skip_prompts,
use_local_binaries: args.use_local_binaries,
};

let subnet_splitting_state =
Expand Down
1 change: 1 addition & 0 deletions rs/tests/consensus/subnet_recovery/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,7 @@ fn app_subnet_recovery_test(env: TestEnv, cfg: Config) {
key_file: Some(ssh_authorized_priv_keys_dir.join(SSH_USERNAME)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};

let mut unassigned_nodes = env.topology_snapshot().unassigned_nodes();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ pub fn test(env: TestEnv) {
key_file: Some(ssh_authorized_priv_keys_dir.join(SSH_USERNAME)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};
let subnet_args = NNSRecoveryFailoverNodesArgs {
subnet_id: topo_broken_ic.root_subnet_id(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ pub fn test(env: TestEnv) {
key_file: Some(ssh_authorized_priv_keys_dir.join(SSH_USERNAME)),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};

// unlike during a production recovery using the CLI, here we already know all of parameters
Expand Down
1 change: 1 addition & 0 deletions rs/tests/consensus/subnet_splitting_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ fn subnet_splitting_test(env: TestEnv) {
),
test_mode: true,
skip_prompts: true,
use_local_binaries: false,
};

let subnet_splitting_args = SubnetSplittingArgs {
Expand Down
Loading