Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: [NODE-1528] Check HSM during bare metal tests #2781

Merged
merged 3 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows-source/schedule-daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ jobs:
--file_share_ssh_key "$(realpath file2)" \
--inject_image_pub_key "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK3gjE/2K5nxIBbk3ohgs8J5LW+XiObwA+kGtSaF5+4c" \
--file_share_username ci_interim \
--hsm \
--ci_mode
# Run bare metal node performance benchmarks
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/schedule-daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ jobs:
--file_share_ssh_key "$(realpath file2)" \
--inject_image_pub_key "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIK3gjE/2K5nxIBbk3ohgs8J5LW+XiObwA+kGtSaF5+4c" \
--file_share_username ci_interim \
--hsm \
--ci_mode

# Run bare metal node performance benchmarks
Expand Down
55 changes: 48 additions & 7 deletions ic-os/dev-tools/bare_metal_deployment/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ class Args:
# Run benchmarks if True
benchmark: bool = flag(default=False)

# Check HSM capability if True
hsm: bool = flag(default=False)

# Path to the benchmark_driver script.
benchmark_driver_script: Optional[str] = "./benchmark_driver.sh"

Expand Down Expand Up @@ -266,6 +269,21 @@ def check_guestos_metrics_version(ip_address: IPv6Address, timeout_secs: int) ->
return True


def check_guestos_hsm_capability(ip_address: IPv6Address, ssh_key_file: Optional[str] = None) -> bool:
# Check that the HSM is working correctly, over an SSH session with the node.
ssh_key_arg = f"-i {ssh_key_file}" if ssh_key_file else ""
ssh_opts = "-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null"
result = invoke.run(
f"ssh {ssh_opts} {ssh_key_arg} admin@{ip_address} '/opt/ic/bin/vsock_guest --attach-hsm && sleep 5 && pkcs11-tool --list-slots | grep \"Nitrokey HSM\"'",
warn=True,
)
if not result or not result.ok:
return False

log.info("HSM check success.")
return True


def wait(wait_secs: int) -> bool:
time.sleep(wait_secs)
return False
Expand Down Expand Up @@ -323,7 +341,13 @@ def configure_process_local_log(server_id: str):
log.add(sys.stderr, format=logger_format)


def deploy_server(bmc_info: BMCInfo, wait_time_mins: int, idrac_script_dir: Path):
def deploy_server(
bmc_info: BMCInfo,
wait_time_mins: int,
idrac_script_dir: Path,
file_share_ssh_key: Optional[str] = None,
check_hsm: bool = False,
):
# Partially applied function for brevity
run_func = functools.partial(run_script, idrac_script_dir, bmc_info)

Expand Down Expand Up @@ -382,9 +406,14 @@ def wait_func() -> bool:

def check_connectivity_func() -> bool:
assert bmc_info.guestos_ipv6_address is not None, "Logic error"
return check_guestos_ping_connectivity(
bmc_info.guestos_ipv6_address, timeout_secs
) and check_guestos_metrics_version(bmc_info.guestos_ipv6_address, timeout_secs)

result = check_guestos_ping_connectivity(bmc_info.guestos_ipv6_address, timeout_secs)
result = result and check_guestos_metrics_version(bmc_info.guestos_ipv6_address, timeout_secs)

if check_hsm:
result = result and check_guestos_hsm_capability(bmc_info.guestos_ipv6_address, file_share_ssh_key)

return result

iterate_func = check_connectivity_func if bmc_info.guestos_ipv6_address else wait_func

Expand Down Expand Up @@ -420,10 +449,17 @@ def check_connectivity_func() -> bool:
return e.args[0]


def boot_images(bmc_infos: List[BMCInfo], parallelism: int, wait_time_mins: int, idrac_script_dir: Path):
def boot_images(
bmc_infos: List[BMCInfo],
parallelism: int,
wait_time_mins: int,
idrac_script_dir: Path,
file_share_ssh_key: Optional[str] = None,
check_hsm: bool = False,
):
results: List[OperationResult] = []

arg_tuples = ((bmc_info, wait_time_mins, idrac_script_dir) for bmc_info in bmc_infos)
arg_tuples = ((bmc_info, wait_time_mins, idrac_script_dir, file_share_ssh_key, check_hsm) for bmc_info in bmc_infos)

with Pool(parallelism) as p:
results = p.starmap(deploy_server, arg_tuples)
Expand Down Expand Up @@ -682,7 +718,12 @@ def main():
wait_time_mins = args.wait_time
parallelism = args.parallel
success = boot_images(
bmc_infos=bmc_infos, parallelism=parallelism, wait_time_mins=wait_time_mins, idrac_script_dir=idrac_script_dir
bmc_infos=bmc_infos,
parallelism=parallelism,
wait_time_mins=wait_time_mins,
idrac_script_dir=idrac_script_dir,
file_share_ssh_key=args.file_share_ssh_key,
check_hsm=args.hsm,
)

if not success:
Expand Down