diff --git a/.gitignore b/.gitignore index 015dc671..65b4cb73 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,9 @@ svsm.bin bzImage coconut-qemu.igvm +# Ansible inventories +ansible/inventory + # Kubernetes stuff .config diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 00000000..14c80651 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,2 @@ +[defaults] +host_key_checking = False diff --git a/ansible/tasks/apt.yaml b/ansible/tasks/apt.yaml new file mode 100644 index 00000000..2328e35f --- /dev/null +++ b/ansible/tasks/apt.yaml @@ -0,0 +1,25 @@ +--- + +# Currently the provisioned VM image for CoCo on Azure VMs uses Ubuntu 20.04 +# so we need to add an APT repository to install python3.10 +# - name: "Add deadsnakes APT repository" +# become: yes +# apt_repository: +# repo: "ppa:deadsnakes/ppa" + +- name: "Install APT depdencencies" + become: yes + apt: + name: + - apt-transport-https + - ca-certificates + - curl + - gnupg2 + - libssl-dev + - pkg-config + - software-properties-common + - python3.10-dev + - python3-pip + - python3.10-venv + - unzip + update_cache: yes diff --git a/ansible/tasks/code.yaml b/ansible/tasks/code.yaml new file mode 100644 index 00000000..592b8df7 --- /dev/null +++ b/ansible/tasks/code.yaml @@ -0,0 +1,34 @@ +--- + +- name: "Create code dir" + file: + path: "/home/{{ ansible_user }}/git" + state: directory + +- name: "Clone SC2 repos" + git: + repo: "https://www.github.com/sc2-sys/{{ item }}.git" + dest: "/home/{{ ansible_user }}/git/sc2-sys/{{ item }}" + depth: 1 + update: yes + recursive: no + clone: yes + force: yes + accept_hostkey: yes + with_items: + - "applications" + - "deploy" + - "experiments" + +- name: "Clone virtee repos" + git: + repo: "https://www.github.com/virtee/{{ item }}.git" + dest: "/home/{{ ansible_user }}/git/virtee/{{ item }}" + depth: 1 + update: yes + recursive: no + clone: yes + force: yes + accept_hostkey: yes + with_items: + - "snphost" diff --git a/ansible/tasks/docker.yaml b/ansible/tasks/docker.yaml new file mode 100644 index 00000000..45317966 --- /dev/null +++ b/ansible/tasks/docker.yaml @@ -0,0 +1,27 @@ +--- + +- name: "Add Docker GPG key" + become: yes + apt_key: url=https://download.docker.com/linux/ubuntu/gpg + +- name: "Add Docker APT repository" + become: yes + apt_repository: + repo: "deb [arch=amd64] https://download.docker.com/linux/{{ ansible_distribution|lower }} {{ ansible_distribution_release }} stable" + +- name: "Install Docker" + become: yes + apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-compose-plugin + update_cache: yes + +- name: "Add user to docker group" + become: yes + user: + name: "{{ ansible_user }}" + groups: docker + append: yes diff --git a/ansible/tasks/qemu.yaml b/ansible/tasks/qemu.yaml new file mode 100644 index 00000000..1e9ae600 --- /dev/null +++ b/ansible/tasks/qemu.yaml @@ -0,0 +1,14 @@ +--- + +- name: "Download the QEMU package with Azure's SNP patches" + get_url: + url: "https://github.com/jepio/AMDSEV/releases/download/v2024.02.24/snp-qemu_2024.10.28-0_amd64.deb" + dest: "/tmp/snp-qemu.deb" + mode: '0644' + +- name: "Install the new QEMU package" + become: true + apt: + deb: "/tmp/snp-qemu.deb" + state: present + diff --git a/ansible/tasks/rust.yaml b/ansible/tasks/rust.yaml new file mode 100644 index 00000000..d8f22807 --- /dev/null +++ b/ansible/tasks/rust.yaml @@ -0,0 +1,17 @@ +--- + +- name: "Install Rust using rustup" + shell: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + args: + creates: ~/.cargo/bin/rustc + +- name: "Add Cargo bin directory to PATH" + lineinfile: + path: ~/.bashrc + line: 'export PATH="$HOME/.cargo/bin:$PATH"' + create: yes + +- name: "Reload shell profile" + shell: source ~/.bashrc + args: + executable: /bin/bash diff --git a/ansible/tasks/update_host_kernel.yaml b/ansible/tasks/update_host_kernel.yaml new file mode 100644 index 00000000..54ec5ac3 --- /dev/null +++ b/ansible/tasks/update_host_kernel.yaml @@ -0,0 +1,41 @@ +--- + +- name: "Download the kernel with Azure's patches" + get_url: + url: "https://github.com/jepio/AMDSEV/releases/download/v2024.02.24/linux-image-6.8.0-rc5-next-20240221-snp-host-2cfe07293708_6.8.0-rc5-g2cfe07293708-2_amd64.deb" + dest: "/tmp/linux-image.deb" + mode: '0644' + +- name: "Install the new kernel package" + become: true + apt: + deb: "/tmp/linux-image.deb" + state: present + +- name: "Update GRUB to pick up the newly installed kernel" + become: true + command: update-grub + +- name: "List all installed kernels" + shell: dpkg --list | grep linux-image + register: installed_kernels + +# You can see the different options by running `dpkg --list | grep linux-image` +- name: "Set the default kernel in GRUB" + become: true + lineinfile: + path: /etc/default/grub + regexp: '^GRUB_DEFAULT=' + line: 'GRUB_DEFAULT="Advanced options for Ubuntu>Ubuntu, with Linux 6.8.0-rc5-next-20240221-snp-host-2cfe07293708"' + +- name: "Update GRUB configuration again to pick up new changes" + become: true + command: update-grub + when: installed_kernels is changed + +- name: "Reboot the system to apply the new kernel" + become: true + reboot: + reboot_timeout: 600 + test_command: uname -r + register: reboot_result diff --git a/ansible/vm.yaml b/ansible/vm.yaml new file mode 100644 index 00000000..37ffe655 --- /dev/null +++ b/ansible/vm.yaml @@ -0,0 +1,16 @@ +--- + +- hosts: all + gather_facts: yes + tasks: + - include_tasks: tasks/apt.yaml + # Install docker before updating the host kernel so that the reboot in the + # latter also means we can use docker without sudo (which also requires a + # reboot) + - include_tasks: tasks/docker.yaml + # TODO: manually cherry-pick patches on top of svsm/linux + - include_tasks: tasks/update_host_kernel.yaml + - include_tasks: tasks/qemu.yaml + - include_tasks: tasks/rust.yaml + - include_tasks: tasks/code.yaml + # - include_tasks: tasks/sc2.yml diff --git a/bin/workon.sh b/bin/workon.sh index 994bbdf5..27053ae0 100644 --- a/bin/workon.sh +++ b/bin/workon.sh @@ -49,6 +49,12 @@ else echo "sc2-deploy: WARN: neither SNP nor TDX is enabled" fi +if [ "$(sudo dmidecode -s system-manufacturer 2>/dev/null)" == "Microsoft Corporation" ]; then + export SC2_ON_AZURE="yes" +else + export SC2_ON_AZURE="no" +fi + # ---------------------------------- # VM cache config # ---------------------------------- @@ -71,11 +77,17 @@ export PS1="(sc2-deploy) $PS1" # Splash # ----------------------------- +if [ "$SC2_ON_AZURE" == "yes" ]; then + tee_str="${SC2_TEE}-azure" +else + tee_str="${SC2_TEE}" +fi + echo "" echo "----------------------------------" echo "CLI for SC2 Deployment Scripts" echo "CoCo Version: ${COCO_VERSION}" -echo "TEE: ${SC2_TEE}" +echo "TEE: ${tee_str}" echo "----------------------------------" echo "" diff --git a/docker/ovmf.dockerfile b/docker/ovmf.dockerfile index eead9d7d..31fc32f9 100644 --- a/docker/ovmf.dockerfile +++ b/docker/ovmf.dockerfile @@ -28,3 +28,23 @@ RUN mkdir -p ${CODE_DIR} \ && build -a X64 -b RELEASE -t GCC5 -p OvmfPkg/OvmfPkgX64.dsc \ && touch OvmfPkg/AmdSev/Grub/grub.efi \ && build -a X64 -b RELEASE -t GCC5 -p OvmfPkg/AmdSev/AmdSevX64.dsc + +ARG OVMF_VERSION_AZURE +ARG CODE_DIR=/git/sc2-sys/edk2-azure +RUN mkdir -p ${CODE_DIR} \ + && git clone \ + --branch ${OVMF_VERSION_AZURE} \ + --depth 1 \ + https://github.com/tianocore/edk2.git \ + ${CODE_DIR} \ + && cd ${CODE_DIR} \ + && sed -i \ + 's#https://github\.com/Zeex/subhook\.git#https://github.com/tianocore/edk2-subhook.git#g' \ + .gitmodules \ + && git submodule update --init \ + && export PYTHON3_ENABLE=TRUE \ + && export PYTHON_COMMAND=python3 \ + && make -j $(nproc) -C BaseTools/ \ + && . ./edksetup.sh --reconfig \ + && touch OvmfPkg/AmdSev/Grub/grub.efi \ + && build -a X64 -b RELEASE -t GCC5 -p OvmfPkg/AmdSev/AmdSevX64.dsc diff --git a/requirements.txt b/requirements.txt index be532c1b..8274934b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ ansible>=8.4.0 +azure-cli>=2.68.0 black>=23.9.1 flake8>=7.1.1 invoke>=2.1.0 diff --git a/tasks/__init__.py b/tasks/__init__.py index b15c1ab5..743d1c19 100644 --- a/tasks/__init__.py +++ b/tasks/__init__.py @@ -1,5 +1,6 @@ from invoke import Collection +from . import azure from . import coco from . import containerd from . import cosign @@ -26,6 +27,7 @@ from . import svsm ns = Collection( + azure, coco, containerd, cosign, diff --git a/tasks/azure.py b/tasks/azure.py new file mode 100644 index 00000000..493142bb --- /dev/null +++ b/tasks/azure.py @@ -0,0 +1,228 @@ +from invoke import task +from json import loads as json_loads +from os import makedirs +from os.path import join +from tasks.util.env import PROJ_ROOT +from subprocess import run + +ANSIBLE_ROOT = join(PROJ_ROOT, "ansible") +ANSIBLE_INVENTORY_DIR = join(ANSIBLE_ROOT, "inventory") +ANSIBLE_INVENTORY_FILE = join(ANSIBLE_INVENTORY_DIR, "vms.ini") + +# TODO: request creating a new resource group named sc2 +AZURE_RESOURCE_GROUP = "faasm" + +AZURE_SNP_VM_ADMIN = "sc2" +AZURE_SNP_VM_IMAGE = ( + "/CommunityGalleries/cocopreview-91c44057-c3ab-4652-bf00-9242d5a90170/" + "Images/ubu2204-snp-host-upm/Versions/latest" +) +AZURE_SNP_VM_LOCATION = "eastus" +AZURE_SNP_VM_OS_DISK_SIZE = 64 +AZURE_SNP_VM_SSH_PRIV_KEY = "~/.ssh/id_rsa" +AZURE_SNP_VM_SSH_PUB_KEY = "~/.ssh/id_rsa.pub" +AZURE_SNP_VM_SKU = "Standard_DC8as_cc_v5" + +# Specifies order in which to delete resource types +RESOURCE_TYPE_PRECEDENCE = [ + "Microsoft.Network/networkInterfaces", + "Microsoft.Network/networkSecurityGroups", + "Microsoft.Network/virtualNetworks", + "Microsoft.Network/publicIpAddresses", +] + +# ----------------------------------------------------------------------------- +# Azure Functions +# ----------------------------------------------------------------------------- + + +def build_ssh_command(ip_addr): + return f"ssh -A -i {AZURE_SNP_VM_SSH_PRIV_KEY} {AZURE_SNP_VM_ADMIN}@{ip_addr}" + + +def get_ip(name): + cmd = [ + "az vm list-ip-addresses", + "-n {}".format(name), + "-g {}".format(AZURE_RESOURCE_GROUP), + ] + + cmd = " ".join(cmd) + res = run(cmd, shell=True, capture_output=True) + + res = json_loads(res.stdout.decode("utf-8")) + vm_info = res[0]["virtualMachine"] + return vm_info["network"]["publicIpAddresses"][0]["ipAddress"] + + +def vm_op(op, name, extra_args=None, capture=False): + print("Performing {} on {}".format(op, name)) + + cmd = [ + "az vm {}".format(op), + "--resource-group {}".format(AZURE_RESOURCE_GROUP), + "--name {}".format(name), + ] + + if extra_args: + cmd.extend(extra_args) + + cmd = " ".join(cmd) + print(cmd) + + if capture: + res = run(cmd, shell=True, capture_stdout=True) + return res.stdout.decode("utf-8") + else: + run(cmd, shell=True, check=True) + + +def delete_resource(name, resource_type): + print(f"Deleting resource {name}") + + cmd = ( + f"az resource delete --resource-group {AZURE_RESOURCE_GROUP} " + f"--name {name} --resource-type {resource_type}" + ) + run(cmd, check=True, shell=True) + + +def delete_resources(resources): + print("Deleting {} resources".format(len(resources))) + + deleted_resources = list() + + # Prioritise certain types + for t in RESOURCE_TYPE_PRECEDENCE: + to_delete = [r for r in resources if r["type"] == t] + + if to_delete: + print("Prioritising {} resources of type {}".format(len(to_delete), t)) + + for r in to_delete: + delete_resource(r["name"], r["type"]) + deleted_resources.append(r["id"]) + + remaining = [r for r in resources if r["id"] not in deleted_resources] + for r in remaining: + delete_resource(r["name"], r["type"]) + + +def list_all(azure_cmd, prefix=None): + cmd = f"az {azure_cmd} list --resource-group {AZURE_RESOURCE_GROUP}" + res = run(cmd, shell=True, capture_output=True) + res = json_loads(res.stdout.decode("utf-8")) + + if prefix: + res = [v for v in res if v["name"].startswith(prefix)] + + return res + + +# ----------------------------------------------------------------------------- +# Ansible functions +# ----------------------------------------------------------------------------- + + +def ansible_prepare_inventory(prefix): + """ + Create ansbile inventory for VMs + """ + all_vms = list_all("vm", prefix) + + if len(all_vms) == 0: + print(f"Did not find any VMs matching prefix {prefix}") + raise RuntimeError("No VMs found with prefix") + + print("Generating inventory for {} VMs".format(len(all_vms))) + + # Sort VMs based on name to ensure consistent choice of main + all_vms = sorted(all_vms, key=lambda d: d["name"]) + + # Get all IPs + for vm in all_vms: + vm["public_ip"] = get_ip(vm["name"]) + + makedirs(ANSIBLE_INVENTORY_DIR, exist_ok=True) + + # One group for all VMs, one for main, one for workers + lines = ["[all]"] + for v in all_vms: + # Include VM name for debugging purposes + lines.append( + "{} ansible_host={} ansible_user={}".format( + v["name"], v["public_ip"], AZURE_SNP_VM_ADMIN + ) + ) + + file_content = "\n".join(lines) + + print("Contents:\n") + print(file_content) + + with open(ANSIBLE_INVENTORY_FILE, "w") as fh: + fh.write(file_content) + fh.write("\n") + + +# ----------------------------------------------------------------------------- +# Entrypoint tasks +# ----------------------------------------------------------------------------- + + +@task +def deploy(ctx): + """ + Deploy SC2 on an SNP-enabled VM on Azure + """ + vm_name = "sc2-snp-test" + az_cmd = ( + f"az vm create -g {AZURE_RESOURCE_GROUP} -n {vm_name} " + f"--location {AZURE_SNP_VM_LOCATION} --admin-username {AZURE_SNP_VM_ADMIN} " + f"--image {AZURE_SNP_VM_IMAGE} --accept-term --size {AZURE_SNP_VM_SKU} " + f"--ssh-key-value {AZURE_SNP_VM_SSH_PUB_KEY} --accelerated-networking true " + f"--os-disk-size-gb {AZURE_SNP_VM_OS_DISK_SIZE}" + ) + run(az_cmd, shell=True, check=True) + + ansible_prepare_inventory(vm_name) + + vm_playbook = join(ANSIBLE_ROOT, "vm.yaml") + run( + f"ansible-playbook -i {ANSIBLE_INVENTORY_FILE} {vm_playbook}", + shell=True, + check=True, + ) + + +@task +def destroy(ctx, vm_name="sc2-snp-test"): + # First delete the VM + vm_op("delete", vm_name, extra_args=["--yes"]) + + # Delete all other resources associated with it that may be left + all_resources = list_all("resource", prefix=vm_name) + delete_resources(all_resources) + + +@task +def ssh(ctx, name="sc2-snp-test"): + """ + Prints SSH information for given VM + """ + ip_addr = get_ip(name) + print("--- SSH command ---\n") + print(build_ssh_command(ip_addr)) + + print("\n--- SSH config ---") + print( + """ +# SC2 Azure SNP VM +Host {} +HostName {} +User {} +ForwardAgent yes + """.format( + name, ip_addr, AZURE_SNP_VM_ADMIN + ) + ) diff --git a/tasks/ovmf.py b/tasks/ovmf.py index dce8e609..a6f0a3e8 100644 --- a/tasks/ovmf.py +++ b/tasks/ovmf.py @@ -1,17 +1,28 @@ from invoke import task from os.path import join +from tasks.util.azure import on_azure from tasks.util.docker import copy_from_ctr_image -from tasks.util.env import KATA_ROOT -from tasks.util.ovmf import OVMF_IMAGE_TAG, build_ovmf_image +from tasks.util.env import KATA_ROOT, print_dotted_line +from tasks.util.ovmf import ( + OVMF_IMAGE_TAG, + OVMF_VERSION, + OVMF_VERSION_AZURE, + build_ovmf_image +) def install(): """ Copy a custom build of OVMF into the destination path """ - ctr_paths = ["/git/sc2-sys/edk2/Build/AmdSev/RELEASE_GCC5/FV/OVMF.fd"] + repo = "edk2-azure" if on_azure() else "edk2" + ovmf_version = OVMF_VERSION_AZURE if on_azure() else OVMF_VERSION + + print_dotted_line(f"Installing OVMF ({ovmf_version})") + ctr_paths = [f"/git/sc2-sys/{repo}/Build/AmdSev/RELEASE_GCC5/FV/OVMF.fd"] host_paths = [join(KATA_ROOT, "share", "ovmf", "AMDSEV.fd")] copy_from_ctr_image(OVMF_IMAGE_TAG, ctr_paths, host_paths, requires_sudo=True) + print("Success!") @task diff --git a/tasks/sc2.py b/tasks/sc2.py index 6fd32b89..e9af68e1 100644 --- a/tasks/sc2.py +++ b/tasks/sc2.py @@ -27,6 +27,7 @@ install_cc_runtime as operator_install_cc_runtime, ) from tasks.ovmf import install as ovmf_install +from tasks.util.azure import on_azure from tasks.util.containerd import restart_containerd from tasks.util.docker import pull_artifact_images from tasks.util.env import ( @@ -309,9 +310,7 @@ def deploy(ctx, debug=False, clean=False): # Install an up-to-date version of OVMF (the one currently shipped with # CoCo is not enough to run on 6.11 and QEMU 9.1) - print_dotted_line(f"Installing OVMF ({OVMF_VERSION})") ovmf_install() - print("Success!") # Update SNP class to use default QEMU (we use host kernel 6.11, so we # can use upstream QEMU 9.1). We do this update before generating the SC2 @@ -331,6 +330,23 @@ def deploy(ctx, debug=False, clean=False): requires_root=True, ) + # If running on Azure, point QEMU to the system-wide qemu + if on_azure(): + qemu_path = "/usr/local/bin/qemu-system-x86_64" + updated_toml_str = """ + [hypervisor.qemu] + path = "{qemu_path}" + valid_hypervisor_paths = [ "{qemu_path}" ] + disable_nesting_checks = true + """.format( + qemu_path=qemu_path + ) + update_toml( + join(KATA_CONFIG_DIR, "configuration-qemu-snp.toml"), + updated_toml_str, + requires_root=True, + ) + # Apply general patches to the Kata runtime replace_kata_shim( dst_shim_binary=join(KATA_ROOT, "bin", "containerd-shim-kata-v2"), diff --git a/tasks/util/azure.py b/tasks/util/azure.py new file mode 100644 index 00000000..b8f4c8c3 --- /dev/null +++ b/tasks/util/azure.py @@ -0,0 +1,8 @@ +from os import environ + + +def on_azure(): + if "SC2_ON_AZURE" not in environ: + return False + + return environ["SC2_ON_AZURE"] == "yes" diff --git a/tasks/util/kernel.py b/tasks/util/kernel.py index 42dc3b90..70207d32 100644 --- a/tasks/util/kernel.py +++ b/tasks/util/kernel.py @@ -1,11 +1,19 @@ from os import environ from subprocess import run -from tasks.util.versions import HOST_KERNEL_VERSION_SNP, HOST_KERNEL_VERSION_TDX +from tasks.util.azure import on_azure +from tasks.util.versions import ( + HOST_KERNEL_VERSION_SNP, + HOST_KERNEL_VERSION_SNP_AZURE, + HOST_KERNEL_VERSION_TDX, +) def get_host_kernel_expected_prefix(): sc2_runtime_class = environ["SC2_RUNTIME_CLASS"] if "snp" in sc2_runtime_class: + if on_azure(): + return HOST_KERNEL_VERSION_SNP_AZURE + return HOST_KERNEL_VERSION_SNP if "tdx" in sc2_runtime_class: diff --git a/tasks/util/ovmf.py b/tasks/util/ovmf.py index e4b18fb6..aa7ef515 100644 --- a/tasks/util/ovmf.py +++ b/tasks/util/ovmf.py @@ -2,7 +2,7 @@ from os.path import join from tasks.util.env import GHCR_URL, GITHUB_ORG, PROJ_ROOT from tasks.util.docker import build_image -from tasks.util.versions import OVMF_VERSION +from tasks.util.versions import OVMF_VERSION, OVMF_VERSION_AZURE OVMF_IMAGE_TAG = join(GHCR_URL, GITHUB_ORG, f"ovmf:{OVMF_VERSION}") @@ -16,7 +16,10 @@ def build_ovmf_image(nocache, push, debug=True): build_image( OVMF_IMAGE_TAG, join(PROJ_ROOT, "docker", "ovmf.dockerfile"), - build_args={"OVMF_VERSION": OVMF_VERSION}, + build_args={ + "OVMF_VERSION": OVMF_VERSION, + "OVMF_VERSION_AZURE": OVMF_VERSION_AZURE, + }, nocache=nocache, push=push, debug=debug, diff --git a/tasks/util/versions.py b/tasks/util/versions.py index 6c479992..7ebf6aa9 100644 --- a/tasks/util/versions.py +++ b/tasks/util/versions.py @@ -31,6 +31,7 @@ # WARNING: if we update the host kernel version, make sure to update it in the # table in ./docs/host_kernel.md HOST_KERNEL_VERSION_SNP = "6.11.0-snp-host-cc2568386" +HOST_KERNEL_VERSION_SNP_AZURE = "6.8.0-rc5-next-20240221-snp-host-2cfe07293708" HOST_KERNEL_VERSION_TDX = "6.8.0-1013-intel" GUEST_KERNEL_VERSION = "6.12.13" @@ -39,3 +40,4 @@ # Firmware OVMF_VERSION = "edk2-stable202411" +OVMF_VERSION_AZURE = "edk2-stable202402"