Skip to content

Commit

Permalink
Merge pull request kata-containers#10812 from zvonkok/fix-arch-build-gpu
Browse files Browse the repository at this point in the history
gpu: Fix arm64 build
  • Loading branch information
zvonkok authored Feb 4, 2025
2 parents 3fc1707 + eeacd8f commit 429b265
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 58 deletions.
49 changes: 22 additions & 27 deletions tools/osbuilder/rootfs-builder/nvidia/nvidia_chroot.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ nvidia_gpu_stack="$5"
driver_version=""
driver_type="-open"
supported_gpu_devids="/supported-gpu.devids"
base_os="jammy"

APT_INSTALL="apt -o Dpkg::Options::='--force-confdef' -o Dpkg::Options::='--force-confold' -yqq --no-install-recommends install"

Expand Down Expand Up @@ -153,7 +154,6 @@ prepare_run_file_drivers() {
echo "chroot: Resetting driver version not supported with run-file"
fi


echo "chroot: Prepare NVIDIA run file drivers"
pushd / >> /dev/null
chmod +x "${run_file_name}"
Expand Down Expand Up @@ -239,21 +239,19 @@ setup_apt_repositories() {
touch /var/lib/dpkg/status
rm -f /etc/apt/sources.list.d/*

if [ "${arch_target}" == "aarch64" ]; then
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/jammy.list
deb http://ports.ubuntu.com/ubuntu-ports/ jammy main restricted universe multiverse
deb http://ports.ubuntu.com/ubuntu-ports/ jammy-updates main restricted universe multiverse
deb http://ports.ubuntu.com/ubuntu-ports/ jammy-security main restricted universe multiverse
deb http://ports.ubuntu.com/ubuntu-ports/ jammy-backports main restricted universe multiverse
CHROOT_EOF
else
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/noble.list
deb http://us.archive.ubuntu.com/ubuntu/ jammy main restricted universe multiverse
deb http://us.archive.ubuntu.com/ubuntu/ jammy-updates main restricted universe multiverse
deb http://us.archive.ubuntu.com/ubuntu/ jammy-security main restricted universe multiverse
deb http://us.archive.ubuntu.com/ubuntu/ jammy-backports main restricted universe multiverse
CHROOT_EOF
fi
# Changing the reference here also means changes needed for cuda_keyring
# and cuda apt repository see install_dcgm for details
cat <<-CHROOT_EOF > /etc/apt/sources.list.d/${base_os}.list
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os} main restricted universe multiverse
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os}-updates main restricted universe multiverse
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os}-security main restricted universe multiverse
deb [arch=amd64] http://us.archive.ubuntu.com/ubuntu ${base_os}-backports main restricted universe multiverse
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os} main restricted universe multiverse
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os}-updates main restricted universe multiverse
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os}-security main restricted universe multiverse
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports ${base_os}-backports main restricted universe multiverse
CHROOT_EOF

apt update

Expand Down Expand Up @@ -298,18 +296,15 @@ install_nvidia_dcgm() {
return
}

curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb
dpkg -i cuda-keyring_1.0-1_all.deb && rm -f cuda-keyring_1.0-1_all.deb
arch="x86_64"
[[ ${arch_target} == "aarch64" ]] && arch="sbsa"
# shellcheck disable=SC2015
[[ ${base_os} == "jammy" ]] && osver="ubuntu2204" || die "Unknown base_os ${base_os} used"

keyring="cuda-keyring_1.1-1_all.deb"
curl -O https://developer.download.nvidia.com/compute/cuda/repos/${osver}/${arch}/${keyring}
dpkg -i ${keyring} && rm -f ${keyring}

if [ "${arch_target}" == "aarch64" ]; then
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/cuda.list
deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/arm64/ /
CHROOT_EOF
else
cat <<-'CHROOT_EOF' > /etc/apt/sources.list.d/cuda.list
deb [signed-by=/usr/share/keyrings/cuda-archive-keyring.gpg] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /
CHROOT_EOF
fi
apt update
eval "${APT_INSTALL}" datacenter-gpu-manager
}
Expand Down
75 changes: 45 additions & 30 deletions tools/osbuilder/rootfs-builder/nvidia/nvidia_rootfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ readonly SCRIPT_DIR="${script_dir}/nvidia"
# This will control how much output the inird/image will produce
DEBUG=""

machine_arch=${ARCH}

if [[ "$machine_arch" == "aarch64" ]]; then
distro_arch="arm64"
elif [[ "$machine_arch" == "x86_64" ]]; then
distro_arch="amd64"
else
die "Unsupported architecture: ${machine_arch}"
fi

setup_nvidia-nvrc() {
local TARGET="nvidia-nvrc"
local TARGET_VERSION="main"
Expand All @@ -33,8 +43,8 @@ setup_nvidia-nvrc() {

pushd "${PROJECT}" > /dev/null || exit 1

cargo build --release --target=x86_64-unknown-linux-musl
cp target/x86_64-unknown-linux-musl/release/NVRC ../../destdir/bin/.
cargo build --release --target="${machine_arch}"-unknown-linux-gnu
cp target/"${machine_arch}"-unknown-linux-gnu/release/NVRC ../../destdir/bin/.

popd > /dev/null || exit 1

Expand Down Expand Up @@ -155,7 +165,7 @@ setup_nvidia_gpu_rootfs_stage_one() {
mount -t proc /proc ./proc

chroot . /bin/bash -c "/nvidia_chroot.sh $(uname -r) ${run_file_name} \
${run_fm_file_name} ${ARCH} ${NVIDIA_GPU_STACK}"
${run_fm_file_name} ${machine_arch} ${NVIDIA_GPU_STACK}"

umount -R ./dev
umount ./proc
Expand All @@ -168,8 +178,8 @@ setup_nvidia_gpu_rootfs_stage_one() {
popd >> /dev/null

pushd "${BUILD_DIR}" >> /dev/null
curl -LO https://github.com/upx/upx/releases/download/v4.2.4/upx-4.2.4-amd64_linux.tar.xz
tar xvf upx-4.2.4-amd64_linux.tar.xz
curl -LO "https://github.com/upx/upx/releases/download/v4.2.4/upx-4.2.4-${distro_arch}_linux.tar.xz"
tar xvf "upx-4.2.4-${distro_arch}_linux.tar.xz"
popd >> /dev/null
}

Expand All @@ -180,12 +190,12 @@ chisseled_iptables() {
ln -s ../sbin/xtables-nft-multi sbin/iptables-restore
ln -s ../sbin/xtables-nft-multi sbin/iptables-save

libdir="lib/x86_64-linux-gnu"
cp -a "${stage_one}"/${libdir}/libmnl.so.0* lib/.
libdir=lib/"${machine_arch}"-linux-gnu
cp -a "${stage_one}/${libdir}"/libmnl.so.0* lib/.

libdir="usr/lib/x86_64-linux-gnu"
cp -a "${stage_one}"/${libdir}/libnftnl.so.11* lib/.
cp -a "${stage_one}"/${libdir}/libxtables.so.12* lib/.
libdir=usr/lib/"${machine_arch}"-linux-gnu
cp -a "${stage_one}/${libdir}"/libnftnl.so.11* lib/.
cp -a "${stage_one}/${libdir}"/libxtables.so.12* lib/.
}

chisseled_nvswitch() {
Expand All @@ -198,10 +208,10 @@ chisseled_dcgm() {
echo "nvidia: chisseling DCGM"

mkdir -p etc/dcgm-exporter
libdir="lib/x86_64-linux-gnu"
libdir=lib/"${machine_arch}"-linux-gnu

cp -a "${stage_one}"/usr/${libdir}/libdcgm.* ${libdir}/.
cp -a "${stage_one}"/${libdir}/libgcc_s.so.1* ${libdir}/.
cp -a "${stage_one}"/usr/"${libdir}"/libdcgm.* "${libdir}"/.
cp -a "${stage_one}"/"${libdir}"/libgcc_s.so.1* "${libdir}"/.
cp -a "${stage_one}"/usr/bin/nv-hostengine bin/.

tar xvf "${BUILD_DIR}"/kata-static-nvidia-dcgm-exporter.tar.zst -C .
Expand All @@ -217,22 +227,24 @@ chisseled_compute() {

cp -a "${stage_one}"/lib/modules/* lib/modules/.

libdir="lib/x86_64-linux-gnu"
cp -a "${stage_one}"/${libdir}/libdl.so.2* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libz.so.1* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libpthread.so.0* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libresolv.so.2* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libc.so.6* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libm.so.6* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/librt.so.1* lib/x86_64-linux-gnu/.
libdir="lib/${machine_arch}-linux-gnu"
cp -a "${stage_one}/${libdir}"/libdl.so.2* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libz.so.1* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libpthread.so.0* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libresolv.so.2* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libc.so.6* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/libm.so.6* "${libdir}"/.
cp -a "${stage_one}/${libdir}"/librt.so.1* "${libdir}"/.

libdir="lib64"
cp -aL "${stage_one}"/${libdir}/ld-linux-x86-64.so.* lib64/.
[[ ${machine_arch} == "aarch64" ]] && libdir="lib"
[[ ${machine_arch} == "x86_64" ]] && libdir="lib64"

libdir="usr/lib/x86_64-linux-gnu"
cp -a "${stage_one}"/${libdir}/libnvidia-ml.so.* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libcuda.so.* lib/x86_64-linux-gnu/.
cp -a "${stage_one}"/${libdir}/libnvidia-cfg.so.* lib/x86_64-linux-gnu/.
cp -aL "${stage_one}/${libdir}"/ld-linux-* "${libdir}"/.

libdir=usr/lib/"${machine_arch}"-linux-gnu
cp -a "${stage_one}/${libdir}"/libnvidia-ml.so.* lib/"${machine_arch}"-linux-gnu/.
cp -a "${stage_one}/${libdir}"/libcuda.so.* lib/"${machine_arch}"-linux-gnu/.
cp -a "${stage_one}/${libdir}"/libnvidia-cfg.so.* lib/"${machine_arch}"-linux-gnu/.

# basich GPU admin tools
cp -a "${stage_one}"/usr/bin/nvidia-persistenced bin/.
Expand All @@ -253,7 +265,7 @@ chisseled_init() {
tar xvf "${BUILD_DIR}"/kata-static-busybox.tar.xz -C .

mkdir -p dev etc proc run/cdi sys tmp usr var lib/modules lib/firmware \
usr/share/nvidia lib/x86_64-linux-gnu lib64 usr/bin
usr/share/nvidia lib/"${machine_arch}"-linux-gnu lib64 usr/bin

ln -sf ../run var/run

Expand Down Expand Up @@ -282,14 +294,17 @@ compress_rootfs() {

find . -type f -executable | while IFS= read -r file; do
strip "${file}"
${BUILD_DIR}/upx-4.2.4-amd64_linux/upx --best --lzma "${file}"
"${BUILD_DIR}"/upx-4.2.4-"${distro_arch}"_linux/upx --best --lzma "${file}"
done

# While I was playing with compression the executable flag on
# /lib64/ld-linux-x86-64.so.2 was lost...
# Since this is the program interpreter, it needs to be executable
# as well.. sigh
chmod +x lib64/ld-linux-x86-64.so.2
[[ ${machine_arch} == "aarch64" ]] && libdir="lib"
[[ ${machine_arch} == "x86_64" ]] && libdir="lib64"

chmod +x "${libdir}"/ld-linux-*

}

Expand Down
2 changes: 1 addition & 1 deletion tools/osbuilder/rootfs-builder/rootfs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ EXTRA_PKGS=${EXTRA_PKGS:-""}

NVIDIA_GPU_STACK=${NVIDIA_GPU_STACK:-""}
nvidia_rootfs="${script_dir}/nvidia/nvidia_rootfs.sh"
source "$nvidia_rootfs"
[ "${ARCH}" == "x86_64" ] || [ "${ARCH}" == "aarch64" ] && source "$nvidia_rootfs"

#For cross build
CROSS_BUILD=${CROSS_BUILD:-false}
Expand Down

0 comments on commit 429b265

Please sign in to comment.