diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml new file mode 100644 index 00000000..ad4241b0 --- /dev/null +++ b/.github/workflows/e2e.yaml @@ -0,0 +1,89 @@ +# Copyright 2025 NVIDIA CORPORATION +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: End-to-end Tests + +on: + workflow_run: + workflows: [image] + types: + - completed + branches: + - "pull-request/[0-9]+" + - main + - release-* + +jobs: + e2e-tests: + runs-on: linux-amd64-cpu4 + if: ${{ github.event.workflow_run.conclusion == 'success' }} && ${{ github.event.workflow_run.event == 'push' }} + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Calculate build vars + id: vars + run: | + echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV + echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV + GOLANG_VERSION=$(./hack/golang-version.sh) + echo "GOLANG_VERSION=${GOLANG_VERSION##GOLANG_VERSION := }" >> $GITHUB_ENV + + - name: Install Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GOLANG_VERSION }} + + - name: Set up Holodeck + uses: NVIDIA/holodeck@v0.2.5 + with: + aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws_ssh_key: ${{ secrets.AWS_SSH_KEY }} + holodeck_config: "tests/e2e/infra/aws.yaml" + + - name: Get public dns name + id: holodeck_public_dns_name + uses: mikefarah/yq@master + with: + cmd: yq '.status.properties[] | select(.name == "public-dns-name") | .value' /github/workspace/.cache/holodeck.yaml + + - name: Run e2e tests + env: + IMAGE_NAME: ghcr.io/${LOWERCASE_REPO_OWNER}/container-toolkit + VERSION: ${COMMIT_SHORT_SHA} + SSH_KEY: ${{ secrets.AWS_SSH_KEY }} + E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }} + run: | + e2e_ssh_key=$(mktemp) + echo "$SSH_KEY" > "$e2e_ssh_key" + chmod 600 "$e2e_ssh_key" + export E2E_SSH_KEY="$e2e_ssh_key" + export E2E_SSH_HOST="${{ steps.get_public_dns_name.outputs.result }}" + + make -f test/e2e/Makefile test + + - name: Send Slack alert notification + id: slack + if: ${{ failure() }} + uses: slackapi/slack-github-action@v1.27.0 + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + SUMMARY_URL: https://github.com/${{github.repository}}/actions/runs/${{github.run_id}} + with: + channel-id: ${{ secrets.SLACK_CHANNEL_ID }} + slack-message: | + :x: On repository ${{ github.repository }} the Workflow *${{ github.workflow }}* has failed. + + Details: ${{ env.SUMMARY_URL }} diff --git a/.github/workflows/image.yaml b/.github/workflows/image.yaml index 693239f6..81434839 100644 --- a/.github/workflows/image.yaml +++ b/.github/workflows/image.yaml @@ -16,21 +16,15 @@ name: image on: - pull_request: - types: - - opened - - synchronize - branches: - - main - - release-* push: branches: + - "pull-request/[0-9]+" - main - release-* jobs: packages: - runs-on: ubuntu-latest + runs-on: linux-amd64-cpu4 strategy: matrix: target: @@ -74,7 +68,7 @@ jobs: path: ${{ github.workspace }}/dist/* image: - runs-on: ubuntu-latest + runs-on: linux-amd64-cpu4 strategy: matrix: dist: @@ -96,21 +90,12 @@ jobs: echo "COMMIT_SHORT_SHA=${GITHUB_SHA:0:8}" >> $GITHUB_ENV echo "LOWERCASE_REPO_OWNER=$(echo "${GITHUB_REPOSITORY_OWNER}" | awk '{print tolower($0)}')" >> $GITHUB_ENV - PUSH_ON_BUILD="false" - BUILD_MULTI_ARCH_IMAGES="false" - if [[ "${{ github.event_name }}" == "pull_request" ]]; then - if [[ "${{ github.actor }}" != "dependabot[bot]" && "${{ github.event.pull_request.head.repo.full_name }}" == "${{ github.repository }}" ]]; then - # For non-fork PRs that are not created by dependabot we do push images - PUSH_ON_BUILD="true" - fi - elif [[ "${{ github.event_name }}" == "push" ]]; then - # On push events we do generate images and enable muilti-arch builds - PUSH_ON_BUILD="true" - BUILD_MULTI_ARCH_IMAGES="true" + BUILD_MULTI_ARCH_IMAGES="true" + if [[ "${{ matrix.ispr }}" == "true" ]]; then + BUILD_MULTI_ARCH_IMAGES="false" fi - echo "PUSH_ON_BUILD=${PUSH_ON_BUILD}" >> $GITHUB_ENV + echo "PUSH_ON_BUILD=\"true\"" >> $GITHUB_ENV echo "BUILD_MULTI_ARCH_IMAGES=${BUILD_MULTI_ARCH_IMAGES}" >> $GITHUB_ENV - - name: Set up QEMU uses: docker/setup-qemu-action@v3 with: diff --git a/deployments/container/Dockerfile.packaging b/deployments/container/Dockerfile.packaging index 5185e7e3..b8fd9cfc 100644 --- a/deployments/container/Dockerfile.packaging +++ b/deployments/container/Dockerfile.packaging @@ -14,7 +14,7 @@ ARG GOLANG_VERSION=x.x.x -FROM nvidia/cuda:12.8.0-base-ubuntu20.04 +FROM nvcr.io/nvidia/cuda:12.8.0-base-ubuntu20.04 ARG ARTIFACTS_ROOT COPY ${ARTIFACTS_ROOT} /artifacts/packages/ diff --git a/deployments/container/Dockerfile.ubi8 b/deployments/container/Dockerfile.ubi8 index c69158d3..c6b3be14 100644 --- a/deployments/container/Dockerfile.ubi8 +++ b/deployments/container/Dockerfile.ubi8 @@ -15,7 +15,7 @@ ARG GOLANG_VERSION=x.x.x ARG VERSION="N/A" -FROM nvidia/cuda:12.8.0-base-ubi8 AS build +FROM nvcr.io/nvidia/cuda:12.8.0-base-ubi8 AS build RUN yum install -y \ wget make git gcc \ @@ -47,7 +47,7 @@ ARG VERSION="N/A" ARG GIT_COMMIT="unknown" RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer -FROM nvidia/cuda:12.8.0-base-ubi8 +FROM nvcr.io/nvidia/cuda:12.8.0-base-ubi8 ENV NVIDIA_DISABLE_REQUIRE="true" ENV NVIDIA_VISIBLE_DEVICES=void diff --git a/deployments/container/Dockerfile.ubuntu b/deployments/container/Dockerfile.ubuntu index 4285429b..b23614d7 100644 --- a/deployments/container/Dockerfile.ubuntu +++ b/deployments/container/Dockerfile.ubuntu @@ -15,7 +15,7 @@ ARG GOLANG_VERSION=x.x.x ARG VERSION="N/A" -FROM nvidia/cuda:12.8.0-base-ubuntu20.04 AS build +FROM nvcr.io/nvidia/cuda:12.8.0-base-ubuntu20.04 AS build RUN apt-get update && \ apt-get install -y wget make git gcc \ diff --git a/deployments/container/Makefile b/deployments/container/Makefile index 3d838551..3d37051e 100644 --- a/deployments/container/Makefile +++ b/deployments/container/Makefile @@ -27,12 +27,6 @@ DIST_DIR ?= $(CURDIR)/dist ##### Global variables ##### include $(CURDIR)/versions.mk -ifeq ($(IMAGE_NAME),) -REGISTRY ?= nvidia -IMAGE_NAME := $(REGISTRY)/container-toolkit -endif - -VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG)) IMAGE_VERSION := $(VERSION) IMAGE_TAG ?= $(VERSION)-$(DIST) diff --git a/tests/e2e/infra/aws.yaml b/tests/e2e/infra/aws.yaml new file mode 100644 index 00000000..d73bf7f5 --- /dev/null +++ b/tests/e2e/infra/aws.yaml @@ -0,0 +1,30 @@ +apiVersion: holodeck.nvidia.com/v1alpha1 +kind: Environment +metadata: + name: HOLODECK_NAME + description: "end-to-end test infrastructure" +spec: + provider: aws + auth: + keyName: cnt-ci + privateKey: HOLODECK_PRIVATE_KEY + instance: + type: g4dn.xlarge + region: us-west-1 + ingressIpRanges: + - 18.190.12.32/32 + - 3.143.46.93/32 + - 44.230.241.223/32 + - 44.235.4.62/32 + - 52.15.119.136/32 + - 52.24.205.48/32 + image: + architecture: amd64 + imageId: ami-0ce2cb35386fc22e9 + containerRuntime: + install: true + name: docker + nvidiaContainerToolkit: + install: false + nvidiaDriver: + install: true diff --git a/versions.mk b/versions.mk index f2d8f71c..a5893af9 100644 --- a/versions.mk +++ b/versions.mk @@ -30,3 +30,10 @@ GIT_COMMIT ?= $(shell git describe --match="" --dirty --long --always --abbrev=4 GIT_COMMIT_SHORT ?= $(shell git rev-parse --short HEAD 2> /dev/null || echo "") GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD 2> /dev/null || echo "${GIT_COMMIT}") SOURCE_DATE_EPOCH ?= $(shell git log -1 --format=%ct 2> /dev/null || echo "") + +ifeq ($(IMAGE_NAME),) +REGISTRY ?= nvidia +IMAGE_NAME := $(REGISTRY)/container-toolkit +endif + +VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))