Skip to content

Commit 0dfb440

Browse files
authored
All CI/CD support matrix of targets (#402)
Rest of the workflows (`portable_linux_package_matrix.yml` and `publish_pytorch_dev_docker.yml`) support the matrix! Did some organization as well Closes #221 Builds passing here for [pytorch docker](https://github.com/ROCm/TheRock/actions/runs/14406918426) and [portable linux packages](https://github.com/ROCm/TheRock/actions/runs/14406921335)
1 parent 636bcdf commit 0dfb440

10 files changed

+146
-62
lines changed

.github/workflows/ci.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
families: ${{ fromJSON(needs.setup.outputs.build_linux_amdgpu_families) }}
5858
uses: ./.github/workflows/build_linux_packages.yml
5959
with:
60-
amdgpu_families: ${{ matrix.families.target }}
60+
amdgpu_families: ${{ matrix.families.family }}
6161
permissions:
6262
id-token: write
6363

@@ -71,7 +71,7 @@ jobs:
7171
families: ${{ fromJSON(needs.setup.outputs.build_windows_amdgpu_families) }}
7272
uses: ./.github/workflows/build_windows_packages.yml
7373
with:
74-
amdgpu_families: ${{ matrix.families.target }}
74+
amdgpu_families: ${{ matrix.families.family }}
7575

7676
test_linux_packages:
7777
needs: [setup, build_linux_packages]
@@ -89,7 +89,7 @@ jobs:
8989
families: ${{ fromJSON(needs.setup.outputs.test_linux_amdgpu_families) }}
9090
uses: ./.github/workflows/test_linux_packages.yml
9191
with:
92-
amdgpu_families: ${{ matrix.families.target }}
92+
amdgpu_families: ${{ matrix.families.family }}
9393
test_runs_on: ${{ matrix.families.test-runs-on }}
9494
artifact_run_id: ${{ inputs.artifact_run_id }}
9595

.github/workflows/portable_linux_package_matrix.yml

+8-5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ jobs:
3333
runs-on: ubuntu-24.04
3434
outputs:
3535
version: ${{ steps.version.outputs.version }}
36+
package_targets: ${{ steps.configure.outputs.package_targets }}
3637
steps:
3738
- name: Checkout repository
3839
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -60,6 +61,12 @@ jobs:
6061
base_version=$(jq -r '.["rocm-version"]' version.json)
6162
echo "version=${base_version}${version_suffix}" >> $GITHUB_OUTPUT
6263
64+
- name: Generating package target matrix
65+
id: configure
66+
env:
67+
PYTORCH_DEV_DOCKER: "false"
68+
run: python ./build_tools/github_action/fetch_package_targets.py
69+
6370
portable_linux_packages:
6471
name: ${{ matrix.target_bundle.amdgpu_family }}::Build Portable Linux
6572
runs-on: ${{ github.repository_owner == 'ROCm' && 'azure-linux-scale-rocm' || 'ubuntu-24.04' }}
@@ -76,11 +83,7 @@ jobs:
7683
strategy:
7784
fail-fast: false
7885
matrix:
79-
target_bundle:
80-
- amdgpu_family: "gfx94X-dcgpu"
81-
- amdgpu_family: "gfx110X-dgpu"
82-
- amdgpu_family: "gfx1151"
83-
- amdgpu_family: "gfx1201"
86+
target_bundle: ${{ fromJSON(needs.setup_metadata.outputs.package_targets) }}
8487

8588
steps:
8689
- name: "Checking out repository"

.github/workflows/publish_pytorch_dev_docker.yml

+21-8
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,34 @@ on:
33
workflow_dispatch:
44
schedule:
55
- cron: "0 2 * * *" # Runs nightly at 2 AM UTC
6+
67
jobs:
8+
setup_metadata:
9+
runs-on: ubuntu-24.04
10+
outputs:
11+
package_targets: ${{ steps.configure.outputs.package_targets }}
12+
steps:
13+
- name: Checkout repository
14+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
15+
16+
- name: Generating package target matrix
17+
id: configure
18+
env:
19+
PYTORCH_DEV_DOCKER: "true"
20+
run: python ./build_tools/github_action/fetch_package_targets.py
21+
722
build-and-push-image:
823
strategy:
924
fail-fast: false
1025
matrix:
11-
targets:
12-
- amdgpu_target: "gfx942"
13-
- amdgpu_target: "gfx1100"
14-
- amdgpu_target: "gfx1201"
26+
targets: ${{ fromJSON(needs.setup_metadata.outputs.package_targets) }}
1527

16-
name: ${{ matrix.targets.amdgpu_target }}::Build
28+
name: ${{ matrix.targets.amdgpu_family }}::Build
1729
runs-on: azure-linux-scale-rocm
30+
needs: [setup_metadata]
1831
env:
1932
REGISTRY: ghcr.io
20-
IMAGE_NAME: ROCm/therock_pytorch_dev_ubuntu_24_04_${{ matrix.targets.amdgpu_target }}
33+
IMAGE_NAME: ROCm/therock_pytorch_dev_ubuntu_24_04_${{ matrix.targets.amdgpu_family }}
2134
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
2235
permissions:
2336
contents: read
@@ -56,7 +69,7 @@ jobs:
5669
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
5770
labels: |
5871
org.opencontainers.image.title=PyTorch ROCm Dev Image
59-
org.opencontainers.image.description=Nightly ROCm PyTorch Dev Docker for target ${{ matrix.targets.amdgpu_target }}
72+
org.opencontainers.image.description=Nightly ROCm PyTorch Dev Docker for target ${{ matrix.targets.amdgpu_family }}
6073
org.opencontainers.image.version=${{ env.VERSION }}
6174
org.opencontainers.image.created=${{ steps.meta.outputs.created }}
6275
org.opencontainers.image.revision=${{ github.sha }}
@@ -66,7 +79,7 @@ jobs:
6679
context: .
6780
file: dockerfiles/pytorch-dev/pytorch_dev_ubuntu_24.04.Dockerfile
6881
build-args: |
69-
AMDGPU_TARGETS=${{ matrix.targets.amdgpu_target }}
82+
AMDGPU_TARGETS=${{ matrix.targets.amdgpu_family }}
7083
push: true
7184
tags: ${{ steps.meta.outputs.tags }}
7285
labels: ${{ steps.meta.outputs.labels }}

.github/workflows/setup.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ jobs:
5757
INPUT_BUILD_WINDOWS_AMDGPU_FAMILIES: ${{ github.event.inputs.build_windows_amdgpu_families }}
5858
INPUT_TEST_LINUX_AMDGPU_FAMILIES: ${{ github.event.inputs.test_linux_amdgpu_families }}
5959
INPUT_TEST_WINDOWS_AMDGPU_FAMILIES: ${{ github.event.inputs.test_windows_amdgpu_families }}
60-
run: ./build_tools/configure_ci.py
60+
run: ./build_tools/github_action/configure_ci.py

.github/workflows/test_release_packages.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
id: configure
2626
env:
2727
TARGET: ${{ inputs.target }}
28-
run: python ./build_tools/configure_target_run.py
28+
run: python ./build_tools/github_action/configure_target_run.py
2929

3030

3131
test_release_packages:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""
2+
This AMD GPU Family Matrix is the "source of truth" for GitHub workflows, indicating which families and test runners are available to use
3+
"""
4+
5+
amdgpu_family_info_matrix = {
6+
"gfx94x": {
7+
"linux": {
8+
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
9+
"family": "gfx94X-dcgpu",
10+
"pytorch-target": "gfx942",
11+
}
12+
},
13+
"gfx110x": {
14+
"linux": {
15+
"test-runs-on": "",
16+
"family": "gfx110X-dgpu",
17+
"pytorch-target": "gfx1100",
18+
},
19+
"windows": {
20+
"test-runs-on": "",
21+
"family": "gfx110X-dgpu",
22+
},
23+
},
24+
"gfx115x": {
25+
"linux": {
26+
"test-runs-on": "",
27+
"family": "gfx1151",
28+
}
29+
},
30+
"gfx120x": {
31+
"linux": {
32+
"test-runs-on": "",
33+
"family": "gfx120X-all",
34+
"pytorch-target": "gfx1201",
35+
}
36+
},
37+
}

build_tools/configure_ci.py renamed to build_tools/github_action/configure_ci.py

+5-23
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import sys
4747
from typing import Iterable, List, Mapping, Optional
4848
import string
49+
from amdgpu_family_matrix import amdgpu_family_info_matrix
4950

5051
# --------------------------------------------------------------------------- #
5152
# General utilities
@@ -186,25 +187,6 @@ def should_ci_run_given_modified_paths(paths: Optional[Iterable[str]]) -> bool:
186187
# Matrix creation logic based on PR, push or workflow_dispatch
187188
# --------------------------------------------------------------------------- #
188189

189-
amdgpu_family_info_matrix = {
190-
"gfx94x": {
191-
"linux": {
192-
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
193-
"target": "gfx94X-dcgpu",
194-
}
195-
},
196-
"gfx110x": {
197-
"linux": {
198-
"test-runs-on": "",
199-
"target": "gfx110X-dgpu",
200-
},
201-
"windows": {
202-
"test-runs-on": "",
203-
"target": "gfx110X-dgpu",
204-
},
205-
},
206-
}
207-
208190
DEFAULT_LINUX_CONFIGURATIONS = ["gfx94X", "gfx110X"]
209191
DEFAULT_WINDOWS_CONFIGURATIONS = ["gfx110X"]
210192

@@ -363,10 +345,10 @@ def main(base_args, build_families, test_families):
363345
write_job_summary(
364346
f"""## Workflow configure results
365347
366-
* `build_linux_amdgpu_families`: {str([item.get("target") for item in build_linux_target_output])}
367-
* `build_windows_amdgpu_families`: {str([item.get("target") for item in build_windows_target_output])}
368-
* `test_linux_amdgpu_families`: {str([item.get("target") for item in test_linux_target_output])}
369-
* `test_windows_amdgpu_families`: {str([item.get("target") for item in test_windows_target_output])}
348+
* `build_linux_amdgpu_families`: {str([item.get("family") for item in build_linux_target_output])}
349+
* `build_windows_amdgpu_families`: {str([item.get("family") for item in build_windows_target_output])}
350+
* `test_linux_amdgpu_families`: {str([item.get("family") for item in test_linux_target_output])}
351+
* `test_windows_amdgpu_families`: {str([item.get("family") for item in test_windows_target_output])}
370352
"""
371353
)
372354

build_tools/configure_ci_test.py renamed to build_tools/github_action/configure_ci_test.py

+36-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
from unittest import TestCase, main
2-
import os
3-
42
import configure_ci
53

64

@@ -39,7 +37,11 @@ def test_valid_workflow_dispatch_matrix_generator(self):
3937
False, True, False, {}, build_families, False
4038
)
4139
linux_target_to_compare = [
42-
{"target": "gfx94X-dcgpu", "test-runs-on": "linux-mi300-1gpu-ossci-rocm"}
40+
{
41+
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
42+
"family": "gfx94X-dcgpu",
43+
"pytorch-target": "gfx942",
44+
}
4345
]
4446
self.assertEqual(linux_target_output, linux_target_to_compare)
4547
self.assertEqual(windows_target_output, [])
@@ -64,10 +66,14 @@ def test_valid_pull_request_matrix_generator(self):
6466
)
6567

6668
linux_target_to_compare = [
67-
{"test-runs-on": "", "target": "gfx110X-dgpu"},
68-
{"test-runs-on": "linux-mi300-1gpu-ossci-rocm", "target": "gfx94X-dcgpu"},
69+
{"test-runs-on": "", "family": "gfx110X-dgpu", "pytorch-target": "gfx1100"},
70+
{
71+
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
72+
"family": "gfx94X-dcgpu",
73+
"pytorch-target": "gfx942",
74+
},
6975
]
70-
windows_target_to_compare = [{"test-runs-on": "", "target": "gfx110X-dgpu"}]
76+
windows_target_to_compare = [{"test-runs-on": "", "family": "gfx110X-dgpu"}]
7177
self.assertEqual(linux_target_output, linux_target_to_compare)
7278
self.assertEqual(windows_target_output, windows_target_to_compare)
7379

@@ -79,10 +85,14 @@ def test_duplicate_pull_request_matrix_generator(self):
7985
True, False, False, base_args, {}, False
8086
)
8187
linux_target_to_compare = [
82-
{"test-runs-on": "", "target": "gfx110X-dgpu"},
83-
{"test-runs-on": "linux-mi300-1gpu-ossci-rocm", "target": "gfx94X-dcgpu"},
88+
{"test-runs-on": "", "family": "gfx110X-dgpu", "pytorch-target": "gfx1100"},
89+
{
90+
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
91+
"family": "gfx94X-dcgpu",
92+
"pytorch-target": "gfx942",
93+
},
8494
]
85-
windows_target_to_compare = [{"test-runs-on": "", "target": "gfx110X-dgpu"}]
95+
windows_target_to_compare = [{"test-runs-on": "", "family": "gfx110X-dgpu"}]
8696
self.assertEqual(linux_target_output, linux_target_to_compare)
8797
self.assertEqual(windows_target_output, windows_target_to_compare)
8898

@@ -94,10 +104,14 @@ def test_invalid_pull_request_matrix_generator(self):
94104
True, False, False, base_args, {}, False
95105
)
96106
linux_target_to_compare = [
97-
{"test-runs-on": "", "target": "gfx110X-dgpu"},
98-
{"test-runs-on": "linux-mi300-1gpu-ossci-rocm", "target": "gfx94X-dcgpu"},
107+
{"test-runs-on": "", "family": "gfx110X-dgpu", "pytorch-target": "gfx1100"},
108+
{
109+
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
110+
"family": "gfx94X-dcgpu",
111+
"pytorch-target": "gfx942",
112+
},
99113
]
100-
windows_target_to_compare = [{"test-runs-on": "", "target": "gfx110X-dgpu"}]
114+
windows_target_to_compare = [{"test-runs-on": "", "family": "gfx110X-dgpu"}]
101115
self.assertEqual(linux_target_output, linux_target_to_compare)
102116
self.assertEqual(windows_target_output, windows_target_to_compare)
103117

@@ -107,10 +121,14 @@ def test_empty_pull_request_matrix_generator(self):
107121
True, False, False, base_args, {}, False
108122
)
109123
linux_target_to_compare = [
110-
{"test-runs-on": "", "target": "gfx110X-dgpu"},
111-
{"test-runs-on": "linux-mi300-1gpu-ossci-rocm", "target": "gfx94X-dcgpu"},
124+
{"test-runs-on": "", "family": "gfx110X-dgpu", "pytorch-target": "gfx1100"},
125+
{
126+
"test-runs-on": "linux-mi300-1gpu-ossci-rocm",
127+
"family": "gfx94X-dcgpu",
128+
"pytorch-target": "gfx942",
129+
},
112130
]
113-
windows_target_to_compare = [{"test-runs-on": "", "target": "gfx110X-dgpu"}]
131+
windows_target_to_compare = [{"test-runs-on": "", "family": "gfx110X-dgpu"}]
114132
self.assertEqual(linux_target_output, linux_target_to_compare)
115133
self.assertEqual(windows_target_output, windows_target_to_compare)
116134

@@ -120,10 +138,10 @@ def test_main_branch_push_matrix_generator(self):
120138
False, False, True, base_args, {}, False
121139
)
122140
linux_target_to_compare = [
123-
{"test-runs-on": "linux-mi300-1gpu-ossci-rocm", "target": "gfx94X-dcgpu"},
124-
{"test-runs-on": "", "target": "gfx110X-dgpu"},
141+
{"test-runs-on": "linux-mi300-1gpu-ossci-rocm", "family": "gfx94X-dcgpu"},
142+
{"test-runs-on": "", "family": "gfx110X-dgpu"},
125143
]
126-
windows_target_to_compare = [{"test-runs-on": "", "target": "gfx110X-dgpu"}]
144+
windows_target_to_compare = [{"test-runs-on": "", "family": "gfx110X-dgpu"}]
127145
self.assertEqual(linux_target_output, linux_target_to_compare)
128146
self.assertEqual(windows_target_output, windows_target_to_compare)
129147

build_tools/configure_target_run.py renamed to build_tools/github_action/configure_target_run.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
2-
import json
3-
from configure_ci import set_github_output, amdgpu_family_info_matrix
2+
from configure_ci import set_github_output
3+
from amdgpu_family_matrix import amdgpu_family_info_matrix
44

55
# This file helps configure which target to run
66

@@ -9,7 +9,7 @@
99

1010

1111
def main(args):
12-
target = args.get("target")
12+
target = args.get("target").lower()
1313
for key in amdgpu_family_info_matrix.keys():
1414
# If the amdgpu_family matrix key is inside the target (ex: gfx94X in gfx94X-dcgpu)
1515
if key in target:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import os
2+
import json
3+
from configure_ci import set_github_output
4+
from amdgpu_family_matrix import amdgpu_family_info_matrix
5+
6+
# This file helps generate a package target matrix for portable_linux_package_matrix.yml and publish_pytorch_dev_docker.yml
7+
8+
9+
def main(args):
10+
pytorch_dev_docker = args.get("PYTORCH_DEV_DOCKER") == "true"
11+
package_targets = []
12+
for key in amdgpu_family_info_matrix:
13+
if pytorch_dev_docker:
14+
# If there is not a target specified for the family
15+
if not "pytorch-target" in amdgpu_family_info_matrix.get(key).get("linux"):
16+
continue
17+
family = (
18+
amdgpu_family_info_matrix.get(key).get("linux").get("pytorch-target")
19+
)
20+
else:
21+
family = amdgpu_family_info_matrix.get(key).get("linux").get("family")
22+
23+
package_targets.append({"amdgpu_family": family})
24+
25+
set_github_output({"package_targets": json.dumps(package_targets)})
26+
27+
28+
if __name__ == "__main__":
29+
args = {}
30+
args["PYTORCH_DEV_DOCKER"] = os.getenv("PYTORCH_DEV_DOCKER")
31+
main(args)

0 commit comments

Comments
 (0)