Skip to content

Commit 4250be8

Browse files
add detection for zen 5 (#56967)
ref llvm/llvm-project@149a150 --------- Co-authored-by: gbaraldi <baraldigabriel@gmail.com>
1 parent 11ce171 commit 4250be8

File tree

2 files changed

+59
-4
lines changed

2 files changed

+59
-4
lines changed

src/features_x86.h

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,13 @@
55
#else
66
#define JL_X86_64ONLY_VER(x) x
77
#endif
8+
// The code is similar to what is here so the bits can be used as reference
9+
// https://github.com/llvm/llvm-project/blob/3f7905733820851bc4f65cb4af693c3101cbf20d/llvm/lib/TargetParser/Host.cpp#L1257
10+
11+
// The way the bits here work is an index into the features array. This is a bit array
12+
// The index works as follows:
13+
// 32*i + j where i is the index into the array and j is the bit in the array.
14+
// There is a reference to what each index corresponds to in _get_host_cpu
815

916
// X86 features definition
1017
// EAX=1: ECX
@@ -79,6 +86,7 @@ JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
7986
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
8087
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
8188
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
89+
// JL_FEATURE_DEF(ibt, 32 * 4 + 20, 0)
8290
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
8391
JL_FEATURE_DEF(avx512fp16, 32 * 4 + 23, 140000)
8492
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
@@ -110,10 +118,28 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
110118
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)
111119

112120
// EAX=7,ECX=1: EAX
121+
JL_FEATURE_DEF(sha512, 32 * 9 + 0, 170000)
122+
JL_FEATURE_DEF(sm3, 32 * 9 + 1, 170000)
123+
JL_FEATURE_DEF(sm4, 32 * 9 + 2, 170000)
124+
JL_FEATURE_DEF(raoint, 32 * 9 + 3, 170000)
113125
JL_FEATURE_DEF(avxvnni, 32 * 9 + 4, 120000)
114126
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
127+
JL_FEATURE_DEF(cmpccxadd, 32 * 9 + 7, 160000)
128+
JL_FEATURE_DEF_NAME(amx_fp16, 32 * 9 + 21, 160000, "amx-fp16")
129+
JL_FEATURE_DEF(hreset, 32 * 9 + 22, 160000)
130+
JL_FEATURE_DEF(avxifma, 32 * 9 + 23, 160000)
131+
132+
// EAX=7,ECX=1: EBX
133+
JL_FEATURE_DEF(avxvnniint8, 32 * 10 + 4, 160000)
134+
JL_FEATURE_DEF(avxneconvert, 32 * 10 + 5, 160000)
135+
JL_FEATURE_DEF_NAME(amx_complex, 32 * 10 + 8, 170000, "amx-complex")
136+
JL_FEATURE_DEF(avxvnniint16, 32 * 10 + 10, 170000)
137+
JL_FEATURE_DEF(prefetchi, 32 * 10 + 14, 160000)
138+
JL_FEATURE_DEF(usermsr, 32 * 10 + 15, 170000)
139+
// JL_FEATURE_DEF(avx10, 32 * 10 + 19, 170000) // TODO: What to do about avx10 and it's mess?
140+
// JL_FEATURE_DEF(apxf, 32 * 10 + 21, 190000)
115141

116142
// EAX=0x14,ECX=0: EBX
117-
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
143+
JL_FEATURE_DEF(ptwrite, 32 * 11 + 4, 0)
118144

119145
#undef JL_X86_64ONLY_VER

src/processor_x86.cpp

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,10 @@ enum class CPU : uint32_t {
9696
amd_znver2,
9797
amd_znver3,
9898
amd_znver4,
99+
amd_znver5,
99100
};
100101

101-
static constexpr size_t feature_sz = 11;
102+
static constexpr size_t feature_sz = 12;
102103
static constexpr FeatureName feature_names[] = {
103104
#define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
104105
#define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
@@ -141,6 +142,10 @@ static constexpr FeatureDep deps[] = {
141142
{vpclmulqdq, avx},
142143
{vpclmulqdq, pclmul},
143144
{avxvnni, avx2},
145+
{avxvnniint8, avx2},
146+
{avxvnniint16, avx2},
147+
{avxifma, avx2},
148+
{avxneconvert, avx2},
144149
{avx512f, avx2},
145150
{avx512dq, avx512f},
146151
{avx512ifma, avx512f},
@@ -159,13 +164,18 @@ static constexpr FeatureDep deps[] = {
159164
{avx512fp16, avx512vl},
160165
{amx_int8, amx_tile},
161166
{amx_bf16, amx_tile},
167+
{amx_fp16, amx_tile},
168+
{amx_complex, amx_tile},
162169
{sse4a, sse3},
163170
{xop, fma4},
164171
{fma4, avx},
165172
{fma4, sse4a},
166173
{xsaveopt, xsave},
167174
{xsavec, xsave},
168175
{xsaves, xsave},
176+
{sha512, avx2},
177+
{sm3, avx},
178+
{sm4, avx2},
169179
};
170180

171181
// We require cx16 on 64bit by default. This can be overwritten with `-cx16`
@@ -236,6 +246,7 @@ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
236246
constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
237247
constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
238248
avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
249+
constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);
239250

240251
}
241252

@@ -298,6 +309,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
298309
{"znver2", CPU::amd_znver2, CPU::generic, 0, Feature::znver2},
299310
{"znver3", CPU::amd_znver3, CPU::amd_znver2, 120000, Feature::znver3},
300311
{"znver4", CPU::amd_znver4, CPU::amd_znver3, 160000, Feature::znver4},
312+
{"znver5", CPU::amd_znver5, CPU::amd_znver4, 190000, Feature::znver5},
301313
};
302314
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
303315

@@ -575,6 +587,9 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
575587
return CPU::amd_znver4;
576588
}
577589
return CPU::amd_znver3; // fallback
590+
case 26:
591+
// if (model <= 0x77)
592+
return CPU::amd_znver5;
578593
}
579594
}
580595

@@ -660,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
660675
int32_t info7[4];
661676
jl_cpuidex(info7, 7, 1);
662677
features[9] = info7[0];
678+
features[10] = info7[1];
663679
}
664680
if (maxleaf >= 0x14) {
665681
int32_t info14[4];
666682
jl_cpuidex(info14, 0x14, 0);
667-
features[10] = info14[1];
683+
features[11] = info14[1];
668684
}
669685

670686
// Fix up AVX bits to account for OS support and match LLVM model
@@ -705,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
705721
else {
706722
cpu = uint32_t(CPU::generic);
707723
}
708-
724+
/* Feature bits to register map
725+
feature[0] = ecx
726+
feature[1] = edx
727+
feature[2] = leaf 7 ebx
728+
feature[3] = leaf 7 ecx
729+
feature[4] = leaf 7 edx
730+
feature[5] = leaf 0x80000001 ecx
731+
feature[6] = leaf 0x80000001 edx
732+
feature[7] = leaf 0xd subleaf 1 eax
733+
feature[8] = leaf 0x80000008 ebx
734+
feature[9] = leaf 7 ebx subleaf 1 eax
735+
feature[10] = leaf 7 ebx subleaf 1 ebx
736+
feature[11] = leaf 0x14 ebx
737+
*/
709738
return std::make_pair(cpu, features);
710739
}
711740

0 commit comments

Comments
 (0)