@@ -96,9 +96,10 @@ enum class CPU : uint32_t {
96
96
amd_znver2,
97
97
amd_znver3,
98
98
amd_znver4,
99
+ amd_znver5,
99
100
};
100
101
101
- static constexpr size_t feature_sz = 11 ;
102
+ static constexpr size_t feature_sz = 12 ;
102
103
static constexpr FeatureName feature_names[] = {
103
104
#define JL_FEATURE_DEF (name, bit, llvmver ) {#name, bit, llvmver},
104
105
#define JL_FEATURE_DEF_NAME (name, bit, llvmver, str ) {str, bit, llvmver},
@@ -141,6 +142,10 @@ static constexpr FeatureDep deps[] = {
141
142
{vpclmulqdq, avx},
142
143
{vpclmulqdq, pclmul},
143
144
{avxvnni, avx2},
145
+ {avxvnniint8, avx2},
146
+ {avxvnniint16, avx2},
147
+ {avxifma, avx2},
148
+ {avxneconvert, avx2},
144
149
{avx512f, avx2},
145
150
{avx512dq, avx512f},
146
151
{avx512ifma, avx512f},
@@ -159,13 +164,18 @@ static constexpr FeatureDep deps[] = {
159
164
{avx512fp16, avx512vl},
160
165
{amx_int8, amx_tile},
161
166
{amx_bf16, amx_tile},
167
+ {amx_fp16, amx_tile},
168
+ {amx_complex, amx_tile},
162
169
{sse4a, sse3},
163
170
{xop, fma4},
164
171
{fma4, avx},
165
172
{fma4, sse4a},
166
173
{xsaveopt, xsave},
167
174
{xsavec, xsave},
168
175
{xsaves, xsave},
176
+ {sha512, avx2},
177
+ {sm3, avx},
178
+ {sm4, avx2},
169
179
};
170
180
171
181
// We require cx16 on 64bit by default. This can be overwritten with `-cx16`
@@ -236,6 +246,7 @@ constexpr auto znver2 = znver1 | get_feature_masks(clwb, rdpid, wbnoinvd);
236
246
constexpr auto znver3 = znver2 | get_feature_masks(shstk, pku, vaes, vpclmulqdq);
237
247
constexpr auto znver4 = znver3 | get_feature_masks(avx512f, avx512cd, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi,
238
248
avx512vbmi2, avx512vnni, avx512bitalg, avx512vpopcntdq, avx512bf16, gfni, shstk, xsaves);
249
+ constexpr auto znver5 = znver4 | get_feature_masks(avxvnni, movdiri, movdir64b, avx512vp2intersect, prefetchi, avxvnni);
239
250
240
251
}
241
252
@@ -298,6 +309,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
298
309
{" znver2" , CPU::amd_znver2, CPU::generic, 0 , Feature::znver2},
299
310
{" znver3" , CPU::amd_znver3, CPU::amd_znver2, 120000 , Feature::znver3},
300
311
{" znver4" , CPU::amd_znver4, CPU::amd_znver3, 160000 , Feature::znver4},
312
+ {" znver5" , CPU::amd_znver5, CPU::amd_znver4, 190000 , Feature::znver5},
301
313
};
302
314
static constexpr size_t ncpu_names = sizeof (cpus) / sizeof (cpus[0 ]);
303
315
@@ -575,6 +587,9 @@ static CPU get_amd_processor_name(uint32_t family, uint32_t model, const uint32_
575
587
return CPU::amd_znver4;
576
588
}
577
589
return CPU::amd_znver3; // fallback
590
+ case 26 :
591
+ // if (model <= 0x77)
592
+ return CPU::amd_znver5;
578
593
}
579
594
}
580
595
@@ -660,11 +675,12 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
660
675
int32_t info7[4 ];
661
676
jl_cpuidex (info7, 7 , 1 );
662
677
features[9 ] = info7[0 ];
678
+ features[10 ] = info7[1 ];
663
679
}
664
680
if (maxleaf >= 0x14 ) {
665
681
int32_t info14[4 ];
666
682
jl_cpuidex (info14, 0x14 , 0 );
667
- features[10 ] = info14[1 ];
683
+ features[11 ] = info14[1 ];
668
684
}
669
685
670
686
// Fix up AVX bits to account for OS support and match LLVM model
@@ -705,7 +721,20 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
705
721
else {
706
722
cpu = uint32_t (CPU::generic);
707
723
}
708
-
724
+ /* Feature bits to register map
725
+ feature[0] = ecx
726
+ feature[1] = edx
727
+ feature[2] = leaf 7 ebx
728
+ feature[3] = leaf 7 ecx
729
+ feature[4] = leaf 7 edx
730
+ feature[5] = leaf 0x80000001 ecx
731
+ feature[6] = leaf 0x80000001 edx
732
+ feature[7] = leaf 0xd subleaf 1 eax
733
+ feature[8] = leaf 0x80000008 ebx
734
+ feature[9] = leaf 7 ebx subleaf 1 eax
735
+ feature[10] = leaf 7 ebx subleaf 1 ebx
736
+ feature[11] = leaf 0x14 ebx
737
+ */
709
738
return std::make_pair (cpu, features);
710
739
}
711
740
0 commit comments