@@ -24,6 +24,10 @@ function addOptimizationPasses!(pm, opt_level=2)
24
24
constant_merge! (pm)
25
25
26
26
if opt_level < 2
27
+ cpu_features! (pm)
28
+ if opt_level == 1
29
+ instruction_simplify! (pm)
30
+ end
27
31
if LLVM. version () >= v " 12"
28
32
cfgsimplification! (pm; hoist_common_insts= true )
29
33
else
@@ -72,6 +76,7 @@ function addOptimizationPasses!(pm, opt_level=2)
72
76
else
73
77
cfgsimplification! (pm)
74
78
end
79
+ cpu_features! (pm)
75
80
scalar_repl_aggregates! (pm)
76
81
instruction_simplify! (pm)
77
82
jump_threading! (pm)
237
242
238
243
239
244
# # lowering intrinsics
245
+ cpu_features! (pm:: PassManager ) = add! (pm, ModulePass (" LowerCPUFeatures" , cpu_features!))
246
+ function cpu_features! (mod:: LLVM.Module )
247
+ job = current_job:: CompilerJob
248
+ ctx = context (mod)
249
+ changed = false
250
+
251
+ argtyps = Dict (
252
+ " f32" => Float32,
253
+ " f64" => Float64,
254
+ )
255
+
256
+ # have_fma
257
+ for f in functions (mod)
258
+ ft = eltype (llvmtype (f))
259
+ fn = LLVM. name (f)
260
+ startswith (fn, " julia.cpu.have_fma." ) || continue
261
+ typnam = fn[20 : end ]
262
+
263
+ # determine whether this back-end supports FMA on this type
264
+ has_fma = if haskey (argtyps, typnam)
265
+ typ = argtyps[typnam]
266
+ have_fma (job. target, typ)
267
+ else
268
+ # warn?
269
+ false
270
+ end
271
+ has_fma = ConstantInt (return_type (ft), has_fma)
272
+
273
+ # substitute all uses of the intrinsic with a constant
274
+ materialized = LLVM. Value[]
275
+ for use in uses (f)
276
+ val = user (use)
277
+ replace_uses! (val, has_fma)
278
+ push! (materialized, val)
279
+ end
280
+
281
+ # remove the intrinsic and its uses
282
+ for val in materialized
283
+ @assert isempty (uses (val))
284
+ unsafe_delete! (LLVM. parent (val), val)
285
+ end
286
+ @assert isempty (uses (f))
287
+ unsafe_delete! (mod, f)
288
+ end
289
+
290
+ return changed
291
+ end
240
292
241
293
# lower object allocations to to PTX malloc
242
294
#
0 commit comments