Skip to content

Commit b8cf0a4

Browse files
committedSep 23, 2021
Delay the GCN triple hack to right before optimization.
1 parent 6e4cfc9 commit b8cf0a4

File tree

2 files changed

+32
-37
lines changed

2 files changed

+32
-37
lines changed
 

‎src/driver.jl

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -234,23 +234,6 @@ const __llvm_initialized = Ref(false)
234234
unsafe_delete!(LLVM.parent(call), call)
235235
end
236236
end
237-
238-
# clean-up
239-
ModulePassManager() do pm
240-
# inline and optimize the call to the deferred code. in particular we want to
241-
# remove unnecessary alloca's that are created by pass-by-ref semantics.
242-
instruction_combining!(pm)
243-
always_inliner!(pm)
244-
scalar_repl_aggregates_ssa!(pm)
245-
promote_memory_to_register!(pm)
246-
gvn!(pm)
247-
248-
# merge duplicate functions, since each compilation invocation emits everything
249-
# XXX: ideally we want to avoid emitting these in the first place
250-
merge_functions!(pm)
251-
252-
run!(pm, ir)
253-
end
254237
end
255238

256239
# all deferred compilations should have been resolved
@@ -300,6 +283,20 @@ const __llvm_initialized = Ref(false)
300283
# merge constants (such as exception messages)
301284
constant_merge!(pm)
302285

286+
if do_deferred_codegen
287+
# inline and optimize the call to the deferred code. in particular we want to
288+
# remove unnecessary alloca's that are created by pass-by-ref semantics.
289+
instruction_combining!(pm)
290+
always_inliner!(pm)
291+
scalar_repl_aggregates_ssa!(pm)
292+
promote_memory_to_register!(pm)
293+
gvn!(pm)
294+
295+
# merge duplicate functions, since each compilation invocation emits everything
296+
# XXX: ideally we want to avoid emitting these in the first place
297+
merge_functions!(pm)
298+
end
299+
303300
run!(pm, ir)
304301
end
305302
end

‎src/gcn.jl

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,6 @@ runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.target.dev_isa)$(
3535
const gcn_intrinsics = () # TODO: ("vprintf", "__assertfail", "malloc", "free")
3636
isintrinsic(::CompilerJob{GCNCompilerTarget}, fn::String) = in(fn, gcn_intrinsics)
3737

38-
# we have to fake our target early in the pipeline because Julia's optimization passes
39-
# weren't designed for a non-0 stack addrspace, and the AMDGPU target is very strict
40-
# about which addrspaces are permitted for various code patterns
41-
function process_module!(job::CompilerJob{GCNCompilerTarget}, mod::LLVM.Module)
42-
triple!(mod, llvm_triple(NativeCompilerTarget()))
43-
datalayout!(mod, julia_datalayout(NativeCompilerTarget()))
44-
end
45-
4638
function process_entry!(job::CompilerJob{GCNCompilerTarget}, mod::LLVM.Module, entry::LLVM.Function)
4739
entry = invoke(process_entry!, Tuple{CompilerJob, LLVM.Module, LLVM.Function}, job, mod, entry)
4840

@@ -58,6 +50,24 @@ function add_lowering_passes!(job::CompilerJob{GCNCompilerTarget}, pm::LLVM.Pass
5850
add!(pm, ModulePass("LowerThrowExtra", lower_throw_extra!))
5951
end
6052

53+
function finish_module!(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
54+
mod::LLVM.Module, entry::LLVM.Function)
55+
# we have to fake our target early in the pipeline because Julia's optimization passes
56+
# weren't designed for a non-0 stack addrspace, and the AMDGPU target is very strict
57+
# about which addrspaces are permitted for various code patterns
58+
triple!(mod, llvm_triple(NativeCompilerTarget()))
59+
datalayout!(mod, julia_datalayout(NativeCompilerTarget()))
60+
61+
entry = invoke(finish_module!, Tuple{CompilerJob, LLVM.Module, LLVM.Function}, job, mod, entry)
62+
63+
if job.source.kernel
64+
# work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92)
65+
entry = lower_byval(job, mod, entry)
66+
end
67+
68+
return entry
69+
end
70+
6171
# We need to do alloca rewriting (from 0 to 5) after Julia's optimization
6272
# passes because of two reasons:
6373
# 1. Debug builds call the target verifier first, which would trip if AMDGPU
@@ -80,18 +90,6 @@ function optimize_module!(job::CompilerJob{GCNCompilerTarget}, mod::LLVM.Module)
8090
end
8191
end
8292

83-
function finish_module!(@nospecialize(job::CompilerJob{GCNCompilerTarget}),
84-
mod::LLVM.Module, entry::LLVM.Function)
85-
entry = invoke(finish_module!, Tuple{CompilerJob, LLVM.Module, LLVM.Function}, job, mod, entry)
86-
87-
if job.source.kernel
88-
# work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92)
89-
entry = lower_byval(job, mod, entry)
90-
end
91-
92-
return entry
93-
end
94-
9593

9694
## LLVM passes
9795

0 commit comments

Comments
 (0)