@@ -35,14 +35,6 @@ runtime_slug(job::CompilerJob{GCNCompilerTarget}) = "gcn-$(job.target.dev_isa)$(
35
35
const gcn_intrinsics = () # TODO : ("vprintf", "__assertfail", "malloc", "free")
36
36
isintrinsic (:: CompilerJob{GCNCompilerTarget} , fn:: String ) = in (fn, gcn_intrinsics)
37
37
38
- # we have to fake our target early in the pipeline because Julia's optimization passes
39
- # weren't designed for a non-0 stack addrspace, and the AMDGPU target is very strict
40
- # about which addrspaces are permitted for various code patterns
41
- function process_module! (job:: CompilerJob{GCNCompilerTarget} , mod:: LLVM.Module )
42
- triple! (mod, llvm_triple (NativeCompilerTarget ()))
43
- datalayout! (mod, julia_datalayout (NativeCompilerTarget ()))
44
- end
45
-
46
38
function process_entry! (job:: CompilerJob{GCNCompilerTarget} , mod:: LLVM.Module , entry:: LLVM.Function )
47
39
entry = invoke (process_entry!, Tuple{CompilerJob, LLVM. Module, LLVM. Function}, job, mod, entry)
48
40
@@ -58,6 +50,24 @@ function add_lowering_passes!(job::CompilerJob{GCNCompilerTarget}, pm::LLVM.Pass
58
50
add! (pm, ModulePass (" LowerThrowExtra" , lower_throw_extra!))
59
51
end
60
52
53
+ function finish_module! (@nospecialize (job:: CompilerJob{GCNCompilerTarget} ),
54
+ mod:: LLVM.Module , entry:: LLVM.Function )
55
+ # we have to fake our target early in the pipeline because Julia's optimization passes
56
+ # weren't designed for a non-0 stack addrspace, and the AMDGPU target is very strict
57
+ # about which addrspaces are permitted for various code patterns
58
+ triple! (mod, llvm_triple (NativeCompilerTarget ()))
59
+ datalayout! (mod, julia_datalayout (NativeCompilerTarget ()))
60
+
61
+ entry = invoke (finish_module!, Tuple{CompilerJob, LLVM. Module, LLVM. Function}, job, mod, entry)
62
+
63
+ if job. source. kernel
64
+ # work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92)
65
+ entry = lower_byval (job, mod, entry)
66
+ end
67
+
68
+ return entry
69
+ end
70
+
61
71
# We need to do alloca rewriting (from 0 to 5) after Julia's optimization
62
72
# passes because of two reasons:
63
73
# 1. Debug builds call the target verifier first, which would trip if AMDGPU
@@ -80,18 +90,6 @@ function optimize_module!(job::CompilerJob{GCNCompilerTarget}, mod::LLVM.Module)
80
90
end
81
91
end
82
92
83
- function finish_module! (@nospecialize (job:: CompilerJob{GCNCompilerTarget} ),
84
- mod:: LLVM.Module , entry:: LLVM.Function )
85
- entry = invoke (finish_module!, Tuple{CompilerJob, LLVM. Module, LLVM. Function}, job, mod, entry)
86
-
87
- if job. source. kernel
88
- # work around bad byval codegen (JuliaGPU/GPUCompiler.jl#92)
89
- entry = lower_byval (job, mod, entry)
90
- end
91
-
92
- return entry
93
- end
94
-
95
93
96
94
# # LLVM passes
97
95
0 commit comments