@@ -171,6 +171,11 @@ const __llvm_initialized = Ref(false)
171
171
# target-specific libraries
172
172
undefined_fns = LLVM. name .(decls (ir))
173
173
@timeit_debug to " target libraries" link_libraries! (job, ir, undefined_fns)
174
+
175
+ # GPU run-time library
176
+ if any (fn -> fn in runtime_fns, undefined_fns)
177
+ @timeit_debug to " runtime library" link_library! (ir, runtime)
178
+ end
174
179
end
175
180
end
176
181
@@ -263,20 +268,6 @@ const __llvm_initialized = Ref(false)
263
268
entry = functions (ir)[entry_fn]
264
269
end
265
270
266
- if libraries
267
- # GPU run-time library
268
- #
269
- # we do this late for multiple reasons:
270
- # - the runtime library is already optimized, so we don't want to re-optimize
271
- # - if `malloc(...) = 0`, the consequent stores are reduced to a trap, which
272
- # results in e.g. every `box` function just trapping. this breaks our test
273
- # suite, which runs without malloc, but expects actual code being generated.
274
- undefined_fns = LLVM. name .(decls (ir))
275
- if any (fn -> fn in runtime_fns, undefined_fns)
276
- @timeit_debug to " runtime library" link_library! (ir, runtime)
277
- end
278
- end
279
-
280
271
if ccall (:jl_is_debugbuild , Cint, ()) == 1
281
272
@timeit_debug to " verification" verify (ir)
282
273
end
0 commit comments