Skip to content

Commit ed5f35d

Browse files
gbaraldiKristofferC
authored andcommitted
Fix late gc lowering pass for vector intrinsics (#55864)
Fixes #55844 (cherry picked from commit 9986d97)
1 parent 440c0d3 commit ed5f35d

File tree

3 files changed

+62
-5
lines changed

3 files changed

+62
-5
lines changed

src/llvm-late-gc-lowering.cpp

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// This file is a part of Julia. License is MIT: https://julialang.org/license
22

33
#include "llvm-gc-interface-passes.h"
4+
#include "llvm/IR/Intrinsics.h"
5+
#include "llvm/Support/Casting.h"
46

57
#define DEBUG_TYPE "late_lower_gcroot"
68

@@ -171,12 +173,12 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
171173
(void)LI;
172174
break;
173175
}
174-
else if (auto II = dyn_cast<IntrinsicInst>(CurrentV)) {
175-
// Some intrinsics behave like LoadInst followed by a SelectInst
176-
// This should never happen in a derived addrspace (since those cannot be stored to memory)
177-
// so we don't need to lift these operations, but we do need to check if it's loaded and continue walking the base pointer
176+
else if (auto *II = dyn_cast<IntrinsicInst>(CurrentV)) {
178177
if (II->getIntrinsicID() == Intrinsic::masked_load ||
179178
II->getIntrinsicID() == Intrinsic::masked_gather) {
179+
// Some intrinsics behave like LoadInst followed by a SelectInst
180+
// This should never happen in a derived addrspace (since those cannot be stored to memory)
181+
// so we don't need to lift these operations, but we do need to check if it's loaded and continue walking the base pointer
180182
if (auto VTy = dyn_cast<VectorType>(II->getType())) {
181183
if (hasLoadedTy(VTy->getElementType())) {
182184
Value *Mask = II->getOperand(2);
@@ -205,16 +207,36 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
205207
// In general a load terminates a walk
206208
break;
207209
}
210+
else if (II->getIntrinsicID() == Intrinsic::vector_extract) {
211+
if (auto VTy = dyn_cast<VectorType>(II->getType())) {
212+
if (hasLoadedTy(VTy->getElementType())) {
213+
Value *Idx = II->getOperand(1);
214+
if (!isa<ConstantInt>(Idx)) {
215+
assert(isa<UndefValue>(Idx) && "unimplemented");
216+
(void)Idx;
217+
}
218+
CurrentV = II->getOperand(0);
219+
fld_idx = -1;
220+
continue;
221+
}
222+
}
223+
break;
224+
} else {
225+
// Unknown Intrinsic
226+
break;
227+
}
208228
}
209229
else if (auto CI = dyn_cast<CallInst>(CurrentV)) {
210230
auto callee = CI->getCalledFunction();
211231
if (callee && callee->getName() == "julia.gc_loaded") {
212232
CurrentV = CI->getArgOperand(0);
213233
continue;
214234
}
235+
// Unknown Call
215236
break;
216237
}
217238
else {
239+
// Unknown Instruction
218240
break;
219241
}
220242
}
@@ -518,6 +540,22 @@ SmallVector<int, 0> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
518540
Numbers = NumberAll(S, IEI->getOperand(0));
519541
int ElNumber = Number(S, IEI->getOperand(1));
520542
Numbers[idx] = ElNumber;
543+
// C++17
544+
// } else if (auto *II = dyn_cast<IntrinsicInst>(CurrentV); II && II->getIntrinsicID() == Intrinsic::vector_insert) {
545+
} else if (isa<IntrinsicInst>(CurrentV) && cast<IntrinsicInst>(CurrentV)->getIntrinsicID() == Intrinsic::vector_insert) {
546+
auto *II = dyn_cast<IntrinsicInst>(CurrentV);
547+
// Vector insert is a bit like a shuffle so use the same approach
548+
SmallVector<int, 0> Numbers1 = NumberAll(S, II->getOperand(0));
549+
SmallVector<int, 0> Numbers2 = NumberAll(S, II->getOperand(1));
550+
unsigned first_idx = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
551+
for (unsigned i = 0; i < Numbers1.size(); ++i) {
552+
if (i < first_idx)
553+
Numbers.push_back(Numbers1[i]);
554+
else if (i - first_idx < Numbers2.size())
555+
Numbers.push_back(Numbers2[i - first_idx]);
556+
else
557+
Numbers.push_back(Numbers1[i]);
558+
}
521559
} else if (auto *IVI = dyn_cast<InsertValueInst>(CurrentV)) {
522560
Numbers = NumberAll(S, IVI->getAggregateOperand());
523561
auto Tracked = TrackCompositeType(IVI->getType());
@@ -1150,6 +1188,10 @@ State LateLowerGCFrame::LocalScan(Function &F) {
11501188
}
11511189
}
11521190
}
1191+
if (II->getIntrinsicID() == Intrinsic::vector_extract || II->getIntrinsicID() == Intrinsic::vector_insert) {
1192+
// These are not real defs
1193+
continue;
1194+
}
11531195
}
11541196
auto callee = CI->getCalledFunction();
11551197
if (callee && callee == typeof_func) {

test/llvmpasses/image-codegen.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# RUN: export JULIA_LLVM_ARGS="--print-before=loop-vectorize --print-module-scope"
33
# RUN: rm -rf %t
44
# RUN: mkdir %t
5-
# RUN: julia --image-codegen --startup-file=no %s 2> %t/output.txt
5+
# RUN: julia --image-codegen -t1,0 --startup-file=no %s 2> %t/output.txt
66
# RUN: FileCheck %s < %t/output.txt
77

88
# COM: checks that global variables compiled in imaging codegen

test/llvmpasses/late-lower-gc.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,21 @@ define {} addrspace(10)* @gclift_switch({} addrspace(13)* addrspace(10)* %input,
164164
ret {} addrspace(10)* %ret
165165
}
166166

167+
; Shouldn't hang
168+
define void @vector_insert(<4 x {} addrspace(10)* > %0, <2 x {} addrspace(10)* > %1) {
169+
top:
170+
%pgcstack = call {}*** @julia.get_pgcstack()
171+
%2 = call <4 x {} addrspace(10)*> @llvm.vector.insert.v4p10.v2p10(<4 x {} addrspace(10)*> %0, <2 x {} addrspace(10)*> %1, i64 2)
172+
ret void
173+
}
174+
175+
define void @vector_extract(<4 x {} addrspace(10)* > %0, <2 x {} addrspace(10)* > %1) {
176+
top:
177+
%pgcstack = call {}*** @julia.get_pgcstack()
178+
%2 = call <2 x {} addrspace(10)*> @llvm.vector.extract.v2p10.v4p10(<4 x {} addrspace(10)* > %0, i64 2)
179+
ret void
180+
}
181+
167182
define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
168183
%v2 = call {}*** @julia.get_pgcstack()
169184
%e0 = extractvalue [2 x {} addrspace(10)* addrspace(11)*] %ar, 0

0 commit comments

Comments
 (0)