Skip to content

Commit 33123de

Browse files
committed
[JDK-8353240] VectorRearrangeTest failures with 64-bit-element 128-bit vectors
PullRequest: graal/20645
2 parents 1b6cda4 + a36995e commit 33123de

File tree

1 file changed

+33
-17
lines changed

1 file changed

+33
-17
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/vector/AMD64VectorShuffle.java

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,12 @@ yield switch (avxSize) {
150150
}
151151
case DWORD, SINGLE -> new PermuteOp(result, source, indices, encoding);
152152
case QWORD, DOUBLE -> {
153-
if (encoding == AMD64SIMDInstructionEncoding.EVEX || avxSize != YMM) {
153+
if (encoding == AMD64SIMDInstructionEncoding.EVEX && avxSize != XMM) {
154154
yield new PermuteOp(result, source, indices, encoding);
155-
} else {
155+
} else if (avxSize == YMM) {
156156
yield new PermuteOpWithTemps(gen, result, source, indices, encoding, 2, false);
157+
} else {
158+
yield new PermuteOpWithTemps(gen, result, source, indices, encoding, 1, false);
157159
}
158160
}
159161
default -> throw GraalError.shouldNotReachHereUnexpectedValue(eKind);
@@ -255,21 +257,35 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
255257
}
256258
case DWORD, SINGLE -> throw GraalError.shouldNotReachHere("should be PermuteOp");
257259
case QWORD, DOUBLE -> {
258-
GraalError.guarantee(encoding == AMD64SIMDInstructionEncoding.VEX && avxSize == YMM, "should be PermuteOp");
259-
Register indexReg = asRegister(indices);
260-
Register xtmp1Reg = asRegister(xtmps[0]);
261-
Register xtmp2Reg = asRegister(xtmps[1]);
262-
263-
// Transform into an int permute by transforming a 64-bit index with value v
264-
// into a pair of 32-bit indices v + 2, v * 2 + 1
265-
VexShiftOp.VPSLLQ.encoding(encoding).emit(masm, YMM, xtmp1Reg, indexReg, Integer.SIZE + 1);
266-
AMD64Address inc = (AMD64Address) crb.asLongConstRef(JavaConstant.forLong(1L << Integer.SIZE));
267-
VexRMOp.VPBROADCASTQ.encoding(encoding).emit(masm, YMM, xtmp2Reg, inc);
268-
VexRVMOp.VPOR.encoding(encoding).emit(masm, YMM, xtmp2Reg, xtmp1Reg, xtmp2Reg);
269-
VexShiftOp.VPSLLQ.encoding(encoding).emit(masm, YMM, xtmp1Reg, indexReg, 1);
270-
VexRVMOp.VPOR.encoding(encoding).emit(masm, YMM, xtmp1Reg, xtmp1Reg, xtmp2Reg);
271-
VexRVMOp op = eKind == AMD64Kind.QWORD ? VexRVMOp.VPERMD : VexRVMOp.VPERMPS;
272-
op.encoding(encoding).emit(masm, YMM, asRegister(result), xtmp1Reg, asRegister(source));
260+
if (avxSize == YMM) {
261+
GraalError.guarantee(encoding == AMD64SIMDInstructionEncoding.VEX, "should be PermuteOp");
262+
Register indexReg = asRegister(indices);
263+
Register xtmp1Reg = asRegister(xtmps[0]);
264+
Register xtmp2Reg = asRegister(xtmps[1]);
265+
266+
// Transform into an int permute by transforming a 64-bit index with value v
267+
// into a pair of 32-bit indices v + 2, v * 2 + 1
268+
VexShiftOp.VPSLLQ.encoding(encoding).emit(masm, YMM, xtmp1Reg, indexReg, Integer.SIZE + 1);
269+
AMD64Address inc = (AMD64Address) crb.asLongConstRef(JavaConstant.forLong(1L << Integer.SIZE));
270+
VexRMOp.VPBROADCASTQ.encoding(encoding).emit(masm, YMM, xtmp2Reg, inc);
271+
VexRVMOp.VPOR.encoding(encoding).emit(masm, YMM, xtmp2Reg, xtmp1Reg, xtmp2Reg);
272+
VexShiftOp.VPSLLQ.encoding(encoding).emit(masm, YMM, xtmp1Reg, indexReg, 1);
273+
VexRVMOp.VPOR.encoding(encoding).emit(masm, YMM, xtmp1Reg, xtmp1Reg, xtmp2Reg);
274+
VexRVMOp op = eKind == AMD64Kind.QWORD ? VexRVMOp.VPERMD : VexRVMOp.VPERMPS;
275+
op.encoding(encoding).emit(masm, YMM, asRegister(result), xtmp1Reg, asRegister(source));
276+
} else {
277+
GraalError.guarantee(avxSize == XMM, "should be PermuteOp");
278+
Register xtmpReg = asRegister(xtmps[0]);
279+
/*
280+
* VPERMILPD uses the SECOND bit in each element as the index. Note that
281+
* although the textual description of the instruction in the Intel SDM
282+
* (March 2025) says that "The control bits are located at bit 0 of each
283+
* quadword element", the pseudocode in the same manual as well as
284+
* experiments show that it is actually the bit 1 that is the control bit.
285+
*/
286+
VexShiftOp.VPSLLQ.encoding(encoding).emit(masm, XMM, xtmpReg, asRegister(indices), 1);
287+
VexRVMOp.VPERMILPD.encoding(encoding).emit(masm, XMM, asRegister(result), asRegister(source), xtmpReg);
288+
}
273289
}
274290
default -> throw GraalError.shouldNotReachHereUnexpectedValue(eKind);
275291
}

0 commit comments

Comments
 (0)