Skip to content

Commit 5f68ad3

Browse files
committed
[GR-49418] Fix AArch64AtomicMove.
PullRequest: graal/17601
2 parents c9c290b + b6e870a commit 5f68ad3

File tree

1 file changed

+54
-51
lines changed

1 file changed

+54
-51
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/aarch64/AArch64AtomicMove.java

Lines changed: 54 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ public CompareAndSwapOp(AArch64Kind accessKind, MemoryOrderMode memoryOrder, boo
111111
}
112112

113113
/**
114-
* Both cas and ld(a)xr produce a zero-extended value. Since comparisons must be at minimum
114+
* Both cas and ldxr produce a zero-extended value. Since comparisons must be at minimum
115115
* 32-bits, the expected value must also be zero-extended to produce an accurate comparison.
116116
*/
117117
private static void emitCompare(AArch64MacroAssembler masm, int memAccessSize, Register result, Register expected) {
@@ -168,53 +168,64 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
168168
throw GraalError.shouldNotReachHereUnexpectedValue(memoryOrder); // ExcludeFromJacocoGeneratedReport
169169
}
170170

171+
int moveSize = Math.max(memAccessSize, 32);
171172
if (AArch64LIRFlags.useLSE(masm)) {
172-
masm.mov(Math.max(memAccessSize, 32), result, expected);
173+
masm.mov(moveSize, result, expected);
173174
moveSPAndEmitCode(masm, asRegister(newValue), newVal -> {
174175
masm.cas(memAccessSize, result, newVal, address, acquire, release);
175176
});
176177
if (setConditionFlags) {
177178
emitCompare(masm, memAccessSize, result, expected);
178179
}
179180
} else {
181+
182+
try (ScratchRegister scratchRegister1 = masm.getScratchRegister(); ScratchRegister scratchRegister2 = masm.getScratchRegister()) {
183+
Label retry = new Label();
184+
masm.bind(retry);
185+
Register scratch2 = scratchRegister2.getRegister();
186+
Register newValueReg = asRegister(newValue);
187+
if (newValueReg.equals(sp)) {
188+
/*
189+
* SP cannot be used in csel or stl(x)r.
190+
*
191+
* Since csel overwrites scratch2, newValue must be newly loaded each loop
192+
* iteration. However, unless under heavy contention, the storeExclusive
193+
* should rarely fail.
194+
*/
195+
masm.mov(moveSize, scratch2, newValueReg);
196+
newValueReg = scratch2;
197+
}
198+
masm.loadExclusive(memAccessSize, result, address, false);
199+
200+
emitCompare(masm, memAccessSize, result, expected);
201+
masm.csel(moveSize, scratch2, newValueReg, result, AArch64Assembler.ConditionFlag.EQ);
202+
203+
/*
204+
* STLXR must be used also if acquire is set to ensure prior ldaxr/stlxr
205+
* instructions are not reordered after it.
206+
*/
207+
Register scratch1 = scratchRegister1.getRegister();
208+
masm.storeExclusive(memAccessSize, scratch1, scratch2, address, acquire || release);
209+
// if scratch1 == 0 then write successful, else retry.
210+
masm.cbnz(32, scratch1, retry);
211+
}
212+
180213
/*
181-
* Because the store is only conditionally emitted, a dmb is needed for performing a
182-
* release.
183-
*
184-
* Furthermore, even if the stlxr is emitted, if both acquire and release semantics
185-
* are required, then a dmb is anyways needed to ensure that the instruction
186-
* sequence:
214+
* From the Java perspective, the (ldxr, cmp, csel, stl(x)r) is a single atomic
215+
* operation which must abide by all requested semantics. Therefore, when acquire
216+
* semantics are needed, we use a full barrier after the store to guarantee that
217+
* instructions following the store cannot execute before it and violate acquire
218+
* semantics.
187219
*
188-
* A -> ldaxr -> stlxr -> B
189-
*
190-
* cannot be executed as:
191-
*
192-
* ldaxr -> B -> A -> stlxr
220+
* Note we could instead perform a conditional branch and when the comparison fails
221+
* skip the store, but this introduces an opportunity for branch mispredictions, and
222+
* also, when release semantics are needed, requires a barrier to be inserted before
223+
* the operation.
193224
*/
194-
if (release) {
225+
226+
if (acquire) {
195227
masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY);
196228
}
197-
198-
moveSPAndEmitCode(masm, asRegister(newValue), newVal -> {
199-
try (ScratchRegister scratchRegister = masm.getScratchRegister()) {
200-
Register scratch = scratchRegister.getRegister();
201-
Label retry = new Label();
202-
Label fail = new Label();
203-
masm.bind(retry);
204-
masm.loadExclusive(memAccessSize, result, address, acquire);
205-
emitCompare(masm, memAccessSize, result, expected);
206-
masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, fail);
207-
/*
208-
* Even with the prior dmb, for releases it is still necessary to use stlxr
209-
* instead of stxr to guarantee subsequent lda(x)r/stl(x)r cannot be hoisted
210-
* above this instruction and thereby violate volatile semantics.
211-
*/
212-
masm.storeExclusive(memAccessSize, scratch, newVal, address, release);
213-
// if scratch == 0 then write successful, else retry.
214-
masm.cbnz(32, scratch, retry);
215-
masm.bind(fail);
216-
}
217-
});
218229
}
219230
}
220231
}
@@ -290,14 +301,10 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
290301
}
291302
}
292303
/*
293-
* Use a full barrier for the acquire semantics instead of ldaxr to guarantee that the
294-
* instruction sequence:
295-
*
296-
* A -> ldaxr -> stlxr -> B
297-
*
298-
* cannot be executed as:
299-
*
300-
* ldaxr -> B -> A -> stlxr
304+
* From the Java perspective, the (ldxr, add, stlxr) is a single atomic operation which
305+
* must abide by both acquire and release semantics. Therefore, we use a full barrier
306+
* after the store to guarantee that instructions following the store cannot execute
307+
* before it and violate acquire semantics.
301308
*/
302309
masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY);
303310
}
@@ -405,14 +412,10 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
405412
// if scratch == 0 then write successful, else retry
406413
masm.cbnz(32, scratch, retry);
407414
/*
408-
* Use a full barrier for the acquire semantics instead of ldaxr to
409-
* guarantee that the instruction sequence:
410-
*
411-
* A -> ldaxr -> stlxr -> B
412-
*
413-
* cannot be executed as:
414-
*
415-
* ldaxr -> B -> A -> stlxr
415+
* From the Java perspective, the (ldxr, stlxr) is a single atomic operation
416+
* which must abide by both acquire and release semantics. Therefore, we use
417+
* a full barrier after the store to guarantee that instructions following
418+
* the store cannot execute before it and violate acquire semantics.
416419
*/
417420
masm.dmb(AArch64Assembler.BarrierKind.ANY_ANY);
418421
}

0 commit comments

Comments
 (0)