Skip to content

Adding 'sb' instruction to spin_delay() for ARM v8.5 onward #611

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion extra/gperftools/gperftools-2.15/src/base/spinlock.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
#include "base/spinlock_internal.h"
#include "base/sysinfo.h" /* for GetSystemCPUsCount() */

#if defined(__GNUC__) && defined(__aarch64__)
#include <sys/auxv.h>
#endif // end __aarch64__

// NOTE on the Lock-state values:
//
// kSpinLockFree represents the unlocked state
Expand Down Expand Up @@ -68,7 +72,20 @@ inline void SpinlockPause(void) {
#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
__asm__ __volatile__("rep; nop" : : );
#elif defined(__GNUC__) && defined(__aarch64__)
__asm__ __volatile__("isb" : : );
static int use_spin_delay_sb = -1;

// Use SB instruction if available otherwise ISB
if (__builtin_expect(use_spin_delay_sb == 1, 1)) {
__asm__ __volatile__(".inst 0xd50330ff \n"); // SB instruction encoding
} else if (use_spin_delay_sb == 0) {
__asm__ __volatile__(" isb; \n");
} else {
// Initialize variable and use getauxval fuction as delay
if (getauxval(AT_HWCAP) & HWCAP_SB)
use_spin_delay_sb = 1;
else
use_spin_delay_sb = 0;
}
#endif
}

Expand Down
37 changes: 31 additions & 6 deletions storage/innobase/include/ut0ut.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <thread>
#include <type_traits>

#if defined(__GNUC__) && defined(__aarch64__)
#include <sys/auxv.h>
#endif // end __aarch64__

#ifdef UNIV_DEBUG
#include <limits>
#include <random>
Expand Down Expand Up @@ -101,12 +105,33 @@ independent way by using YieldProcessor. */
#define UT_RELAX_CPU() YieldProcessor()
#elif defined(__aarch64__)
/* A "yield" instruction in aarch64 is essentially a nop, and does not cause
enough delay to help backoff. "isb" is a barrier that, especially inside a
loop, creates a small delay without consuming ALU resources.
Experiments shown that adding the isb instruction improves stability and reduces
result jitter. Adding more delay to the UT_RELAX_CPU than a single isb reduces
performance. */
#define UT_RELAX_CPU() __asm__ __volatile__("isb" ::: "memory")
enough delay to help backoff. For CPUs that support AArch64 <v8.5, an "isb"
can be used in a loop. It creates a small delay without consuming ALU resources,
by forcing a CPU flush. Experiments shown that adding the isb instruction improves
stability and reduces result jitter.

For CPUs supporting AArch64 >=v8.5 an "sb" is a better choice. It also creates
a small delay, but instead of flushing the CPU it does so by serializing older instructions
to be non-speculative before it completes. This is less disruptive than an "isb" to high
performance CPUs.
*/
#define UT_RELAX_CPU() spin_delay()
static __inline__ void spin_delay(void) {
static int use_spin_delay_sb = -1;

// Use SB instruction if available otherwise ISB
if (__builtin_expect(use_spin_delay_sb == 1, 1)) {
__asm__ __volatile__(".inst 0xd50330ff \n"); // SB instruction encoding
} else if (use_spin_delay_sb == 0) {
__asm__ __volatile__(" isb; \n");
} else {
// Initialize variable and use getauxval fuction as delay
if (getauxval(AT_HWCAP) & HWCAP_SB)
use_spin_delay_sb = 1;
else
use_spin_delay_sb = 0;
}
}
#else
#define UT_RELAX_CPU() __asm__ __volatile__("" ::: "memory")
#endif
Expand Down