Skip to content

Commit

Permalink
add CONFIG_BKL_SWAP_MANUAL
Browse files Browse the repository at this point in the history
Signed-off-by: Axel Heider <axelheider@gmx.de>
  • Loading branch information
axel-h committed Nov 10, 2024
1 parent 3a79a19 commit 4ea3b9f
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 60 deletions.
65 changes: 35 additions & 30 deletions include/arch/arm/arch/model/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,47 +16,52 @@ static inline cpu_id_t cpuIndexToID(word_t index)
return BIT(index);
}

static inline bool_t try_arch_atomic_exchange_rlx(void *ptr, void *new_val, void **prev)
#ifdef CONFIG_BKL_SWAP_MANUAL
static inline void *arch_atomic_exchange(void *head, void *node)
{
/* The ARM architecture had an atomic swap instruction in ARMv6, which got
* deprecated and war replaced by exclusive load and store instructions in
* ARMv7. The compiler builtin __atomic_exchange_n() executed them in a loop
* as long as the exclusivity fail fails. Unfortunately, this approach can
* results in an undefined delay up to seconds in the worst case if other
* cores touch memory in the same exclusivity reservation granule. The
* actual granule size is implementation specific. The explicit load and
* store instruction below and reporting back success or failure is the only
* way to work around this, the caller can loop over calling this functions
* this and perform other maintenance operation or wait on failure.
/* ARMv6 had an atomic swap instruction. ARMv7 deprecated is and recommended
* using exclusive load and store instructions, which the compiler builtin
* __atomic_exchange_n() executes in a loop as long as the exclusivity fail
* fails. Unfortunately, this approach can results in an undefined delay up
* to seconds in the worst case if other cores touch memory in the same
* exclusivity reservation granule. The actual granule size is
* implementation specific, for modern SMP optimized cores the size is
* expected to be small.
* ARM v8.1 eventually brought the atomic swap instruction back as part of
* LSE (Large System Extensions) and GCC support was added in v9.4. Thus,
* using __atomic_exchange_n() becomes an option when "-march=armv8.1-a" is
* passed on platform that use core implementing ARMv8.1 or higher. Details
* remain to be analyzed.
*/
uint32_t atomic_status;
void *temp;

asm volatile(
LD_EX "%[prev_output], [%[ptr_val]] \n\t" /* ret = *ptr */
ST_EX "%" OP_WIDTH "[atomic_var], %[new_val] , [%[ptr_val]] \n\t" /* *ptr = new */
: [atomic_var] "=&r"(atomic_status), [prev_output]"=&r"(temp) /* output */
: [ptr_val] "r"(ptr), [new_val] "r"(new_val) /* input */
:
);

if (0 != atomic_status) {
/* Specs say 0 indicates success and 1 a exclusivity failure, any other
* value is not defined. If the atomic operation has failed, prev is
* left untouched. There seems no gain updating it with the value
* obtained, as this might no longer be valid anyway.
__atomic_thread_fence(__ATOMIC_RELEASE); /* all writes must finish */

for (;;) {
void *prev;
uint32_t atomic_status;
asm volatile(
LD_EX "%[prev_output], [%[ptr_val]] \n\t" /* prev = *head */
ST_EX "%" OP_WIDTH "[atomic_var], %[new_val] , [%[ptr_val]] \n\t" /* *head = node */
: /* output */ [atomic_var] "=&r"(atomic_status), [prev_output]"=&r"(prev)
: /* input */ [ptr_val] "r"(head), [new_val] "r"(node)
: /* no clobber */
);

/* Specs say 0 indicates success and 1 an exclusivity failure, any other
* value is not defined.
*/
return false;
if (0 == atomic_status) {
/* The write for the update of the queue write has finished anyway,
* prevent pipeline from starting any reads before passing here
*/
__atomic_thread_fence(__ATOMIC_ACQUIRE);
return prev;
}

/* keep spinning */
}

*prev = temp;
return true;
}
#endif /* CONFIG_BKL_SWAP_MANUAL */

#endif /* ENABLE_SMP_SUPPORT */

6 changes: 0 additions & 6 deletions include/arch/riscv/arch/model/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,6 @@ static inline void add_hart_to_core_map(word_t hart_id, word_t core_id)
coreMap.map[core_id] = hart_id;
}

static inline bool_t try_arch_atomic_exchange_rlx(void *ptr, void *new_val, void **prev)
{
*prev = __atomic_exchange_n((void **)ptr, new_val, __ATOMIC_RELAXED);
return true;
}

static inline CONST cpu_id_t getCurrentCPUIndex(void)
{
word_t sp;
Expand Down
6 changes: 0 additions & 6 deletions include/arch/x86/arch/model/smp.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,4 @@ static inline PURE word_t getCurrentCPUID(void)
return cpu_mapping.index_to_cpu_id[getCurrentCPUIndex()];
}

static inline bool_t try_arch_atomic_exchange_rlx(void *ptr, void *new_val, void **prev)
{
*prev = __atomic_exchange_n((void **) ptr, new_val, __ATOMIC_RELAXED);
return true;
}

#endif /* ENABLE_SMP_SUPPORT */
9 changes: 9 additions & 0 deletions include/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@
#define AARCH64_VSPACE_S2_START_L1
#endif
#endif

#ifdef CONFIG_ARCH_ARM
/* ToDo: Check if we can disable this by default and only enable this for
* platforms where __atomic_exchange_n() can't be used. Document the
* reason then, as it might just be a compiler limitation in the end that
* is lifted in a future release.
*/
#define CONFIG_BKL_SWAP_MANUAL
#endif
27 changes: 9 additions & 18 deletions include/smp/lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,24 +61,15 @@ static inline void FORCE_INLINE clh_lock_acquire(word_t cpu, bool_t irqPath)
{
clh_qnode_p_t volatile *node_owner = &big_kernel_lock.node_owners[cpu];
node_owner->node->value = CLHState_Pending;

__atomic_thread_fence(__ATOMIC_RELEASE); /* writes must finish */
/* Unfortunately, the compiler builtin __atomic_exchange_n() cannot be used
* here, because some architectures lack an actual atomic swap instruction
* and spin in a tight loop instead. The spinning duration is undefined,
* tests have shown it could be seconds in the worst case.
* Performance evaluation has also shown, that it's best to have fences just
* before and after a loop over one relaxed atomic exchange attempt.
* The content of big_kernel_lock.node_owners[cpu].next will only be
* modified if the atomic exchange way successful, otherwise it is left
* untouched.
*/
while (!try_arch_atomic_exchange_rlx(&big_kernel_lock.head,
node_owner->node,
&node_owner->next)) {
/* busy waiting */
}
__atomic_thread_fence(__ATOMIC_ACQUIRE); /* prevent reads before passing here */

#ifdef CONFIG_BKL_SWAP_MANUAL
node_owner->next = arch_atomic_exchange(&big_kernel_lock.head,
node_owner->node);
#else
node_owner->next = __atomic_exchange_n(&big_kernel_lock.head,
node_owner->node,
__ATOMIC_ACQ_REL);
#endif

/* We do not have an __atomic_thread_fence here as this is already handled by the
* atomic_exchange just above */
Expand Down

0 comments on commit 4ea3b9f

Please sign in to comment.