diff --git a/include/api/debug.h b/include/api/debug.h index 72075fb2b86..f4e4019031b 100644 --- a/include/api/debug.h +++ b/include/api/debug.h @@ -4,10 +4,11 @@ * SPDX-License-Identifier: GPL-2.0-only */ +#pragma once + #include #ifdef CONFIG_DEBUG_BUILD -#pragma once #include #include diff --git a/include/api/syscall.h b/include/api/syscall.h index 42cb8fb5e85..9fb345fadc0 100644 --- a/include/api/syscall.h +++ b/include/api/syscall.h @@ -29,11 +29,11 @@ } #endif -exception_t handleSyscall(syscall_t syscall); -exception_t handleInterruptEntry(void); -exception_t handleUnknownSyscall(word_t w); -exception_t handleUserLevelFault(word_t w_a, word_t w_b); -exception_t handleVMFaultEvent(vm_fault_type_t vm_faultType); +void handleSyscall(syscall_t syscall); +void handleInterruptEntry(void); +void handleUnknownSyscall(word_t w); +void handleUserLevelFault(word_t w_a, word_t w_b); +void handleVMFaultEvent(vm_fault_type_t vm_faultType); static inline word_t PURE getSyscallArg(word_t i, word_t *ipc_buffer) { diff --git a/include/arch/arm/arch/machine.h b/include/arch/arm/arch/machine.h index 5df92793556..4db5ea60386 100644 --- a/include/arch/arm/arch/machine.h +++ b/include/arch/arm/arch/machine.h @@ -63,6 +63,17 @@ static inline void arch_pause(void) { /* TODO */ } + +static inline void ipi_mem_barrier(void) +{ + /* For GICv2 systems a dmb() is sufficient, but it's not enough with GICv3 + * due to the way IPIs is triggered (memory-mapped or MSR inst.). A dmb() + * does not prevent re-ordering to happen between memory accesses and + * instructions, this guarantee requires a dsb(). + */ + dsb_ishst(); +} + #endif /* ENABLE_SMP_SUPPORT */ /* Update the value of the actual regsiter to hold the expected value */ diff --git a/include/arch/arm/arch/machine/hardware.h b/include/arch/arm/arch/machine/hardware.h index 029ea51c0ed..931e2d591b5 100644 --- a/include/arch/arm/arch/machine/hardware.h +++ b/include/arch/arm/arch/machine/hardware.h @@ -19,17 +19,7 @@ typedef word_t vm_fault_type_t; #define PAGE_BASE(_p, _s) ((_p) & ~MASK(pageBitsForSize((_s)))) #define PAGE_OFFSET(_p, _s) ((_p) & MASK(pageBitsForSize((_s)))) -#define IPI_MEM_BARRIER \ - do { \ - /* This can be relaxed for GICv2 but for GICv3 dmb() no longer works */ \ - /* since the way IPI is triggered is different (memory-mapped or MSR inst.) */ \ - /* and dmb() is not able to avoid re-ordering between memory accesses and */ \ - /* instructions. In order to support both GICv2 and v3 dsb() is required. */ \ - dsb_ishst(); \ - } while (0) - #endif /* __ASSEMBLER__ */ #define L1_CACHE_LINE_SIZE_BITS CONFIG_L1_CACHE_LINE_SIZE_BITS #define L1_CACHE_LINE_SIZE BIT(L1_CACHE_LINE_SIZE_BITS) - diff --git a/include/arch/riscv/arch/machine.h b/include/arch/riscv/arch/machine.h index 568d35e36ad..1e69e99c216 100644 --- a/include/arch/riscv/arch/machine.h +++ b/include/arch/riscv/arch/machine.h @@ -71,6 +71,11 @@ static inline void fence_rw_rw(void) asm volatile("fence rw, rw" ::: "memory"); } +static inline void ipi_mem_barrier(void) +{ + fence_rw_rw(); +} + static inline void fence_w_rw(void) { asm volatile("fence w, rw" ::: "memory"); diff --git a/include/arch/riscv/arch/machine/hardware.h b/include/arch/riscv/arch/machine/hardware.h index 1c8506fb931..1e8759094db 100644 --- a/include/arch/riscv/arch/machine/hardware.h +++ b/include/arch/riscv/arch/machine/hardware.h @@ -122,9 +122,3 @@ static inline void arch_clean_invalidate_caches(void) #define LOAD_S STRINGIFY(LOAD) #define STORE_S STRINGIFY(STORE) - -#define IPI_MEM_BARRIER \ - do { \ - asm volatile("fence rw,rw" ::: "memory"); \ - } while (0) - diff --git a/include/arch/x86/arch/kernel/x2apic.h b/include/arch/x86/arch/kernel/x2apic.h index 3d517c2e73e..1a7a41b1fd8 100644 --- a/include/arch/x86/arch/kernel/x2apic.h +++ b/include/arch/x86/arch/kernel/x2apic.h @@ -65,6 +65,14 @@ static inline void apic_write_icr(word_t high, word_t low) x86_wrmsr(APIC_ICR, icr); } -#define IPI_ICR_BARRIER asm volatile("mfence" ::: "memory") -#define IPI_MEM_BARRIER IPI_ICR_BARRIER +static inline void ipi_icr_barrier(void) +{ + asm volatile("mfence" ::: "memory"); +} + +static inline void ipi_mem_barrier(void) +{ + IPI_ICR_BARRIER; +} + #endif /* CONFIG_X2APIC */ diff --git a/include/arch/x86/arch/kernel/xapic.h b/include/arch/x86/arch/kernel/xapic.h index 2be88896e63..b3fbc0f9c01 100644 --- a/include/arch/x86/arch/kernel/xapic.h +++ b/include/arch/x86/arch/kernel/xapic.h @@ -66,7 +66,14 @@ static inline void apic_write_icr(word_t high, word_t low) apic_write_reg(APIC_ICR1, low); } -#define IPI_ICR_BARRIER asm volatile("" ::: "memory") -#define IPI_MEM_BARRIER IPI_ICR_BARRIER -#endif /* CONFIG_XAPIC */ +static inline void ipi_icr_barrier(void) +{ + asm volatile("" ::: "memory"); +} +static inline void ipi_mem_barrier(void) +{ + ipi_icr_barrier(); +} + +#endif /* CONFIG_XAPIC */ diff --git a/include/assert.h b/include/assert.h index 579b8810bfb..931720662a2 100644 --- a/include/assert.h +++ b/include/assert.h @@ -12,13 +12,14 @@ #ifdef CONFIG_DEBUG_BUILD void _fail( - const char *str, const char *file, unsigned int line, - const char *function + const char *function, + const char *str, + ... ) NORETURN; -#define fail(s) _fail(s, __FILE__, __LINE__, __func__) +#define fail(...) _fail(__FILE__, __LINE__, __func__, __VA_ARGS__) void _assert_fail( const char *assertion, @@ -36,7 +37,7 @@ void _assert_fail( #else /* !DEBUG */ -#define fail(s) halt() +#define fail(...) halt() #define assert(expr) diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index f20561cdf5f..3a380a75b77 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -15,18 +15,18 @@ #include #ifdef CONFIG_ENABLE_BENCHMARKS -exception_t handle_SysBenchmarkFlushCaches(void); -exception_t handle_SysBenchmarkResetLog(void); -exception_t handle_SysBenchmarkFinalizeLog(void); +void handle_SysBenchmarkFlushCaches(void); +void handle_SysBenchmarkResetLog(void); +void handle_SysBenchmarkFinalizeLog(void); #ifdef CONFIG_KERNEL_LOG_BUFFER -exception_t handle_SysBenchmarkSetLogBuffer(void); +void handle_SysBenchmarkSetLogBuffer(void); #endif /* CONFIG_KERNEL_LOG_BUFFER */ #ifdef CONFIG_BENCHMARK_TRACK_UTILISATION -exception_t handle_SysBenchmarkGetThreadUtilisation(void); -exception_t handle_SysBenchmarkResetThreadUtilisation(void); +void handle_SysBenchmarkGetThreadUtilisation(void); +void handle_SysBenchmarkResetThreadUtilisation(void); #ifdef CONFIG_DEBUG_BUILD -exception_t handle_SysBenchmarkDumpAllThreadsUtilisation(void); -exception_t handle_SysBenchmarkResetAllThreadsUtilisation(void); +void handle_SysBenchmarkDumpAllThreadsUtilisation(void); +void handle_SysBenchmarkResetAllThreadsUtilisation(void); #endif /* CONFIG_DEBUG_BUILD */ #endif /* CONFIG_BENCHMARK_TRACK_UTILISATION */ #endif /* CONFIG_ENABLE_BENCHMARKS */ diff --git a/include/smp/ipi.h b/include/smp/ipi.h index 9bf2c0e2d91..6e31844d1d5 100644 --- a/include/smp/ipi.h +++ b/include/smp/ipi.h @@ -157,7 +157,7 @@ void doMaskReschedule(word_t mask); #ifdef CONFIG_DEBUG_BUILD -exception_t handle_SysDebugSendIPI(void); +void handle_SysDebugSendIPI(void); #endif #endif /* ENABLE_SMP_SUPPORT */ diff --git a/libsel4/arch_include/arm/sel4/arch/syscalls.h b/libsel4/arch_include/arm/sel4/arch/syscalls.h index d7ab6e7633e..3c807778250 100644 --- a/libsel4/arch_include/arm/sel4/arch/syscalls.h +++ b/libsel4/arch_include/arm/sel4/arch/syscalls.h @@ -629,10 +629,14 @@ LIBSEL4_INLINE_FUNC seL4_Uint32 seL4_DebugCapIdentify(seL4_CPtr cap) return (seL4_Uint32)cap; } -char *strcpy(char *, const char *); +char *strncpy(char *, const char *, seL4_Word); LIBSEL4_INLINE_FUNC void seL4_DebugNameThread(seL4_CPtr tcb, const char *name) { - strcpy((char *)seL4_GetIPCBuffer()->msg, name); + /* IPC buffer can be used directly, otherwise buffers must not overlap. */ + char *ipc_buf = (char *)seL4_GetIPCBuffer()->msg; + if (name != ipc_buf) { + strncpy(ipc_buf, name, seL4_MsgMaxLength); + } seL4_Word unused0 = 0; seL4_Word unused1 = 0; diff --git a/libsel4/arch_include/riscv/sel4/arch/syscalls.h b/libsel4/arch_include/riscv/sel4/arch/syscalls.h index e8a48e1a18a..2ca3fe467e4 100644 --- a/libsel4/arch_include/riscv/sel4/arch/syscalls.h +++ b/libsel4/arch_include/riscv/sel4/arch/syscalls.h @@ -834,10 +834,14 @@ LIBSEL4_INLINE_FUNC seL4_Uint32 seL4_DebugCapIdentify(seL4_CPtr cap) return (seL4_Uint32)cap; } -char *strcpy(char *, const char *); +char *strncpy(char *, const char *, seL4_Word); LIBSEL4_INLINE_FUNC void seL4_DebugNameThread(seL4_CPtr tcb, const char *name) { - strcpy((char *)seL4_GetIPCBuffer()->msg, name); + /* IPC buffer can be used directly, otherwise buffers must not overlap. */ + char *ipc_buf = (char *)seL4_GetIPCBuffer()->msg; + if (name != ipc_buf) { + strncpy(ipc_buf, name, seL4_MsgMaxLength); + } seL4_Word unused0 = 0; seL4_Word unused1 = 0; diff --git a/libsel4/include/sel4/functions.h b/libsel4/include/sel4/functions.h index cd6c1445297..63e07a9e1f4 100644 --- a/libsel4/include/sel4/functions.h +++ b/libsel4/include/sel4/functions.h @@ -103,3 +103,24 @@ LIBSEL4_INLINE_FUNC void seL4_SetCapReceivePath(seL4_CPtr receiveCNode, seL4_CPt ipcbuffer->receiveDepth = receiveDepth; } +#if CONFIG_DEBUG_BUILD + +/* Making this a macro avoids requiring stdarg.h to get va_start() */ +#define seL4_DebugNameThreadFmt(tcb, fmt, ...) \ + do { \ + char *ipc_buf = (char *)seL4_GetIPCBuffer()->msg; \ + snprintf(ipc_buf, seL4_MsgMaxLength, fmt, __VA_ARGS__); \ + seL4_DebugNameThread(tcb, ipc_buf); \ + } while(0) + +// LIBSEL4_INLINE_FUNC void seL4_DebugNameThreadFmt(seL4_CPtr tcb, const char *fmt, ...) +// { +// char *ipc_buf = (char *)seL4_GetIPCBuffer()->msg; +// va_list args; +// va_start(args, fmt); +// vsnprintf(ipc_buf, seL4_MsgMaxLength, fmt, args) +// va_end(args); +// seL4_DebugNameThread(tcb, ipc_buf); +// } + +#endif /* CONFIG_DEBUG_BUILD */ diff --git a/libsel4/sel4_arch_include/ia32/sel4/sel4_arch/syscalls.h b/libsel4/sel4_arch_include/ia32/sel4/sel4_arch/syscalls.h index 3d3a8f2dbff..577c7a6bcc4 100644 --- a/libsel4/sel4_arch_include/ia32/sel4/sel4_arch/syscalls.h +++ b/libsel4/sel4_arch_include/ia32/sel4/sel4_arch/syscalls.h @@ -862,10 +862,14 @@ LIBSEL4_INLINE_FUNC seL4_Uint32 seL4_DebugCapIdentify(seL4_CPtr cap) return (seL4_Uint32)cap; } -char *strcpy(char *, const char *); +char *strncpy(char *, const char *, seL4_Word); LIBSEL4_INLINE_FUNC void seL4_DebugNameThread(seL4_CPtr tcb, const char *name) { - strcpy((char *)seL4_GetIPCBuffer()->msg, name); + /* IPC buffer can be used directly, otherwise buffers must not overlap. */ + char *ipc_buf = (char *)seL4_GetIPCBuffer()->msg; + if (name != ipc_buf) { + strncpy(ipc_buf, name, seL4_MsgMaxLength); + } seL4_Word unused0 = 0; seL4_Word unused1 = 0; diff --git a/libsel4/sel4_arch_include/x86_64/sel4/sel4_arch/syscalls.h b/libsel4/sel4_arch_include/x86_64/sel4/sel4_arch/syscalls.h index 2752cfc30b5..916fd3b8c27 100644 --- a/libsel4/sel4_arch_include/x86_64/sel4/sel4_arch/syscalls.h +++ b/libsel4/sel4_arch_include/x86_64/sel4/sel4_arch/syscalls.h @@ -658,11 +658,14 @@ LIBSEL4_INLINE_FUNC seL4_Uint32 seL4_DebugCapIdentify(seL4_CPtr cap) #endif #ifdef CONFIG_DEBUG_BUILD -char *strcpy(char *, const char *); +char *strncpy(char *, const char *, seL4_Word); LIBSEL4_INLINE_FUNC void seL4_DebugNameThread(seL4_CPtr tcb, const char *name) { - - strcpy((char *)seL4_GetIPCBuffer()->msg, name); + /* IPC buffer can be used directly, otherwise buffers must not overlap. */ + char *ipc_buf = (char *)seL4_GetIPCBuffer()->msg; + if (name != ipc_buf) { + strncpy(ipc_buf, name, seL4_MsgMaxLength); + } seL4_Word unused0 = 0; seL4_Word unused1 = 0; diff --git a/src/api/syscall.c b/src/api/syscall.c index 305f712b3d1..1b82da04112 100644 --- a/src/api/syscall.c +++ b/src/api/syscall.c @@ -34,18 +34,15 @@ /* The haskell function 'handleEvent' is split into 'handleXXX' variants * for each event causing a kernel entry */ -exception_t handleInterruptEntry(void) +void handleInterruptEntry(void) { - irq_t irq; - #ifdef CONFIG_KERNEL_MCS if (SMP_TERNARY(clh_is_self_in_queue(), 1)) { updateTimestamp(); checkBudget(); } #endif - - irq = getActiveIRQ(); + irq_t irq = getActiveIRQ(); if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { handleInterrupt(irq); } else { @@ -63,103 +60,126 @@ exception_t handleInterruptEntry(void) #ifdef CONFIG_KERNEL_MCS } #endif - - return EXCEPTION_NONE; } -exception_t handleUnknownSyscall(word_t w) +void handleUnknownSyscall(syscall_t syscall) { + switch (syscall) { + #ifdef CONFIG_PRINTING - if (w == SysDebugPutChar) { - kernel_putchar(getRegister(NODE_STATE(ksCurThread), capRegister)); - return EXCEPTION_NONE; + case SysDebugPutChar: { + /* Debug printing is not coupled to CONFIG_DEBUG_BUILD, thus non-debug + * builds can also show status and error message. + */ + char c = getRegister(NODE_STATE(ksCurThread), capRegister); + kernel_putchar(c); + return; } - if (w == SysDebugDumpScheduler) { +#endif /* CONFIG_PRINTING */ + #ifdef CONFIG_DEBUG_BUILD + + case SysDebugDumpScheduler: + /* Without CONFIG_PRINTING this syscall still exists, but it does + * nothing. + */ +#ifdef CONFIG_PRINTING debug_dumpScheduler(); -#endif - return EXCEPTION_NONE; - } -#endif -#ifdef CONFIG_DEBUG_BUILD - if (w == SysDebugHalt) { +#endif /* CONFIG_PRINTING */ + return; + + case SysDebugHalt: { tcb_t *UNUSED tptr = NODE_STATE(ksCurThread); - printf("Debug halt syscall from user thread %p \"%s\"\n", tptr, TCB_PTR_DEBUG_PTR(tptr)->tcbName); + printf("Debug halt syscall from user thread %p \"%s\"\n", + tptr, TCB_PTR_DEBUG_PTR(tptr)->tcbName); halt(); + UNREACHABLE(); } - if (w == SysDebugSnapshot) { + + case SysDebugSnapshot: { tcb_t *UNUSED tptr = NODE_STATE(ksCurThread); printf("Debug snapshot syscall from user thread %p \"%s\"\n", tptr, TCB_PTR_DEBUG_PTR(tptr)->tcbName); debug_capDL(); - return EXCEPTION_NONE; + return; } - if (w == SysDebugCapIdentify) { + + case SysDebugCapIdentify: { word_t cptr = getRegister(NODE_STATE(ksCurThread), capRegister); lookupCapAndSlot_ret_t lu_ret = lookupCapAndSlot(NODE_STATE(ksCurThread), cptr); word_t cap_type = cap_get_capType(lu_ret.cap); setRegister(NODE_STATE(ksCurThread), capRegister, cap_type); - return EXCEPTION_NONE; + return; } - if (w == SysDebugNameThread) { + case SysDebugNameThread: { /* This is a syscall meant to aid debugging, so if anything goes wrong - * then assume the system is completely misconfigured and halt */ - const char *name; - word_t len; + * then assume the system is completely misconfigured and halt + */ word_t cptr = getRegister(NODE_STATE(ksCurThread), capRegister); lookupCapAndSlot_ret_t lu_ret = lookupCapAndSlot(NODE_STATE(ksCurThread), cptr); - /* ensure we got a TCB cap */ word_t cap_type = cap_get_capType(lu_ret.cap); + + /* ensure we got a TCB cap */ if (cap_type != cap_thread_cap) { userError("SysDebugNameThread: cap is not a TCB, halting"); halt(); + UNREACHABLE(); } - /* Add 1 to the IPC buffer to skip the message info word */ - name = (const char *)(lookupIPCBuffer(true, NODE_STATE(ksCurThread)) + 1); - if (!name) { + seL4_IPCBuffer *ipcBuffer = (seL4_IPCBuffer *)lookupIPCBuffer(true, NODE_STATE(ksCurThread)); + if (!ipcBuffer) { userError("SysDebugNameThread: Failed to lookup IPC buffer, halting"); halt(); + UNREACHABLE(); } - /* ensure the name isn't too long */ - len = strnlen(name, seL4_MsgMaxLength * sizeof(word_t)); - if (len == seL4_MsgMaxLength * sizeof(word_t)) { - userError("SysDebugNameThread: Name too long, halting"); + const char *name = (const char *)(&ipcBuffer->msg); + const word_t max_len = seL4_MsgMaxLength * sizeof(word_t); + assert(max_len == sizeof(ipcBuffer->msg)); + word_t len = strnlen(name, max_len); + if (len == max_len) { + userError("SysDebugNameThread: Name exceeds %"SEL4_PRIu_word" chars, halting", + max_len - 1); halt(); + UNREACHABLE(); } setThreadName(TCB_PTR(cap_thread_cap_get_capTCBPtr(lu_ret.cap)), name); - return EXCEPTION_NONE; + return; } + #ifdef ENABLE_SMP_SUPPORT - if (w == SysDebugSendIPI) { - return handle_SysDebugSendIPI(); + case SysDebugSendIPI: { + handle_SysDebugSendIPI(); + return; } #endif /* ENABLE_SMP_SUPPORT */ #endif /* CONFIG_DEBUG_BUILD */ #ifdef CONFIG_DANGEROUS_CODE_INJECTION - if (w == SysDebugRun) { - ((void (*)(void *))getRegister(NODE_STATE(ksCurThread), capRegister))((void *)getRegister(NODE_STATE(ksCurThread), - msgInfoRegister)); - return EXCEPTION_NONE; + case SysDebugRun: { + /* This syscall can be enabled even on non-debug builds. */ + typedef void (*func_ptr)(void *ctx); + func_ptr injected_func = (func_ptr)getRegister(NODE_STATE(ksCurThread), capRegister); + void *ctx = (void *)getRegister(NODE_STATE(ksCurThread), msgInfoRegister); + injected_func(ctx); + return; } -#endif +#endif /* CONFIG_DANGEROUS_CODE_INJECTION */ #ifdef CONFIG_KERNEL_X86_DANGEROUS_MSR - if (w == SysX86DangerousWRMSR) { - uint64_t val; + + case SysX86DangerousWRMSR: { uint32_t reg = getRegister(NODE_STATE(ksCurThread), capRegister); - if (CONFIG_WORD_SIZE == 32) { - val = (uint64_t)getSyscallArg(0, NULL) | ((uint64_t)getSyscallArg(1, NULL) << 32); - } else { - val = getSyscallArg(0, NULL); + uint64_t val = getSyscallArg(0, NULL); + if (CONFIG_WORD_SIZE == 64) { + val |= (uint64_t)getSyscallArg(1, NULL) << 32; } x86_wrmsr(reg, val); - return EXCEPTION_NONE; - } else if (w == SysX86DangerousRDMSR) { - uint64_t val; + return; + } + + case SysX86DangerousRDMSR: { uint32_t reg = getRegister(NODE_STATE(ksCurThread), capRegister); - val = x86_rdmsr(reg); + uint64_t val = x86_rdmsr(reg); int num = 1; if (CONFIG_WORD_SIZE == 32) { setMR(NODE_STATE(ksCurThread), NULL, 0, val & 0xffffffff); @@ -168,66 +188,89 @@ exception_t handleUnknownSyscall(word_t w) } else { setMR(NODE_STATE(ksCurThread), NULL, 0, val); } - setRegister(NODE_STATE(ksCurThread), msgInfoRegister, wordFromMessageInfo(seL4_MessageInfo_new(0, 0, 0, num))); - return EXCEPTION_NONE; + setRegister(NODE_STATE(ksCurThread), msgInfoRegister, + wordFromMessageInfo(seL4_MessageInfo_new(0, 0, 0, num))); + return; } -#endif + +#endif /* CONFIG_KERNEL_X86_DANGEROUS_MSR */ #ifdef CONFIG_ENABLE_BENCHMARKS - switch (w) { + case SysBenchmarkFlushCaches: - return handle_SysBenchmarkFlushCaches(); + handle_SysBenchmarkFlushCaches(); + return; case SysBenchmarkResetLog: - return handle_SysBenchmarkResetLog(); + handle_SysBenchmarkResetLog(); + return; case SysBenchmarkFinalizeLog: - return handle_SysBenchmarkFinalizeLog(); + handle_SysBenchmarkFinalizeLog(); + return; #ifdef CONFIG_KERNEL_LOG_BUFFER case SysBenchmarkSetLogBuffer: - return handle_SysBenchmarkSetLogBuffer(); + handle_SysBenchmarkSetLogBuffer(); + return; #endif /* CONFIG_KERNEL_LOG_BUFFER */ #ifdef CONFIG_BENCHMARK_TRACK_UTILISATION case SysBenchmarkGetThreadUtilisation: - return handle_SysBenchmarkGetThreadUtilisation(); + handle_SysBenchmarkGetThreadUtilisation(); + return; case SysBenchmarkResetThreadUtilisation: - return handle_SysBenchmarkResetThreadUtilisation(); + handle_SysBenchmarkResetThreadUtilisation(); + return; #ifdef CONFIG_DEBUG_BUILD case SysBenchmarkDumpAllThreadsUtilisation: - return handle_SysBenchmarkDumpAllThreadsUtilisation(); + handle_SysBenchmarkDumpAllThreadsUtilisation(); + return; case SysBenchmarkResetAllThreadsUtilisation: - return handle_SysBenchmarkResetAllThreadsUtilisation(); + handle_SysBenchmarkResetAllThreadsUtilisation(); + return; #endif /* CONFIG_DEBUG_BUILD */ #endif /* CONFIG_BENCHMARK_TRACK_UTILISATION */ case SysBenchmarkNullSyscall: - return EXCEPTION_NONE; - default: - break; /* syscall is not for benchmarking */ - } /* end switch(w) */ + return; + #endif /* CONFIG_ENABLE_BENCHMARKS */ + default: + /* No matching handler so far, continue below. */ + break; + + } // end switch (syscall) + MCS_DO_IF_BUDGET({ -#ifdef CONFIG_SET_TLS_BASE_SELF - if (w == SysSetTLSBase) + switch (syscall) { + +#ifdef CONFIG_SET_TLS_BASE_SELF + case SysSetTLSBase: { word_t tls_base = getRegister(NODE_STATE(ksCurThread), capRegister); - /* - * This updates the real register as opposed to the thread state + /* This updates the real register as opposed to the thread state * value. For many architectures, the TLS variables only get * updated on a thread switch. */ - return Arch_setTLSRegister(tls_base); + exception_t ret = Arch_setTLSRegister(tls_base); + if (unlikely(ret != EXCEPTION_NONE)) { + userError("could not set TLS register"); + halt(); + UNREACHABLE(); + } + return; } -#endif - current_fault = seL4_Fault_UnknownSyscall_new(w); - handleFault(NODE_STATE(ksCurThread)); +#endif /* CONFIG_SET_TLS_BASE_SELF */ + + default: + current_fault = seL4_Fault_UnknownSyscall_new(syscall); + handleFault(NODE_STATE(ksCurThread)); + + } // end switch (syscall) }) schedule(); activateThread(); - - return EXCEPTION_NONE; } -exception_t handleUserLevelFault(word_t w_a, word_t w_b) +void handleUserLevelFault(word_t w_a, word_t w_b) { MCS_DO_IF_BUDGET({ current_fault = seL4_Fault_UserException_new(w_a, w_b); @@ -235,11 +278,9 @@ exception_t handleUserLevelFault(word_t w_a, word_t w_b) }) schedule(); activateThread(); - - return EXCEPTION_NONE; } -exception_t handleVMFaultEvent(vm_fault_type_t vm_faultType) +void handleVMFaultEvent(vm_fault_type_t vm_faultType) { MCS_DO_IF_BUDGET({ @@ -252,8 +293,6 @@ exception_t handleVMFaultEvent(vm_fault_type_t vm_faultType) schedule(); activateThread(); - - return EXCEPTION_NONE; } #ifdef CONFIG_KERNEL_MCS @@ -262,38 +301,26 @@ static exception_t handleInvocation(bool_t isCall, bool_t isBlocking, bool_t can static exception_t handleInvocation(bool_t isCall, bool_t isBlocking) #endif { - seL4_MessageInfo_t info; - lookupCapAndSlot_ret_t lu_ret; - word_t *buffer; exception_t status; - word_t length; - tcb_t *thread; - - thread = NODE_STATE(ksCurThread); - - info = messageInfoFromWord(getRegister(thread, msgInfoRegister)); + tcb_t *thread = NODE_STATE(ksCurThread); #ifndef CONFIG_KERNEL_MCS cptr_t cptr = getRegister(thread, capRegister); #endif /* faulting section */ - lu_ret = lookupCapAndSlot(thread, cptr); - + lookupCapAndSlot_ret_t lu_ret = lookupCapAndSlot(thread, cptr); if (unlikely(lu_ret.status != EXCEPTION_NONE)) { - userError("Invocation of invalid cap #%lu.", cptr); + userError("Invocation of invalid cap #%"SEL4_PRIu_word, cptr); current_fault = seL4_Fault_CapFault_new(cptr, false); - if (isBlocking) { handleFault(thread); } - return EXCEPTION_NONE; } - buffer = lookupIPCBuffer(false, thread); - + word_t *buffer = lookupIPCBuffer(false, thread); + seL4_MessageInfo_t info = messageInfoFromWord(getRegister(thread, msgInfoRegister)); status = lookupExtraCaps(thread, buffer, info); - if (unlikely(status != EXCEPTION_NONE)) { userError("Lookup of extra caps failed."); if (isBlocking) { @@ -303,21 +330,18 @@ static exception_t handleInvocation(bool_t isCall, bool_t isBlocking) } /* Syscall error/Preemptible section */ - length = seL4_MessageInfo_get_length(info); + word_t length = seL4_MessageInfo_get_length(info); if (unlikely(length > n_msgRegisters && !buffer)) { length = n_msgRegisters; } -#ifdef CONFIG_KERNEL_MCS + status = decodeInvocation(seL4_MessageInfo_get_label(info), length, cptr, lu_ret.slot, lu_ret.cap, isBlocking, isCall, - canDonate, firstPhase, buffer); -#else - status = decodeInvocation(seL4_MessageInfo_get_label(info), length, - cptr, lu_ret.slot, lu_ret.cap, - isBlocking, isCall, buffer); +#ifdef CONFIG_KERNEL_MCS + canDonate, firstPhase, #endif - + buffer); if (unlikely(status == EXCEPTION_PREEMPTED)) { return status; } @@ -470,9 +494,9 @@ static void handleRecv(bool_t isBlocking) } } -#ifdef CONFIG_KERNEL_MCS -static inline void mcsPreemptionPoint(void) +static void checkPreemption(void) { +#ifdef CONFIG_KERNEL_MCS /* at this point we could be handling a timer interrupt which actually ends the current * threads timeslice. However, preemption is possible on revoke, which could have deleted * the current thread and/or the current scheduling context, rendering them invalid. */ @@ -489,12 +513,13 @@ static inline void mcsPreemptionPoint(void) * then having cleared the SC. */ NODE_STATE(ksConsumed) = 0; } +#endif /* CONFIG_KERNEL_MCS */ + + irq_t irq = getActiveIRQ(); + if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { + handleInterrupt(irq); + } } -#else -#define handleRecv(isBlocking, canReply) handleRecv(isBlocking) -#define mcsPreemptionPoint() -#define handleInvocation(isCall, isBlocking, canDonate, firstPhase, cptr) handleInvocation(isCall, isBlocking) -#endif static void handleYield(void) { @@ -511,21 +536,21 @@ static void handleYield(void) #endif } -exception_t handleSyscall(syscall_t syscall) +#ifndef CONFIG_KERNEL_MCS +#define handleRecv(isBlocking, canReply) handleRecv(isBlocking) +#define handleInvocation(isCall, isBlocking, canDonate, firstPhase, cptr) handleInvocation(isCall, isBlocking) +#endif + +void handleSyscall(syscall_t syscall) { exception_t ret; - irq_t irq; MCS_DO_IF_BUDGET({ switch (syscall) { case SysSend: ret = handleInvocation(false, true, false, false, getRegister(NODE_STATE(ksCurThread), capRegister)); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } + checkPreemption(); } break; @@ -533,22 +558,14 @@ exception_t handleSyscall(syscall_t syscall) case SysNBSend: ret = handleInvocation(false, false, false, false, getRegister(NODE_STATE(ksCurThread), capRegister)); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } + checkPreemption(); } break; case SysCall: ret = handleInvocation(true, true, true, false, getRegister(NODE_STATE(ksCurThread), capRegister)); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } + checkPreemption(); } break; @@ -573,6 +590,7 @@ exception_t handleSyscall(syscall_t syscall) case SysNBWait: handleRecv(false, false); break; + case SysReplyRecv: { cptr_t reply = getRegister(NODE_STATE(ksCurThread), replyRegister); ret = handleInvocation(false, false, true, true, reply); @@ -586,29 +604,23 @@ exception_t handleSyscall(syscall_t syscall) cptr_t dest = getNBSendRecvDest(); ret = handleInvocation(false, false, true, true, dest); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } + checkPreemption(); break; } handleRecv(true, true); break; } - case SysNBSendWait: - ret = handleInvocation(false, false, true, true, getRegister(NODE_STATE(ksCurThread), replyRegister)); + case SysNBSendWait: { + cptr_t dest = getRegister(NODE_STATE(ksCurThread), replyRegister); + ret = handleInvocation(false, false, true, true, dest); if (unlikely(ret != EXCEPTION_NONE)) { - mcsPreemptionPoint(); - irq = getActiveIRQ(); - if (IRQT_TO_IRQ(irq) != IRQT_TO_IRQ(irqInvalid)) { - handleInterrupt(irq); - } + checkPreemption(); break; } handleRecv(true, false); break; + } #endif case SysNBRecv: handleRecv(false, true); @@ -619,13 +631,11 @@ exception_t handleSyscall(syscall_t syscall) break; default: - fail("Invalid syscall"); + fail("Invalid syscall %"SEL4_PRIu_word, syscall); } }) schedule(); activateThread(); - - return EXCEPTION_NONE; } diff --git a/src/arch/arm/32/kernel/vspace.c b/src/arch/arm/32/kernel/vspace.c index 4f57027c0fb..aaa197713bb 100644 --- a/src/arch/arm/32/kernel/vspace.c +++ b/src/arch/arm/32/kernel/vspace.c @@ -540,54 +540,44 @@ BOOT_CODE cap_t create_mapped_it_frame_cap(cap_t pd_cap, pptr_t pptr, vptr_t vpt return cap; } -#ifndef CONFIG_ARM_HYPERVISOR_SUPPORT - -BOOT_CODE void activate_kernel_vspace(void) -{ - /* Ensure that there's nothing stale in newly-mapped regions, and - that everything we've written (particularly the kernel page tables) - is committed. */ - cleanInvalidateL1Caches(); - setCurrentPD(addrFromKPPtr(armKSGlobalPD)); - invalidateLocalTLB(); - lockTLBEntry(PPTR_BASE); - lockTLBEntry(PPTR_VECTOR_TABLE); -} -#else BOOT_CODE void activate_kernel_vspace(void) { - uint32_t r; /* Ensure that there's nothing stale in newly-mapped regions, and that everything we've written (particularly the kernel page tables) is committed. */ cleanInvalidateL1Caches(); + +#ifdef CONFIG_ARM_HYPERVISOR_SUPPORT /* Setup the memory attributes: We use 2 indicies (cachable/non-cachable) */ setHMAIR((ATTRINDX_NONCACHEABLE << 0) | (ATTRINDX_CACHEABLE << 8), 0); setCurrentHypPD(addrFromKPPtr(armHSGlobalPGD)); invalidateHypTLB(); -#if 0 /* Can't lock entries on A15 */ + /* TODO find a better place to init the VMMU */ + setVTCR( + /* [0:3] T0SZ = 0 to use range 2^32 + * [4]: Sign extension bit, must be set to T0SZ[3] + * [5]: ??? + * [6:7]: SL0 = b00 (Start at second level) + */ + BIT(8) /* Inner write-back, write-allocate */ + | BIT(10) /* Outer write-back, write-allocate */ + | BIT(31) /* Long descriptor format (not that we have a choice) */ + ); +#else /* not CONFIG_ARM_HYPERVISOR_SUPPORT */ + setCurrentPD(addrFromKPPtr(armKSGlobalPD)); + invalidateLocalTLB(); + + /* ToDo: cleanup this TLB entry lock down hack? It's not supported by ARMv8 + * and on ARMv7 we are doing this only if not HYP - because A15 does + * not support it? + */ lockTLBEntry(PPTR_BASE); lockTLBEntry(PPTR_VECTOR_TABLE); -#endif - /* TODO find a better place to init the VMMU */ - r = 0; - /* Translation range */ - r |= (0x0 << 0); /* 2^(32 -(0)) input range. */ - r |= (r & 0x8) << 1; /* Sign bit */ - /* starting level */ - r |= (0x0 << 6); /* Start at second level */ - /* Sharability of tables */ - r |= BIT(8); /* Inner write-back, write-allocate */ - r |= BIT(10); /* Outer write-back, write-allocate */ - /* Long descriptor format (not that we have a choice) */ - r |= BIT(31); - setVTCR(r); +#endif /* [not] CONFIG_ARM_HYPERVISOR_SUPPORT */ } -#endif /* CONFIG_ARM_HYPERVISOR_SUPPORT */ - BOOT_CODE void write_it_asid_pool(cap_t it_ap_cap, cap_t it_pd_cap) { asid_pool_t *ap = ASID_POOL_PTR(pptr_of_cap(it_ap_cap)); diff --git a/src/arch/arm/kernel/boot.c b/src/arch/arm/kernel/boot.c index 1c8decc70c9..301ce0aec6f 100644 --- a/src/arch/arm/kernel/boot.c +++ b/src/arch/arm/kernel/boot.c @@ -34,7 +34,8 @@ * spinning until the primary core has initialized all kernel structures and * then set it to 1. */ -BOOT_BSS static volatile int node_boot_lock; +BOOT_BSS static int node_boot_lock; +BOOT_BSS static word_t node_boot_mask; #endif /* ENABLE_SMP_SUPPORT */ BOOT_BSS static region_t reserved[NUM_RESERVED_REGIONS]; @@ -261,10 +262,28 @@ BOOT_CODE static void init_plat(void) } #ifdef ENABLE_SMP_SUPPORT -BOOT_CODE static bool_t try_init_kernel_secondary_core(void) + +BOOT_CODE static void update_smp_lock(word_t new_val) +{ + assert(node_boot_lock < new_val); + __atomic_store_n(&node_boot_lock, new_val, __ATOMIC_RELEASE); +} + +BOOT_CODE static void wait_for_smp_lock_update(word_t new_val) { - /* need to first wait until some kernel init has been done */ - while (!node_boot_lock); + for (;;) { + word_t v = __atomic_load_n(&node_boot_lock, __ATOMIC_ACQUIRE); + if (v == new_val) { + return; + } + assert(v < new_val); + } +} + +BOOT_CODE static bool_t try_init_kernel_secondary_core(word_t core_id) +{ + /* Busy wait for primary node to release secondary nodes. */ + wait_for_smp_lock_update(1); /* Perform cpu init */ init_cpu(); @@ -280,21 +299,31 @@ BOOT_CODE static bool_t try_init_kernel_secondary_core(void) setIRQState(IRQReserved, CORE_IRQ_TO_IRQT(getCurrentCPUIndex(), INTERRUPT_VGIC_MAINTENANCE)); setIRQState(IRQReserved, CORE_IRQ_TO_IRQT(getCurrentCPUIndex(), INTERRUPT_VTIMER_EVENT)); #endif /* CONFIG_ARM_HYPERVISOR_SUPPORT */ - NODE_LOCK_SYS; + /* intialize kernel on secondary nodes */ + NODE_LOCK_SYS; + printf("core #%d init\n", core_id); clock_sync_test(); - ksNumCPUs++; - init_core_state(SchedulerAction_ResumeCurrentThread); + NODE_UNLOCK; + /* Set BIT(core_id - 1) in node_boot_mask to tell primary node that init on + * this node is done + */ + (void)__atomic_fetch_or(&node_boot_mask, BIT(core_id - 1), __ATOMIC_ACQ_REL); + + /* Busy wait (again) for primary node to ack SMP init and release secondary + * the nodes. */ + wait_for_smp_lock_update(2); return true; } BOOT_CODE static void release_secondary_cpus(void) { - /* release the cpus at the same time */ - assert(0 == node_boot_lock); /* Sanity check for a proper lock state. */ - node_boot_lock = 1; + /* Release all nodes at the same time. Update the lock in a way that ensures + * the result is visible everywhere. + */ + update_smp_lock(1); /* * At this point in time the primary core (executing this code) already uses @@ -313,15 +342,46 @@ BOOT_CODE static void release_secondary_cpus(void) plat_cleanInvalidateL2Cache(); #endif - /* Wait until all the secondary cores are done initialising */ - while (ksNumCPUs != CONFIG_MAX_NUM_NODES) { + /* Wait until all the secondary cores are done initialising, ie. the bit for + * each core is set. Each core has a bit, so missing bits in the mask + * indicate which core failed to start. + */ + word_t missing_nodes = BIT(CONFIG_MAX_NUM_NODES - 1) - 1; + word_t ready_order[CONFIG_MAX_NUM_NODES - 1][2] = {0}; + int idx = 0; + do { #ifdef ENABLE_SMP_CLOCK_SYNC_TEST_ON_BOOT + /* Secondary cores compare their time with our primary core's time, so + * keep updating the timestamp while spinning. + */ NODE_STATE(ksCurTime) = getCurrentTime(); + __atomic_thread_fence(__ATOMIC_RELEASE); /* ensure write propagates */ #endif - /* perform a memory acquire to get new values of ksNumCPUs, release for ksCurTime */ - __atomic_thread_fence(__ATOMIC_ACQ_REL); + word_t mask = __atomic_load_n(&node_boot_mask, __ATOMIC_ACQUIRE); + // mask = 0100 + word_t new_cores_ready = missing_nodes & mask; + // new_cores_ready = 1110 & 0100 = 0100 + while (new_cores_ready > 0) { + unsigned int core_bit = wordBits - 1 - clzl(new_cores_ready); + new_cores_ready &= ~BIT(core_bit); + ready_order[idx][0] = timestamp(); + ready_order[idx][1] = core_bit; + idx++; + } + missing_nodes &= ~mask; + } while ( != missing_nodes > 0); + + for (int i = 0; i < ARRAY_SIZE(ready_order); i++) { + printf("[%"SEL4_PRIu_word"] core #%d up\n", + ready_order[i][0], (int)ready_order[i][1] + 1); } } + +BOOT_CODE static void release_secondary_cores_to_userland(void) +{ + update_smp_lock(2); +} + #endif /* ENABLE_SMP_SUPPORT */ /* Main kernel initialisation function. */ @@ -607,22 +667,27 @@ static BOOT_CODE bool_t try_init_kernel( invalidateHypTLB(); } - ksNumCPUs = 1; + /* primary node is avaialble */ + __atomic_store_n(&ksNumCPUs, 1, __ATOMIC_RELEASE); /* initialize BKL before booting up other cores */ SMP_COND_STATEMENT(clh_lock_init()); SMP_COND_STATEMENT(release_secondary_cpus()); - /* All cores are up now, so there can be concurrency. The kernel booting is + /* All cores have finished booting now and wait to be released again to exit + * to userland. Grabing the BKL no is not really needed as there is no + * concurrency here. . needed for SMK init finalization are up now, so there can be concurrency. The kernel booting is * supposed to be finished before the secondary cores are released, all the * primary has to do now is schedule the initial thread. Currently there is * nothing that touches any global data structures, nevertheless we grab the * BKL here to play safe. It is released when the kernel is left. */ NODE_LOCK_SYS; - + ksNumCPUs = CONFIG_MAX_NUM_NODES; printf("Booting all finished, dropped to user space\n"); + NODE_UNLOCK; + + SMP_COND_STATEMENT(release_secondary_cores_to_userland()); - /* kernel successfully initialized */ return true; } @@ -639,14 +704,15 @@ BOOT_CODE VISIBLE void init_kernel( #ifdef ENABLE_SMP_SUPPORT /* we assume there exists a cpu with id 0 and will use it for bootstrapping */ - if (getCurrentCPUIndex() == 0) { + word_t core_id = getCurrentCPUIndex(); + if (core_id == 0) { result = try_init_kernel(ui_p_reg_start, ui_p_reg_end, pv_offset, v_entry, dtb_addr_p, dtb_size); } else { - result = try_init_kernel_secondary_core(); + result = try_init_kernel_secondary_core(core_id); } #else diff --git a/src/arch/riscv/kernel/boot.c b/src/arch/riscv/kernel/boot.c index 34d6f6db61a..12e26b9fa61 100644 --- a/src/arch/riscv/kernel/boot.c +++ b/src/arch/riscv/kernel/boot.c @@ -27,8 +27,9 @@ * spinning until the primary core has initialized all kernel structures and * then set it to 1. */ -BOOT_BSS static volatile word_t node_boot_lock; -#endif +BOOT_BSS static int node_boot_lock; +BOOT_BSS static word_t node_boot_mask; +#endif /* ENABLE_SMP_SUPPORT */ BOOT_BSS static region_t res_reg[NUM_RESERVED_REGIONS]; @@ -151,41 +152,117 @@ BOOT_CODE static void init_plat(void) #ifdef ENABLE_SMP_SUPPORT + +BOOT_CODE static void wait_for_lock_update(word_t new_val) +{ + for (;;) { + word_t v = __atomic_load_n(&node_boot_lock, __ATOMIC_ACQUIRE); + if (v == new_val) { + return; + } + assert(v < new_val); + } +} + BOOT_CODE static bool_t try_init_kernel_secondary_core(word_t hart_id, word_t core_id) { - while (!node_boot_lock); + /* Busy wait for primary node to release secondary nodes. Using C11 atomics + * guarantees proper visibility across threads and cores. + */ + wait_for_lock_update(1); - fence_r_rw(); + fence_r_rw(); /* ToDo: is this still necessary? */ init_cpu(); + + /* intialize kernel on secondary nodes */ NODE_LOCK_SYS; clock_sync_test(); - ksNumCPUs++; + printf("core #%d (hart_id #%d) init\n", (int)core_id, (int)hart_id); init_core_state(SchedulerAction_ResumeCurrentThread); + NODE_UNLOCK; + /* Set BIT(core_id - 1) in node_boot_mask to tell primary node that init on + * this node is done + */ + (void)__atomic_fetch_or(&node_boot_mask, BIT(core_id - 1), __ATOMIC_ACQ_REL); + ifence_local(); + + /* Busy wait (again) for primary node to ack SMP init and release the + * secondary nodes. + */ + wait_for_lock_update(2); + return true; } BOOT_CODE static void release_secondary_cores(void) { + /* Release all nodes at the same time. Update the lock in a way that ensures + * the result is visible everywhere. + */ assert(0 == node_boot_lock); /* Sanity check for a proper lock state. */ - node_boot_lock = 1; + __atomic_store_n(&node_boot_lock, 1, __ATOMIC_RELEASE); + /* At this point in time the primary core (executing this code) already uses * the seL4 MMU/cache setup. However, the secondary cores are still using * the elfloader's MMU/cache setup, and thus the update of node_boot_lock * may not be visible there if the setups differ. Currently, the mappings - * match, so a barrier is all that is needed. + * match, so __atomic_store_n() should be sufficient. It generates the + * assembler sequence + * fence iorw,ow + * amoswap.w zero,a5,(a3) // a5 = 1, a3 = &node_boot_lock + * that ensure the write really happens and becomes globally visible to make + * the secondary harts boot before we start the polling loop that checks + * ksNumCPUs to determine when all nodes are up. Having another explicit + * barrier should no longer be necessary. */ fence_rw_rw(); - while (ksNumCPUs != CONFIG_MAX_NUM_NODES) { + /* Wait until all the secondary cores are done initialising, ie the bit for + * each core is set. Each core has a bit, so missing bits in the mask + * indicate which core failed to start. + */ + word_t missing_nodes = BIT(CONFIG_MAX_NUM_NODES - 1) - 1; + word_t ready_order[CONFIG_MAX_NUM_NODES - 1][2] = {0}; + int idx = 0; + do { #ifdef ENABLE_SMP_CLOCK_SYNC_TEST_ON_BOOT + /* Secondary cores compare their time with our primary core's time, so + * keep updating the timestamp while spinning. + */ NODE_STATE(ksCurTime) = getCurrentTime(); + __atomic_thread_fence(__ATOMIC_RELEASE); /* ensure write propagates */ #endif - __atomic_thread_fence(__ATOMIC_ACQ_REL); + + word_t mask = __atomic_load_n(&node_boot_mask, __ATOMIC_ACQUIRE); + // mask = 0100 + word_t new_cores_ready = missing_nodes & mask; + // new_cores_ready = 1110 & 0100 = 0100 + while (new_cores_ready > 0) { + unsigned int core_bit = wordBits - 1 - clzl(new_cores_ready); + new_cores_ready &= ~BIT(core_bit); + ready_order[idx][0] = riscv_read_cycle(); + ready_order[idx][1] = core_bit; + idx++; + } + missing_nodes &= ~mask; + } while ( != missing_nodes > 0); + + for (int i = 0; i < ARRAY_SIZE(ready_order); i++) { + printf("[%"SEL4_PRIu_word"] core #%d up\n", + ready_order[i][0], (int)ready_order[i][1] + 1); } } + + +BOOT_CODE static void release_secondary_cores_to_userland(void) +{ + assert(1 == node_boot_lock); + __atomic_store_n(&node_boot_lock, 2, __ATOMIC_RELEASE); +} + #endif /* ENABLE_SMP_SUPPORT */ /* Main kernel initialisation function. */ @@ -437,19 +514,26 @@ static BOOT_CODE bool_t try_init_kernel( /* finalise the bootinfo frame */ bi_finalise(); - ksNumCPUs = 1; + /* primary node is avaialble */ + __atomic_store_n(&ksNumCPUs, 1, __ATOMIC_RELEASE); SMP_COND_STATEMENT(clh_lock_init()); SMP_COND_STATEMENT(release_secondary_cores()); - /* All cores are up now, so there can be concurrency. The kernel booting is + /* All cores have finished booting now and wait to be released again to exit + * to userland. Grabing the BKL no is not really needed as there is no + * concurrency here. . needed for SMK init finalization are up now, so there can be concurrency. The kernel booting is * supposed to be finished before the secondary cores are released, all the * primary has to do now is schedule the initial thread. Currently there is * nothing that touches any global data structures, nevertheless we grab the * BKL here to play safe. It is released when the kernel is left. */ NODE_LOCK_SYS; - + ksNumCPUs = CONFIG_MAX_NUM_NODES; printf("Booting all finished, dropped to user space\n"); + NODE_UNLOCK; + + SMP_COND_STATEMENT(release_secondary_cores_to_userland()); + return true; } diff --git a/src/arch/x86/smp/ipi.c b/src/arch/x86/smp/ipi.c index 198148594b8..5c0dfeefd7e 100644 --- a/src/arch/x86/smp/ipi.c +++ b/src/arch/x86/smp/ipi.c @@ -110,7 +110,7 @@ static void x86_ipi_send_mask(interrupt_t ipi, word_t mask, bool_t isBlocking) } while (mask != 0); /* broadcast IPIs to clusters... */ - IPI_ICR_BARRIER; + ipi_icr_barrier(); for (int i = 0; i < nr_target_clusters; i++) { apic_send_ipi_cluster(ipi, target_clusters[i]); } diff --git a/src/assert.c b/src/assert.c index 5fa3f35b739..801feb14374 100644 --- a/src/assert.c +++ b/src/assert.c @@ -10,18 +10,19 @@ #ifdef CONFIG_DEBUG_BUILD void _fail( - const char *s, const char *file, unsigned int line, - const char *function) + const char *function, + const char *s, + ...) { - printf( - "seL4 called fail at %s:%u in function %s, saying \"%s\"\n", - file, - line, - function, - s - ); + va_list args; + printf("seL4 called fail at %s:%u in function %s, saying \"", + file, line, function); + va_start(args, s); + vprintf(s, args); + va_end(args); + printf("\"\n"); halt(); } diff --git a/src/benchmark/benchmark.c b/src/benchmark/benchmark.c index c4ba2fef32e..f974263e365 100644 --- a/src/benchmark/benchmark.c +++ b/src/benchmark/benchmark.c @@ -14,7 +14,7 @@ #include -exception_t handle_SysBenchmarkFlushCaches(void) +void handle_SysBenchmarkFlushCaches(void) { #ifdef CONFIG_ARCH_ARM tcb_t *thread = NODE_STATE(ksCurThread); @@ -26,17 +26,17 @@ exception_t handle_SysBenchmarkFlushCaches(void) #else arch_clean_invalidate_caches(); #endif - return EXCEPTION_NONE; + return; } -exception_t handle_SysBenchmarkResetLog(void) +void handle_SysBenchmarkResetLog(void) { #ifdef CONFIG_KERNEL_LOG_BUFFER if (ksUserLogBuffer == 0) { userError("A user-level buffer has to be set before resetting benchmark.\ Use seL4_BenchmarkSetLogBuffer\n"); setRegister(NODE_STATE(ksCurThread), capRegister, seL4_IllegalOperation); - return EXCEPTION_SYSCALL_ERROR; + return; } ksLogIndex = 0; @@ -55,10 +55,10 @@ exception_t handle_SysBenchmarkResetLog(void) #endif /* CONFIG_BENCHMARK_TRACK_UTILISATION */ setRegister(NODE_STATE(ksCurThread), capRegister, seL4_NoError); - return EXCEPTION_NONE; + return; } -exception_t handle_SysBenchmarkFinalizeLog(void) +void handle_SysBenchmarkFinalizeLog(void) { #ifdef CONFIG_KERNEL_LOG_BUFFER ksLogIndexFinalized = ksLogIndex; @@ -69,32 +69,32 @@ exception_t handle_SysBenchmarkFinalizeLog(void) benchmark_utilisation_finalise(); #endif /* CONFIG_BENCHMARK_TRACK_UTILISATION */ - return EXCEPTION_NONE; + return; } #ifdef CONFIG_KERNEL_LOG_BUFFER -exception_t handle_SysBenchmarkSetLogBuffer(void) +void handle_SysBenchmarkSetLogBuffer(void) { word_t cptr_userFrame = getRegister(NODE_STATE(ksCurThread), capRegister); if (benchmark_arch_map_logBuffer(cptr_userFrame) != EXCEPTION_NONE) { setRegister(NODE_STATE(ksCurThread), capRegister, seL4_IllegalOperation); - return EXCEPTION_SYSCALL_ERROR; + return; } setRegister(NODE_STATE(ksCurThread), capRegister, seL4_NoError); - return EXCEPTION_NONE; + return; } #endif /* CONFIG_KERNEL_LOG_BUFFER */ #ifdef CONFIG_BENCHMARK_TRACK_UTILISATION -exception_t handle_SysBenchmarkGetThreadUtilisation(void) +void handle_SysBenchmarkGetThreadUtilisation(void) { benchmark_track_utilisation_dump(); - return EXCEPTION_NONE; + return; } -exception_t handle_SysBenchmarkResetThreadUtilisation(void) +void handle_SysBenchmarkResetThreadUtilisation(void) { word_t tcb_cptr = getRegister(NODE_STATE(ksCurThread), capRegister); lookupCap_ret_t lu_ret; @@ -105,18 +105,18 @@ exception_t handle_SysBenchmarkResetThreadUtilisation(void) cap_type = cap_get_capType(lu_ret.cap); if (cap_type != cap_thread_cap) { userError("SysBenchmarkResetThreadUtilisation: cap is not a TCB, halting"); - return EXCEPTION_NONE; + return; } tcb_t *tcb = TCB_PTR(cap_thread_cap_get_capTCBPtr(lu_ret.cap)); benchmark_track_reset_utilisation(tcb); - return EXCEPTION_NONE; + return; } #ifdef CONFIG_DEBUG_BUILD -exception_t handle_SysBenchmarkDumpAllThreadsUtilisation(void) +void handle_SysBenchmarkDumpAllThreadsUtilisation(void) { printf("{\n"); printf(" \"BENCHMARK_TOTAL_UTILISATION\":%lu,\n", @@ -140,15 +140,15 @@ exception_t handle_SysBenchmarkDumpAllThreadsUtilisation(void) } } printf(" ]\n}\n"); - return EXCEPTION_NONE; + return; } -exception_t handle_SysBenchmarkResetAllThreadsUtilisation(void) +void handle_SysBenchmarkResetAllThreadsUtilisation(void) { for (tcb_t *curr = NODE_STATE(ksDebugTCBs); curr != NULL; curr = TCB_PTR_DEBUG_PTR(curr)->tcbDebugNext) { benchmark_track_reset_utilisation(curr); } - return EXCEPTION_NONE; + return; } #endif /* CONFIG_DEBUG_BUILD */ diff --git a/src/smp/ipi.c b/src/smp/ipi.c index ccc52a2b192..3bbe6fbbf37 100644 --- a/src/smp/ipi.c +++ b/src/smp/ipi.c @@ -139,7 +139,7 @@ void generic_ipi_send_mask(irq_t ipi, word_t mask, bool_t isBlocking) if (nr_target_cores > 0) { /* sending IPIs... */ - IPI_MEM_BARRIER; + ipi_mem_barrier(); for (int i = 0; i < nr_target_cores; i++) { ipi_send_target(ipi, cpuIndexToID(target_cores[i])); } @@ -147,7 +147,7 @@ void generic_ipi_send_mask(irq_t ipi, word_t mask, bool_t isBlocking) } #ifdef CONFIG_DEBUG_BUILD -exception_t handle_SysDebugSendIPI(void) +void handle_SysDebugSendIPI(void) { #ifdef CONFIG_ARCH_ARM word_t target = getRegister(NODE_STATE(ksCurThread), capRegister); @@ -155,16 +155,19 @@ exception_t handle_SysDebugSendIPI(void) if (target > CONFIG_MAX_NUM_NODES) { userError("SysDebugSendIPI: Invalid target, halting"); halt(); + UNREACHABLE(); } if (irq > 15) { userError("SysDebugSendIPI: Invalid IRQ, not a SGI, halting"); halt(); + UNREACHABLE(); } ipi_send_target(CORE_IRQ_TO_IRQT(0, irq), BIT(target)); - return EXCEPTION_NONE; + return; #else /* not CONFIG_ARCH_ARM */ userError("SysDebugSendIPI: not supported on this architecture"); halt(); + UNREACHABLE(); #endif /* [not] CONFIG_ARCH_ARM */ } #endif /* CONFIG_DEBUG_BUILD */