Skip to content

Commit e238eb2

Browse files
aykevldeadprogram
authored andcommitted
rp2040: add multicore support
1 parent 5625f68 commit e238eb2

File tree

7 files changed

+304
-7
lines changed

7 files changed

+304
-7
lines changed

src/internal/task/task_stack_cortexm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build scheduler.tasks && cortexm
1+
//go:build (scheduler.tasks || scheduler.cores) && cortexm
22
#include <stdint.h>
33

44
uintptr_t SystemStack() {

src/internal/task/task_stack_cortexm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//go:build scheduler.tasks && cortexm
1+
//go:build (scheduler.tasks || scheduler.cores) && cortexm
22

33
package task
44

src/runtime/gc_stack_cores.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ func gcMarkReachable() {
5656
// Busy-wait until all the other cores are ready. They certainly should be,
5757
// after the scanning we did above.
5858
for gcScanState.Load() != numCPU {
59-
spinLoopHint()
59+
spinLoopWait()
6060
}
6161
gcScanState.Store(0)
6262

@@ -71,7 +71,7 @@ func gcMarkReachable() {
7171

7272
// Busy-wait until this core finished scanning.
7373
for gcScanState.Load() == 0 {
74-
spinLoopHint()
74+
spinLoopWait()
7575
}
7676
gcScanState.Store(0)
7777
}
@@ -118,7 +118,7 @@ func gcResumeWorld() {
118118
// Busy-wait until the core acknowledges the signal (and is going to return
119119
// from the interrupt handler).
120120
for gcScanState.Load() != numCPU-1 {
121-
spinLoopHint()
121+
spinLoopWait()
122122
}
123123
gcScanState.Store(0)
124124
}

src/runtime/runtime_rp2040.go

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,17 @@ package runtime
44

55
import (
66
"device/arm"
7+
"device/rp"
8+
"internal/task"
79
"machine"
810
"machine/usb/cdc"
11+
"runtime/interrupt"
12+
"runtime/volatile"
13+
"unsafe"
914
)
1015

16+
const numCPU = 2
17+
1118
// machineTicks is provided by package machine.
1219
func machineTicks() uint64
1320

@@ -43,6 +50,284 @@ func sleepTicks(d timeUnit) {
4350
}
4451
}
4552

53+
// Currently sleeping core, or 0xff.
54+
// Must only be accessed with the scheduler lock held.
55+
var sleepingCore uint8 = 0xff
56+
57+
// Return whether another core is sleeping.
58+
// May only be called with the scheduler lock held.
59+
func hasSleepingCore() bool {
60+
return sleepingCore != 0xff
61+
}
62+
63+
// Almost identical to sleepTicks, except that it will unlock/lock the scheduler
64+
// while sleeping and is interruptible by interruptSleepTicksMulticore.
65+
// This may only be called with the scheduler lock held.
66+
func sleepTicksMulticore(d timeUnit) {
67+
sleepingCore = uint8(currentCPU())
68+
69+
// Note: interruptSleepTicksMulticore will be able to interrupt this, since
70+
// it executes the "sev" instruction which would make sleepTicks return
71+
// immediately without sleeping. Even if it happens while configuring the
72+
// sleep operation.
73+
74+
schedulerLock.Unlock()
75+
sleepTicks(d)
76+
schedulerLock.Lock()
77+
78+
sleepingCore = 0xff
79+
}
80+
81+
// Interrupt an ongoing call to sleepTicksMulticore on another core.
82+
func interruptSleepTicksMulticore(wakeup timeUnit) {
83+
arm.Asm("sev")
84+
}
85+
86+
// Number of cores that are currently in schedulerUnlockAndWait.
87+
// It is possible for both cores to be sleeping, if the program is waiting for
88+
// an interrupt (or is deadlocked).
89+
var waitingCore uint8
90+
91+
// Put the scheduler to sleep, since there are no tasks to run.
92+
// This will unlock the scheduler lock, and must be called with the scheduler
93+
// lock held.
94+
func schedulerUnlockAndWait() {
95+
waitingCore++
96+
schedulerLock.Unlock()
97+
arm.Asm("wfe")
98+
schedulerLock.Lock()
99+
waitingCore--
100+
}
101+
102+
// Wake another core, if one is sleeping. Must be called with the scheduler lock
103+
// held.
104+
func schedulerWake() {
105+
if waitingCore != 0 {
106+
arm.Asm("sev")
107+
}
108+
}
109+
110+
// Return the current core number: 0 or 1.
111+
func currentCPU() uint32 {
112+
return rp.SIO.CPUID.Get()
113+
}
114+
115+
// Start the secondary cores for this chip.
116+
// On the RP2040, there is only one other core to start.
117+
func startSecondaryCores() {
118+
// Start the second core of the RP2040.
119+
// See section 2.8.2 in the datasheet.
120+
seq := 0
121+
for {
122+
cmd := core1StartSequence[seq]
123+
if cmd == 0 {
124+
multicore_fifo_drain()
125+
arm.Asm("sev")
126+
}
127+
multicore_fifo_push_blocking(cmd)
128+
response := multicore_fifo_pop_blocking()
129+
if cmd != response {
130+
seq = 0
131+
continue
132+
}
133+
seq = seq + 1
134+
if seq >= len(core1StartSequence) {
135+
break
136+
}
137+
}
138+
139+
// Enable the FIFO interrupt for the GC stop the world phase.
140+
// We can only do this after we don't need the FIFO anymore for starting the
141+
// second core.
142+
intr := interrupt.New(rp.IRQ_SIO_IRQ_PROC0, func(intr interrupt.Interrupt) {
143+
switch rp.SIO.FIFO_RD.Get() {
144+
case 1:
145+
gcInterruptHandler(0)
146+
}
147+
})
148+
intr.Enable()
149+
intr.SetPriority(0xff)
150+
}
151+
152+
var core1StartSequence = [...]uint32{
153+
0, 0, 1,
154+
uint32(uintptr(unsafe.Pointer(&__isr_vector))),
155+
uint32(uintptr(unsafe.Pointer(&stack1TopSymbol))),
156+
uint32(exportedFuncPtr(runCore1)),
157+
}
158+
159+
//go:extern __isr_vector
160+
var __isr_vector [0]uint32
161+
162+
//go:extern _stack1_top
163+
var stack1TopSymbol [0]uint32
164+
165+
// The function that is started on the second core.
166+
//
167+
//export tinygo_runCore1
168+
func runCore1() {
169+
// Clear sticky bit that seems to have been set while starting this core.
170+
rp.SIO.FIFO_ST.Set(rp.SIO_FIFO_ST_ROE)
171+
172+
// Enable the FIFO interrupt, mainly used for the stop-the-world phase of
173+
// the GC.
174+
// Use the lowest possible priority (highest priority value), so that other
175+
// interrupts can still happen while the GC is running.
176+
intr := interrupt.New(rp.IRQ_SIO_IRQ_PROC1, func(intr interrupt.Interrupt) {
177+
switch rp.SIO.FIFO_RD.Get() {
178+
case 1:
179+
gcInterruptHandler(1)
180+
}
181+
})
182+
intr.Enable()
183+
intr.SetPriority(0xff)
184+
185+
// Now start running the scheduler on this core.
186+
schedulerLock.Lock()
187+
scheduler(false)
188+
schedulerLock.Unlock()
189+
190+
// The main function returned.
191+
exit(0)
192+
}
193+
194+
// The below multicore_fifo_* functions have been translated from the Raspberry
195+
// Pi Pico SDK.
196+
197+
func multicore_fifo_rvalid() bool {
198+
return rp.SIO.FIFO_ST.Get()&rp.SIO_FIFO_ST_VLD != 0
199+
}
200+
201+
func multicore_fifo_wready() bool {
202+
return rp.SIO.FIFO_ST.Get()&rp.SIO_FIFO_ST_RDY != 0
203+
}
204+
205+
func multicore_fifo_drain() {
206+
for multicore_fifo_rvalid() {
207+
rp.SIO.FIFO_RD.Get()
208+
}
209+
}
210+
211+
func multicore_fifo_push_blocking(data uint32) {
212+
for !multicore_fifo_wready() {
213+
}
214+
rp.SIO.FIFO_WR.Set(data)
215+
arm.Asm("sev")
216+
}
217+
218+
func multicore_fifo_pop_blocking() uint32 {
219+
for !multicore_fifo_rvalid() {
220+
arm.Asm("wfe")
221+
}
222+
223+
return rp.SIO.FIFO_RD.Get()
224+
}
225+
226+
// Value used to communicate between the GC core and the other (paused) cores.
227+
var gcSignalWait volatile.Register8
228+
229+
// The GC interrupted this core for the stop-the-world phase.
230+
// This function handles that, and only returns after the stop-the-world phase
231+
// ended.
232+
func gcInterruptHandler(hartID uint32) {
233+
// Let the GC know we're ready.
234+
gcScanState.Add(1)
235+
arm.Asm("sev")
236+
237+
// Wait until we get a signal to start scanning.
238+
for gcSignalWait.Get() == 0 {
239+
arm.Asm("wfe")
240+
}
241+
gcSignalWait.Set(0)
242+
243+
// Scan the stack(s) of this core.
244+
scanCurrentStack()
245+
if !task.OnSystemStack() {
246+
// Mark system stack.
247+
markRoots(task.SystemStack(), coreStackTop(hartID))
248+
}
249+
250+
// Signal we've finished scanning.
251+
gcScanState.Store(1)
252+
arm.Asm("sev")
253+
254+
// Wait until we get a signal that the stop-the-world phase has ended.
255+
for gcSignalWait.Get() == 0 {
256+
arm.Asm("wfe")
257+
}
258+
gcSignalWait.Set(0)
259+
260+
// Signal we received the signal and are going to exit the interrupt.
261+
gcScanState.Add(1)
262+
arm.Asm("sev")
263+
}
264+
265+
// Pause the given core by sending it an interrupt.
266+
func gcPauseCore(core uint32) {
267+
rp.SIO.FIFO_WR.Set(1)
268+
}
269+
270+
// Signal the given core that it can resume one step.
271+
// This is called twice after gcPauseCore: the first time to scan the stack of
272+
// the core, and the second time to end the stop-the-world phase.
273+
func gcSignalCore(core uint32) {
274+
gcSignalWait.Set(1)
275+
arm.Asm("sev")
276+
}
277+
278+
// Returns the stack top (highest address) of the system stack of the given
279+
// core.
280+
func coreStackTop(core uint32) uintptr {
281+
switch core {
282+
case 0:
283+
return uintptr(unsafe.Pointer(&stackTopSymbol))
284+
case 1:
285+
return uintptr(unsafe.Pointer(&stack1TopSymbol))
286+
default:
287+
runtimePanic("unexpected core")
288+
return 0
289+
}
290+
}
291+
292+
// These spinlocks are needed by the runtime.
293+
var (
294+
printLock = spinLock{id: 0}
295+
schedulerLock = spinLock{id: 1}
296+
atomicsLock = spinLock{id: 2}
297+
futexLock = spinLock{id: 3}
298+
)
299+
300+
// A hardware spinlock, one of the 32 spinlocks defined in the SIO peripheral.
301+
type spinLock struct {
302+
id uint8
303+
}
304+
305+
// Return the spinlock register: rp.SIO.SPINLOCKx
306+
func (l *spinLock) spinlock() *volatile.Register32 {
307+
return (*volatile.Register32)(unsafe.Add(unsafe.Pointer(&rp.SIO.SPINLOCK0), l.id*4))
308+
}
309+
310+
func (l *spinLock) Lock() {
311+
// Wait for the lock to be available.
312+
spinlock := l.spinlock()
313+
for spinlock.Get() == 0 {
314+
// TODO: use wfe and send an event when unlocking so the CPU can go to
315+
// sleep while waiting for the lock.
316+
// Unfortunately when doing that, time.Sleep() seems to hang somewhere.
317+
// This needs some debugging to figure out.
318+
}
319+
}
320+
321+
func (l *spinLock) Unlock() {
322+
l.spinlock().Set(0)
323+
}
324+
325+
// Wait until a signal is received, indicating that it can resume from the
326+
// spinloop.
327+
func spinLoopWait() {
328+
arm.Asm("wfe")
329+
}
330+
46331
func waitForEvents() {
47332
arm.Asm("wfe")
48333
}

src/runtime/runtime_tinygoriscv_qemu.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,7 @@ type spinLock struct {
360360
func (l *spinLock) Lock() {
361361
// Try to replace 0 with 1. Once we succeed, the lock has been acquired.
362362
for !l.Uint32.CompareAndSwap(0, 1) {
363-
spinLoopHint()
363+
spinLoopWait()
364364
}
365365
}
366366

@@ -376,7 +376,7 @@ func (l *spinLock) Unlock() {
376376

377377
// Hint to the CPU that this core is just waiting, and the core can go into a
378378
// lower energy state.
379-
func spinLoopHint() {
379+
func spinLoopWait() {
380380
// This is a no-op in QEMU TCG (but added here for completeness):
381381
// https://github.com/qemu/qemu/blob/v9.2.3/target/riscv/insn_trans/trans_rvi.c.inc#L856
382382
riscv.Asm("pause")

targets/arm.ld

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ SECTIONS
3232
_stack_top = .;
3333
} >RAM
3434

35+
/* Stack for second core (core 1), if there is one. */
36+
.stack1 (NOLOAD) :
37+
{
38+
. = ALIGN(4);
39+
. += DEFINED(__num_stacks) && __num_stacks >= 2 ? _stack_size : 0;
40+
_stack1_top = .;
41+
} >RAM
42+
3543
/* Start address (in flash) of .data, used by startup code. */
3644
_sidata = LOADADDR(.data);
3745

targets/rp2040.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{
22
"inherits": ["cortex-m0plus"],
33
"build-tags": ["rp2040", "rp"],
4+
"scheduler": "cores",
45
"flash-1200-bps-reset": "true",
56
"flash-method": "msd",
67
"serial": "usb",
@@ -12,6 +13,9 @@
1213
"extra-files": [
1314
"src/device/rp/rp2040.s"
1415
],
16+
"ldflags": [
17+
"--defsym=__num_stacks=2"
18+
],
1519
"linkerscript": "targets/rp2040.ld",
1620
"openocd-interface": "picoprobe",
1721
"openocd-transport": "swd",

0 commit comments

Comments
 (0)