Skip to content

Commit 6b7b8c3

Browse files
authored
Try to fix the "deadlock" in batch receive (#48)
* Try to fix the "deadlock" in batch receive, somehow it's also 5% faster (to be checked in Travis, deadlock not reproduced locally) * Ignore nimble during runs nim-lang/nimble#696 * don't auto fail due to getTicks on ARM * Negative count is non-blocking, can be investigated later (#49) * When using C++ we need C++11 * typo * Make travis Arch visible * fix arch
1 parent 418f22b commit 6b7b8c3

File tree

5 files changed

+42
-19
lines changed

5 files changed

+42
-19
lines changed

.travis.yml

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,32 @@ matrix:
1212
# Build and test using both gcc and clang
1313
# Build and test on both x86-64 and ARM64
1414
- os: linux
15-
env: CHANNEL=devel
15+
arch: amd64
16+
env:
17+
- ARCH=amd64
18+
- CHANNEL=devel
1619
compiler: gcc
1720

1821
- os: linux
1922
arch: arm64
20-
env: CHANNEL=devel
23+
env:
24+
- ARCH=arm64
25+
- CHANNEL=devel
2126
compiler: gcc
2227

2328
- os: linux
24-
env: CHANNEL=devel
29+
arch: amd64
30+
env:
31+
- ARCH=amd64
32+
- CHANNEL=devel
2533
compiler: clang
2634

2735
# On OSX we only test against clang (gcc is mapped to clang by default)
2836
- os: osx
29-
env: CHANNEL=devel
37+
arch: amd64
38+
env:
39+
- ARCH=amd64
40+
- CHANNEL=devel
3041
compiler: clang
3142
fast_finish: true
3243

@@ -66,7 +77,7 @@ before_script:
6677
- export PATH="nim-${CHANNEL}/bin${PATH:+:$PATH}"
6778
script:
6879
- nimble refresh
69-
- nimble install cligen
80+
- nimble install cligen || true
7081
- nimble test
7182
branches:
7283
except:

azure-pipelines.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ steps:
147147
148148
- bash: |
149149
nimble refresh
150-
nimble install cligen
150+
nimble install cligen || true
151151
displayName: 'Building the package dependencies'
152152
153153
- bash: |

weave/channels/channels_mpsc_unbounded_batch.nim

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ proc tryRecvBatch*[T](chan: var ChannelMpscUnboundedBatch[T], bFirst, bLast: var
147147
## Returns the number of items received
148148
##
149149
## If no items are returned bFirst and bLast are undefined
150+
## and should not be used.
150151
##
151152
## ⚠️ This leaks the next item
152153
## nil or overwrite it for further use in linked lists
@@ -172,9 +173,10 @@ proc tryRecvBatch*[T](chan: var ChannelMpscUnboundedBatch[T], bFirst, bLast: var
172173
# We lose the competition, bail out
173174
chan.front.next.store(front, moRelaxed)
174175
discard chan.count.fetchSub(result, moRelaxed)
175-
postCondition: chan.count.load(moRelaxed) >= 0
176+
postCondition: chan.count.load(moRelaxed) >= 0 # TODO: somehow it can be negative
176177
return
177178

179+
# front == last
178180
chan.front.next.store(nil, moRelaxed)
179181
if compareExchange(chan.back, last, chan.front.addr, moAcquireRelease):
180182
# We won and replaced the last node with the channel front
@@ -190,19 +192,22 @@ proc tryRecvBatch*[T](chan: var ChannelMpscUnboundedBatch[T], bFirst, bLast: var
190192
# we assume that consumer has plenty of work to do with the
191193
# already retrived batch
192194
next = cast[T](front.next.load(moRelaxed))
193-
if not next.isNil:
194-
# Extra node after this one, no competition with producers
195-
prefetch(front)
196-
result += 1
197-
discard chan.count.fetchSub(result, moRelaxed)
198-
chan.front.next.store(next, moRelaxed)
199-
fence(moAcquire)
200-
bLast = front
201-
postCondition: chan.count.load(moRelaxed) >= 0
202-
return
195+
while next.isNil:
196+
# We spinlock, unfortunately there seems to be a livelock potential
197+
# or contention issue if we don't use cpuRelax
198+
# at least twice or just bail out if next is nil.
199+
# Replace this spinlock by "if not next.isNil" and run the "memory pool" bench
200+
# or fibonacci and the program will get stuck.
201+
# The queue should probably be model checked and/or run through Relacy
202+
cpuRelax()
203+
next = cast[T](front.next.load(moRelaxed))
203204

204-
# The last item wasn't linked to the list yet, bail out
205+
prefetch(front)
206+
result += 1
205207
discard chan.count.fetchSub(result, moRelaxed)
208+
chan.front.next.store(next, moRelaxed)
209+
fence(moAcquire)
210+
bLast = front
206211
postCondition: chan.count.load(moRelaxed) >= 0
207212

208213
func peek*(chan: var ChannelMpscUnboundedBatch): int32 {.inline.} =

weave/config.nim

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77

88
import strutils
99

10+
# With C++ we need C++11
11+
# ----------------------------------------------------------------------------------
12+
13+
when defined(cpp):
14+
{.passC:"-std=c++11".}
15+
1016
# Static configuration & compile-time options
1117
# ----------------------------------------------------------------------------------
1218

weave/instrumentation/timers.nim

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ when defined(i386) or defined(amd64):
7171
lfence()
7272
return rdtsc()
7373
else:
74-
{.error: "getticks is not supported on this CPU architecture".}
74+
when defined(WV_profile):
75+
{.error: "getticks is not supported on this CPU architecture".}
7576

7677
template timer_new*(timer: var Timer, ghzClock: float64) =
7778
timer.elapsed = 0

0 commit comments

Comments
 (0)