Skip to content

Commit 10bed1a

Browse files
sasha0552garg-amit
authored andcommitted
[Bugfix][Kernel] Implement acquire/release polyfill for Pascal (vllm-project#8776)
Signed-off-by: Amit Garg <mitgarg17495@gmail.com>
1 parent 7bf6f1b commit 10bed1a

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

csrc/custom_all_reduce.cuh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,26 @@ DINLINE O downcast(array_t<float, O::size> val) {
131131
}
132132

133133
static DINLINE void st_flag_release(FlagType* flag_addr, FlagType flag) {
134+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
134135
asm volatile("st.release.sys.global.u32 [%1], %0;" ::"r"(flag),
135136
"l"(flag_addr));
137+
#else
138+
asm volatile("membar.sys; st.volatile.global.u32 [%1], %0;" ::"r"(flag),
139+
"l"(flag_addr));
140+
#endif
136141
}
137142

138143
static DINLINE FlagType ld_flag_acquire(FlagType* flag_addr) {
139144
FlagType flag;
145+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
140146
asm volatile("ld.acquire.sys.global.u32 %0, [%1];"
141147
: "=r"(flag)
142148
: "l"(flag_addr));
149+
#else
150+
asm volatile("ld.volatile.global.u32 %0, [%1]; membar.gl;"
151+
: "=r"(flag)
152+
: "l"(flag_addr));
153+
#endif
143154
return flag;
144155
}
145156

csrc/custom_all_reduce_test.cu

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,14 @@
4444
} while (0)
4545

4646
__global__ void dummy_kernel() {
47+
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
4748
for (int i = 0; i < 100; i++) __nanosleep(1000000); // 100ms
49+
#else
50+
for (int i = 0; i < 100; i++) {
51+
long long int start = clock64();
52+
while (clock64() - start < 150000000); // approximately 98.4ms on P40
53+
}
54+
#endif
4855
}
4956

5057
template <typename T>

0 commit comments

Comments
 (0)