Skip to content

Commit b6fe5ab

Browse files
authored
fix: correct gfx10 ctl stack size (tinygrad#9384)
1 parent 456697d commit b6fe5ab

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

tinygrad/runtime/ops_amd.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -584,7 +584,8 @@ def __init__(self, device:str=""):
584584
sgrp_size_per_cu, lds_size_per_cu, hwreg_size_per_cu = 0x4000, 0x10000, 0x1000
585585
vgpr_size_per_cu = 0x60000 if self.target in {110000, 110001, 120000, 120001} else 0x40000
586586
wg_data_size = round_up((vgpr_size_per_cu + sgrp_size_per_cu + lds_size_per_cu + hwreg_size_per_cu) * (self.max_cu_id + 1), mmap.PAGESIZE)
587-
ctl_stack_size = round_up(12 * (self.max_cu_id + 1) * (self.max_wave_id + 1) + 8 + 40, mmap.PAGESIZE) if self.target//10000 != 10 else 0x7000
587+
ctl_stack_size = round_up(12 * (self.max_cu_id + 1) * (self.max_wave_id + 1) + 8 + 40, mmap.PAGESIZE)
588+
if self.target//10000 == 10: ctl_stack_size = min(ctl_stack_size, 0x7000)
588589
debug_memory_size = round_up((self.max_cu_id + 1) * (self.max_wave_id + 1) * 32, 64)
589590

590591
self.compute_queue = self.create_queue(kfd.KFD_IOC_QUEUE_TYPE_COMPUTE, 0x800000, ctx_save_restore_size=wg_data_size + ctl_stack_size,

0 commit comments

Comments
 (0)