Skip to content

Commit b3fa764

Browse files
authored
am: move queues to gpus (tinygrad#8848)
* am: fix * add flsg for thos * do not depend on host parameter,
1 parent 42d7c80 commit b3fa764

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

tinygrad/runtime/ops_amd.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,8 @@ def alloc(self, size:int, host=False, uncached=False, cpu_access=False) -> HCQBu
353353

354354
if cpu_access or host: flags |= kfd.KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
355355

356-
if host: buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
356+
if flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR:
357+
buf = addr = HWInterface.anon_mmap(0, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 0)
357358
else: buf, addr = 0, HWInterface.anon_mmap(0, size, 0, mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE, 0)
358359
assert addr != 0xffffffffffffffff
359360

@@ -365,7 +366,7 @@ def alloc(self, size:int, host=False, uncached=False, cpu_access=False) -> HCQBu
365366
if e.errno == errno.ENOMEM: raise MemoryError("Cannot allocate memory: no memory is available.") from e
366367
raise
367368

368-
if not host:
369+
if not (flags & kfd.KFD_IOC_ALLOC_MEM_FLAGS_USERPTR):
369370
buf = self.drm_fd.mmap(mem.va_addr, mem.size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | MAP_FIXED, mem.mmap_offset)
370371
assert addr == buf == mem.va_addr
371372

@@ -502,7 +503,7 @@ def _map_pci_range(self, bar, off=0, addr=0, size=None):
502503
return to_mv(fd.mmap(addr, sz, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | (MAP_FIXED if addr else 0), off), sz)
503504

504505
def alloc(self, size:int, host=False, uncached=False, cpu_access=False):
505-
if host or (uncached and cpu_access): # host or gtt-like memory.
506+
if host or (not getenv("AMD_ALLOC_QUEUE_DEV_MEM", 1) and uncached and cpu_access): # host or gtt-like memory.
506507
vaddr = self.adev.mm.alloc_vaddr(size:=round_up(size, mmap.PAGESIZE), align=mmap.PAGESIZE)
507508
va = HWInterface.anon_mmap(vaddr, size, mmap.PROT_READ | mmap.PROT_WRITE, mmap.MAP_SHARED | mmap.MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED, 0)
508509

@@ -564,7 +565,7 @@ def __init__(self, device:str=""):
564565
if self.target < 100300 or self.target >= 120000: raise RuntimeError(f"Unsupported arch: {self.arch}")
565566

566567
if AMDDevice.signals_page is None:
567-
AMDDevice.signals_page = self.dev_iface.alloc(16 * 65536, uncached=True, cpu_access=True)
568+
AMDDevice.signals_page = self.dev_iface.alloc(16 * 65536, host=True, uncached=True, cpu_access=True)
568569
AMDDevice.signals_pool = [AMDDevice.signals_page.va_addr + off for off in range(0, AMDDevice.signals_page.size, 16)]
569570
else: self.dev_iface.map(AMDDevice.signals_page)
570571

0 commit comments

Comments
 (0)