@@ -353,7 +353,8 @@ def alloc(self, size:int, host=False, uncached=False, cpu_access=False) -> HCQBu
353
353
354
354
if cpu_access or host : flags |= kfd .KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC
355
355
356
- if host : buf = addr = HWInterface .anon_mmap (0 , size , mmap .PROT_READ | mmap .PROT_WRITE , mmap .MAP_SHARED | mmap .MAP_ANONYMOUS , 0 )
356
+ if flags & kfd .KFD_IOC_ALLOC_MEM_FLAGS_USERPTR :
357
+ buf = addr = HWInterface .anon_mmap (0 , size , mmap .PROT_READ | mmap .PROT_WRITE , mmap .MAP_SHARED | mmap .MAP_ANONYMOUS , 0 )
357
358
else : buf , addr = 0 , HWInterface .anon_mmap (0 , size , 0 , mmap .MAP_PRIVATE | mmap .MAP_ANONYMOUS | MAP_NORESERVE , 0 )
358
359
assert addr != 0xffffffffffffffff
359
360
@@ -365,7 +366,7 @@ def alloc(self, size:int, host=False, uncached=False, cpu_access=False) -> HCQBu
365
366
if e .errno == errno .ENOMEM : raise MemoryError ("Cannot allocate memory: no memory is available." ) from e
366
367
raise
367
368
368
- if not host :
369
+ if not ( flags & kfd . KFD_IOC_ALLOC_MEM_FLAGS_USERPTR ) :
369
370
buf = self .drm_fd .mmap (mem .va_addr , mem .size , mmap .PROT_READ | mmap .PROT_WRITE , mmap .MAP_SHARED | MAP_FIXED , mem .mmap_offset )
370
371
assert addr == buf == mem .va_addr
371
372
@@ -502,7 +503,7 @@ def _map_pci_range(self, bar, off=0, addr=0, size=None):
502
503
return to_mv (fd .mmap (addr , sz , mmap .PROT_READ | mmap .PROT_WRITE , mmap .MAP_SHARED | (MAP_FIXED if addr else 0 ), off ), sz )
503
504
504
505
def alloc (self , size :int , host = False , uncached = False , cpu_access = False ):
505
- if host or (uncached and cpu_access ): # host or gtt-like memory.
506
+ if host or (not getenv ( "AMD_ALLOC_QUEUE_DEV_MEM" , 1 ) and uncached and cpu_access ): # host or gtt-like memory.
506
507
vaddr = self .adev .mm .alloc_vaddr (size := round_up (size , mmap .PAGESIZE ), align = mmap .PAGESIZE )
507
508
va = HWInterface .anon_mmap (vaddr , size , mmap .PROT_READ | mmap .PROT_WRITE , mmap .MAP_SHARED | mmap .MAP_ANONYMOUS | MAP_LOCKED | MAP_FIXED , 0 )
508
509
@@ -564,7 +565,7 @@ def __init__(self, device:str=""):
564
565
if self .target < 100300 or self .target >= 120000 : raise RuntimeError (f"Unsupported arch: { self .arch } " )
565
566
566
567
if AMDDevice .signals_page is None :
567
- AMDDevice .signals_page = self .dev_iface .alloc (16 * 65536 , uncached = True , cpu_access = True )
568
+ AMDDevice .signals_page = self .dev_iface .alloc (16 * 65536 , host = True , uncached = True , cpu_access = True )
568
569
AMDDevice .signals_pool = [AMDDevice .signals_page .va_addr + off for off in range (0 , AMDDevice .signals_page .size , 16 )]
569
570
else : self .dev_iface .map (AMDDevice .signals_page )
570
571
0 commit comments