Skip to content

Commit a5c971f

Browse files
authored
am: prereqs for rdna4 1/n (tinygrad#9495)
* am: ip_ver rename for acc * am: refactor this * fix version * ugh
1 parent 1e5d9ad commit a5c971f

File tree

3 files changed

+16
-13
lines changed

3 files changed

+16
-13
lines changed

tinygrad/runtime/ops_amd.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -613,9 +613,10 @@ def __init__(self, dev, dev_id):
613613
pci_cmd = int.from_bytes(self.cfg_fd.read(2, binary=True, offset=pci.PCI_COMMAND), byteorder='little') | pci.PCI_COMMAND_MASTER
614614
self.cfg_fd.write(pci_cmd.to_bytes(2, byteorder='little'), binary=True, offset=pci.PCI_COMMAND)
615615

616+
gfxver = int(f"{self.adev.ip_ver[am.GC_HWIP][0]:02d}{self.adev.ip_ver[am.GC_HWIP][1]:02d}{self.adev.ip_ver[am.GC_HWIP][2]:02d}")
616617
array_count = self.adev.gc_info.gc_num_sa_per_se * self.adev.gc_info.gc_num_se
617618
simd_count = 2 * array_count * (self.adev.gc_info.gc_num_wgp0_per_sa + self.adev.gc_info.gc_num_wgp1_per_sa)
618-
self.props = {'simd_count': 2 * simd_count, 'simd_per_cu': 2, 'array_count': array_count, 'gfx_target_version': self.adev.ip_versions[am.GC_HWIP],
619+
self.props = {'simd_count': 2 * simd_count, 'simd_per_cu': 2, 'array_count': array_count, 'gfx_target_version': gfxver,
619620
'max_slots_scratch_cu': self.adev.gc_info.gc_max_scratch_slots_per_cu, 'max_waves_per_simd': self.adev.gc_info.gc_max_waves_per_simd,
620621
'simd_arrays_per_engine': self.adev.gc_info.gc_num_sa_per_se, 'lds_size_in_kb': self.adev.gc_info.gc_lds_size}
621622

tinygrad/runtime/support/am/amdev.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ def read(self, **kwargs): return self.adev.rreg(self.reg_off) & self._parse_kwar
3333

3434
class AMFirmware:
3535
def __init__(self, adev):
36-
def fmt_ver(hwip): return f"{adev.ip_versions[hwip]//10000}_{(adev.ip_versions[hwip]//100)%100}_{adev.ip_versions[hwip]%100}"
36+
def fmt_ver(hwip): return '_'.join(map(str, adev.ip_ver[hwip]))
3737

3838
# Load SOS firmware
3939
self.sos_fw = {}
@@ -372,7 +372,7 @@ def _run_discovery(self):
372372
# Mapping of HW IP to Discovery HW IP
373373
hw_id_map = {am.__dict__[x]: int(y) for x,y in am.hw_id_map}
374374
self.regs_offset:dict[int, dict[int, list]] = collections.defaultdict(dict)
375-
self.ip_versions:dict[int, int] = {}
375+
self.ip_ver:dict[int, tuple[int, int, int]] = {}
376376

377377
for num_die in range(ihdr.num_dies):
378378
dhdr = am.struct_die_header.from_address(ctypes.addressof(bhdr) + ihdr.die_info[num_die].die_offset)
@@ -384,19 +384,18 @@ def _run_discovery(self):
384384
for hw_ip in range(1, am.MAX_HWIP):
385385
if hw_ip in hw_id_map and hw_id_map[hw_ip] == ip.hw_id:
386386
self.regs_offset[hw_ip][ip.instance_number] = list(ba)
387-
self.ip_versions[hw_ip] = int(f"{ip.major:02d}{ip.minor:02d}{ip.revision:02d}")
387+
self.ip_ver[hw_ip] = (ip.major, ip.minor, ip.revision)
388388

389389
ip_offset += 8 + (8 if ihdr.base_addr_64_bit else 4) * ip.num_base_address
390390

391391
gc_info = am.struct_gc_info_v1_0.from_address(gc_addr:=ctypes.addressof(bhdr) + bhdr.table_list[am.GC].offset)
392392
self.gc_info = getattr(am, f"struct_gc_info_v{gc_info.header.version_major}_{gc_info.header.version_minor}").from_address(gc_addr)
393393

394394
def _ip_module(self, prefix:str, hwip, prever_prefix:str=""):
395-
version = [self.ip_versions[hwip]//10000, (self.ip_versions[hwip]//100)%100, self.ip_versions[hwip]%100]
396-
for ver in [version, version[:2]+[0], version[:1]+[0, 0]]:
395+
for ver in [self.ip_ver[hwip], self.ip_ver[hwip][:2]+(0,), self.ip_ver[hwip][:1]+(0, 0)]:
397396
try: return importlib.import_module(f"tinygrad.runtime.autogen.am.{prefix}_{prever_prefix}{ver[0]}_{ver[1]}_{ver[2]}")
398397
except ImportError: pass
399-
raise ImportError(f"am {self.devfmt}: failed to load {prefix} module with version {version}")
398+
raise ImportError(f"am {self.devfmt}: failed to load {prefix} module with version {self.ip_ver[hwip]}")
400399

401400
def _build_regs(self):
402401
mods = [("MP0", self._ip_module("mp", am.MP0_HWIP)), ("NBIO", self._ip_module("nbio", am.NBIO_HWIP)), ("GC", self._ip_module("gc", am.GC_HWIP)),

tinygrad/runtime/support/am/ip.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ def init(self):
1515
self.adev.regRCC_DEV0_EPF0_RCC_DOORBELL_APER_EN.write(0x1)
1616
def set_clockgating_state(self): self.adev.regHDP_MEM_POWER_CTRL.update(atomic_mem_power_ctrl_en=1, atomic_mem_power_ds_en=1)
1717

18+
def doorbell_enable(self, port, awid=0, awaddr_31_28_value=0, offset=0, size=0):
19+
self.adev.reg(f"regS2A_DOORBELL_ENTRY_{port}_CTRL").update(**{f"s2a_doorbell_port{port}_enable": 1, f"s2a_doorbell_port{port}_awid": awid,
20+
f"s2a_doorbell_port{port}_awaddr_31_28_value": awaddr_31_28_value, f"s2a_doorbell_port{port}_range_offset": offset,
21+
f"s2a_doorbell_port{port}_range_size": size})
22+
1823
class AM_GMC(AM_IP):
1924
def __init__(self, adev):
2025
super().__init__(adev)
@@ -171,8 +176,8 @@ def init(self):
171176
self.adev.regTCP_CNTL.write(self.adev.regTCP_CNTL.read() | 0x20000000)
172177
self.adev.regRLC_SRM_CNTL.update(srm_enable=1, auto_incr_addr=1)
173178

174-
self.adev.regS2A_DOORBELL_ENTRY_0_CTRL.write(s2a_doorbell_port0_enable=1, s2a_doorbell_port0_awid=0x3, s2a_doorbell_port0_awaddr_31_28_value=0x3)
175-
self.adev.regS2A_DOORBELL_ENTRY_3_CTRL.write(s2a_doorbell_port3_enable=1, s2a_doorbell_port3_awid=0x6, s2a_doorbell_port3_awaddr_31_28_value=0x3)
179+
self.adev.soc.doorbell_enable(port=0, awid=0x3, awaddr_31_28_value=0x3)
180+
self.adev.soc.doorbell_enable(port=3, awid=0x6, awaddr_31_28_value=0x3)
176181

177182
self.adev.regGRBM_CNTL.update(read_timeout=0xff)
178183
for i in range(0, 16):
@@ -308,8 +313,7 @@ def init(self):
308313
for _, rwptr_vm, suf, ring_id in self.rings:
309314
self.adev.reg(f"regIH_RB_CNTL{suf}").update(rb_enable=1, **({'enable_intr': 1} if ring_id == 0 else {}))
310315

311-
self.adev.regS2A_DOORBELL_ENTRY_1_CTRL.update(s2a_doorbell_port1_enable=1, s2a_doorbell_port1_awid=0x0, s2a_doorbell_port1_awaddr_31_28_value=0x0,
312-
s2a_doorbell_port1_range_offset=am.AMDGPU_NAVI10_DOORBELL_IH*2, s2a_doorbell_port1_range_size=2)
316+
self.adev.soc.doorbell_enable(port=1, awid=0x0, awaddr_31_28_value=0x0, offset=am.AMDGPU_NAVI10_DOORBELL_IH*2, size=2)
313317

314318
class AM_SDMA(AM_IP):
315319
def setup_ring(self, ring_addr:int, ring_size:int, rptr_addr:int, wptr_addr:int, doorbell:int, pipe:int, queue:int):
@@ -334,8 +338,7 @@ def init(self):
334338
self.adev.reg(f"regSDMA{pipe}_UTCL1_PAGE").update(rd_l2_policy=0x2, wr_l2_policy=0x3, llc_noalloc=1) # rd=noa, wr=bypass
335339
self.adev.reg(f"regSDMA{pipe}_F32_CNTL").update(halt=0, th1_reset=0)
336340
self.adev.reg(f"regSDMA{pipe}_CNTL").update(ctxempty_int_enable=1, trap_enable=1)
337-
self.adev.regS2A_DOORBELL_ENTRY_2_CTRL.update(s2a_doorbell_port2_enable=1, s2a_doorbell_port2_awid=0xe, s2a_doorbell_port2_awaddr_31_28_value=0x3,
338-
s2a_doorbell_port2_range_offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, s2a_doorbell_port2_range_size=4)
341+
self.adev.soc.doorbell_enable(port=2, awid=0xe, awaddr_31_28_value=0x3, offset=am.AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0*2, size=4)
339342

340343
def fini(self):
341344
self.adev.regSDMA0_QUEUE0_RB_CNTL.update(rb_enable=0)

0 commit comments

Comments
 (0)