@@ -113,7 +113,7 @@ def FindBB(bbs, ins): # find BB of an instruction
113
113
114
114
return None , None # never reaches here
115
115
116
- def AnalyzeSbidStalls (kernel , args , df , report ):
116
+ def AnalyzeStalls (kernel , args , stalldf , report ):
117
117
shaders = os .listdir (args .shaderdump )
118
118
files = [] # .asm files
119
119
@@ -172,9 +172,6 @@ def AnalyzeSbidStalls(kernel, args, df, report):
172
172
173
173
asm = asm + ".ip"
174
174
175
- df = df [["IP[Address]" , "SbidStall[Events]" ]]
176
- df = df [df ["SbidStall[Events]" ] > 0 ] # drop 0s
177
- df = df .sort_values (by = ["SbidStall[Events]" ], ascending = False )
178
175
source_available = False
179
176
instructions = []
180
177
with open (asm , "r" ) as inf :
@@ -189,9 +186,14 @@ def AnalyzeSbidStalls(kernel, args, df, report):
189
186
190
187
bbs = ConstructCFG (instructions )
191
188
189
+ df = stalldf [["IP[Address]" , "SbidStall[Events]" ]]
190
+ df = df [df ["SbidStall[Events]" ] > 0 ] # drop 0s
191
+ df = df .sort_values (by = ["SbidStall[Events]" ], ascending = False )
192
+
192
193
print ("Kernel: " + kernel , file = report )
193
194
print ("Assembly with instruction addresses: " + asm , file = report )
194
- print ("SBID stalls: " , file = report )
195
+ print ("***********************************************************************************************" , file = report )
196
+ print ("Sbid Stalls: " , file = report )
195
197
196
198
for index , row in df .iterrows ():
197
199
ip = row ["IP[Address]" ]
@@ -200,15 +202,24 @@ def AnalyzeSbidStalls(kernel, args, df, report):
200
202
if ((ins .startswith ("//" ) == False ) and ("//" in ins )):
201
203
if (ins .startswith ("/* [" + str ('{:08X}' .format (pc ))+ "] */ " ) == True ): # found stalled instruction
202
204
words = ins .split ("{" )
203
- if (len (words ) < 2 ):
204
- break # invalid
205
-
206
205
sbids_stalled = []
207
- for token in words [1 ].split ("}" )[0 ].split ("," ):
208
- if (token .startswith ("$" ) == True ):
209
- subtokens = token .split ("." )
210
- if (len (subtokens ) > 1 ):
211
- sbids_stalled .append (subtokens [0 ])
206
+ if (len (words ) >= 2 ):
207
+ for token in words [1 ].split ("}" )[0 ].split ("," ):
208
+ if (token .startswith ("$" ) == True ):
209
+ sbids_stalled .append (token )
210
+ if (len (sbids_stalled ) == 0 ):
211
+ words = ins .split ("(" ) # check if SBID tokens are in (...)
212
+ if (len (words ) >= 2 ):
213
+ i = 1
214
+ done = False
215
+ while (done == False ):
216
+ for token in words [i ].split (")" )[0 ].split ("," ):
217
+ if (token .startswith ("$" ) == True ):
218
+ sbids_stalled .append (token )
219
+ done = True
220
+ i = i + 1
221
+ if (i == len (words )): # all words are inspected
222
+ break
212
223
213
224
ins_stalled_not_line_resolved = addr
214
225
ins_stalled_not_file_resolved = addr
@@ -229,18 +240,26 @@ def AnalyzeSbidStalls(kernel, args, df, report):
229
240
bid , start , end = bbs_to_check [j ]
230
241
for addr2 , ins2 in enumerate (reversed (instructions [start : end + 1 ])):
231
242
if (len (sbids_stalled ) > 0 ):
232
- if ((ins2 .startswith ("//" ) == False ) and ("//" in ins2 )):
243
+ if ((ins2 .startswith ("//" ) == False ) and ("//" in ins2 ) and ( re . match ( "/\* *\[" , ins2 ) is not None ) ):
233
244
tokens = ins2 .split ("{" )
234
245
if (len (tokens ) > 1 ):
235
- sbids = []
246
+ sbids_stall = []
236
247
for token in tokens [1 ].split ("}" )[0 ].split ("," ):
237
248
if (token .startswith ("$" ) == True ):
238
- sbids .append (token )
249
+ sbids_stall .append (token )
239
250
for sbid in sbids_stalled :
240
- if (sbid in sbids ): # instruction stalled depends on ins2
241
- sbids_stalled .remove (sbid ) # remove sbid from the sbids of the instruction stalled
242
- ins_stall_not_line_resolved .append (end - addr2 ) # source lines/files to be resolved
243
- ins_stall_not_file_resolved .append (end - addr2 )
251
+ for sbid2 in sbids_stall :
252
+ if (len (sbid .split ("." )) > 1 ): # .dst or .src in bid
253
+ if (sbid2 == sbid .split ("." )[0 ]):
254
+ sbids_stalled .remove (sbid ) # remove sbid from the sbids of the instruction stalled
255
+ ins_stall_not_line_resolved .append (end - addr2 ) # source lines/files to be resolved
256
+ ins_stall_not_file_resolved .append (end - addr2 )
257
+ else :
258
+ if (sbid == sbid2 .split ("." )[0 ]): # stalled ins depends on ins2 or dependency already resolved
259
+ sbids_stalled .remove (sbid ) # remove sbid from the sbids of the instruction stalled
260
+ if ((sbid == sbid2 ) and ("sync." not in ins2 )): # ins2 not a sync. ins depends on ins2
261
+ ins_stall_not_line_resolved .append (end - addr2 ) # source lines/files to be resolved
262
+ ins_stall_not_file_resolved .append (end - addr2 )
244
263
245
264
if (re .match ("// *Line" , ins2 ) is not None ):
246
265
if (ins_stalled_not_line_resolved != None ): # source line of stalled instruction
@@ -300,6 +319,58 @@ def AnalyzeSbidStalls(kernel, args, df, report):
300
319
print ("is stalled" , file = report )
301
320
302
321
break
322
+
323
+
324
+ # analyze stalls of other types
325
+
326
+ type = ["ControlStall[Events]" , "PipeStall[Events]" , "SendStall[Events]" , "DistStall[Events]" , "SyncStall[Events]" , "InstrFetchStall[Events]" , "OtherStall[Events]" ]
327
+ for t in type :
328
+ df = stalldf [["IP[Address]" , t ]]
329
+ df = df [df [t ] > 0 ] # drop 0s
330
+ if (df .shape [0 ] == 0 ): # zero stalls. move to the next type
331
+ continue
332
+ df = df .sort_values (by = [t ], ascending = False )
333
+
334
+ print ("***********************************************************************************************" , file = report )
335
+ print (t .split ("Stall" )[0 ] + " Stalls: " , file = report )
336
+
337
+ for index , row in df .iterrows ():
338
+ ip = row ["IP[Address]" ]
339
+ pc = int (ip , 16 )
340
+ for addr , ins in enumerate (instructions ):
341
+ if ((ins .startswith ("//" ) == False ) and ("//" in ins )):
342
+ if (ins .startswith ("/* [" + str ('{:08X}' .format (pc ))+ "] */ " ) == True ): # found stalled instruction
343
+ if (source_available == True ):
344
+ ins_stalled_not_line_resolved = addr
345
+ ins_stalled_not_file_resolved = addr
346
+ source_line_stalled = None
347
+ source_file_stalled = None
348
+
349
+ for addr2 , ins2 in enumerate (reversed (instructions [0 : addr ])):
350
+ if (re .match ("// *Line" , ins2 ) is not None ):
351
+ if (ins_stalled_not_line_resolved != None ): # source line of stalled instruction
352
+ source_line_stalled = addr - 1 - addr2
353
+ ins_stalled_not_line_resolved = None
354
+
355
+ if (re .match ("// *File" , ins2 ) is not None ):
356
+ if (ins_stalled_not_file_resolved != None ): # source file of stalled instruction
357
+ source_file_stalled = addr - 1 - addr2
358
+ ins_stalled_not_file_resolved = None
359
+
360
+ if ((ins_stalled_not_line_resolved == None ) and (ins_stalled_not_file_resolved == None )):
361
+ break # we are done
362
+
363
+ print ("\n Instruction" , file = report )
364
+ print (" " + ins , file = report )
365
+ if (source_line_stalled != None ):
366
+ print (" " + instructions [source_line_stalled ][3 :], file = report )
367
+ if (source_file_stalled != None ):
368
+ print (" " + instructions [source_file_stalled ][3 :], file = report )
369
+
370
+ print ("is stalled" , file = report )
371
+
372
+ break
373
+
303
374
print ("===============================================================================================" , file = report )
304
375
305
376
def AnalyzeStallMetrics (args , header , last ):
@@ -357,7 +428,7 @@ def AnalyzeStallMetrics(args, header, last):
357
428
plt .close (fig ) # close figure to save memory
358
429
359
430
if (args .shaderdump is not None ):
360
- AnalyzeSbidStalls (kernel , args , df2 , report_out )
431
+ AnalyzeStalls (kernel , args , df2 , report_out )
361
432
362
433
print ("\n Analyzed kernel " + kernel )
363
434
@@ -396,7 +467,7 @@ def AnalyzeStallMetrics(args, header, last):
396
467
plt .close (fig ) # close figure to save memory
397
468
398
469
if (args .shaderdump is not None ):
399
- AnalyzeSbidStalls (kernel , args , df2 , report_out )
470
+ AnalyzeStalls (kernel , args , df2 , report_out )
400
471
401
472
print ("\n Analyzed kernel " + kernel )
402
473
if (p != None ):
@@ -405,7 +476,7 @@ def AnalyzeStallMetrics(args, header, last):
405
476
406
477
if ((args .shaderdump is not None ) and (args .report is not None )):
407
478
report_out .close ()
408
- print ("SBID stall report is in file " + args .report )
479
+ print ("Stall report is in file " + args .report )
409
480
410
481
else :
411
482
counting = True
@@ -455,12 +526,12 @@ def AnalyzeStallMetrics(args, header, last):
455
526
plt .savefig (args .output )
456
527
457
528
if (args .shaderdump is not None ):
458
- AnalyzeSbidStalls (kernel , args , df2 , report_out )
529
+ AnalyzeStalls (kernel , args , df2 , report_out )
459
530
460
531
print ("\n Stall metric chart in file " + args .output + " has been successfully generated." )
461
532
if ((args .shaderdump is not None ) and (args .report is not None )):
462
533
report_out .close ()
463
- print ("SBID stall report is in file " + args .report )
534
+ print ("Stall report is in file " + args .report )
464
535
465
536
def PlotKernelInstancePerfMetrics (args , kernel , df , metrics ):
466
537
k = 0
@@ -536,7 +607,7 @@ def AnalyzePerfMetrics(args, header, last):
536
607
ax = df3 .plot (y = metrics_cleansed , kind = 'line' , xlabel = args .xlabel , ylabel = args .ylabel )
537
608
else :
538
609
ax = df3 .plot (y = metrics_cleansed , kind = 'bar' , xlabel = args .xlabel , ylabel = args .ylabel )
539
-
610
+
540
611
plt .grid (visible = True , which = 'both' , axis = 'y' )
541
612
plt .legend (loc = 'best' , fontsize = 4 )
542
613
plt .title (label = args .title + "\n (" + kernel + ")" , loc = 'center' , fontsize = 8 , wrap = True )
0 commit comments