@@ -366,19 +366,19 @@ function generic_matmatmul!(C::AbstractGPUMatrix{R}, A::AbstractGPUMatrix{T}, B:
366
366
# number of tiles depends on inner dimension
367
367
@uniform NUM_TILES = div (Q + TILE_DIM - 1 , TILE_DIM)
368
368
369
- I = (grow - 1 ) * TILE_DIM + tile_row
370
- J = (gcol - 1 ) * TILE_DIM + tile_col
369
+ glob_I = (grow - 1 ) * TILE_DIM + tile_row
370
+ glob_J = (gcol - 1 ) * TILE_DIM + tile_col
371
371
372
372
# loop over all tiles needed for this calculation
373
373
for t in 0 : (NUM_TILES - 1 )
374
374
# load inputs into tiles, with bounds checking for non-square matrices
375
- if I <= N && t * TILE_DIM + tile_col <= Q
376
- @inbounds tile1[tile_row, tile_col] = input1[I , t * TILE_DIM + tile_col]
375
+ if glob_I <= N && t * TILE_DIM + tile_col <= Q
376
+ @inbounds tile1[tile_row, tile_col] = input1[glob_I , t * TILE_DIM + tile_col]
377
377
else
378
378
@inbounds tile1[tile_row, tile_col] = zero (R)
379
379
end
380
- if J <= M && t * TILE_DIM + tile_row <= Q
381
- @inbounds tile2[tile_row, tile_col] = input2[t * TILE_DIM + tile_row, J ]
380
+ if glob_J <= M && t * TILE_DIM + tile_row <= Q
381
+ @inbounds tile2[tile_row, tile_col] = input2[t * TILE_DIM + tile_row, glob_J ]
382
382
else
383
383
@inbounds tile2[tile_row, tile_col] = zero (R)
384
384
end
@@ -397,8 +397,8 @@ function generic_matmatmul!(C::AbstractGPUMatrix{R}, A::AbstractGPUMatrix{T}, B:
397
397
end
398
398
399
399
# save if inbounds
400
- if I <= N && J <= M
401
- @inbounds output[I, J ] = add (outval[1 ], output[I, J ])
400
+ if glob_I <= N && glob_J <= M
401
+ @inbounds output[glob_I, glob_J ] = add (outval[1 ], output[glob_I, glob_J ])
402
402
end
403
403
end
404
404
0 commit comments