Skip to content

Commit 12df7b4

Browse files
committedMar 25, 2025
fix: const offset.
1 parent 1221564 commit 12df7b4

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed
 

‎src/nn/vulkan/matmul-forward-q80-q40-f32.comp

+2-1
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,11 @@ void main() {
6363

6464
for (uint d = sharedStart + threadIndex; d < end; d += N_THREADS) {
6565
float16_t sum = float16_t(0.0f);
66+
const uint wOffset = d * inputSizeX;
6667

6768
for (uint i = 0; i < inputSizeX; i++) {
6869
const BlockQ80 xi = x[inputOffset + i];
69-
const BlockQ40 wi = weight[d * inputSizeX + i];
70+
const BlockQ40 wi = weight[wOffset + i];
7071

7172
float16_t s = float16_t(0.0f);
7273
[[unroll]] for (uint j = 0; j < Q40_BLOCK_SIZE / 2; j++) {

0 commit comments

Comments
 (0)
Failed to load comments.