From 9a02dc9bff1ae4b175685b9f7d184f6731e2a1b0 Mon Sep 17 00:00:00 2001 From: Roger Ferrer Ibanez Date: Mon, 20 May 2024 13:06:06 +0000 Subject: [PATCH] Fix wrong scalar strided access in matrix C of matmul Where I accidentally used "n" I wanted to use "m". The vector access was correct already. --- doc/rvv-intrinsic-examples.adoc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/rvv-intrinsic-examples.adoc b/doc/rvv-intrinsic-examples.adoc index d69650130..6de62ca69 100644 --- a/doc/rvv-intrinsic-examples.adoc +++ b/doc/rvv-intrinsic-examples.adoc @@ -77,16 +77,16 @@ Consider the following function that implements a naive matrix multiplication. void matmul_reference(double *a, double *b, double *c, int n, int m, int p) { for (int i = 0; i < n; ++i) for (int j = 0; j < m; ++j) { - c[i * n + j] = 0; + c[i * m + j] = 0; for (int k = 0; k < p; ++k) { - c[i * n + j] += a[i * p + k] * b[k * m + j]; + c[i * m + j] += a[i * p + k] * b[k * m + j]; } } } ---- The following example is a version of the matrix multiplication. The -accumulation on `c[i * n + j]` is implemented using partial accumulations +accumulation on `c[i * m + j]` is implemented using partial accumulations followed by a single final accumulation. .An implementation of a naive matrix multiplication using RVV intrinsics. @@ -121,7 +121,7 @@ void matmul_rvv(double *a, double *b, double *c, int n, int m, int p) { vfloat64m1_t vec_sum = __riscv_vfredusum_vs_f64m1_f64m1(vec_s, vec_zero, vlmax); double sum = __riscv_vfmv_f_s_f64m1_f64(vec_sum); - c[i * n + j] = sum; + c[i * m + j] = sum; } } ----