@@ -458,12 +458,12 @@ class tinyBLAS {
458
458
// QUANT ZERO MATRIX MULTIPLICATION
459
459
460
460
#if defined(__ARM_FEATURE_DOTPROD)
461
- template <typename TA>
461
+ template <typename TA, typename TB >
462
462
class tinyBLAS_Q0_ARM {
463
463
public:
464
464
tinyBLAS_Q0_ARM (int64_t k,
465
465
const TA *A, int64_t lda,
466
- const BlockQ80 *B, int64_t ldb,
466
+ const TB *B, int64_t ldb,
467
467
float *C, int64_t ldc,
468
468
int ith, int nth)
469
469
: A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) {
@@ -584,7 +584,7 @@ class tinyBLAS_Q0_ARM {
584
584
}
585
585
586
586
const TA *const A;
587
- const BlockQ80 *const B;
587
+ const TB *const B;
588
588
float *const C;
589
589
const int64_t k;
590
590
const int64_t lda;
@@ -936,6 +936,17 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
936
936
}
937
937
938
938
case Q80: {
939
+ #if defined(__ARM_FEATURE_DOTPROD)
940
+ if (Btype == Q40) {
941
+ tinyBLAS_Q0_ARM<BlockQ80, BlockQ40> tb{
942
+ k, (const BlockQ80 *)A, lda,
943
+ (const BlockQ40 *)B, ldb,
944
+ (float *)C, ldc,
945
+ ith, nth};
946
+ tb.matmul (m, n, task);
947
+ return true ;
948
+ }
949
+ #endif
939
950
if (Btype != Q80)
940
951
return false ;
941
952
#if defined(__AVX2__) || defined(__AVX512F__)
@@ -947,7 +958,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
947
958
tb.matmul (m, n, task);
948
959
return true ;
949
960
#elif defined(__ARM_FEATURE_DOTPROD)
950
- tinyBLAS_Q0_ARM<BlockQ80> tb{
961
+ tinyBLAS_Q0_ARM<BlockQ80, BlockQ80 > tb{
951
962
k, (const BlockQ80 *)A, lda,
952
963
(const BlockQ80 *)B, ldb,
953
964
(float *)C, ldc,
@@ -971,7 +982,7 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
971
982
tb.matmul (m, n, task);
972
983
return true ;
973
984
#elif defined(__ARM_FEATURE_DOTPROD)
974
- tinyBLAS_Q0_ARM<BlockQ40> tb{
985
+ tinyBLAS_Q0_ARM<BlockQ40, BlockQ80 > tb{
975
986
k, (const BlockQ40 *)A, lda,
976
987
(const BlockQ80 *)B, ldb,
977
988
(float *)C, ldc,
0 commit comments