@@ -222,12 +222,12 @@ static inline __m256i mul_sum_i8_pairs_int32x8(const __m256i x, const __m256i y)
222
222
223
223
static const int8_t kvalues_iq4nl[16 ] = {-127 , -104 , -83 , -65 , -49 , -35 , -22 , -10 , 1 , 13 , 25 , 38 , 53 , 69 , 89 , 113 };
224
224
225
- static void quantize_q8_0_4x4 (const float * x, void * vy, int64_t k) {
225
+ static void quantize_q8_0_4x4 (const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
226
226
assert (QK8_0 == 32 );
227
227
assert (k % QK8_0 == 0 );
228
228
const int nb = k / QK8_0;
229
229
230
- block_q8_0x4 * y = (block_q8_0x4 *) vy;
230
+ block_q8_0x4 * GGML_RESTRICT y = (block_q8_0x4 *) vy;
231
231
232
232
#if defined(__ARM_NEON)
233
233
float32x4_t srcv[4 ][8 ];
@@ -316,12 +316,12 @@ static void quantize_q8_0_4x4(const float * x, void * vy, int64_t k) {
316
316
#endif
317
317
}
318
318
319
- static void quantize_q8_0_4x8 (const float * x, void * vy, int64_t k) {
319
+ static void quantize_q8_0_4x8 (const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
320
320
assert (QK8_0 == 32 );
321
321
assert (k % QK8_0 == 0 );
322
322
const int nb = k / QK8_0;
323
323
324
- block_q8_0x4 * y = (block_q8_0x4 *) vy;
324
+ block_q8_0x4 * GGML_RESTRICT y = (block_q8_0x4 *) vy;
325
325
326
326
#if defined(__ARM_NEON)
327
327
float32x4_t srcv[4 ][8 ];
@@ -531,7 +531,7 @@ static void quantize_q8_0_4x8(const float * x, void * vy, int64_t k) {
531
531
#endif
532
532
}
533
533
534
- static void quantize_mat_q8_0 (const float * x, void * vy, int64_t nrow, int64_t n_per_row, int64_t blck_size_interleave) {
534
+ static void quantize_mat_q8_0 (const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t nrow, int64_t n_per_row, int64_t blck_size_interleave) {
535
535
assert (nrow == 4 );
536
536
UNUSED (nrow);
537
537
if (blck_size_interleave == 4 ) {
@@ -543,7 +543,7 @@ static void quantize_mat_q8_0(const float * x, void * vy, int64_t nrow, int64_t
543
543
}
544
544
}
545
545
546
- static void ggml_gemv_q4_0_4x4_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
546
+ static void ggml_gemv_q4_0_4x4_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
547
547
const int qk = QK8_0;
548
548
const int nb = n / qk;
549
549
const int ncols_interleaved = 4 ;
@@ -628,7 +628,7 @@ static void ggml_gemv_q4_0_4x4_q8_0(int n, float * s, size_t bs, const void * vx
628
628
}
629
629
}
630
630
631
- static void ggml_gemv_q4_0_4x8_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
631
+ static void ggml_gemv_q4_0_4x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
632
632
const int qk = QK8_0;
633
633
const int nb = n / qk;
634
634
const int ncols_interleaved = 4 ;
@@ -738,7 +738,7 @@ static void ggml_gemv_q4_0_4x8_q8_0(int n, float * s, size_t bs, const void * vx
738
738
}
739
739
}
740
740
741
- static void ggml_gemv_q4_0_8x8_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
741
+ static void ggml_gemv_q4_0_8x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
742
742
const int qk = QK8_0;
743
743
const int nb = n / qk;
744
744
const int ncols_interleaved = 8 ;
@@ -1011,7 +1011,7 @@ static void ggml_gemv_q4_0_8x8_q8_0(int n, float * s, size_t bs, const void * vx
1011
1011
}
1012
1012
}
1013
1013
1014
- static void ggml_gemv_iq4_nl_4x4_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
1014
+ static void ggml_gemv_iq4_nl_4x4_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1015
1015
const int qk = QK8_0;
1016
1016
const int nb = n / qk;
1017
1017
const int ncols_interleaved = 4 ;
@@ -1107,7 +1107,7 @@ static void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * s, size_t bs, const void *
1107
1107
}
1108
1108
}
1109
1109
1110
- static void ggml_gemm_q4_0_4x4_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
1110
+ static void ggml_gemm_q4_0_4x4_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1111
1111
const int qk = QK8_0;
1112
1112
const int nb = n / qk;
1113
1113
const int ncols_interleaved = 4 ;
@@ -1623,7 +1623,7 @@ static void ggml_gemm_q4_0_4x4_q8_0(int n, float * s, size_t bs, const void * vx
1623
1623
}
1624
1624
}
1625
1625
1626
- static void ggml_gemm_q4_0_4x8_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
1626
+ static void ggml_gemm_q4_0_4x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
1627
1627
const int qk = QK8_0;
1628
1628
const int nb = n / qk;
1629
1629
const int ncols_interleaved = 4 ;
@@ -2077,7 +2077,7 @@ static void ggml_gemm_q4_0_4x8_q8_0(int n, float * s, size_t bs, const void * vx
2077
2077
}
2078
2078
}
2079
2079
2080
- static void ggml_gemm_q4_0_8x8_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
2080
+ static void ggml_gemm_q4_0_8x8_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
2081
2081
const int qk = QK8_0;
2082
2082
const int nb = n / qk;
2083
2083
const int ncols_interleaved = 8 ;
@@ -3497,7 +3497,7 @@ static void ggml_gemm_q4_0_8x8_q8_0(int n, float * s, size_t bs, const void * vx
3497
3497
}
3498
3498
}
3499
3499
3500
- static void ggml_gemm_iq4_nl_4x4_q8_0 (int n, float * s, size_t bs, const void * vx, const void * vy, int nr, int nc) {
3500
+ static void ggml_gemm_iq4_nl_4x4_q8_0 (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) {
3501
3501
const int qk = QK8_0;
3502
3502
const int nb = n / qk;
3503
3503
const int ncols_interleaved = 4 ;
@@ -3677,7 +3677,7 @@ static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_in
3677
3677
return out;
3678
3678
}
3679
3679
3680
- static int repack_q4_0_to_q4_0_4_bl (struct ggml_tensor * t, int interleave_block, const void * data, size_t data_size) {
3680
+ static int repack_q4_0_to_q4_0_4_bl (struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
3681
3681
GGML_ASSERT (t->type == GGML_TYPE_Q4_0);
3682
3682
GGML_ASSERT (interleave_block == 4 || interleave_block == 8 );
3683
3683
@@ -3708,7 +3708,7 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
3708
3708
GGML_UNUSED (data_size);
3709
3709
}
3710
3710
3711
- static int repack_q4_0_to_q4_0_8_bl (struct ggml_tensor *t, int interleave_block, const void * data, size_t data_size) {
3711
+ static int repack_q4_0_to_q4_0_8_bl (struct ggml_tensor *t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
3712
3712
GGML_ASSERT (t->type == GGML_TYPE_Q4_0);
3713
3713
GGML_ASSERT (interleave_block == 8 );
3714
3714
@@ -3772,7 +3772,7 @@ static block_iq4_nlx4 make_block_iq4_nlx4(block_iq4_nl * in, unsigned int blck_s
3772
3772
return out;
3773
3773
}
3774
3774
3775
- static int repack_iq4_nl_to_iq4_nl_4_bl (struct ggml_tensor * t, int interleave_block, const void * data, size_t data_size) {
3775
+ static int repack_iq4_nl_to_iq4_nl_4_bl (struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
3776
3776
GGML_ASSERT (t->type == GGML_TYPE_IQ4_NL);
3777
3777
GGML_ASSERT (interleave_block == 4 || interleave_block == 8 );
3778
3778
@@ -3971,8 +3971,7 @@ static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
3971
3971
3972
3972
static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type (const struct ggml_tensor * cur) {
3973
3973
if (cur->type == GGML_TYPE_Q4_0) {
3974
- // TODO: enable for AVX2 - currently disabled due to bad gemv performance
3975
- if (/* ggml_cpu_has_avx2() || */ (ggml_cpu_has_sve () && ggml_cpu_has_matmul_int8 () && ggml_cpu_get_sve_cnt () == QK8_0)) {
3974
+ if (ggml_cpu_has_avx2 () || (ggml_cpu_has_sve () && ggml_cpu_has_matmul_int8 () && ggml_cpu_get_sve_cnt () == QK8_0)) {
3976
3975
return &ggml::cpu::aarch64::q4_0_8x8_q8_0;
3977
3976
}
3978
3977
if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
0 commit comments