@@ -3680,17 +3680,17 @@ static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_in
3680
3680
static int repack_q4_0_to_q4_0_4_bl (struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
3681
3681
GGML_ASSERT (t->type == GGML_TYPE_Q4_0);
3682
3682
GGML_ASSERT (interleave_block == 4 || interleave_block == 8 );
3683
+ constexpr int nrows_interleaved = 4 ;
3683
3684
3684
3685
block_q4_0x4 * dst = (block_q4_0x4 *)t->data ;
3685
3686
const block_q4_0 * src = (const block_q4_0 *)data;
3686
3687
block_q4_0 dst_tmp[4 ];
3687
- int nrow = t->ne [1 ]*t->ne [2 ]*t->ne [3 ]; // Number of rows
3688
- int nrows_interleaved = 4 ;
3688
+ int nrow = ggml_nrows (t);
3689
3689
int nblocks = t->ne [0 ] / QK4_0;
3690
3690
3691
3691
GGML_ASSERT (data_size == nrow * nblocks * sizeof (block_q4_0));
3692
3692
3693
- if (nrow % nrows_interleaved != 0 || t->ne [0 ] % 8 != 0 ) {
3693
+ if (t-> ne [ 1 ] % nrows_interleaved != 0 || t->ne [0 ] % 8 != 0 ) {
3694
3694
return -1 ;
3695
3695
}
3696
3696
@@ -3711,17 +3711,17 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
3711
3711
static int repack_q4_0_to_q4_0_8_bl (struct ggml_tensor *t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
3712
3712
GGML_ASSERT (t->type == GGML_TYPE_Q4_0);
3713
3713
GGML_ASSERT (interleave_block == 8 );
3714
+ constexpr int nrows_interleaved = 8 ;
3714
3715
3715
3716
block_q4_0x8 * dst = (block_q4_0x8*)t->data ;
3716
3717
const block_q4_0 * src = (const block_q4_0*) data;
3717
3718
block_q4_0 dst_tmp[8 ];
3718
- int nrow = t->ne [1 ]*t->ne [2 ]*t->ne [3 ]; // Number of rows
3719
- int nrows_interleaved = 8 ;
3719
+ int nrow = ggml_nrows (t);
3720
3720
int nblocks = t->ne [0 ] / QK4_0;
3721
3721
3722
3722
GGML_ASSERT (data_size == nrow * nblocks * sizeof (block_q4_0));
3723
3723
3724
- if (nrow % nrows_interleaved != 0 || t->ne [0 ] % 8 != 0 ) {
3724
+ if (t-> ne [ 1 ] % nrows_interleaved != 0 || t->ne [0 ] % 8 != 0 ) {
3725
3725
return -1 ;
3726
3726
}
3727
3727
@@ -3779,13 +3779,13 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
3779
3779
block_iq4_nlx4 * dst = (block_iq4_nlx4 *)t->data ;
3780
3780
const block_iq4_nl * src = (const block_iq4_nl *)data;
3781
3781
block_iq4_nl dst_tmp[4 ];
3782
- int nrow = t-> ne [ 1 ]*t-> ne [ 2 ]*t-> ne [ 3 ]; // Number of rows
3782
+ int nrow = ggml_nrows (t);
3783
3783
int nrows_interleaved = 4 ;
3784
3784
int nblocks = t->ne [0 ] / QK4_0;
3785
3785
3786
3786
GGML_ASSERT (data_size == nrow * nblocks * sizeof (block_iq4_nl));
3787
3787
3788
- if (nrow % nrows_interleaved != 0 || t->ne [0 ] % 8 != 0 ) {
3788
+ if (t-> ne [ 1 ] % nrows_interleaved != 0 || t->ne [0 ] % 8 != 0 ) {
3789
3789
return -1 ;
3790
3790
}
3791
3791
@@ -4121,17 +4121,25 @@ static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
4121
4121
static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type (const struct ggml_tensor * cur) {
4122
4122
if (cur->type == GGML_TYPE_Q4_0) {
4123
4123
if (ggml_cpu_has_avx2 () || (ggml_cpu_has_sve () && ggml_cpu_has_matmul_int8 () && ggml_cpu_get_sve_cnt () == QK8_0)) {
4124
- return &ggml::cpu::aarch64::q4_0_8x8_q8_0;
4124
+ if (cur->ne [1 ] % 8 ==0 ) {
4125
+ return &ggml::cpu::aarch64::q4_0_8x8_q8_0;
4126
+ }
4125
4127
}
4126
4128
if (ggml_cpu_has_neon () && ggml_cpu_has_matmul_int8 ()) {
4127
- return &ggml::cpu::aarch64::q4_0_4x8_q8_0;
4129
+ if (cur->ne [1 ] % 4 == 0 ) {
4130
+ return &ggml::cpu::aarch64::q4_0_4x8_q8_0;
4131
+ }
4128
4132
}
4129
4133
if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod ()) {
4130
- return &ggml::cpu::aarch64::q4_0_4x4_q8_0;
4134
+ if (cur->ne [1 ] % 4 == 0 ) {
4135
+ return &ggml::cpu::aarch64::q4_0_4x4_q8_0;
4136
+ }
4131
4137
}
4132
4138
} else if (cur->type == GGML_TYPE_IQ4_NL) {
4133
4139
if (ggml_cpu_has_neon () && ggml_cpu_has_dotprod ()) {
4134
- return &ggml::cpu::aarch64::iq4_nl_4x4_q8_0;
4140
+ if (cur->ne [1 ] % 4 == 0 ) {
4141
+ return &ggml::cpu::aarch64::iq4_nl_4x4_q8_0;
4142
+ }
4135
4143
}
4136
4144
}
4137
4145
@@ -4184,9 +4192,12 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
4184
4192
namespace ggml ::cpu::aarch64 {
4185
4193
class extra_buffer_type : ggml::cpu::extra_buffer_type {
4186
4194
bool supports_op (ggml_backend_dev_t , const struct ggml_tensor * op) override {
4187
- if (op->op == GGML_OP_MUL_MAT && op->src [0 ]->buffer && (ggml_n_dims (op->src [0 ]) == 2 ) &&
4188
- op->src [0 ]->buffer ->buft == ggml_backend_cpu_aarch64_buffer_type () &&
4189
- ggml_aarch64_get_optimal_repack_type (op->src [0 ])) {
4195
+ if ( op->op == GGML_OP_MUL_MAT &&
4196
+ op->src [0 ]->buffer &&
4197
+ (ggml_n_dims (op->src [0 ]) == 2 ) &&
4198
+ op->src [0 ]->buffer ->buft == ggml_backend_cpu_aarch64_buffer_type () &&
4199
+ ggml_aarch64_get_optimal_repack_type (op->src [0 ])
4200
+ ) {
4190
4201
if (op->src [1 ]->buffer && !ggml_backend_buft_is_host (op->src [1 ]->buffer ->buft )) {
4191
4202
return false ;
4192
4203
}
@@ -4197,9 +4208,12 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
4197
4208
// return true;
4198
4209
// }
4199
4210
// may be possible if Q8_0 packed...
4200
- } else if (op->op == GGML_OP_MUL_MAT_ID && op->src [0 ]->buffer && (ggml_n_dims (op->src [0 ]) == 3 ) &&
4201
- op->src [0 ]->buffer ->buft == ggml_backend_cpu_aarch64_buffer_type () &&
4202
- ggml_aarch64_get_optimal_repack_type (op->src [0 ])) {
4211
+ } else if (op->op == GGML_OP_MUL_MAT_ID
4212
+ && op->src [0 ]->buffer
4213
+ && (ggml_n_dims (op->src [0 ]) == 3 )
4214
+ && op->src [0 ]->buffer ->buft == ggml_backend_cpu_aarch64_buffer_type ()
4215
+ && ggml_aarch64_get_optimal_repack_type (op->src [0 ])
4216
+ ) {
4203
4217
if (op->src [1 ]->buffer && !ggml_backend_buft_is_host (op->src [1 ]->buffer ->buft )) {
4204
4218
return false ;
4205
4219
}
0 commit comments