Skip to content

Commit b14b471

Browse files
committed
added/corrected control on tensor size for Q4 repacking.
1 parent 0a2be72 commit b14b471

File tree

1 file changed

+32
-18
lines changed

1 file changed

+32
-18
lines changed

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

+32-18
Original file line numberDiff line numberDiff line change
@@ -3680,17 +3680,17 @@ static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_in
36803680
static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
36813681
GGML_ASSERT(t->type == GGML_TYPE_Q4_0);
36823682
GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
3683+
constexpr int nrows_interleaved = 4;
36833684

36843685
block_q4_0x4 * dst = (block_q4_0x4 *)t->data;
36853686
const block_q4_0 * src = (const block_q4_0 *)data;
36863687
block_q4_0 dst_tmp[4];
3687-
int nrow = t->ne[1]*t->ne[2]*t->ne[3]; // Number of rows
3688-
int nrows_interleaved = 4;
3688+
int nrow = ggml_nrows(t);
36893689
int nblocks = t->ne[0] / QK4_0;
36903690

36913691
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_0));
36923692

3693-
if (nrow % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
3693+
if (t->ne[1] % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
36943694
return -1;
36953695
}
36963696

@@ -3711,17 +3711,17 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
37113711
static int repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block, const void * GGML_RESTRICT data, size_t data_size) {
37123712
GGML_ASSERT(t->type == GGML_TYPE_Q4_0);
37133713
GGML_ASSERT(interleave_block == 8);
3714+
constexpr int nrows_interleaved = 8;
37143715

37153716
block_q4_0x8 * dst = (block_q4_0x8*)t->data;
37163717
const block_q4_0 * src = (const block_q4_0*) data;
37173718
block_q4_0 dst_tmp[8];
3718-
int nrow = t->ne[1]*t->ne[2]*t->ne[3]; // Number of rows
3719-
int nrows_interleaved = 8;
3719+
int nrow = ggml_nrows(t);
37203720
int nblocks = t->ne[0] / QK4_0;
37213721

37223722
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_0));
37233723

3724-
if (nrow % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
3724+
if (t->ne[1] % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
37253725
return -1;
37263726
}
37273727

@@ -3779,13 +3779,13 @@ static int repack_iq4_nl_to_iq4_nl_4_bl(struct ggml_tensor * t, int interleave_b
37793779
block_iq4_nlx4 * dst = (block_iq4_nlx4 *)t->data;
37803780
const block_iq4_nl * src = (const block_iq4_nl *)data;
37813781
block_iq4_nl dst_tmp[4];
3782-
int nrow = t->ne[1]*t->ne[2]*t->ne[3]; // Number of rows
3782+
int nrow = ggml_nrows(t);
37833783
int nrows_interleaved = 4;
37843784
int nblocks = t->ne[0] / QK4_0;
37853785

37863786
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_iq4_nl));
37873787

3788-
if (nrow % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
3788+
if (t->ne[1] % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
37893789
return -1;
37903790
}
37913791

@@ -4121,17 +4121,25 @@ static const tensor_traits<block_iq4_nl, 4, 4> iq4_nl_4x4_q8_0;
41214121
static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * cur) {
41224122
if (cur->type == GGML_TYPE_Q4_0) {
41234123
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
4124-
return &ggml::cpu::aarch64::q4_0_8x8_q8_0;
4124+
if (cur->ne[1] % 8==0) {
4125+
return &ggml::cpu::aarch64::q4_0_8x8_q8_0;
4126+
}
41254127
}
41264128
if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
4127-
return &ggml::cpu::aarch64::q4_0_4x8_q8_0;
4129+
if (cur->ne[1] % 4 == 0) {
4130+
return &ggml::cpu::aarch64::q4_0_4x8_q8_0;
4131+
}
41284132
}
41294133
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
4130-
return &ggml::cpu::aarch64::q4_0_4x4_q8_0;
4134+
if (cur->ne[1] % 4 == 0) {
4135+
return &ggml::cpu::aarch64::q4_0_4x4_q8_0;
4136+
}
41314137
}
41324138
} else if (cur->type == GGML_TYPE_IQ4_NL) {
41334139
if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) {
4134-
return &ggml::cpu::aarch64::iq4_nl_4x4_q8_0;
4140+
if (cur->ne[1] % 4 == 0) {
4141+
return &ggml::cpu::aarch64::iq4_nl_4x4_q8_0;
4142+
}
41354143
}
41364144
}
41374145

@@ -4184,9 +4192,12 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
41844192
namespace ggml::cpu::aarch64 {
41854193
class extra_buffer_type : ggml::cpu::extra_buffer_type {
41864194
bool supports_op(ggml_backend_dev_t, const struct ggml_tensor * op) override {
4187-
if (op->op == GGML_OP_MUL_MAT && op->src[0]->buffer && (ggml_n_dims(op->src[0]) == 2) &&
4188-
op->src[0]->buffer->buft == ggml_backend_cpu_aarch64_buffer_type() &&
4189-
ggml_aarch64_get_optimal_repack_type(op->src[0])) {
4195+
if ( op->op == GGML_OP_MUL_MAT &&
4196+
op->src[0]->buffer &&
4197+
(ggml_n_dims(op->src[0]) == 2) &&
4198+
op->src[0]->buffer->buft == ggml_backend_cpu_aarch64_buffer_type() &&
4199+
ggml_aarch64_get_optimal_repack_type(op->src[0])
4200+
) {
41904201
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
41914202
return false;
41924203
}
@@ -4197,9 +4208,12 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
41974208
// return true;
41984209
//}
41994210
// may be possible if Q8_0 packed...
4200-
} else if (op->op == GGML_OP_MUL_MAT_ID && op->src[0]->buffer && (ggml_n_dims(op->src[0]) == 3) &&
4201-
op->src[0]->buffer->buft == ggml_backend_cpu_aarch64_buffer_type() &&
4202-
ggml_aarch64_get_optimal_repack_type(op->src[0])) {
4211+
} else if (op->op == GGML_OP_MUL_MAT_ID
4212+
&& op->src[0]->buffer
4213+
&& (ggml_n_dims(op->src[0]) == 3)
4214+
&& op->src[0]->buffer->buft == ggml_backend_cpu_aarch64_buffer_type()
4215+
&& ggml_aarch64_get_optimal_repack_type(op->src[0])
4216+
) {
42034217
if (op->src[1]->buffer && !ggml_backend_buft_is_host(op->src[1]->buffer->buft)) {
42044218
return false;
42054219
}

0 commit comments

Comments
 (0)