Skip to content

Commit 9849c64

Browse files
committed
clean Q4_0_N_M
- remove from "file" tensor type - allow only with dynamic repack
1 parent 9fe0fb0 commit 9849c64

16 files changed

+153
-387
lines changed

Makefile

-1
Original file line numberDiff line numberDiff line change
@@ -938,7 +938,6 @@ DIR_COMMON = common
938938

939939
OBJ_GGML = \
940940
$(DIR_GGML)/src/ggml.o \
941-
$(DIR_GGML)/src/ggml-aarch64.o \
942941
$(DIR_GGML)/src/ggml-alloc.o \
943942
$(DIR_GGML)/src/ggml-backend.o \
944943
$(DIR_GGML)/src/ggml-backend-reg.o \

Package.swift

-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ var sources = [
1010
"src/unicode.cpp",
1111
"src/unicode-data.cpp",
1212
"ggml/src/ggml.c",
13-
"ggml/src/ggml-aarch64.c",
1413
"ggml/src/ggml-alloc.c",
1514
"ggml/src/ggml-backend.cpp",
1615
"ggml/src/ggml-backend-reg.cpp",

ggml/include/ggml-cpu.h

-9
Original file line numberDiff line numberDiff line change
@@ -128,24 +128,15 @@ extern "C" {
128128

129129
// Internal types and functions exposed for tests and benchmarks
130130

131-
typedef void (*ggml_from_float_to_mat_t)
132-
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
133131
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
134132
const void * GGML_RESTRICT y, size_t by, int nrc);
135-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
136-
const void * GGML_RESTRICT y, int nr, int nc);
137-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
138-
const void * GGML_RESTRICT y, int nr, int nc);
139133

140134
struct ggml_type_traits_cpu {
141135
ggml_from_float_t from_float;
142-
ggml_from_float_to_mat_t from_float_to_mat;
143136
ggml_vec_dot_t vec_dot;
144137
enum ggml_type vec_dot_type;
145138
int64_t nrows; // number of rows to process simultaneously
146139
int64_t ncols; // number of columns to process simultaneously
147-
ggml_gemv_t gemv;
148-
ggml_gemm_t gemm;
149140
};
150141

151142
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);

ggml/include/ggml.h

+3-6
Original file line numberDiff line numberDiff line change
@@ -384,9 +384,9 @@ extern "C" {
384384
GGML_TYPE_F64 = 28,
385385
GGML_TYPE_IQ1_M = 29,
386386
GGML_TYPE_BF16 = 30,
387-
GGML_TYPE_Q4_0_4_4 = 31,
388-
GGML_TYPE_Q4_0_4_8 = 32,
389-
GGML_TYPE_Q4_0_8_8 = 33,
387+
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
388+
// GGML_TYPE_Q4_0_4_8 = 32,
389+
// GGML_TYPE_Q4_0_8_8 = 33,
390390
GGML_TYPE_TQ1_0 = 34,
391391
GGML_TYPE_TQ2_0 = 35,
392392
GGML_TYPE_COUNT,
@@ -430,9 +430,6 @@ extern "C" {
430430
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
431431
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
432432
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
433-
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
434-
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
435-
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
436433
};
437434

438435
// available tensor operations:

ggml/src/CMakeLists.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -215,9 +215,7 @@ add_library(ggml-base
215215
ggml-threading.cpp
216216
ggml-threading.h
217217
ggml-quants.c
218-
ggml-quants.h
219-
ggml-aarch64.c
220-
ggml-aarch64.h)
218+
ggml-quants.h)
221219

222220
target_include_directories(ggml-base PRIVATE .)
223221

ggml/src/ggml-aarch64.c

-129
This file was deleted.

ggml/src/ggml-aarch64.h

-19
This file was deleted.

ggml/src/ggml-common.h

-24
Original file line numberDiff line numberDiff line change
@@ -203,30 +203,6 @@ typedef struct {
203203
} block_q8_1;
204204
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
205205

206-
typedef struct {
207-
ggml_half d[4]; // deltas for 4 q4_0 blocks
208-
uint8_t qs[QK4_0 * 2]; // nibbles / quants for 4 q4_0 blocks
209-
} block_q4_0x4;
210-
static_assert(sizeof(block_q4_0x4) == 4 * sizeof(ggml_half) + QK4_0 * 2, "wrong q4_0x4 block size/padding");
211-
212-
typedef struct {
213-
ggml_half d[8]; // deltas for 8 q4_0 blocks
214-
uint8_t qs[QK4_0 * 4]; // nibbles / quants for 8 q4_0 blocks
215-
} block_q4_0x8;
216-
static_assert(sizeof(block_q4_0x8) == 8 * sizeof(ggml_half) + QK4_0 * 4, "wrong q4_0x8 block size/padding");
217-
218-
typedef struct {
219-
ggml_half d[4]; // deltas for 4 q8_0 blocks
220-
int8_t qs[QK8_0 * 4]; // quants for 4 q8_0 blocks
221-
} block_q8_0x4;
222-
static_assert(sizeof(block_q8_0x4) == 4 * sizeof(ggml_half) + QK8_0 * 4, "wrong q8_0x4 block size/padding");
223-
224-
typedef struct {
225-
ggml_half d[8]; // deltas for 8 q8_0 blocks
226-
int8_t qs[QK8_0 * 8]; // quants for 8 q8_0 blocks
227-
} block_q8_0x8;
228-
static_assert(sizeof(block_q8_0x8) == 8 * sizeof(ggml_half) + QK8_0 * 8, "wrong q8_0x8 block size/padding");
229-
230206
//
231207
// Ternary quantization
232208
//

0 commit comments

Comments
 (0)