Skip to content

Commit 1b29245

Browse files
committedDec 1, 2024
reformat extra cpu backend.
- clean Q4_0_N_M and IQ4_0_N_M - remove from "file" tensor type - allow only with dynamic repack - extract cpu extra bufts and convert to C++ - hbm - "aarch64" - more generic use of extra buffer - generalise extra_supports_op - new API for "cpu-accel": - amx - aarch64
1 parent 0c39f44 commit 1b29245

28 files changed

+2313
-2364
lines changed
 

‎Makefile

+10-12
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,10 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
441441
MK_CFLAGS += -march=native -mtune=native
442442
HOST_CXXFLAGS += -march=native -mtune=native
443443

444+
# Usage AMX build test
445+
#MK_CFLAGS += -march=graniterapids -mtune=graniterapids
446+
#HOST_CXXFLAGS += -march=graniterapids -mtune=graniterapids
447+
444448
# Usage AVX-only
445449
#MK_CFLAGS += -mfma -mf16c -mavx
446450
#MK_CXXFLAGS += -mfma -mf16c -mavx
@@ -944,17 +948,18 @@ DIR_COMMON = common
944948

945949
OBJ_GGML = \
946950
$(DIR_GGML)/src/ggml.o \
947-
$(DIR_GGML)/src/ggml-aarch64.o \
948951
$(DIR_GGML)/src/ggml-alloc.o \
949952
$(DIR_GGML)/src/ggml-backend.o \
950953
$(DIR_GGML)/src/ggml-backend-reg.o \
951954
$(DIR_GGML)/src/ggml-opt.o \
952955
$(DIR_GGML)/src/ggml-quants.o \
953956
$(DIR_GGML)/src/ggml-threading.o \
954957
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
955-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o \
958+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \
956959
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
960+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
957961
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
962+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \
958963
$(OBJ_GGML_EXT)
959964

960965
OBJ_LLAMA = \
@@ -1094,17 +1099,10 @@ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
10941099
# Default target
10951100
all: $(BUILD_TARGETS)
10961101

1102+
# force c++ build for source file that have same name as c file
10971103
# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
1098-
# g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
1099-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o: \
1100-
ggml/src/ggml-cpu/ggml-cpu.cpp \
1101-
ggml/include/ggml-backend.h \
1102-
ggml/include/ggml.h \
1103-
ggml/include/ggml-alloc.h \
1104-
ggml/src/ggml-backend-impl.h \
1105-
ggml/include/ggml-cpu.h \
1106-
ggml/src/ggml-impl.h
1107-
$(CXX) $(CXXFLAGS) -c $< -o $@
1104+
$(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp
1105+
$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
11081106

11091107
# Rules for building object files
11101108
$(DIR_GGML)/%.o: $(DIR_GGML)/%.c

‎Package.swift

+4-3
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@ var sources = [
1010
"src/unicode.cpp",
1111
"src/unicode-data.cpp",
1212
"ggml/src/ggml.c",
13-
"ggml/src/ggml-aarch64.c",
1413
"ggml/src/ggml-alloc.c",
1514
"ggml/src/ggml-backend.cpp",
1615
"ggml/src/ggml-backend-reg.cpp",
1716
"ggml/src/ggml-cpu/ggml-cpu.c",
1817
"ggml/src/ggml-cpu/ggml-cpu.cpp",
19-
"ggml/src/ggml-cpu/ggml-cpu-aarch64.c",
18+
"ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp",
19+
"ggml/src/ggml-cpu/ggml-cpu-hbm.cpp",
2020
"ggml/src/ggml-cpu/ggml-cpu-quants.c",
21+
"ggml/src/ggml-cpu/ggml-cpu-traits.cpp",
2122
"ggml/src/ggml-threading.cpp",
2223
"ggml/src/ggml-quants.c",
2324
]
@@ -88,5 +89,5 @@ let package = Package(
8889
linkerSettings: linkerSettings
8990
)
9091
],
91-
cxxLanguageStandard: .cxx11
92+
cxxLanguageStandard: .cxx17
9293
)

‎ggml/include/ggml-cpu.h

-17
Original file line numberDiff line numberDiff line change
@@ -103,24 +103,14 @@ extern "C" {
103103

104104
// Internal types and functions exposed for tests and benchmarks
105105

106-
typedef void (*ggml_from_float_to_mat_t)
107-
(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs);
108106
typedef void (*ggml_vec_dot_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, size_t bx,
109107
const void * GGML_RESTRICT y, size_t by, int nrc);
110-
typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
111-
const void * GGML_RESTRICT y, int nr, int nc);
112-
typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x,
113-
const void * GGML_RESTRICT y, int nr, int nc);
114108

115109
struct ggml_type_traits_cpu {
116110
ggml_from_float_t from_float;
117-
ggml_from_float_to_mat_t from_float_to_mat;
118111
ggml_vec_dot_t vec_dot;
119112
enum ggml_type vec_dot_type;
120113
int64_t nrows; // number of rows to process simultaneously
121-
int64_t ncols; // number of columns to process simultaneously
122-
ggml_gemv_t gemv;
123-
ggml_gemm_t gemm;
124114
};
125115

126116
GGML_BACKEND_API const struct ggml_type_traits_cpu * ggml_get_type_traits_cpu(enum ggml_type type);
@@ -140,13 +130,6 @@ extern "C" {
140130

141131
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void);
142132

143-
#ifdef GGML_USE_CPU_HBM
144-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
145-
#endif
146-
147-
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
148-
GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
149-
150133
#ifdef __cplusplus
151134
}
152135
#endif

‎ggml/include/ggml.h

+4-7
Original file line numberDiff line numberDiff line change
@@ -384,12 +384,12 @@ extern "C" {
384384
GGML_TYPE_F64 = 28,
385385
GGML_TYPE_IQ1_M = 29,
386386
GGML_TYPE_BF16 = 30,
387-
GGML_TYPE_Q4_0_4_4 = 31,
388-
GGML_TYPE_Q4_0_4_8 = 32,
389-
GGML_TYPE_Q4_0_8_8 = 33,
387+
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
388+
// GGML_TYPE_Q4_0_4_8 = 32,
389+
// GGML_TYPE_Q4_0_8_8 = 33,
390390
GGML_TYPE_TQ1_0 = 34,
391391
GGML_TYPE_TQ2_0 = 35,
392-
GGML_TYPE_IQ4_NL_4_4 = 36,
392+
// GGML_TYPE_IQ4_NL_4_4 = 36,
393393
// GGML_TYPE_IQ4_NL_4_8 = 37,
394394
// GGML_TYPE_IQ4_NL_8_8 = 38,
395395
GGML_TYPE_COUNT,
@@ -433,9 +433,6 @@ extern "C" {
433433
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
434434
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
435435
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
436-
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
437-
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
438-
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
439436
};
440437

441438
// available tensor operations:

‎ggml/src/CMakeLists.txt

+1-3
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,7 @@ add_library(ggml-base
220220
ggml-threading.cpp
221221
ggml-threading.h
222222
ggml-quants.c
223-
ggml-quants.h
224-
ggml-aarch64.c
225-
ggml-aarch64.h)
223+
ggml-quants.h)
226224

227225
target_include_directories(ggml-base PRIVATE .)
228226

‎ggml/src/ggml-aarch64.c

-129
This file was deleted.

‎ggml/src/ggml-aarch64.h

-19
This file was deleted.

‎ggml/src/ggml-cann/ggml-cann.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2089,7 +2089,7 @@ static void * ggml_backend_cann_reg_get_proc_address(ggml_backend_reg_t reg, con
20892089
static const ggml_backend_reg_i ggml_backend_cann_reg_interface = {
20902090
/* .get_name = */ ggml_backend_cann_reg_get_name,
20912091
/* .get_device_count = */ ggml_backend_cann_reg_get_device_count,
2092-
/* .get_device_get = */ ggml_backend_cann_reg_get_device,
2092+
/* .get_device = */ ggml_backend_cann_reg_get_device,
20932093
/* .get_proc_address = */ ggml_backend_cann_reg_get_proc_address,
20942094
};
20952095

0 commit comments

Comments
 (0)