Skip to content

Commit 514021c

Browse files
authored
Merge branch 'master' into bailingmoe
2 parents 41ccc5d + b3de7ca commit 514021c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1223
-2777
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
112112
- [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM)
113113
- [x] [QRWKV-6](https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1)
114114
- [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct)
115+
- [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview)
115116
- [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32)
116117

117118
#### Multimodal

ci/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ docker run --privileged -it \
6060
Inside the container, execute the following commands:
6161

6262
```bash
63-
apt update -y && apt install -y bc cmake git python3.10-venv time unzip wget
63+
apt update -y && apt install -y bc cmake ccache git python3.10-venv time unzip wget
6464
git config --global --add safe.directory /ws
6565
GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
6666
```

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ fi
6969
if [ ! -z ${GG_BUILD_MUSA} ]; then
7070
# Use qy1 by default (MTT S80)
7171
MUSA_ARCH=${MUSA_ARCH:-21}
72-
CMAKE_EXTRA="-DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
72+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
7373
fi
7474
## helpers
7575

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -708,6 +708,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
708708
if chkhsh == "7dec86086fcc38b66b7bc1575a160ae21cf705be7718b9d5598190d7c12db76f":
709709
# ref: https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k
710710
res = "superbpe"
711+
if chkhsh == "1994ffd01900cfb37395608534236ecd63f2bd5995d6cb1004dda1af50240f15":
712+
# ref: https://huggingface.co/trillionlabs/Trillion-7B-preview
713+
res = "trillion"
711714
if chkhsh == "96a5f08be6259352137b512d4157e333e21df7edd3fcd152990608735a65b224":
712715
# ref: https://huggingface.co/inclusionAI/Ling-lite
713716
res = "bailingmoe"

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class TOKENIZER_TYPE(IntEnum):
111111
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
112112
{"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
113113
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
114+
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
114115
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
115116
]
116117

ggml/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ else()
100100
set(INS_ENB ON)
101101
endif()
102102

103+
message(DEBUG "GGML_NATIVE : ${GGML_NATIVE}")
104+
message(DEBUG "GGML_NATIVE_DEFAULT : ${GGML_NATIVE_DEFAULT}")
105+
message(DEBUG "INS_ENB : ${INS_ENB}")
106+
103107
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
104108
option(GGML_CPU_AARCH64 "ggml: use runtime weight conversion of Q4_0 to Q4_X_X" ON)
105109
option(GGML_CPU_KLEIDIAI "ggml: use KleidiAI optimized kernels if applicable" OFF)

ggml/src/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ if (GGML_LTO)
6565
endif()
6666
endif()
6767

68-
if (GGML_CCACHE)
68+
if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
6969
find_program(GGML_CCACHE_FOUND ccache)
7070
find_program(GGML_SCCACHE_FOUND sccache)
7171

ggml/src/ggml-cann/.clang-format

Lines changed: 0 additions & 168 deletions
This file was deleted.

ggml/src/ggml-common.h

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,12 @@ typedef sycl::half2 ggml_half2;
158158

159159
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
160160

161+
#ifdef _MSC_VER
162+
#define GGML_EXTENSION
163+
#else // _MSC_VER
164+
#define GGML_EXTENSION __extension__
165+
#endif // _MSC_VER
166+
161167
#define QK4_0 32
162168
typedef struct {
163169
ggml_half d; // delta
@@ -167,7 +173,7 @@ static_assert(sizeof(block_q4_0) == sizeof(ggml_half) + QK4_0 / 2, "wrong q4_0 b
167173

168174
#define QK4_1 32
169175
typedef struct {
170-
union {
176+
GGML_EXTENSION union {
171177
struct {
172178
ggml_half d; // delta
173179
ggml_half m; // min
@@ -188,7 +194,7 @@ static_assert(sizeof(block_q5_0) == sizeof(ggml_half) + sizeof(uint32_t) + QK5_0
188194

189195
#define QK5_1 32
190196
typedef struct {
191-
union {
197+
GGML_EXTENSION union {
192198
struct {
193199
ggml_half d; // delta
194200
ggml_half m; // min
@@ -209,7 +215,7 @@ static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block
209215

210216
#define QK8_1 32
211217
typedef struct {
212-
union {
218+
GGML_EXTENSION union {
213219
struct {
214220
ggml_half d; // delta
215221
ggml_half s; // d * sum(qs[i])
@@ -250,7 +256,7 @@ static_assert(sizeof(block_tq2_0) == sizeof(ggml_half) + QK_K / 4, "wrong tq2_0
250256
typedef struct {
251257
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
252258
uint8_t qs[QK_K/4]; // quants
253-
union {
259+
GGML_EXTENSION union {
254260
struct {
255261
ggml_half d; // super-block scale for quantized scales
256262
ggml_half dmin; // super-block scale for quantized mins
@@ -277,7 +283,7 @@ static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12
277283
// weight is represented as x = a * q + b
278284
// Effectively 4.5 bits per weight
279285
typedef struct {
280-
union {
286+
GGML_EXTENSION union {
281287
struct {
282288
ggml_half d; // super-block scale for quantized scales
283289
ggml_half dmin; // super-block scale for quantized mins
@@ -294,7 +300,7 @@ static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2,
294300
// weight is represented as x = a * q + b
295301
// Effectively 5.5 bits per weight
296302
typedef struct {
297-
union {
303+
GGML_EXTENSION union {
298304
struct {
299305
ggml_half d; // super-block scale for quantized scales
300306
ggml_half dmin; // super-block scale for quantized mins

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
2323
ggml-cpu/amx/mmq.cpp
2424
ggml-cpu/amx/mmq.h
2525
ggml-cpu/ggml-cpu-impl.h
26+
ggml-cpu/common.h
27+
ggml-cpu/binary-ops.h
28+
ggml-cpu/binary-ops.cpp
29+
ggml-cpu/unary-ops.h
30+
ggml-cpu/unary-ops.cpp
2631
)
2732

2833
target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)

0 commit comments

Comments
 (0)