Skip to content

Commit 91c7360

Browse files
zj040045z5269887
and
z5269887
authored
llama : add gguf_remove_key + remove split meta during quantize (ggml-org#6591)
* Remove split metadata when quantize model shards * Find metadata key by enum * Correct loop range for gguf_remove_key and code format * Free kv memory --------- Co-authored-by: z5269887 <z5269887@unsw.edu.au>
1 parent 5c4d767 commit 91c7360

File tree

3 files changed

+47
-25
lines changed

3 files changed

+47
-25
lines changed

ggml.c

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20550,6 +20550,32 @@ static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
2055020550
return ok;
2055120551
}
2055220552

20553+
static void gguf_free_kv(struct gguf_kv * kv) {
20554+
if (kv->key.data) {
20555+
GGML_FREE(kv->key.data);
20556+
}
20557+
20558+
if (kv->type == GGUF_TYPE_STRING) {
20559+
if (kv->value.str.data) {
20560+
GGML_FREE(kv->value.str.data);
20561+
}
20562+
}
20563+
20564+
if (kv->type == GGUF_TYPE_ARRAY) {
20565+
if (kv->value.arr.data) {
20566+
if (kv->value.arr.type == GGUF_TYPE_STRING) {
20567+
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
20568+
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
20569+
if (str->data) {
20570+
GGML_FREE(str->data);
20571+
}
20572+
}
20573+
}
20574+
GGML_FREE(kv->value.arr.data);
20575+
}
20576+
}
20577+
}
20578+
2055320579
struct gguf_context * gguf_init_empty(void) {
2055420580
struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
2055520581

@@ -20899,31 +20925,7 @@ void gguf_free(struct gguf_context * ctx) {
2089920925
if (ctx->kv) {
2090020926
// free string memory - not great..
2090120927
for (uint64_t i = 0; i < ctx->header.n_kv; ++i) {
20902-
struct gguf_kv * kv = &ctx->kv[i];
20903-
20904-
if (kv->key.data) {
20905-
GGML_FREE(kv->key.data);
20906-
}
20907-
20908-
if (kv->type == GGUF_TYPE_STRING) {
20909-
if (kv->value.str.data) {
20910-
GGML_FREE(kv->value.str.data);
20911-
}
20912-
}
20913-
20914-
if (kv->type == GGUF_TYPE_ARRAY) {
20915-
if (kv->value.arr.data) {
20916-
if (kv->value.arr.type == GGUF_TYPE_STRING) {
20917-
for (uint64_t j = 0; j < kv->value.arr.n; ++j) {
20918-
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
20919-
if (str->data) {
20920-
GGML_FREE(str->data);
20921-
}
20922-
}
20923-
}
20924-
GGML_FREE(kv->value.arr.data);
20925-
}
20926-
}
20928+
gguf_free_kv(&ctx->kv[i]);
2092720929
}
2092820930

2092920931
GGML_FREE(ctx->kv);
@@ -21148,6 +21150,19 @@ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
2114821150
return n_kv;
2114921151
}
2115021152

21153+
void gguf_remove_key(struct gguf_context * ctx, const char * key) {
21154+
const int idx = gguf_find_key(ctx, key);
21155+
if (idx >= 0) {
21156+
const int n_kv = gguf_get_n_kv(ctx);
21157+
gguf_free_kv(&ctx->kv[idx]);
21158+
for (int i = idx; i < n_kv-1; ++i) {
21159+
ctx->kv[i] = ctx->kv[i+1];
21160+
}
21161+
ctx->kv = realloc(ctx->kv, (n_kv - 1) * sizeof(struct gguf_kv));
21162+
ctx->header.n_kv--;
21163+
}
21164+
}
21165+
2115121166
void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
2115221167
const int idx = gguf_get_or_add_key(ctx, key);
2115321168

ggml.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,9 @@ extern "C" {
22892289
GGML_API char * gguf_get_tensor_name (const struct gguf_context * ctx, int i);
22902290
GGML_API enum ggml_type gguf_get_tensor_type (const struct gguf_context * ctx, int i);
22912291

2292+
// removes key if it exists
2293+
GGML_API void gguf_remove_key(struct gguf_context * ctx, const char * key);
2294+
22922295
// overrides existing values or adds a new one
22932296
GGML_API void gguf_set_val_u8 (struct gguf_context * ctx, const char * key, uint8_t val);
22942297
GGML_API void gguf_set_val_i8 (struct gguf_context * ctx, const char * key, int8_t val);

llama.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13535,6 +13535,10 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1353513535
gguf_set_kv (ctx_out, ml.meta);
1353613536
gguf_set_val_u32(ctx_out, "general.quantization_version", GGML_QNT_VERSION);
1353713537
gguf_set_val_u32(ctx_out, "general.file_type", ftype);
13538+
// Remove split metadata
13539+
gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_NO).c_str());
13540+
gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str());
13541+
gguf_remove_key(ctx_out, ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str());
1353813542

1353913543
if (params->kv_overrides) {
1354013544
const std::vector<llama_model_kv_override> & overrides = *(const std::vector<llama_model_kv_override> *)params->kv_overrides;

0 commit comments

Comments
 (0)