Skip to content

Commit fed4cb5

Browse files
committed
Config: Fix quantized cache naming scheme
The standard naming version is "q8_0" instead of "Q8_0", always convert the string to lowercase which avoids case mismatch and rewite the enum to use llamacpp's convention for quant naming. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
1 parent 1ac149f commit fed4cb5

File tree

3 files changed

+19
-14
lines changed

3 files changed

+19
-14
lines changed

bindings/types.ts

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
// Subset for caching
22
export enum GGMLType {
3-
F32 = 0,
4-
F16 = 1,
5-
Q4_0 = 2,
6-
Q4_1 = 3,
3+
f32 = 0,
4+
f16 = 1,
5+
q4_0 = 2,
6+
q4_1 = 3,
77
// 4 and 5 were removed (Q4_2 and Q4_3)
8-
Q5_0 = 6,
9-
Q5_1 = 7,
10-
Q8_0 = 8,
11-
Q8_1 = 9,
8+
q5_0 = 6,
9+
q5_1 = 7,
10+
q8_0 = 8,
1211
}
1312

1413
export enum BindingFinishReason {

common/configModels.ts

+8-4
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,19 @@ export const ModelConfig = z.object({
2727
rope_freq_base: z.number().nullish().coalesce(0),
2828
enable_yarn: z.boolean().nullish().coalesce(false),
2929
cache_mode_k: z.union([
30-
z.string().transform((str) => GGMLType[str as keyof typeof GGMLType]),
30+
z.string().transform((str) =>
31+
GGMLType[str.toLowerCase() as keyof typeof GGMLType]
32+
),
3133
z.number(),
3234
])
33-
.nullish().coalesce(GGMLType.F16),
35+
.nullish().coalesce(GGMLType.f16),
3436
cache_mode_v: z.union([
35-
z.string().transform((str) => GGMLType[str as keyof typeof GGMLType]),
37+
z.string().transform((str) =>
38+
GGMLType[str.toLowerCase() as keyof typeof GGMLType]
39+
),
3640
z.number(),
3741
])
38-
.nullish().coalesce(GGMLType.F16),
42+
.nullish().coalesce(GGMLType.f16),
3943
});
4044

4145
export type ModelConfig = z.infer<typeof ModelConfig>;

config_sample.yml

+4-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ model:
5959
enable_yarn: false
6060

6161
# K cache quantization type (default: F16)
62-
cache_mode_k: F16
62+
# Possible values - f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0
63+
cache_mode_k: f16
6364

6465
# V cache quantization type (default: F16)
65-
cache_mode_v: F16
66+
# Possible values - f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0
67+
cache_mode_v: f16

0 commit comments

Comments
 (0)