Skip to content

Commit 9c74d74

Browse files
authoredMar 29, 2025
feat(gguf): guess default context size from file (mudler#5089)
feat(gguf): guess default config file from files Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
1 parent 679ee7b commit 9c74d74

File tree

4 files changed

+264
-242
lines changed

4 files changed

+264
-242
lines changed
 

‎core/cli/run.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ type RunCMD struct {
3838

3939
F16 bool `name:"f16" env:"LOCALAI_F16,F16" help:"Enable GPU acceleration" group:"performance"`
4040
Threads int `env:"LOCALAI_THREADS,THREADS" short:"t" help:"Number of threads used for parallel computation. Usage of the number of physical cores in the system is suggested" group:"performance"`
41-
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" default:"512" help:"Default context size for models" group:"performance"`
41+
ContextSize int `env:"LOCALAI_CONTEXT_SIZE,CONTEXT_SIZE" help:"Default context size for models" group:"performance"`
4242

4343
Address string `env:"LOCALAI_ADDRESS,ADDRESS" default:":8080" help:"Bind address for the API server" group:"api"`
4444
CORS bool `env:"LOCALAI_CORS,CORS" help:"" group:"api"`

‎core/config/backend_config.go

+1-11
Original file line numberDiff line numberDiff line change
@@ -389,16 +389,6 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
389389
cfg.Embeddings = &falseV
390390
}
391391

392-
// Value passed by the top level are treated as default (no implicit defaults)
393-
// defaults are set by the user
394-
if ctx == 0 {
395-
ctx = 1024
396-
}
397-
398-
if cfg.ContextSize == nil {
399-
cfg.ContextSize = &ctx
400-
}
401-
402392
if threads == 0 {
403393
// Threads can't be 0
404394
threads = 4
@@ -420,7 +410,7 @@ func (cfg *BackendConfig) SetDefaults(opts ...ConfigLoaderOption) {
420410
cfg.Debug = &trueV
421411
}
422412

423-
guessDefaultsFromFile(cfg, lo.modelPath)
413+
guessDefaultsFromFile(cfg, lo.modelPath, ctx)
424414
}
425415

426416
func (c *BackendConfig) Validate() bool {

‎core/config/gguf.go

+253
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
package config
2+
3+
import (
4+
"strings"
5+
6+
"github.com/rs/zerolog/log"
7+
8+
gguf "github.com/thxcode/gguf-parser-go"
9+
)
10+
11+
type familyType uint8
12+
13+
const (
14+
Unknown familyType = iota
15+
LLaMa3
16+
CommandR
17+
Phi3
18+
ChatML
19+
Mistral03
20+
Gemma
21+
DeepSeek2
22+
)
23+
24+
const (
25+
defaultContextSize = 1024
26+
)
27+
28+
type settingsConfig struct {
29+
StopWords []string
30+
TemplateConfig TemplateConfig
31+
RepeatPenalty float64
32+
}
33+
34+
// default settings to adopt with a given model family
35+
var defaultsSettings map[familyType]settingsConfig = map[familyType]settingsConfig{
36+
Gemma: {
37+
RepeatPenalty: 1.0,
38+
StopWords: []string{"<|im_end|>", "<end_of_turn>", "<start_of_turn>"},
39+
TemplateConfig: TemplateConfig{
40+
Chat: "{{.Input }}\n<start_of_turn>model\n",
41+
ChatMessage: "<start_of_turn>{{if eq .RoleName \"assistant\" }}model{{else}}{{ .RoleName }}{{end}}\n{{ if .Content -}}\n{{.Content -}}\n{{ end -}}<end_of_turn>",
42+
Completion: "{{.Input}}",
43+
},
44+
},
45+
DeepSeek2: {
46+
StopWords: []string{"<|end▁of▁sentence|>"},
47+
TemplateConfig: TemplateConfig{
48+
ChatMessage: `{{if eq .RoleName "user" -}}User: {{.Content }}
49+
{{ end -}}
50+
{{if eq .RoleName "assistant" -}}Assistant: {{.Content}}<|end▁of▁sentence|>{{end}}
51+
{{if eq .RoleName "system" -}}{{.Content}}
52+
{{end -}}`,
53+
Chat: "{{.Input -}}\nAssistant: ",
54+
},
55+
},
56+
LLaMa3: {
57+
StopWords: []string{"<|eot_id|>"},
58+
TemplateConfig: TemplateConfig{
59+
Chat: "<|begin_of_text|>{{.Input }}\n<|start_header_id|>assistant<|end_header_id|>",
60+
ChatMessage: "<|start_header_id|>{{ .RoleName }}<|end_header_id|>\n\n{{.Content }}<|eot_id|>",
61+
},
62+
},
63+
CommandR: {
64+
TemplateConfig: TemplateConfig{
65+
Chat: "{{.Input -}}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",
66+
Functions: `<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>
67+
You are a function calling AI model, you can call the following functions:
68+
## Available Tools
69+
{{range .Functions}}
70+
- {"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}
71+
{{end}}
72+
When using a tool, reply with JSON, for instance {"name": "tool_name", "arguments": {"param1": "value1", "param2": "value2"}}
73+
<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Input -}}`,
74+
ChatMessage: `{{if eq .RoleName "user" -}}
75+
<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
76+
{{- else if eq .RoleName "system" -}}
77+
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
78+
{{- else if eq .RoleName "assistant" -}}
79+
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
80+
{{- else if eq .RoleName "tool" -}}
81+
<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{{.Content}}<|END_OF_TURN_TOKEN|>
82+
{{- else if .FunctionCall -}}
83+
<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{{toJson .FunctionCall}}}<|END_OF_TURN_TOKEN|>
84+
{{- end -}}`,
85+
},
86+
StopWords: []string{"<|END_OF_TURN_TOKEN|>"},
87+
},
88+
Phi3: {
89+
TemplateConfig: TemplateConfig{
90+
Chat: "{{.Input}}\n<|assistant|>",
91+
ChatMessage: "<|{{ .RoleName }}|>\n{{.Content}}<|end|>",
92+
Completion: "{{.Input}}",
93+
},
94+
StopWords: []string{"<|end|>", "<|endoftext|>"},
95+
},
96+
ChatML: {
97+
TemplateConfig: TemplateConfig{
98+
Chat: "{{.Input -}}\n<|im_start|>assistant",
99+
Functions: `<|im_start|>system
100+
You are a function calling AI model. You are provided with functions to execute. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
101+
{{range .Functions}}
102+
{'type': 'function', 'function': {'name': '{{.Name}}', 'description': '{{.Description}}', 'parameters': {{toJson .Parameters}} }}
103+
{{end}}
104+
For each function call return a json object with function name and arguments
105+
<|im_end|>
106+
{{.Input -}}
107+
<|im_start|>assistant`,
108+
ChatMessage: `<|im_start|>{{ .RoleName }}
109+
{{ if .FunctionCall -}}
110+
Function call:
111+
{{ else if eq .RoleName "tool" -}}
112+
Function response:
113+
{{ end -}}
114+
{{ if .Content -}}
115+
{{.Content }}
116+
{{ end -}}
117+
{{ if .FunctionCall -}}
118+
{{toJson .FunctionCall}}
119+
{{ end -}}<|im_end|>`,
120+
},
121+
StopWords: []string{"<|im_end|>", "<dummy32000>", "</s>"},
122+
},
123+
Mistral03: {
124+
TemplateConfig: TemplateConfig{
125+
Chat: "{{.Input -}}",
126+
Functions: `[AVAILABLE_TOOLS] [{{range .Functions}}{"type": "function", "function": {"name": "{{.Name}}", "description": "{{.Description}}", "parameters": {{toJson .Parameters}} }}{{end}} ] [/AVAILABLE_TOOLS]{{.Input }}`,
127+
ChatMessage: `{{if eq .RoleName "user" -}}
128+
[INST] {{.Content }} [/INST]
129+
{{- else if .FunctionCall -}}
130+
[TOOL_CALLS] {{toJson .FunctionCall}} [/TOOL_CALLS]
131+
{{- else if eq .RoleName "tool" -}}
132+
[TOOL_RESULTS] {{.Content}} [/TOOL_RESULTS]
133+
{{- else -}}
134+
{{ .Content -}}
135+
{{ end -}}`,
136+
},
137+
StopWords: []string{"<|im_end|>", "<dummy32000>", "</tool_call>", "<|eot_id|>", "<|end_of_text|>", "</s>", "[/TOOL_CALLS]", "[/ACTIONS]"},
138+
},
139+
}
140+
141+
// this maps well known template used in HF to model families defined above
142+
var knownTemplates = map[string]familyType{
143+
`{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}`: ChatML,
144+
`{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}`: Mistral03,
145+
}
146+
147+
func guessGGUFFromFile(cfg *BackendConfig, f *gguf.GGUFFile, defaultCtx int) {
148+
149+
if defaultCtx == 0 && cfg.ContextSize == nil {
150+
ctxSize := f.EstimateLLaMACppUsage().ContextSize
151+
if ctxSize > 0 {
152+
cSize := int(ctxSize)
153+
cfg.ContextSize = &cSize
154+
} else {
155+
defaultCtx = defaultContextSize
156+
cfg.ContextSize = &defaultCtx
157+
}
158+
}
159+
160+
if cfg.HasTemplate() {
161+
// nothing to guess here
162+
log.Debug().Any("name", cfg.Name).Msgf("guessDefaultsFromFile: %s", "template already set")
163+
return
164+
}
165+
166+
log.Debug().
167+
Any("eosTokenID", f.Tokenizer().EOSTokenID).
168+
Any("bosTokenID", f.Tokenizer().BOSTokenID).
169+
Any("modelName", f.Model().Name).
170+
Any("architecture", f.Architecture().Architecture).Msgf("Model file loaded: %s", cfg.ModelFileName())
171+
172+
// guess the name
173+
if cfg.Name == "" {
174+
cfg.Name = f.Model().Name
175+
}
176+
177+
family := identifyFamily(f)
178+
179+
if family == Unknown {
180+
log.Debug().Msgf("guessDefaultsFromFile: %s", "family not identified")
181+
return
182+
}
183+
184+
// identify template
185+
settings, ok := defaultsSettings[family]
186+
if ok {
187+
cfg.TemplateConfig = settings.TemplateConfig
188+
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: guessed template %+v", cfg.TemplateConfig)
189+
if len(cfg.StopWords) == 0 {
190+
cfg.StopWords = settings.StopWords
191+
}
192+
if cfg.RepeatPenalty == 0.0 {
193+
cfg.RepeatPenalty = settings.RepeatPenalty
194+
}
195+
} else {
196+
log.Debug().Any("family", family).Msgf("guessDefaultsFromFile: no template found for family")
197+
}
198+
199+
if cfg.HasTemplate() {
200+
return
201+
}
202+
203+
// identify from well known templates first, otherwise use the raw jinja template
204+
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
205+
if found {
206+
// try to use the jinja template
207+
cfg.TemplateConfig.JinjaTemplate = true
208+
cfg.TemplateConfig.ChatMessage = chatTemplate.ValueString()
209+
}
210+
}
211+
212+
func identifyFamily(f *gguf.GGUFFile) familyType {
213+
214+
// identify from well known templates first
215+
chatTemplate, found := f.Header.MetadataKV.Get("tokenizer.chat_template")
216+
if found && chatTemplate.ValueString() != "" {
217+
if family, ok := knownTemplates[chatTemplate.ValueString()]; ok {
218+
return family
219+
}
220+
}
221+
222+
// otherwise try to identify from the model properties
223+
arch := f.Architecture().Architecture
224+
eosTokenID := f.Tokenizer().EOSTokenID
225+
bosTokenID := f.Tokenizer().BOSTokenID
226+
227+
isYI := arch == "llama" && bosTokenID == 1 && eosTokenID == 2
228+
// WTF! Mistral0.3 and isYi have same bosTokenID and eosTokenID
229+
230+
llama3 := arch == "llama" && eosTokenID == 128009
231+
commandR := arch == "command-r" && eosTokenID == 255001
232+
qwen2 := arch == "qwen2"
233+
phi3 := arch == "phi-3"
234+
gemma := strings.HasPrefix(arch, "gemma") || strings.Contains(strings.ToLower(f.Model().Name), "gemma")
235+
deepseek2 := arch == "deepseek2"
236+
237+
switch {
238+
case deepseek2:
239+
return DeepSeek2
240+
case gemma:
241+
return Gemma
242+
case llama3:
243+
return LLaMa3
244+
case commandR:
245+
return CommandR
246+
case phi3:
247+
return Phi3
248+
case qwen2, isYI:
249+
return ChatML
250+
default:
251+
return Unknown
252+
}
253+
}

0 commit comments

Comments
 (0)