forked from adrianliechti/llama
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTaskfile.llama.yml
37 lines (32 loc) · 897 Bytes
/
Taskfile.llama.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# https://taskfile.dev
version: "3"
tasks:
server:
deps: [ download-model ]
cmds:
- llama-server
--port 9081
--log-disable
--ctx-size 8192
--flash-attn
--model ./models/llama-3.1-8b-instruct.gguf
download-model:
cmds:
- mkdir -p models
- curl -s -L -o models/llama-3.1-8b-instruct.gguf https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf?download=true
status:
- test -f models/llama-3.1-8b-instruct.gguf
test:
cmds:
- |
curl http://localhost:9081/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "default",
"messages": [
{
"role": "user",
"content": "Hello!"
}
]
}'