Skip to content

Commit d4cd49e

Browse files
authored
Merge branch 'main' into finagent-v1.3
2 parents 5e97ec3 + 13dd27e commit d4cd49e

File tree

15 files changed

+15
-15
lines changed

15 files changed

+15
-15
lines changed

AgentQnA/kubernetes/helm/cpu-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ tgi:
66
vllm:
77
enabled: true
88
LLM_MODEL_ID: "meta-llama/Meta-Llama-3-8B-Instruct"
9-
extraCmdArgs: ["--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
9+
extraCmdArgs: ["--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
1010

1111
supervisor:
1212
llm_endpoint_url: http://{{ .Release.Name }}-vllm

AgentQnA/kubernetes/helm/gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ vllm:
1919
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
2020
VLLM_SKIP_WARMUP: true
2121
shmSize: 16Gi
22-
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq_len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
22+
extraCmdArgs: ["--tensor-parallel-size", "4", "--max-seq-len-to-capture", "16384", "--enable-auto-tool-choice", "--tool-call-parser", "llama3_json"]
2323

2424
supervisor:
2525
llm_endpoint_url: http://{{ .Release.Name }}-vllm

AudioQnA/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ services:
6262
cap_add:
6363
- SYS_NICE
6464
ipc: host
65-
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
65+
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq-len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
6666
audioqna-gaudi-backend-server:
6767
image: ${REGISTRY:-opea}/audioqna:${TAG:-latest}
6868
container_name: audioqna-gaudi-backend-server

AudioQnA/kubernetes/helm/gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ vllm:
2121
"--tensor-parallel-size", "1",
2222
"--block-size", "128",
2323
"--max-num-seqs", "256",
24-
"--max-seq_len-to-capture", "2048"
24+
"--max-seq-len-to-capture", "2048"
2525
]
2626

2727
whisper:

ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ services:
110110
cap_add:
111111
- SYS_NICE
112112
ipc: host
113-
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
113+
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
114114
chatqna-gaudi-backend-server:
115115
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
116116
container_name: chatqna-gaudi-backend-server

ChatQnA/docker_compose/intel/hpu/gaudi/compose_faqgen.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ services:
108108
cap_add:
109109
- SYS_NICE
110110
ipc: host
111-
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
111+
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
112112
llm-faqgen:
113113
image: ${REGISTRY:-opea}/llm-faqgen:${TAG:-latest}
114114
container_name: llm-faqgen-server

ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ services:
139139
cap_add:
140140
- SYS_NICE
141141
ipc: host
142-
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
142+
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
143143
chatqna-gaudi-backend-server:
144144
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
145145
container_name: chatqna-gaudi-guardrails-server

ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ services:
7979
cap_add:
8080
- SYS_NICE
8181
ipc: host
82-
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq_len-to-capture 2048
82+
command: --model ${LLM_MODEL_ID} --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size 128 --max-num-seqs 256 --max-seq-len-to-capture 2048
8383
chatqna-gaudi-backend-server:
8484
image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
8585
container_name: chatqna-gaudi-backend-server

ChatQnA/kubernetes/helm/faqgen-gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ vllm:
3030
"--tensor-parallel-size", "1",
3131
"--block-size", "128",
3232
"--max-num-seqs", "256",
33-
"--max-seq_len-to-capture", "2048"
33+
"--max-seq-len-to-capture", "2048"
3434
]
3535

3636
# Reranking: second largest bottleneck when reranking is in use

ChatQnA/kubernetes/helm/gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ vllm:
2525
"--tensor-parallel-size", "1",
2626
"--block-size", "128",
2727
"--max-num-seqs", "256",
28-
"--max-seq_len-to-capture", "2048"
28+
"--max-seq-len-to-capture", "2048"
2929
]
3030

3131
# Reranking: second largest bottleneck when reranking is in use

ChatQnA/kubernetes/helm/guardrails-gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,5 +90,5 @@ vllm:
9090
"--tensor-parallel-size", "1",
9191
"--block-size", "128",
9292
"--max-num-seqs", "256",
93-
"--max-seq_len-to-capture", "2048"
93+
"--max-seq-len-to-capture", "2048"
9494
]

CodeTrans/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ services:
2828
cap_add:
2929
- SYS_NICE
3030
ipc: host
31-
command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
31+
command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq-len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
3232
llm:
3333
image: ${REGISTRY:-opea}/llm-textgen:${TAG:-latest}
3434
container_name: codetrans-xeon-llm-server

CodeTrans/kubernetes/helm/gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ vllm:
2525
"--tensor-parallel-size", "1",
2626
"--block-size", "128",
2727
"--max-num-seqs", "256",
28-
"--max-seq_len-to-capture", "2048"
28+
"--max-seq-len-to-capture", "2048"
2929
]
3030

3131
llm-uservice:

DocSum/docker_compose/intel/hpu/gaudi/compose.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ services:
2828
cap_add:
2929
- SYS_NICE
3030
ipc: host
31-
command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq_len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
31+
command: --model $LLM_MODEL_ID --tensor-parallel-size ${NUM_CARDS} --host 0.0.0.0 --port 80 --block-size ${BLOCK_SIZE} --max-num-seqs ${MAX_NUM_SEQS} --max-seq-len-to-capture ${MAX_SEQ_LEN_TO_CAPTURE}
3232

3333
llm-docsum-vllm:
3434
image: ${REGISTRY:-opea}/llm-docsum:${TAG:-latest}

DocSum/kubernetes/helm/gaudi-values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ vllm:
2828
"--tensor-parallel-size", "1",
2929
"--block-size", "128",
3030
"--max-num-seqs", "256",
31-
"--max-seq_len-to-capture", "2048"
31+
"--max-seq-len-to-capture", "2048"
3232
]

0 commit comments

Comments
 (0)