Skip to content

Commit 4e336fc

Browse files
committed
add parameter sweeping
1 parent 4d31316 commit 4e336fc

File tree

2 files changed

+97
-68
lines changed

2 files changed

+97
-68
lines changed
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
import argparse
3+
import json
4+
import yaml
5+
import os
6+
from pathlib import Path
7+
8+
def load(path):
9+
10+
with open(str(path), 'r') as f:
11+
return json.loads(f.read())
12+
13+
def main(args):
14+
15+
results = Path(args.results_folder)
16+
17+
chunk = load(results / "chunked_prefill_tp4.json")
18+
prefill = load(results / "disagg_prefill_tp4.json")
19+
decode = load(results / "disagg_decode_tp4.json")
20+
21+
ttft_ratio = chunk["mean_ttft_ms"] / prefill["mean_ttft_ms"]
22+
itl_ratio = chunk["mean_itl_ms"] / decode["mean_itl_ms"]
23+
prefill_decode_ratio = prefill["mean_ttft_ms"] / (decode["mean_itl_ms"] * args.output_len)
24+
25+
with open(results / args.output_file, 'a') as f:
26+
f.write(yaml.dump([{
27+
'qps': args.qps,
28+
'output_len': args.output_len,
29+
'prefill_decode_ratio': prefill_decode_ratio,
30+
'ttft_ratio': ttft_ratio,
31+
'itl_ratio': itl_ratio,
32+
"chunk_ttft": chunk["mean_ttft_ms"],
33+
"chunk_itl": chunk["mean_itl_ms"],
34+
"disagg_ttft": prefill["mean_ttft_ms"],
35+
"disagg_itl": decode["mean_itl_ms"]
36+
}]))
37+
38+
39+
if __name__ == "__main__":
40+
parser = argparse.ArgumentParser(description="Analyze benchmark results")
41+
parser.add_argument("--results-folder", required=True, help="Path to the results folder")
42+
parser.add_argument("--output-len", type=int, required=True, help="Target output length")
43+
parser.add_argument("--qps", type=int, required=True, help="Target QPS")
44+
parser.add_argument("--output-file", type=str, default="chunk_vs_disagg.yaml")
45+
46+
args = parser.parse_args()
47+
main(args)

benchmarks/disagg_benchmarks/disagg_benchmark.sh

Lines changed: 50 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -41,86 +41,31 @@ wait_for_server() {
4141
}
4242

4343

44-
main() {
45-
46-
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
47-
(which jq) || (apt-get -y install jq)
48-
(which socat) || (apt-get -y install socat)
44+
benchmark() {
4945

50-
cd "$(dirname "$0")"
46+
# compare chunked prefill with disaggregated prefill
5147

52-
cd ..
53-
# create sonnet-4x.txt
54-
echo "" > sonnet_4x.txt
55-
for _ in {1..4}
56-
do
57-
cat sonnet.txt >> sonnet_4x.txt
58-
done
59-
cd disagg_benchmarks
60-
61-
62-
mkdir -p results
6348
results_folder="./results"
6449
model="neuralmagic/Meta-Llama-3-70B-Instruct-FP8"
6550
dataset_name="sonnet"
6651
dataset_path="../sonnet_4x.txt"
6752
num_prompts=500
68-
qps=4
53+
qps=$1
6954
prefix_len=64
7055
input_len=2048
71-
output_len=11
72-
73-
74-
# chunked prefill with tp=8
75-
python3 -m vllm.entrypoints.openai.api_server \
76-
--model $model \
77-
--port 8000 \
78-
-tp 8 \
79-
--disable-log-stats \
80-
--disable-log-requests \
81-
--enable-chunked-prefill &
82-
wait_for_server 8000
83-
84-
python3 ../benchmark_serving.py \
85-
--backend vllm \
86-
--model $model \
87-
--dataset-name $dataset_name \
88-
--dataset-path $dataset_path \
89-
--sonnet-input-len $input_len \
90-
--sonnet-output-len $output_len \
91-
--sonnet-prefix-len $prefix_len \
92-
--num-prompts $num_prompts \
93-
--port 8000 \
94-
--save-result \
95-
--result-dir $results_folder \
96-
--result-filename chunked_prefill_tp8.json \
97-
--request-rate $qps
98-
kill_gpu_processes
56+
output_len=$2
9957

10058

10159
# chunked prefill with tp=4
10260
CUDA_VISIBLE_DEVICES=0,1,2,3 python3 \
10361
-m vllm.entrypoints.openai.api_server \
10462
--model $model \
105-
--port 8100 \
63+
--port 8000 \
10664
-tp 4 \
10765
--disable-log-stats \
10866
--disable-log-requests \
10967
--enable-chunked-prefill &
110-
111-
# CUDA_VISIBLE_DEVICES=4,5,6,7 python3 \
112-
# -m vllm.entrypoints.openai.api_server \
113-
# --model $model \
114-
# --port 8200 \
115-
# -tp 4 \
116-
# --disable-log-stats \
117-
# --disable-log-requests \
118-
# --enable-chunked-prefill &
119-
120-
wait_for_server 8100
121-
# wait_for_server 8200
122-
# # launch round robin proxy
123-
# bash ./round_robin_proxy.sh &
68+
wait_for_server 8000
12469

12570
python3 ../benchmark_serving.py \
12671
--backend vllm \
@@ -131,17 +76,15 @@ main() {
13176
--sonnet-output-len $output_len \
13277
--sonnet-prefix-len $prefix_len \
13378
--num-prompts $((num_prompts / 2)) \
134-
--port 8100 \
79+
--port 8000 \
13580
--save-result \
13681
--result-dir $results_folder \
13782
--result-filename chunked_prefill_tp4.json \
13883
--request-rate $((qps / 2))
13984
kill_gpu_processes
140-
# pkill -f round_robin_proxy.sh
14185

14286

14387
# disaggregated prefill
144-
14588
# prefill with tp=4
14689
python3 -m vllm.entrypoints.openai.api_server \
14790
--model $model \
@@ -150,7 +93,6 @@ main() {
15093
--disable-log-stats \
15194
--disable-log-requests &
15295
wait_for_server 8000
153-
15496
# set output-len to 1 so that it only do prefilling
15597
python3 ../benchmark_serving.py \
15698
--backend vllm \
@@ -177,7 +119,6 @@ main() {
177119
--disable-log-stats \
178120
--disable-log-requests &
179121
wait_for_server 8000
180-
181122
# skip prefilling
182123
# by enabling APC and force the input tokens be the same
183124
python3 ../benchmark_serving.py \
@@ -187,7 +128,7 @@ main() {
187128
--dataset-path $dataset_path \
188129
--sonnet-input-len $input_len \
189130
--sonnet-output-len $output_len \
190-
--sonnet-prefix-len $((input_len - 1)) \
131+
--sonnet-prefix-len $input_len \
191132
--num-prompts $num_prompts \
192133
--port 8000 \
193134
--save-result \
@@ -196,7 +137,48 @@ main() {
196137
--request-rate $qps
197138
kill_gpu_processes
198139

140+
python3 analyze_results.py \
141+
--results-folder $results_folder \
142+
--output-len $output_len \
143+
--qps $qps
144+
145+
}
146+
147+
148+
main() {
149+
150+
(which wget && which curl) || (apt-get update && apt-get install -y wget curl)
151+
(which jq) || (apt-get -y install jq)
152+
(which socat) || (apt-get -y install socat)
153+
154+
cd "$(dirname "$0")"
155+
156+
cd ..
157+
# create sonnet-4x.txt
158+
echo "" > sonnet_4x.txt
159+
for _ in {1..4}
160+
do
161+
cat sonnet.txt >> sonnet_4x.txt
162+
done
163+
cd disagg_benchmarks
164+
165+
rm -rf results
166+
mkdir results
167+
168+
default_qps=4
169+
default_output_len=12
170+
171+
for target_qps in 1 2 4 8 16
172+
do
173+
benchmark $target_qps $default_output_len
174+
done
175+
176+
for output_len in 5 10 20 40 80
177+
do
178+
benchmark $default_qps $output_len
179+
done
180+
199181
}
200182

201183

202-
main "$@"
184+
main "$@"

0 commit comments

Comments
 (0)