-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy patholmo-hf-urial.sh
52 lines (48 loc) · 1.73 KB
/
olmo-hf-urial.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
version=$1
temp=${2:-0}
rp=${3:-1}
output_dir="result_dirs/mt-bench/urial_bench/"
mkdir -p $output_dir
gpus=${4:-"0"}
tsp=1
n_shards=4
shard_size=20
start_gpu=0
for ((start = 0, end = (($shard_size)), gpu = $start_gpu; gpu < $n_shards+$start_gpu; start += $shard_size, end += $shard_size, gpu++)); do
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--start_index $start --end_index $end \
--engine hf \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name allenai/OLMo-7B \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 1 \
--no_repeat_ngram_size 3 \
--top_p 1 --temperature $temp --repetition_penalty $rp --batch_size 1 --max_tokens 2048 \
--filepath $output_dir/olmo.turn1.${start}-${end}.json \
--overwrite &
done
wait
python evaluate/merge_results.py $output_dir/ olmo.turn1
n_shards=4
shard_size=20
start_gpu=0
for ((start = 0, end = (($shard_size)), gpu = $start_gpu; gpu < $n_shards+$start_gpu; start += $shard_size, end += $shard_size, gpu++)); do
CUDA_VISIBLE_DEVICES=$gpu python src/unified_infer.py \
--start_index $start --end_index $end \
--engine hf \
--urial $version \
--download_dir /net/nfs/s2-research/llama2/ \
--model_name allenai/OLMo-7B \
--dtype bfloat16 \
--data_name mt-bench \
--mt_turn 2 \
--no_repeat_ngram_size 3 \
--mt_turn1_result $output_dir/olmo.turn1.json \
--top_p 1 --temperature $temp --repetition_penalty $rp --batch_size 1 --max_tokens 2048 \
--filepath $output_dir/olmo.turn2.${start}-${end}.json \
--overwrite &
done
wait
python evaluate/merge_results.py $output_dir/ olmo.turn2