-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrun_experiment.sh
98 lines (90 loc) · 3.2 KB
/
run_experiment.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Required environment variables:
# TAG: tag for the trail
# TYPE: finetune / prompt / prompt-demo
# TASK: SST-2 / sst-5 / cr / CoLA / MNLI / QNLI
# BS: batch size
# LR: learning rate
# SEED: random seed (13 / 21 / 42 / 87 / 100)
# MODEL: pre-trained model name (roberta-*, bert-*), see Transformers model list
# Number of training instances per label
K=16
# Training steps
MAX_STEP=1000
# Validation steps
EVAL_STEP=100
# Task specific parameters
# The default length is 128 and the default number of samples is 16.
# For some tasks, we use longer length or double demo (when using demonstrations, double the maximum length).
# For some tasks, we use smaller number of samples to save time (because of the large size of the test sets).
# All those parameters are set arbitrarily by observing the data distributions.
TASK_EXTRA=""
case $TASK in
CoLA)
TEMPLATE=*cls**sent_0*_This_is*mask*.*sep+*
MAPPING="{'0':'incorrect','1':'correct'}"
;;
SST-2)
TEMPLATE=*cls**sent_0*_It_was*mask*.*sep+*
MAPPING="{'0':'terrible','1':'great'}"
;;
MNLI)
TEMPLATE=*cls**sent-_0*?*mask*,*+sentl_1**sep+*
MAPPING="{'contradiction':'No','entailment':'Yes','neutral':'Maybe'}"
TASK_EXTRA="--max_seq_len 256 --num_sample 4"
;;
QNLI)
TEMPLATE=*cls**sent-_0*?*mask*,*+sentl_1**sep+*
MAPPING="{'not_entailment':'No','entailment':'Yes'}"
;;
sst-5)
TEMPLATE=*cls**sent_0*_It_was*mask*.*sep+*
MAPPING="{0:'terrible',1:'bad',2:'okay',3:'good',4:'great'}"
TASK_EXTRA="--first_sent_limit 110 --other_sent_limit 20 --double_demo"
;;
cr)
TEMPLATE=*cls**sent_0*_It_was*mask*.*sep+*
MAPPING="{0:'terrible',1:'great'}"
TASK_EXTRA="--first_sent_limit 110 --other_sent_limit 50 --double_demo"
;;
esac
# Gradient accumulation steps
# For medium-sized GPUs (e.g., 2080ti with 10GB memory), they can only take
# a maximum batch size of 2 when using large-size models. So we use gradient
# accumulation steps to achieve the same effect of larger batch sizes.
GS=1
# Use a random number to distinguish different trails (avoid accidental overwriting)
TRIAL_IDTF=$RANDOM
DATA_DIR=data/k-shot/$TASK/$K-$SEED
python run.py \
--task_name $TASK \
--data_dir $DATA_DIR \
--overwrite_output_dir \
--do_train \
--do_eval \
--do_predict \
--evaluate_during_training \
--model_name_or_path $MODEL \
--few_shot_type $TYPE \
--num_k $K \
--max_seq_length 128 \
--per_device_train_batch_size $BS \
--per_device_eval_batch_size 16 \
--gradient_accumulation_steps $GS \
--learning_rate $LR \
--max_steps $MAX_STEP \
--logging_steps $EVAL_STEP \
--eval_steps $EVAL_STEP \
--num_train_epochs 0 \
--output_dir result/$TASK-$TYPE-$K-$SEED-$MODEL-$TRIAL_IDTF \
--seed $SEED \
--tag $TAG \
--template $TEMPLATE \
--mapping $MAPPING \
--template_list_location auto_template/$TASK/16-$SEED.sort.txt \
--supcon_learning_rate $SupCon_LR \
$TASK_EXTRA \
$1
# Delete the checkpoint
# Since we need to run multiple trials, saving all the checkpoints takes
# a lot of storage space. You can find all evaluation results in `log` file anyway.
rm -r result/$TASK-$TYPE-$K-$SEED-$MODEL-$TRIAL_IDTF \