1
1
import os
2
2
import pytest
3
3
import torch
4
+ import math
4
5
5
6
import jiant .utils .python .io as py_io
6
7
from jiant .proj .simple import runscript as run
7
8
import jiant .scripts .download_data .runscript as downloader
8
9
import jiant .utils .torch_utils as torch_utils
9
10
11
+ EXPECTED_AGG_VAL_METRICS = {"bert-base-cased" : {"rte" : 0.5740072202166066 , "commonsenseqa" : 0.4258804258804259 , "squad_v1" : 29.071789929086883 },
12
+ "roberta-base" : {"rte" : 0.49458483754512633 , "commonsenseqa" : 0.23013923013923013 , "squad_v1" : 48.222444172918955 },
13
+ "xlm-roberta-base" : {"rte" : 0.4729241877256318 , "commonsenseqa" : 0.22686322686322685 , "squad_v1" : 10.30104037978786 }}
14
+
10
15
11
- @pytest .mark .gpu
12
16
@pytest .mark .parametrize ("task_name" , ["copa" ])
13
17
@pytest .mark .parametrize ("model_type" , ["bert-base-cased" ])
14
18
def test_simple_runscript (tmpdir , task_name , model_type ):
@@ -34,6 +38,35 @@ def test_simple_runscript(tmpdir, task_name, model_type):
34
38
assert val_metrics ["aggregated" ] > 0
35
39
36
40
41
+ @pytest .mark .overnight
42
+ @pytest .mark .parametrize (("task_name" , "train_examples_cap" ), [("rte" , 1024 ), ("commonsenseqa" , 1024 ), ("squad_v1" , 2048 )])
43
+ @pytest .mark .parametrize ("model_type" , ["bert-base-cased" , "roberta-base" , "xlm-roberta-base" ])
44
+ def test_simple_runscript (tmpdir , task_name , train_examples_cap , model_type ):
45
+ RUN_NAME = f"{ test_simple_runscript .__name__ } _{ task_name } _{ model_type } "
46
+ data_dir = str (tmpdir .mkdir ("data" ))
47
+ exp_dir = str (tmpdir .mkdir ("exp" ))
48
+
49
+ torch .use_deterministic_algorithms (True )
50
+
51
+ downloader .download_data ([task_name ], data_dir )
52
+ args = run .RunConfiguration (
53
+ run_name = RUN_NAME ,
54
+ exp_dir = exp_dir ,
55
+ data_dir = data_dir ,
56
+ hf_pretrained_model_name_or_path = model_type ,
57
+ tasks = task_name ,
58
+ train_examples_cap = train_examples_cap ,
59
+ train_batch_size = 32 ,
60
+ seed = 42 ,
61
+ no_cuda = False ,
62
+ )
63
+ run .run_simple (args )
64
+
65
+ val_metrics = py_io .read_json (os .path .join (exp_dir , "runs" , RUN_NAME , "val_metrics.json" ))
66
+ assert math .isclose (val_metrics ["aggregated" ], EXPECTED_AGG_VAL_METRICS [model_type ][task_name ])
67
+ torch .use_deterministic_algorithms (False )
68
+
69
+
37
70
@pytest .mark .gpu
38
71
@pytest .mark .parametrize ("task_name" , ["copa" ])
39
72
@pytest .mark .parametrize ("model_type" , ["roberta-large" ])
0 commit comments