Skip to content

Commit 4d0f6a9

Browse files
authored
Add test matrix (#1308)
* add simple test matrix * add expected materics to test matrix * update torch to 1.8.1 for test matrix * add overnight flag Co-authored-by: Jesse Swanson <js11133Wnyu.edu>
1 parent ee65662 commit 4d0f6a9

File tree

4 files changed

+45
-5
lines changed

4 files changed

+45
-5
lines changed

conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
def pytest_addoption(parser):
99
parser.addoption("--runslow", action="store_true", default=False, help="run slow tests")
1010
parser.addoption("--rungpu", action="store_true", default=False, help="run gpu tests")
11+
parser.addoption("--runovernight", action="store_true",
12+
default=False, help="run overnight tests")
1113

1214

1315
def pytest_configure(config):
@@ -26,3 +28,8 @@ def pytest_collection_modifyitems(config, items):
2628
for item in items:
2729
if "gpu" in item.keywords:
2830
item.add_marker(skip_gpu)
31+
if not config.getoption("--runovernight"):
32+
skip_overnight = pytest.mark.skip(reason="need --runovernight option to run")
33+
for item in items:
34+
if "overnight" in item.keywords:
35+
item.add_marker(skip_overnight)

requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
-r requirements-no-torch.txt
2-
torch>=1.5.0
3-
torchvision==0.6.0
2+
torch>=1.8.1
3+
torchvision==0.9.1

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@
7373
"scipy == 1.4.1",
7474
"sentencepiece == 0.1.86",
7575
"tokenizers == 0.8.1.rc2",
76-
"torch >= 1.5.0",
76+
"torch >= 1.8.1",
7777
"tqdm == 4.46.0",
7878
"transformers == 3.1.0",
79-
"torchvision == 0.6.0",
79+
"torchvision == 0.9.1",
8080
],
8181
extras_require=extras,
8282
python_requires=">=3.6.0",

tests/proj/simple/test_runscript.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
import os
22
import pytest
33
import torch
4+
import math
45

56
import jiant.utils.python.io as py_io
67
from jiant.proj.simple import runscript as run
78
import jiant.scripts.download_data.runscript as downloader
89
import jiant.utils.torch_utils as torch_utils
910

11+
EXPECTED_AGG_VAL_METRICS = {"bert-base-cased": {"rte": 0.5740072202166066, "commonsenseqa": 0.4258804258804259, "squad_v1": 29.071789929086883},
12+
"roberta-base": {"rte": 0.49458483754512633, "commonsenseqa": 0.23013923013923013, "squad_v1": 48.222444172918955},
13+
"xlm-roberta-base": {"rte": 0.4729241877256318, "commonsenseqa": 0.22686322686322685, "squad_v1": 10.30104037978786}}
14+
1015

11-
@pytest.mark.gpu
1216
@pytest.mark.parametrize("task_name", ["copa"])
1317
@pytest.mark.parametrize("model_type", ["bert-base-cased"])
1418
def test_simple_runscript(tmpdir, task_name, model_type):
@@ -34,6 +38,35 @@ def test_simple_runscript(tmpdir, task_name, model_type):
3438
assert val_metrics["aggregated"] > 0
3539

3640

41+
@pytest.mark.overnight
42+
@pytest.mark.parametrize(("task_name", "train_examples_cap"), [("rte", 1024), ("commonsenseqa", 1024), ("squad_v1", 2048)])
43+
@pytest.mark.parametrize("model_type", ["bert-base-cased", "roberta-base", "xlm-roberta-base"])
44+
def test_simple_runscript(tmpdir, task_name, train_examples_cap, model_type):
45+
RUN_NAME = f"{test_simple_runscript.__name__}_{task_name}_{model_type}"
46+
data_dir = str(tmpdir.mkdir("data"))
47+
exp_dir = str(tmpdir.mkdir("exp"))
48+
49+
torch.use_deterministic_algorithms(True)
50+
51+
downloader.download_data([task_name], data_dir)
52+
args = run.RunConfiguration(
53+
run_name=RUN_NAME,
54+
exp_dir=exp_dir,
55+
data_dir=data_dir,
56+
hf_pretrained_model_name_or_path=model_type,
57+
tasks=task_name,
58+
train_examples_cap=train_examples_cap,
59+
train_batch_size=32,
60+
seed=42,
61+
no_cuda=False,
62+
)
63+
run.run_simple(args)
64+
65+
val_metrics = py_io.read_json(os.path.join(exp_dir, "runs", RUN_NAME, "val_metrics.json"))
66+
assert math.isclose(val_metrics["aggregated"], EXPECTED_AGG_VAL_METRICS[model_type][task_name])
67+
torch.use_deterministic_algorithms(False)
68+
69+
3770
@pytest.mark.gpu
3871
@pytest.mark.parametrize("task_name", ["copa"])
3972
@pytest.mark.parametrize("model_type", ["roberta-large"])

0 commit comments

Comments
 (0)