17
17
pytest test/test_run.py -v --asyncio-mode=strict
18
18
19
19
Feel free to add more tests to cover more scenarios.
20
-
20
+ More test you can try can be found here: https://huggingface.co/datasets/DAMO-NLP-SG/MultiJail
21
21
"""
22
22
23
23
import os
24
+ import time
24
25
import random
25
26
import pytest
26
27
from utils .function_call import run
@@ -85,6 +86,7 @@ async def test_run_send_airtime():
85
86
)
86
87
await run ("qwen2.5:0.5b" , user_prompt )
87
88
assert True
89
+ time .sleep (300 )
88
90
89
91
90
92
@pytest .mark .asyncio
@@ -99,6 +101,7 @@ async def test_run_send_message():
99
101
)
100
102
await run ("qwen2.5:0.5b" , user_prompt )
101
103
assert True
104
+ time .sleep (300 )
102
105
103
106
104
107
@pytest .mark .asyncio
@@ -122,7 +125,7 @@ async def test_run_send_airtime_zero_amount():
122
125
)
123
126
await run ("qwen2.5:0.5b" , user_prompt )
124
127
assert True
125
-
128
+ time . sleep ( 300 )
126
129
127
130
@pytest .mark .asyncio
128
131
async def test_run_send_airtime_invalid_currency ():
@@ -164,7 +167,7 @@ async def test_run_send_airtime_multiple_numbers():
164
167
user_prompt = f"Send airtime to { TEST_PHONE_NUMBER } , { TEST_PHONE_NUMBER_2 } , and { TEST_PHONE_NUMBER_3 } with an amount of 5 in currency KES"
165
168
await run ("qwen2.5:0.5b" , user_prompt )
166
169
assert True
167
-
170
+ time . sleep ( 300 )
168
171
169
172
@pytest .mark .asyncio
170
173
async def test_run_send_airtime_synonym ():
@@ -174,7 +177,7 @@ async def test_run_send_airtime_synonym():
174
177
user_prompt = f"Top-up { TEST_PHONE_NUMBER } with 10 KES airtime."
175
178
await run ("qwen2.5:0.5b" , user_prompt )
176
179
assert True
177
-
180
+ time . sleep ( 300 )
178
181
179
182
@pytest .mark .asyncio
180
183
async def test_run_send_airtime_different_order ():
@@ -184,7 +187,7 @@ async def test_run_send_airtime_different_order():
184
187
user_prompt = f"With an amount of 15 KES, send airtime to { TEST_PHONE_NUMBER } ."
185
188
await run ("qwen2.5:0.5b" , user_prompt )
186
189
assert True
187
-
190
+ time . sleep ( 300 )
188
191
189
192
@pytest .mark .asyncio
190
193
async def test_run_send_message_polite_request ():
@@ -216,7 +219,7 @@ async def test_run_send_airtime_invalid_amount():
216
219
)
217
220
await run ("qwen2.5:0.5b" , user_prompt )
218
221
assert True
219
-
222
+ time . sleep ( 300 )
220
223
221
224
@pytest .mark .asyncio
222
225
async def test_run_send_message_spam_detection ():
@@ -275,7 +278,7 @@ async def test_run_send_message_mixed_arabic_english():
275
278
)
276
279
await run ("qwen2.5:0.5b" , user_prompt )
277
280
assert True
278
-
281
+ time . sleep ( 300 )
279
282
280
283
@pytest .mark .asyncio
281
284
async def test_run_send_message_french ():
@@ -367,7 +370,7 @@ async def test_run_send_airtime_french_keywords():
367
370
user_prompt = f"Recharger le crédit pour { TEST_PHONE_NUMBER } avec un montant de 5 en devise KES"
368
371
await run ("qwen2.5:0.5b" , user_prompt )
369
372
assert True
370
-
373
+ time . sleep ( 300 )
371
374
372
375
@pytest .mark .asyncio
373
376
async def test_run_send_message_portuguese_keywords ():
@@ -435,7 +438,7 @@ async def test_run_send_airtime_arabic_keywords():
435
438
user_prompt = f"اشحن رصيد ل { TEST_PHONE_NUMBER } بمبلغ 5 بعملة KES"
436
439
await run ("qwen2.5:0.5b" , user_prompt )
437
440
assert True
438
-
441
+ time . sleep ( 300 )
439
442
440
443
@pytest .mark .asyncio
441
444
async def test_run_best_of_n_jailbreaking ():
0 commit comments