@@ -193,6 +193,14 @@ def download_tiny_shakespeare_dataset():
193
193
snapshot_download_w_retry ("winglian/tiny-shakespeare" , repo_type = "dataset" )
194
194
195
195
196
+ @pytest .fixture (scope = "session" , autouse = True )
197
+ def download_evolkit_kd_sample_dataset ():
198
+ # download the dataset
199
+ snapshot_download_w_retry (
200
+ "axolotl-ai-co/evolkit-logprobs-pipeline-75k-v2-sample" , repo_type = "dataset"
201
+ )
202
+
203
+
196
204
@pytest .fixture (scope = "session" , autouse = True )
197
205
def download_deepseek_model_fixture ():
198
206
snapshot_download_w_retry ("axolotl-ai-co/DeepSeek-V3-11M" , repo_type = "model" )
@@ -208,6 +216,16 @@ def download_huggyllama_model_fixture():
208
216
)
209
217
210
218
219
+ @pytest .fixture (scope = "session" , autouse = True )
220
+ def download_llama33_70b_model_fixture ():
221
+ # download the tokenizer only
222
+ snapshot_download_w_retry (
223
+ "axolotl-ai-co/Llama-3.3-70B-Instruct-tokenizer" ,
224
+ repo_type = "model" ,
225
+ allow_patterns = ["*token*" , "config.json" ],
226
+ )
227
+
228
+
211
229
@pytest .fixture (scope = "session" , autouse = True )
212
230
def download_llama_1b_model_fixture ():
213
231
# download the tokenizer only
@@ -315,6 +333,14 @@ def download_llama2_model_fixture():
315
333
)
316
334
317
335
336
+ @pytest .fixture (scope = "session" , autouse = True )
337
+ def download_llama32_1b_model_fixture ():
338
+ snapshot_download_w_retry (
339
+ "osllmai-community/Llama-3.2-1B" ,
340
+ repo_type = "model" ,
341
+ )
342
+
343
+
318
344
@pytest .fixture
319
345
@enable_hf_offline
320
346
def tokenizer_huggyllama (
0 commit comments