Skip to content

Commit

Permalink
move testing model parameters into lambdas for late importing of modu…
Browse files Browse the repository at this point in the history
…les (#1119)
  • Loading branch information
paulbkoch authored Feb 9, 2025
1 parent 8217ab3 commit bfd8e60
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 134 deletions.
128 changes: 0 additions & 128 deletions tests/_llms_for_testing.py

This file was deleted.

117 changes: 111 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,122 @@
import uuid
import pytest
import requests
import importlib

from guidance import models

# The naming convention for the keys is "<loader>_<model>_<host>" where:
# - 'loader' is 'transformers' or 'llamacpp'
# - 'model' contains relevant information about the model itself
# - 'host' is 'cpu' or 'gpu' as appropriate

AVAILABLE_MODELS = {}

# GEMMA 2
AVAILABLE_MODELS["llamacpp_gemma2_9b_cpu"] = lambda: dict(
# Note that this model requires an appropriate
# HF_TOKEN environment variable
name="huggingface_hubllama:bartowski/gemma-2-9b-it-GGUF:gemma-2-9b-it-IQ2_XS.gguf",
kwargs={"verbose": True, "n_ctx": 4096},
)
AVAILABLE_MODELS["transformers_gemma2_9b_cpu"] = lambda: dict(
# Note that this model requires an appropriate
# HF_TOKEN environment variable
name="transformers:google/gemma-2-9b-it",
kwargs={
"quantization_config": importlib.import_module("transformers").BitsAndBytesConfig(load_in_8bit=True),
},
)
AVAILABLE_MODELS["transformers_gemma2_9b_gpu"] = lambda: dict(
# Note that this model requires an appropriate
# HF_TOKEN environment variable
name="transformers:google/gemma-2-9b-it",
kwargs={
"device_map": "cuda:0",
"quantization_config": importlib.import_module("transformers").BitsAndBytesConfig(load_in_4bit=True),
},
)

# GPT 2
AVAILABLE_MODELS["transformers_gpt2_cpu"] = lambda: dict(name="transformers:gpt2", kwargs=dict())
AVAILABLE_MODELS["transformers_gpt2_gpu"] = lambda: dict(name="transformers:gpt2", kwargs={"device_map": "cuda:0"})

# LLAMA 2
AVAILABLE_MODELS["llamacpp_llama2_7b_cpu"] = lambda: dict(
name="huggingface_hubllama:TheBloke/Llama-2-7B-GGUF:llama-2-7b.Q5_K_M.gguf",
kwargs={"verbose": True, "n_ctx": 4096},
)
AVAILABLE_MODELS["llamacpp_llama2_7b_gpu"] = lambda: dict(
name="huggingface_hubllama:TheBloke/Llama-2-7B-GGUF:llama-2-7b.Q5_K_M.gguf",
kwargs={"verbose": True, "n_gpu_layers": -1, "n_ctx": 4096},
)

# LLAMA 3
AVAILABLE_MODELS["transformers_llama3_8b_cpu"] = lambda: dict(
# Note that this model requires an appropriate
# HF_TOKEN environment variable
name="transformers:meta-llama/Meta-Llama-3-8B-Instruct",
kwargs={"trust_remote_code": True, "torch_dtype": importlib.import_module("torch").bfloat16},
)
AVAILABLE_MODELS["transformers_llama3_8b_gpu"] = lambda: dict(
# Note that this model requires an appropriate
# HF_TOKEN environment variable
name="transformers:meta-llama/Meta-Llama-3-8B-Instruct",
kwargs={"trust_remote_code": True, "torch_dtype": importlib.import_module("torch").bfloat16, "device_map": "cuda:0"},
)

# MISTRAL
AVAILABLE_MODELS["transformers_mistral_7b_cpu"] = lambda: dict(
name="transformers:mistralai/Mistral-7B-v0.1", kwargs=dict()
)
AVAILABLE_MODELS["llamacpp_mistral_7b_cpu"] = lambda: dict(
name="huggingface_hubllama:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:mistral-7b-instruct-v0.2.Q8_0.gguf",
kwargs={"verbose": True, "n_ctx": 2048},
)

# PHI 2
AVAILABLE_MODELS["transformers_phi2_cpu"] = lambda: dict(
name="transformers:microsoft/phi-2", kwargs={"trust_remote_code": True}
)
AVAILABLE_MODELS["transformers_phi2_gpu"] = lambda: dict(
name="transformers:microsoft/phi-2",
kwargs={"trust_remote_code": True, "device_map": "cuda:0"},
)

# PHI 3
AVAILABLE_MODELS["transformers_phi3_mini_4k_instruct_cpu"] = lambda: dict(
name="transformers:microsoft/Phi-3-mini-4k-instruct",
kwargs={"trust_remote_code": True},
)
AVAILABLE_MODELS["llamacpp_phi3_mini_4k_instruct_cpu"] = lambda: dict(
name="huggingface_hubllama:microsoft/Phi-3-mini-4k-instruct-gguf:Phi-3-mini-4k-instruct-q4.gguf",
kwargs={"verbose": True, "n_ctx": 4096},
)
AVAILABLE_MODELS["transformers_phi3_small_8k_instruct_gpu"] = lambda: dict(
name="transformers:microsoft/Phi-3-small-8k-instruct",
kwargs={"trust_remote_code": True, "load_in_8bit": True, "device_map": "cuda:0"},
)

# QWEN2DOT5
AVAILABLE_MODELS["transformers_qwen2dot5_0dot5b_cpu"] = lambda: dict(
name="transformers:Qwen/Qwen2.5-0.5B", kwargs=dict()
)
AVAILABLE_MODELS["transformers_qwen2dot5_0dot5b_gpu"] = lambda: dict(
name="transformers:Qwen/Qwen2.5-0.5B", kwargs={"device_map": "cuda:0"}
)
AVAILABLE_MODELS["transformers_qwen2dot5_0dot5b_instruct_cpu"] = lambda: dict(
name="transformers:Qwen/Qwen2.5-0.5B-Instruct", kwargs=dict()
)
AVAILABLE_MODELS["transformers_qwen2dot5_0dot5b_instruct_gpu"] = lambda: dict(
name="transformers:Qwen/Qwen2.5-0.5B-Instruct", kwargs={"device_map": "cuda:0"}
)


# Ensure that asserts from tests/utils.py are rewritten by pytest to show helpful messages
pytest.register_assert_rewrite("tests.utils")


try:
from ._llms_for_testing import AVAILABLE_MODELS
from .utils import get_model
except:
AVAILABLE_MODELS = {}
from .utils import get_model

SELECTED_MODEL_ENV_VARIABLE = "GUIDANCE_SELECTED_MODEL"

Expand All @@ -39,7 +144,7 @@ def selected_model_name(pytestconfig) -> str:

@pytest.fixture(scope="session")
def selected_model_info(selected_model_name: str):
model_info = AVAILABLE_MODELS[selected_model_name]
model_info = AVAILABLE_MODELS[selected_model_name]()
return model_info


Expand Down

0 comments on commit bfd8e60

Please sign in to comment.