Skip to content

Commit 7b721bf

Browse files
committed
Reapply "bump to 0.0.16"
This reverts commit 36cce99.
1 parent 36cce99 commit 7b721bf

File tree

2 files changed

+17
-7
lines changed

2 files changed

+17
-7
lines changed

examples/streaming.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,24 @@
1515
)
1616

1717
import time
18+
import torch
1819

1920
# Initialize model and cache
2021

21-
model_directory = "/mnt/str/models/mistral-7b-instruct-exl2/4.0bpw/"
22+
# model_directory = "/mnt/str/models/mistral-7b-instruct-exl2/4.0bpw/"
23+
# model_directory = "/mnt/str/models/mistral-7b-instruct"
24+
# model_directory = "/mnt/str/models/starcoder2-7b"
25+
model_directory = "/mnt/str/models/command-r-exl2/6.0bpw"
26+
# model_directory = "/mnt/str/models/command-r"
27+
28+
torch.set_printoptions(precision = 5, sci_mode = False)
2229

2330
config = ExLlamaV2Config()
2431
config.model_dir = model_directory
2532
config.prepare()
33+
# config.load_in_q4 = True
34+
config.max_seq_len = 300
35+
config.no_flash_attn = True
2636

2737
model = ExLlamaV2(config)
2838
print("Loading model: " + model_directory)
@@ -39,25 +49,25 @@
3949
# Settings
4050

4151
settings = ExLlamaV2Sampler.Settings()
42-
settings.temperature = 0.85
43-
settings.top_k = 50
52+
settings.temperature = 1.0
53+
settings.top_k = 0
4454
settings.top_p = 0.8
4555
settings.top_a = 0.0
46-
settings.token_repetition_penalty = 1.05
56+
settings.token_repetition_penalty = 1.02
4757
settings.disallow_tokens(tokenizer, [tokenizer.eos_token_id])
4858

4959
max_new_tokens = 250
5060

5161
# Prompt
5262

53-
prompt = "Our story begins in the Scottish town of Auchtermuchty, where once"
63+
prompt = "Once upon a time,"
5464

5565
input_ids = tokenizer.encode(prompt, add_bos = True)
5666
prompt_tokens = input_ids.shape[-1]
5767

5868
# Make sure CUDA is initialized so we can measure performance
5969

60-
generator.warmup()
70+
# generator.warmup()
6171

6272
# Send prompt to generator to begin stream
6373

exllamav2/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.15"
1+
__version__ = "0.0.16"

0 commit comments

Comments
 (0)