Skip to content

Commit 48925b4

Browse files
committed
fixup example
1 parent 7b721bf commit 48925b4

File tree

1 file changed

+6
-16
lines changed

1 file changed

+6
-16
lines changed

examples/streaming.py

+6-16
Original file line numberDiff line numberDiff line change
@@ -15,24 +15,14 @@
1515
)
1616

1717
import time
18-
import torch
1918

2019
# Initialize model and cache
2120

22-
# model_directory = "/mnt/str/models/mistral-7b-instruct-exl2/4.0bpw/"
23-
# model_directory = "/mnt/str/models/mistral-7b-instruct"
24-
# model_directory = "/mnt/str/models/starcoder2-7b"
25-
model_directory = "/mnt/str/models/command-r-exl2/6.0bpw"
26-
# model_directory = "/mnt/str/models/command-r"
27-
28-
torch.set_printoptions(precision = 5, sci_mode = False)
21+
model_directory = "/mnt/str/models/mistral-7b-instruct-exl2/4.0bpw/"
2922

3023
config = ExLlamaV2Config()
3124
config.model_dir = model_directory
3225
config.prepare()
33-
# config.load_in_q4 = True
34-
config.max_seq_len = 300
35-
config.no_flash_attn = True
3626

3727
model = ExLlamaV2(config)
3828
print("Loading model: " + model_directory)
@@ -49,25 +39,25 @@
4939
# Settings
5040

5141
settings = ExLlamaV2Sampler.Settings()
52-
settings.temperature = 1.0
53-
settings.top_k = 0
42+
settings.temperature = 0.85
43+
settings.top_k = 50
5444
settings.top_p = 0.8
5545
settings.top_a = 0.0
56-
settings.token_repetition_penalty = 1.02
46+
settings.token_repetition_penalty = 1.05
5747
settings.disallow_tokens(tokenizer, [tokenizer.eos_token_id])
5848

5949
max_new_tokens = 250
6050

6151
# Prompt
6252

63-
prompt = "Once upon a time,"
53+
prompt = "Our story begins in the Scottish town of Auchtermuchty, where once"
6454

6555
input_ids = tokenizer.encode(prompt, add_bos = True)
6656
prompt_tokens = input_ids.shape[-1]
6757

6858
# Make sure CUDA is initialized so we can measure performance
6959

70-
# generator.warmup()
60+
generator.warmup()
7161

7262
# Send prompt to generator to begin stream
7363

0 commit comments

Comments
 (0)