Skip to content

Commit 675450d

Browse files
committed
Add Q6 and Q8 cache options to eval scripts
1 parent f3596fc commit 675450d

File tree

3 files changed

+18
-2
lines changed

3 files changed

+18
-2
lines changed

doc/eval.md

+8
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ prefix for the response.
5858
performance.
5959

6060
- **-cq4 / --cache_q4**: Use Q4 cache
61+
62+
- **-cq6 / --cache_q6**: Use Q6 cache
63+
64+
- **-cq8 / --cache_q8**: Use Q8 cache
6165

6266
## MMLU
6367

@@ -83,3 +87,7 @@ the full list of subjects.
8387
performance.
8488

8589
- **-cq4 / --cache_q4**: Use Q4 cache
90+
91+
- **-cq6 / --cache_q6**: Use Q6 cache
92+
93+
- **-cq8 / --cache_q8**: Use Q8 cache

eval/humaneval.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
44
from human_eval.data import write_jsonl, read_problems
55
from exllamav2 import model_init
6-
from exllamav2 import ExLlamaV2Cache, ExLlamaV2Cache_Q4
6+
from exllamav2 import ExLlamaV2Cache, ExLlamaV2Cache_Q4, ExLlamaV2Cache_Q6, ExLlamaV2Cache_Q8
77
from exllamav2.generator import ExLlamaV2DynamicGenerator, ExLlamaV2DynamicJob, ExLlamaV2Sampler
88
import argparse, contextlib
99
import util
@@ -15,6 +15,8 @@
1515
parser.add_argument("-cs", "--cache_size", type = int, default = None)
1616
parser.add_argument("-spt", "--samples_per_task", type = int, default = 200)
1717
parser.add_argument("-cq4", "--cache_q4", action = "store_true", help = "Use Q4 cache")
18+
parser.add_argument("-cq6", "--cache_q6", action = "store_true", help = "Use Q6 cache")
19+
parser.add_argument("-cq8", "--cache_q8", action = "store_true", help = "Use Q8 cache")
1820
parser.add_argument("--max_tokens", type = int, default = 768, help = "Max number of tokens for each completion")
1921
parser.add_argument("-pf", "--prompt_format", type = str, help = "Instruct format to apply. Default is raw completion (for base models) ")
2022
parser.add_argument("-v", "--verbose", action = "store_true", help = "Spam completions to console while generating")
@@ -75,6 +77,8 @@
7577
)
7678

7779
if args.cache_q4: cache_type = ExLlamaV2Cache_Q4
80+
elif args.cache_q6: cache_type = ExLlamaV2Cache_Q6
81+
elif args.cache_q8: cache_type = ExLlamaV2Cache_Q8
7882
else: cache_type = ExLlamaV2Cache
7983
cache = cache_type(
8084
model,

eval/mmlu.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys, os
33
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
44
from exllamav2 import model_init
5-
from exllamav2 import ExLlamaV2Cache, ExLlamaV2Cache_Q4
5+
from exllamav2 import ExLlamaV2Cache, ExLlamaV2Cache_Q4, ExLlamaV2Cache_Q6, ExLlamaV2Cache_Q8
66
from exllamav2.generator import ExLlamaV2DynamicGenerator, ExLlamaV2DynamicJob, ExLlamaV2Sampler
77
import argparse, contextlib
88
import torch
@@ -14,6 +14,8 @@
1414
parser = argparse.ArgumentParser(description = "Run MMLU evaluation on EXL2 model")
1515
parser.add_argument("-cs", "--cache_size", type = int, default = None)
1616
parser.add_argument("-cq4", "--cache_q4", action = "store_true", help = "Use Q4 cache")
17+
parser.add_argument("-cq6", "--cache_q6", action = "store_true", help = "Use Q6 cache")
18+
parser.add_argument("-cq8", "--cache_q8", action = "store_true", help = "Use Q8 cache")
1719
parser.add_argument("-sub", "--subjects", type = str, default = "all", help = "Comma-separated list of categories to test, or 'all'")
1820
parser.add_argument("-fs", "--fewshot_examples", type = int, default = 5, help = "Number of examples for fewshot examples, max 5")
1921
parser.add_argument("-shf", "--shuffle", action = "store_true", help = "Shuffle choices randomly")
@@ -33,6 +35,8 @@
3335
)
3436

3537
if args.cache_q4: cache_type = ExLlamaV2Cache_Q4
38+
elif args.cache_q6: cache_type = ExLlamaV2Cache_Q6
39+
elif args.cache_q8: cache_type = ExLlamaV2Cache_Q8
3640
else: cache_type = ExLlamaV2Cache
3741
cache = cache_type(
3842
model,

0 commit comments

Comments
 (0)