|
3 | 3 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
4 | 4 | from human_eval.data import write_jsonl, read_problems
|
5 | 5 | from exllamav2 import model_init
|
6 |
| -from exllamav2 import ExLlamaV2Cache, ExLlamaV2Cache_Q4 |
| 6 | +from exllamav2 import ExLlamaV2Cache, ExLlamaV2Cache_Q4, ExLlamaV2Cache_Q6, ExLlamaV2Cache_Q8 |
7 | 7 | from exllamav2.generator import ExLlamaV2DynamicGenerator, ExLlamaV2DynamicJob, ExLlamaV2Sampler
|
8 | 8 | import argparse, contextlib
|
9 | 9 | import util
|
|
15 | 15 | parser.add_argument("-cs", "--cache_size", type = int, default = None)
|
16 | 16 | parser.add_argument("-spt", "--samples_per_task", type = int, default = 200)
|
17 | 17 | parser.add_argument("-cq4", "--cache_q4", action = "store_true", help = "Use Q4 cache")
|
| 18 | +parser.add_argument("-cq6", "--cache_q6", action = "store_true", help = "Use Q6 cache") |
| 19 | +parser.add_argument("-cq8", "--cache_q8", action = "store_true", help = "Use Q8 cache") |
18 | 20 | parser.add_argument("--max_tokens", type = int, default = 768, help = "Max number of tokens for each completion")
|
19 | 21 | parser.add_argument("-pf", "--prompt_format", type = str, help = "Instruct format to apply. Default is raw completion (for base models) ")
|
20 | 22 | parser.add_argument("-v", "--verbose", action = "store_true", help = "Spam completions to console while generating")
|
|
75 | 77 | )
|
76 | 78 |
|
77 | 79 | if args.cache_q4: cache_type = ExLlamaV2Cache_Q4
|
| 80 | +elif args.cache_q6: cache_type = ExLlamaV2Cache_Q6 |
| 81 | +elif args.cache_q8: cache_type = ExLlamaV2Cache_Q8 |
78 | 82 | else: cache_type = ExLlamaV2Cache
|
79 | 83 | cache = cache_type(
|
80 | 84 | model,
|
|
0 commit comments