Skip to content

Commit 5f78434

Browse files
committed
Merge remote-tracking branch 'turboderp/master'
2 parents f2c77aa + 10a8842 commit 5f78434

36 files changed

+1041
-633
lines changed
+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
name: Bug report
2+
description: Report bugs with the project
3+
title: "[BUG]"
4+
labels: bug
5+
body:
6+
7+
- type: markdown
8+
attributes:
9+
value: |
10+
### Disclaimer:
11+
Github Issues are **only** for code related bugs.
12+
Please fill in as many fields as possible so we can understand the relevant parts of the issue.
13+
14+
- type: dropdown
15+
attributes:
16+
label: OS
17+
options:
18+
- Windows
19+
- Linux
20+
validations:
21+
required: true
22+
23+
- type: dropdown
24+
attributes:
25+
label: GPU Library
26+
description: Ex. CUDA, ROCm
27+
options:
28+
- CUDA 12.x
29+
- CUDA 11.8
30+
- AMD ROCm
31+
validations:
32+
required: true
33+
34+
- type: dropdown
35+
attributes:
36+
label: Python version
37+
options:
38+
- '3.12'
39+
- '3.11'
40+
- '3.10'
41+
validations:
42+
required: true
43+
44+
- type: input
45+
attributes:
46+
label: Pytorch version
47+
validations:
48+
required: True
49+
50+
- type: input
51+
attributes:
52+
label: Model
53+
description: Provide a model if the issue is related to one
54+
placeholder: HF Repo Author/Model Name
55+
validations:
56+
required: False
57+
58+
- type: textarea
59+
attributes:
60+
label: Describe the bug
61+
description: A clear and concise description of what the bug is.
62+
validations:
63+
required: true
64+
65+
- type: textarea
66+
attributes:
67+
label: Reproduction steps
68+
description: Walk us through how the bug occurred and how to make it happen.
69+
validations:
70+
required: true
71+
72+
- type: textarea
73+
attributes:
74+
label: Expected behavior
75+
description: What was expected to happen?
76+
validations:
77+
required: true
78+
79+
- type: textarea
80+
attributes:
81+
label: Logs
82+
description: If applicable, add logs and tracebacks to help explain your problem.
83+
validations:
84+
required: false
85+
86+
- type: textarea
87+
attributes:
88+
label: Additional context
89+
description: Add any other context about the problem here.
90+
validations:
91+
required: false
92+
93+
- type: checkboxes
94+
attributes:
95+
label: Acknowledgements
96+
description: Before submitting this issue, please make sure you have completed the following checklist.
97+
options:
98+
- label: I have looked for similar issues before submitting this one.
99+
required: true
100+
- label: I understand that the developers have lives and my issue will be answered when possible.
101+
required: true
102+
- label: I understand the developers of this program are human, and I will ask my questions politely.
103+
required: true
104+
105+
- type: markdown
106+
attributes:
107+
value: |
108+
## Thanks!
109+
Well-formatted issues improve ExllamaV2 and make the development process easier.

.github/ISSUE_TEMPLATE/config.yml

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
blank_issues_enabled: false
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
name: Feature request
2+
description: Suggest a new idea
3+
title: "[REQUEST]"
4+
body:
5+
6+
- type: textarea
7+
attributes:
8+
label: Problem
9+
description: Is the feature request related to a problem? If so, please describe.
10+
placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11+
validations:
12+
required: false
13+
14+
- type: textarea
15+
attributes:
16+
label: Solution
17+
description: Describe the solution you'd like.
18+
placeholder: A clear and concise description of what you want to happen.
19+
validations:
20+
required: true
21+
22+
- type: textarea
23+
attributes:
24+
label: Alternatives
25+
description: What alternative options did you consider?
26+
validations:
27+
required: false
28+
29+
- type: textarea
30+
attributes:
31+
label: Explanation
32+
description: Why should this feature be added?
33+
validations:
34+
required: true
35+
36+
- type: textarea
37+
attributes:
38+
label: Examples
39+
description: |
40+
Examples of the feature in action and its significance.
41+
42+
Not required, but will make your request easier to understand. Real-world examples are helpful for samplers.
43+
validations:
44+
required: false
45+
46+
- type: textarea
47+
attributes:
48+
label: Additional context
49+
description: Anything else to add?
50+
validations:
51+
required: false
52+
53+
- type: checkboxes
54+
attributes:
55+
label: Acknowledgements
56+
description: Before submitting this issue, please make sure you have completed the following checklist.
57+
options:
58+
- label: I have looked for similar requests before submitting this one.
59+
required: true
60+
- label: I understand that the developers have lives and my issue will be answered when possible.
61+
required: true
62+
- label: I understand the developers of this program are human, and I will make my requests politely.
63+
required: true
64+
65+
- type: markdown
66+
attributes:
67+
value: |
68+
## Thanks!
69+
Well-formatted issues improve ExllamaV2 and make the development process easier.

examples/inference_json.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache, ExLlamaV2Tokenizer
66
from exllamav2.generator import ExLlamaV2DynamicGenerator
77
from exllamav2.generator.filters import ExLlamaV2PrefixFilter
8-
from lmformatenforcer.integrations.exllamav2 import ExLlamaV2TokenEnforcerFilter
8+
from inference_json_lmfe_wrapper import ExLlamaV2TokenEnforcerFilter
99
from lmformatenforcer import JsonSchemaParser
1010
from pydantic import BaseModel, conlist
1111
from typing import Literal
@@ -61,7 +61,7 @@ class Superhero(BaseModel):
6161
filters.append(None)
6262
prompts.append(p)
6363
filters.append([
64-
ExLlamaV2TokenEnforcerFilter(schema_parser, tokenizer),
64+
ExLlamaV2TokenEnforcerFilter(model, tokenizer, schema_parser),
6565
ExLlamaV2PrefixFilter(model, tokenizer, ["{", " {"])
6666
])
6767

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
import sys, os
3+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4+
5+
from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer
6+
from exllamav2.generator.filters import ExLlamaV2Filter
7+
from functools import lru_cache
8+
from lmformatenforcer.integrations.exllamav2 import build_token_enforcer_tokenizer_data
9+
from lmformatenforcer import TokenEnforcer, CharacterLevelParser
10+
from typing import List
11+
12+
13+
# Temporary wrapper for lm-format-enforcer, until the integration in LMFE itself is updated
14+
15+
16+
@lru_cache(10)
17+
def _get_lmfe_tokenizer_data(tokenizer: ExLlamaV2Tokenizer):
18+
return build_token_enforcer_tokenizer_data(tokenizer)
19+
20+
21+
class ExLlamaV2TokenEnforcerFilter(ExLlamaV2Filter):
22+
23+
token_sequence: List[int]
24+
25+
def __init__(
26+
self,
27+
model: ExLlamaV2,
28+
tokenizer: ExLlamaV2Tokenizer,
29+
character_level_parser: CharacterLevelParser,
30+
):
31+
super().__init__(model, tokenizer)
32+
tokenizer_data = _get_lmfe_tokenizer_data(tokenizer)
33+
self.token_enforcer = TokenEnforcer(tokenizer_data, character_level_parser)
34+
self.token_sequence = []
35+
36+
def begin(self, prefix_str: str) -> None:
37+
self.token_sequence = []
38+
39+
def feed(self, token) -> None:
40+
self.token_sequence.append(int(token[0][0]))
41+
42+
def next(self):
43+
allowed_tokens = self.token_enforcer.get_allowed_tokens(self.token_sequence)
44+
return sorted(allowed_tokens), []
45+
46+
def use_background_worker(self):
47+
return True

exllamav2/architecture.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,17 @@ class RopeStyle(IntEnum):
101101

102102
class ExLlamaV2ArchParams:
103103

104-
def __init__(self, arch_string, read_config):
104+
def __init__(self, arch_string: str, read_config: dict):
105+
"""
106+
Get architecture definition from model config. If the architecture isn't recognized, defaults to Llama
107+
architecture.
108+
W
109+
:param arch_string:
110+
Architecture string from config.json
111+
112+
:param read_config:
113+
config.json as Python dict
114+
"""
105115

106116
self.arch_string = arch_string
107117
arch_recognized = False

0 commit comments

Comments
 (0)