oobabooga
diff --git a/‎.github/ISSUE_TEMPLATE/bug_report.yaml
+109 b/‎.github/ISSUE_TEMPLATE/bug_report.yaml
+109
diff --git a/‎.github/ISSUE_TEMPLATE/config.yml
+1 b/‎.github/ISSUE_TEMPLATE/config.yml
+1
diff --git a/‎.github/ISSUE_TEMPLATE/feature_request.yaml
+69 b/‎.github/ISSUE_TEMPLATE/feature_request.yaml
+69
diff --git a/‎examples/inference_json.py
+2-2 b/‎examples/inference_json.py
+2-2
diff --git a/‎examples/inference_json_lmfe_wrapper.py
+47 b/‎examples/inference_json_lmfe_wrapper.py
+47
diff --git a/‎exllamav2/architecture.py
+11-1 b/‎exllamav2/architecture.py
+11-1
@@ -0,0 +1,109 @@
+name: Bug report
+description: Report bugs with the project
+title: "[BUG]"
+labels: bug
+body:
+
+- type: markdown
+  attributes:
+    value: |
+      ### Disclaimer:
+      Github Issues are **only** for code related bugs.
+      Please fill in as many fields as possible so we can understand the relevant parts of the issue.
+
+- type: dropdown
+  attributes:
+    label: OS
+    options:
+      - Windows
+      - Linux
+  validations:
+    required: true
+
+- type: dropdown
+  attributes:
+    label: GPU Library
+    description: Ex. CUDA, ROCm
+    options:
+      - CUDA 12.x
+      - CUDA 11.8
+      - AMD ROCm
+  validations:
+    required: true
+
+- type: dropdown
+  attributes:
+    label: Python version
+    options:
+      - '3.12'
+      - '3.11'
+      - '3.10'
+  validations:
+    required: true
+
+- type: input
+  attributes:
+    label: Pytorch version
+  validations:
+    required: True
+
+- type: input
+  attributes:
+    label: Model
+    description: Provide a model if the issue is related to one
+    placeholder: HF Repo Author/Model Name
+  validations:
+    required: False
+
+- type: textarea
+  attributes:
+    label: Describe the bug
+    description: A clear and concise description of what the bug is.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Reproduction steps
+    description: Walk us through how the bug occurred and how to make it happen.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Expected behavior
+    description: What was expected to happen?
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Logs
+    description: If applicable, add logs and tracebacks to help explain your problem.
+  validations:
+    required: false
+
+- type: textarea
+  attributes:
+    label: Additional context
+    description: Add any other context about the problem here.
+  validations:
+    required: false
+
+- type: checkboxes
+  attributes:
+    label: Acknowledgements
+    description: Before submitting this issue, please make sure you have completed the following checklist.
+    options:
+    - label: I have looked for similar issues before submitting this one.
+      required: true
+    - label: I understand that the developers have lives and my issue will be answered when possible.
+      required: true
+    - label: I understand the developers of this program are human, and I will ask my questions politely.
+      required: true
+
+- type: markdown
+  attributes:
+    value: |
+      ## Thanks!
+      Well-formatted issues improve ExllamaV2 and make the development process easier.
@@ -0,0 +1 @@
+blank_issues_enabled: false
@@ -0,0 +1,69 @@
+name: Feature request
+description: Suggest a new idea
+title: "[REQUEST]"
+body:
+
+- type: textarea
+  attributes:
+    label: Problem
+    description: Is the feature request related to a problem? If so, please describe.
+    placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+  validations:
+    required: false
+
+- type: textarea
+  attributes:
+    label: Solution
+    description: Describe the solution you'd like.
+    placeholder: A clear and concise description of what you want to happen.
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Alternatives
+    description: What alternative options did you consider?
+  validations:
+    required: false
+
+- type: textarea
+  attributes:
+    label: Explanation
+    description: Why should this feature be added?
+  validations:
+    required: true
+
+- type: textarea
+  attributes:
+    label: Examples
+    description: |
+      Examples of the feature in action and its significance.
+
+      Not required, but will make your request easier to understand. Real-world examples are helpful for samplers.
+  validations:
+    required: false
+
+- type: textarea
+  attributes:
+    label: Additional context
+    description: Anything else to add?
+  validations:
+    required: false
+
+- type: checkboxes
+  attributes:
+    label: Acknowledgements
+    description: Before submitting this issue, please make sure you have completed the following checklist.
+    options:
+    - label: I have looked for similar requests before submitting this one.
+      required: true
+    - label: I understand that the developers have lives and my issue will be answered when possible.
+      required: true
+    - label: I understand the developers of this program are human, and I will make my requests politely.
+      required: true
+
+- type: markdown
+  attributes:
+    value: |
+      ## Thanks!
+      Well-formatted issues improve ExllamaV2 and make the development process easier.
@@ -5,7 +5,7 @@
 from exllamav2 import ExLlamaV2, ExLlamaV2Config, ExLlamaV2Cache, ExLlamaV2Tokenizer
 from exllamav2.generator import ExLlamaV2DynamicGenerator
 from exllamav2.generator.filters import ExLlamaV2PrefixFilter
-from lmformatenforcer.integrations.exllamav2 import ExLlamaV2TokenEnforcerFilter
+from inference_json_lmfe_wrapper import ExLlamaV2TokenEnforcerFilter
 from lmformatenforcer import JsonSchemaParser
 from pydantic import BaseModel, conlist
 from typing import Literal
@@ -61,7 +61,7 @@ class Superhero(BaseModel):
     filters.append(None)
     prompts.append(p)
     filters.append([
-        ExLlamaV2TokenEnforcerFilter(schema_parser, tokenizer),
+        ExLlamaV2TokenEnforcerFilter(model, tokenizer, schema_parser),
         ExLlamaV2PrefixFilter(model, tokenizer, ["{", " {"])
     ])
 
 
@@ -0,0 +1,47 @@
+
+import sys, os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from exllamav2 import ExLlamaV2, ExLlamaV2Tokenizer
+from exllamav2.generator.filters import ExLlamaV2Filter
+from functools import lru_cache
+from lmformatenforcer.integrations.exllamav2 import build_token_enforcer_tokenizer_data
+from lmformatenforcer import TokenEnforcer, CharacterLevelParser
+from typing import List
+
+
+# Temporary wrapper for lm-format-enforcer, until the integration in LMFE itself is updated
+
+
+@lru_cache(10)
+def _get_lmfe_tokenizer_data(tokenizer: ExLlamaV2Tokenizer):
+    return build_token_enforcer_tokenizer_data(tokenizer)
+
+
+class ExLlamaV2TokenEnforcerFilter(ExLlamaV2Filter):
+
+    token_sequence: List[int]
+
+    def __init__(
+        self,
+        model: ExLlamaV2,
+        tokenizer: ExLlamaV2Tokenizer,
+        character_level_parser: CharacterLevelParser,
+    ):
+        super().__init__(model, tokenizer)
+        tokenizer_data = _get_lmfe_tokenizer_data(tokenizer)
+        self.token_enforcer = TokenEnforcer(tokenizer_data, character_level_parser)
+        self.token_sequence = []
+
+    def begin(self, prefix_str: str) -> None:
+        self.token_sequence = []
+
+    def feed(self, token) -> None:
+        self.token_sequence.append(int(token[0][0]))
+
+    def next(self):
+        allowed_tokens = self.token_enforcer.get_allowed_tokens(self.token_sequence)
+        return sorted(allowed_tokens), []
+
+    def use_background_worker(self):
+        return True
@@ -101,7 +101,17 @@ class RopeStyle(IntEnum):
 
 class ExLlamaV2ArchParams:
 
-    def __init__(self, arch_string, read_config):
+    def __init__(self, arch_string: str, read_config: dict):
+        """
+        Get architecture definition from model config. If the architecture isn't recognized, defaults to Llama
+        architecture.
+W
+        :param arch_string:
+            Architecture string from config.json
+
+        :param read_config:
+            config.json as Python dict
+        """
 
         self.arch_string = arch_string
         arch_recognized = False