|
1 | 1 | from __future__ import annotations
|
2 | 2 |
|
3 |
| -import os |
4 | 3 | import ctypes
|
5 |
| - |
6 |
| -from typing import ( |
7 |
| - Dict, |
8 |
| - List, |
9 |
| - Tuple, |
10 |
| - Optional, |
11 |
| - Sequence, |
12 |
| -) |
13 |
| -from dataclasses import dataclass, field |
| 4 | +import os |
14 | 5 | from contextlib import ExitStack
|
| 6 | +from dataclasses import dataclass, field |
| 7 | +from typing import Dict, List, Optional, Sequence, Tuple |
15 | 8 |
|
16 | 9 | import numpy as np
|
17 | 10 | import numpy.typing as npt
|
18 | 11 |
|
19 |
| -from .llama_types import * |
20 |
| -from .llama_grammar import LlamaGrammar |
21 |
| -from ._utils import suppress_stdout_stderr |
22 |
| - |
23 | 12 | import llama_cpp.llama_cpp as llama_cpp
|
24 | 13 |
|
| 14 | +from ._utils import suppress_stdout_stderr |
| 15 | +from .llama_grammar import LlamaGrammar |
| 16 | +from .llama_types import * |
25 | 17 |
|
26 | 18 | # Python wrappers over llama.h structs
|
27 | 19 |
|
@@ -631,7 +623,7 @@ def sample(
|
631 | 623 | if len(self.prev) > 0:
|
632 | 624 | nl_token = ctx_main.model.token_nl()
|
633 | 625 | nl_logit = logits_array[nl_token]
|
634 |
| - last_tokens = self.prev[-self.params.penalty_last_n :] |
| 626 | + last_tokens = self.prev[-self.params.penalty_last_n:] |
635 | 627 | last_tokens_size = min(len(last_tokens), self.params.penalty_last_n)
|
636 | 628 | if last_tokens_size > 0:
|
637 | 629 | last_tokens_p = (llama_cpp.llama_token * len(last_tokens))(*last_tokens)
|
@@ -697,8 +689,9 @@ def accept(self, ctx_main: LlamaContext, id: int, apply_grammar: bool):
|
697 | 689 | self.prev.append(id)
|
698 | 690 |
|
699 | 691 |
|
700 |
| -from typing import List, Callable, Optional, Union |
701 | 692 | import ctypes
|
| 693 | +from typing import Callable, List, Optional, Union |
| 694 | + |
702 | 695 | import llama_cpp
|
703 | 696 |
|
704 | 697 |
|
|
0 commit comments