Lint

Smartappli · web-flow · commit c4dd629634ca · 2024-08-02T17:23:59.000+02:00
diff --git a/llama_cpp/llama_cache.py b/llama_cpp/llama_cache.py
@@ -1,11 +1,11 @@
 import sys
 from abc import ABC, abstractmethod
+from collections import OrderedDict
 from typing import (
     Optional,
     Sequence,
     Tuple,
 )
-from collections import OrderedDict
 
 import diskcache
 
@@ -52,7 +52,7 @@ class LlamaRAMCache(BaseLlamaCache):
     def __init__(self, capacity_bytes: int = (2 << 30)):
         super().__init__(capacity_bytes)
         self.capacity_bytes = capacity_bytes
-        self.cache_state: OrderedDict[Tuple[int, ...], "llama_cpp.llama.LlamaState"] = (
+        self.cache_state: OrderedDict[Tuple[int, ...], llama_cpp.llama.LlamaState] = (
             OrderedDict()
         )
 
@@ -132,7 +132,7 @@ def __getitem__(self, key: Sequence[int]) -> "llama_cpp.llama.LlamaState":
         _key = self._find_longest_prefix_key(key)
         if _key is None:
             raise KeyError("Key not found")
-        value: "llama_cpp.llama.LlamaState" = self.cache.pop(_key)  # type: ignore
+        value: llama_cpp.llama.LlamaState = self.cache.pop(_key)  # type: ignore
         # NOTE: This puts an integer as key in cache, which breaks,
         # Llama.longest_token_prefix(k, key) above since k is not a tuple of ints/tokens
         # self.cache.push(_key, side="front")  # type: ignore