abetlen
diff --git a/‎llama_cpp/llama.py
Lines changed: 6 additions & 6 deletions b/‎llama_cpp/llama.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎llama_cpp/llama_chat_format.py
Lines changed: 2 additions & 1 deletion b/‎llama_cpp/llama_chat_format.py
Lines changed: 2 additions & 1 deletion
@@ -18,6 +18,7 @@
     Iterator,
     Deque,
     Callable,
+    Dict,
 )
 from collections import deque
 from pathlib import Path
@@ -262,9 +263,7 @@ def __init__(
 
         self.n_batch = min(n_ctx, n_batch)  # ???
         self.n_threads = n_threads or max(multiprocessing.cpu_count() // 2, 1)
-        self.n_threads_batch = n_threads_batch or max(
-            multiprocessing.cpu_count() // 2, 1
-        )
+        self.n_threads_batch = n_threads_batch or multiprocessing.cpu_count()
 
         # Context Params
         self.context_params = llama_cpp.llama_context_default_params()
@@ -1793,7 +1792,7 @@ def save_state(self) -> LlamaState:
                 file=sys.stderr,
             )
         return LlamaState(
-            scores=self.scores.copy(),
+            scores=self._scores.copy(),
             input_ids=self.input_ids.copy(),
             n_tokens=self.n_tokens,
             llama_state=bytes(llama_state_compact),
@@ -1802,7 +1801,9 @@ def save_state(self) -> LlamaState:
 
     def load_state(self, state: LlamaState) -> None:
         assert self._ctx.ctx is not None
-        self.scores = state.scores.copy()
+        # Only filling in up to `n_tokens` and then zero-ing out the rest
+        self.scores[: state.n_tokens, :] = state.scores.copy()
+        self.scores[state.n_tokens :, :] = 0.0
         self.input_ids = state.input_ids.copy()
         self.n_tokens = state.n_tokens
         state_size = state.llama_state_size
@@ -1953,7 +1954,6 @@ def from_pretrained(
                 local_dir_use_symlinks=local_dir_use_symlinks,
                 cache_dir=cache_dir,
                 local_files_only=True,
-
             )
         else:
             model_path = os.path.join(local_dir, filename)
 
@@ -2852,4 +2852,5 @@ def vicuna_function_calling(
         "{% if add_generation_prompt %}</s>ASSISTANT\n{% endif %}"  # Vicuna adds the role for prompt continuation
     )
     return base_function_calling(end_token="</s>", 
-                          **locals())
+                          **locals())
+
Original file line number	Diff line number	Diff line change
`@@ -2852,4 +2852,5 @@ def vicuna_function_calling(`
`2852`	`2852`	`"{% if add_generation_prompt %}</s>ASSISTANT\n{% endif %}" # Vicuna adds the role for prompt continuation`
`2853`	`2853`	`)`
`2854`	`2854`	`return base_function_calling(end_token="</s>",`
`2855`		`- **locals())`
	`2855`	`+ **locals())`
	`2856`	`+`