Merge branch 'main' into hypercorn

Smartappli · web-flow · commit c11a690c0dea · 2024-05-04T01:29:03.000+02:00
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -262,7 +262,12 @@ def __init__(
                         raise ValueError(f"Value for {k} is too long: {v}")
                     v_bytes = v_bytes.ljust(128, b"\0")
                     self._kv_overrides_array[i].tag = llama_cpp.LLAMA_KV_OVERRIDE_TYPE_STR
-                    self._kv_overrides_array[i].value.str_value[:128] = v_bytes
+                    # copy min(v_bytes, 128) to str_value
+                    ctypes.memmove(
+                        self._kv_overrides_array[i].value.str_value,
+                        v_bytes,
+                        min(len(v_bytes), 128),
+                    )
                 else:
                     raise ValueError(f"Unknown value type for {k}: {v}")
 
diff --git a/llama_cpp/server/types.py b/llama_cpp/server/types.py
@@ -18,8 +18,6 @@
 
 temperature_field = Field(
     default=0.8,
-    ge=0.0,
-    le=2.0,
     description="Adjust the randomness of the generated text.\n\n"
     + "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
 )

Original file line number	Diff line number	Diff line change
`@@ -18,8 +18,6 @@`
`18`	`18`
`19`	`19`	`temperature_field = Field(`
`20`	`20`	`default=0.8,`
`21`		`- ge=0.0,`
`22`		`- le=2.0,`
`23`	`21`	`description="Adjust the randomness of the generated text.\n\n"`
`24`	`22`	+ "Temperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
`25`	`23`	`)`