Merge branch 'main' into functionary-fixes

abetlen · web-flow · commit ea6602798c06 · 2024-04-27T20:49:05.000-04:00
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -9,3 +9,7 @@ updates:
     directory: "/" # Location of package manifests
     schedule:
       interval: "weekly"
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"    
diff --git a/.github/workflows/build-wheels-metal.yaml b/.github/workflows/build-wheels-metal.yaml
@@ -41,7 +41,7 @@ jobs:
         with:
           submodules: "recursive"
 
-      - uses: actions/setup-python@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.pyver }}
 
@@ -78,7 +78,7 @@ jobs:
             VERBOSE=1 python -m build --wheel
           fi
 
-      - uses: softprops/action-gh-release@v1
+      - uses: softprops/action-gh-release@v2
         with:
           files: dist/*
           # set release name to <tag>-metal
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.65]
+
+- feat: Update llama.cpp to ggerganov/llama.cpp@46e12c4692a37bdd31a0432fc5153d7d22bc7f72
+- feat: Allow for possibly non-pooled embeddings by @iamlemec in #1380
+
 ## [0.2.64]
 
 - feat: Update llama.cpp to ggerganov/llama.cpp@4e96a812b3ce7322a29a3008db2ed73d9087b176
diff --git a/examples/low_level_api/quantize.py b/examples/low_level_api/quantize.py
@@ -4,14 +4,16 @@
 
 
 def main(args):
+    fname_inp = args.fname_inp.encode("utf-8")
+    fname_out = args.fname_out.encode("utf-8")
     if not os.path.exists(fname_inp):
         raise RuntimeError(f"Input file does not exist ({fname_inp})")
     if os.path.exists(fname_out):
         raise RuntimeError(f"Output file already exists ({fname_out})")
-    fname_inp = args.fname_inp.encode("utf-8")
-    fname_out = args.fname_out.encode("utf-8")
-    itype = args.itype
-    return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, itype)
+    ftype = args.type
+    args = llama_cpp.llama_model_quantize_default_params()
+    args.ftype = ftype
+    return_code = llama_cpp.llama_model_quantize(fname_inp, fname_out, args)
     if return_code != 0:
         raise RuntimeError("Failed to quantize model")
 
@@ -20,6 +22,7 @@ def main(args):
     parser = argparse.ArgumentParser()
     parser.add_argument("fname_inp", type=str, help="Path to input model")
     parser.add_argument("fname_out", type=str, help="Path to output model")
-    parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1)")
+    parser.add_argument("type", type=int, help="Type of quantization (2: q4_0, 3: q4_1), see llama_cpp.py for enum")
     args = parser.parse_args()
     main(args)
+
diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py
@@ -1,4 +1,4 @@
 from .llama_cpp import *
 from .llama import *
 
-__version__ = "0.2.64"
+__version__ = "0.2.65"