Skip to content

Commit d2612a4

Browse files
authored
Merge branch 'main' into cuda
2 parents 0441b1d + a14b49d commit d2612a4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+2888
-1631
lines changed

.github/workflows/build-and-release.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
shell: cmd
4343

4444
- name: Build wheels
45-
uses: pypa/cibuildwheel@v2.19.1
45+
uses: pypa/cibuildwheel@v2.19.2
4646
env:
4747
# disable repair
4848
CIBW_REPAIR_WHEEL_COMMAND: ""
@@ -69,7 +69,7 @@ jobs:
6969
platforms: linux/arm64
7070

7171
- name: Build wheels
72-
uses: pypa/cibuildwheel@v2.19.1
72+
uses: pypa/cibuildwheel@v2.19.2
7373
env:
7474
CIBW_SKIP: "*musllinux* pp*"
7575
CIBW_REPAIR_WHEEL_COMMAND: ""

.github/workflows/build-wheels-cuda.yaml

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
id: set-matrix
2121
run: |
2222
$matrix = @{
23-
'os' = @('ubuntu-latest', 'windows-latest')
23+
'os' = @('ubuntu-latest', 'windows-2019')
2424
'pyver' = @("3.9", "3.10", "3.11", "3.12")
2525
'cuda' = @("12.1.1", "12.2.2", "12.3.2", "12.4.1", "12.5.0")
2626
'releasetag' = @("basic")
@@ -43,6 +43,12 @@ jobs:
4343
AVXVER: ${{ matrix.releasetag }}
4444

4545
steps:
46+
- name: Add MSBuild to PATH
47+
if: runner.os == 'Windows'
48+
uses: microsoft/setup-msbuild@v2
49+
with:
50+
vs-version: '[16.11,16.12)'
51+
4652
- uses: actions/checkout@v4
4753
with:
4854
submodules: "recursive"
@@ -85,7 +91,7 @@ jobs:
8591
if: runner.os == 'Windows'
8692
run: |
8793
$y = (gi '.\MSBuildExtensions').fullname + '\*'
88-
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
94+
(gi 'C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
8995
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
9096
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
9197
@@ -108,16 +114,16 @@ jobs:
108114
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
109115
}
110116
$env:VERBOSE = '1'
111-
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
112-
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
117+
$env:CMAKE_ARGS = '-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=all'
118+
$env:CMAKE_ARGS = "-DGGML_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
113119
# if ($env:AVXVER -eq 'AVX') {
114-
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
120+
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
115121
# }
116122
# if ($env:AVXVER -eq 'AVX512') {
117-
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
123+
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX512=on'
118124
# }
119125
# if ($env:AVXVER -eq 'basic') {
120-
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
126+
# $env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DGGML_AVX=off -DGGML_AVX2=off -DGGML_FMA=off -DGGML_F16C=off'
121127
# }
122128
python -m build --wheel
123129
# write the build tag to the output

.github/workflows/build-wheels-metal.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,12 @@ jobs:
4343
shell: cmd
4444

4545
- name: Build wheels
46-
uses: pypa/cibuildwheel@v2.19.1
46+
uses: pypa/cibuildwheel@v2.19.2
4747
env:
4848
# disable repair
4949
CIBW_REPAIR_WHEEL_COMMAND: ""
5050
CIBW_ARCHS: "arm64"
51-
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DLLAMA_METAL=on"
51+
CIBW_ENVIRONMENT: CMAKE_ARGS="-DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_APPLE_SILICON_PROCESSOR=arm64 -DGGML_METAL=on"
5252
CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
5353
with:
5454
package-dir: .

.github/workflows/test.yaml

Lines changed: 1 addition & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -129,32 +129,6 @@ jobs:
129129
run: |
130130
python -m pytest
131131
132-
# build-linux-opencl:
133-
134-
# runs-on: ubuntu-latest
135-
136-
# steps:
137-
# - uses: actions/checkout@v4
138-
# with:
139-
# submodules: "recursive"
140-
# - name: Set up Python 3.8
141-
# uses: actions/setup-python@v5
142-
# with:
143-
# python-version: "3.8"
144-
# - name: Set up OpenCL & CLBlast
145-
# run: |
146-
# wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
147-
# echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
148-
# sudo apt-get update
149-
# sudo apt-get install -y --no-install-recommends llvm intel-oneapi-runtime-opencl intel-oneapi-runtime-compilers libclblast-dev
150-
# - name: Install dependencies
151-
# run: |
152-
# python -m pip install --upgrade pip
153-
# CMAKE_ARGS="-DLLAMA_CLBLAST=on" python -m pip install .[all] --verbose
154-
# - name: Test with pytest
155-
# run: |
156-
# python -m pytest
157-
158132
159133
build-macos-metal:
160134

@@ -184,10 +158,7 @@ jobs:
184158
RUST_LOG: trace
185159
run: |
186160
python -m pip install --upgrade pip
187-
python -m pip install uv
188-
CMAKE_ARGS="-DLLAMA_METAL=on" python -m uv pip install .[all] -vvv
189-
shell: cmd
190-
161+
CMAKE_ARGS="-DGGML_METAL=on" python -m pip install .[all] --verbose
191162
- name: Test with pytest
192163
run: |
193164
python -m pytest

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [0.2.83]
11+
12+
- feat: Update llama.cpp to ggerganov/llama.cpp@081fe431aa8fb6307145c4feb3eed4f48cab19f8
13+
- feat: Add 'required' literal to ChatCompletionToolChoiceOption by @mjschock in #1597
14+
- fix: Change repeat_penalty to 1.0 to match llama.cpp defaults by @ddh0 in #1590
15+
- fix(docs): Update README.md typo by @ericcurtin in #1589
16+
- fix(server): Use split_mode from model settings by @grider-withourai in #1594
17+
- feat(ci): Dockerfile update base images and post-install cleanup by @Smartappli in #1530
18+
19+
## [0.2.82]
20+
21+
- feat: Update llama.cpp to ggerganov/llama.cpp@7fdb6f73e35605c8dbc39e9f19cd9ed84dbc87f2
22+
23+
## [0.2.81]
24+
25+
- feat: Update llama.cpp to ggerganov/llama.cpp@968967376dc2c018d29f897c4883d335bbf384fb
26+
- fix(ci): Fix CUDA wheels, use LLAMA_CUDA instead of removed LLAMA_CUBLAS by @abetlen in 4fb6fc12a02a68884c25dd9f6a421cacec7604c6
27+
- fix(ci): Fix MacOS release, use macos-12 image instead of removed macos-11 by @abetlen in 3a551eb5263fdbd24b36d7770856374c04e92788
28+
29+
## [0.2.80]
30+
31+
- feat: Update llama.cpp to ggerganov/llama.cpp@023b8807e10bc3ade24a255f01c1ad2a01bb4228
32+
- fix(server): Fix bug in FastAPI streaming response where dependency was released before request completes causing SEGFAULT by @abetlen in 296304b60bb83689659883c9cc24f4c074dd88ff
33+
- fix(server): Update default config value for embeddings to False to fix error in text generation where logits were not allocated by llama.cpp by @abetlen in bf5e0bb4b151f4ca2f5a21af68eb832a96a79d75
34+
- fix(ci): Fix the CUDA workflow by @oobabooga in #1551
35+
- docs: Update readme examples to use newer Qwen2 model by @jncraton in #1544
36+
1037
## [0.2.79]
1138

1239
- feat: Update llama.cpp to ggerganov/llama.cpp@9c77ec1d74874ee22bdef8f110e8e8d41389abf2

CMakeLists.txt

Lines changed: 77 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -5,50 +5,91 @@ project(llama_cpp)
55
option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python package" ON)
66
option(LLAVA_BUILD "Build llava shared library and install alongside python package" ON)
77

8+
function(llama_cpp_python_install_target target)
9+
install(
10+
TARGETS ${target}
11+
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
12+
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
13+
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
14+
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
15+
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
16+
)
17+
install(
18+
TARGETS ${target}
19+
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
20+
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
21+
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
22+
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
23+
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
24+
)
25+
set_target_properties(${target} PROPERTIES
26+
INSTALL_RPATH "$ORIGIN"
27+
BUILD_WITH_INSTALL_RPATH TRUE
28+
)
29+
if(UNIX)
30+
if(APPLE)
31+
set_target_properties(${target} PROPERTIES
32+
INSTALL_RPATH "@loader_path"
33+
BUILD_WITH_INSTALL_RPATH TRUE
34+
)
35+
else()
36+
set_target_properties(${target} PROPERTIES
37+
INSTALL_RPATH "$ORIGIN"
38+
BUILD_WITH_INSTALL_RPATH TRUE
39+
)
40+
endif()
41+
endif()
42+
endfunction()
43+
844
if (LLAMA_BUILD)
945
set(BUILD_SHARED_LIBS "On")
1046

47+
set(CMAKE_SKIP_BUILD_RPATH FALSE)
48+
49+
# When building, don't use the install RPATH already
50+
# (but later on when installing)
51+
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
52+
53+
# Add the automatically determined parts of the RPATH
54+
# which point to directories outside the build tree to the install RPATH
55+
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
56+
set(CMAKE_SKIP_RPATH FALSE)
57+
1158
# Building llama
1259
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
1360
# Need to disable these llama.cpp flags on Apple x86_64,
1461
# otherwise users may encounter invalid instruction errors
15-
set(LLAMA_AVX "Off" CACHE BOOL "llama: enable AVX" FORCE)
16-
set(LLAMA_AVX2 "Off" CACHE BOOL "llama: enable AVX2" FORCE)
17-
set(LLAMA_FMA "Off" CACHE BOOL "llama: enable FMA" FORCE)
18-
set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
62+
set(GGML_AVX "Off" CACHE BOOL "ggml: enable AVX" FORCE)
63+
set(GGML_AVX2 "Off" CACHE BOOL "ggml: enable AVX2" FORCE)
64+
set(GGML_FMA "Off" CACHE BOOL "gml: enable FMA" FORCE)
65+
set(GGML_F16C "Off" CACHE BOOL "gml: enable F16C" FORCE)
1966
endif()
2067

2168
if (APPLE)
22-
set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
69+
set(GGML_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
2370
endif()
2471

2572
add_subdirectory(vendor/llama.cpp)
26-
install(
27-
TARGETS llama
28-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
29-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
30-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
31-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
32-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
33-
)
34-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
35-
install(
36-
TARGETS llama
37-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
38-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
39-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
40-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
41-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
42-
)
73+
llama_cpp_python_install_target(llama)
74+
llama_cpp_python_install_target(ggml)
75+
4376
# Workaround for Windows + CUDA https://github.com/abetlen/llama-cpp-python/issues/563
44-
if (WIN32 AND (LLAMA_CUDA OR LLAMA_CUBLAS))
77+
if (WIN32)
4578
install(
4679
FILES $<TARGET_RUNTIME_DLLS:llama>
47-
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
80+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
4881
)
4982
install(
5083
FILES $<TARGET_RUNTIME_DLLS:llama>
51-
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
84+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
85+
)
86+
install(
87+
FILES $<TARGET_RUNTIME_DLLS:ggml>
88+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
89+
)
90+
install(
91+
FILES $<TARGET_RUNTIME_DLLS:ggml>
92+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
5293
)
5394
endif()
5495

@@ -69,22 +110,16 @@ if (LLAMA_BUILD)
69110
if (WIN32)
70111
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
71112
endif()
72-
install(
73-
TARGETS llava_shared
74-
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
75-
RUNTIME DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
76-
ARCHIVE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
77-
FRAMEWORK DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
78-
RESOURCE DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
79-
)
80-
# Temporary fix for https://github.com/scikit-build/scikit-build-core/issues/374
81-
install(
82-
TARGETS llava_shared
83-
LIBRARY DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
84-
RUNTIME DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
85-
ARCHIVE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
86-
FRAMEWORK DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
87-
RESOURCE DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
88-
)
113+
llama_cpp_python_install_target(llava_shared)
114+
if (WIN32)
115+
install(
116+
FILES $<TARGET_RUNTIME_DLLS:llava_shared>
117+
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp/lib
118+
)
119+
install(
120+
FILES $<TARGET_RUNTIME_DLLS:llava_shared>
121+
DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp/lib
122+
)
123+
endif()
89124
endif()
90125
endif()

Makefile

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,28 @@ build.debug:
2222
--editable .
2323

2424
build.cuda:
25-
CMAKE_ARGS="-DLLAMA_CUDA=on" python3 -m pip install --verbose -e .
25+
CMAKE_ARGS="-DGGML_CUDA=on" python3 -m pip install --verbose -e .
2626

2727
build.openblas:
28-
CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
28+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" python3 -m pip install --verbose -e .
2929

3030
build.blis:
31-
CMAKE_ARGS="-DLLAMA_BLAS=on -DLLAMA_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
31+
CMAKE_ARGS="-DGGML_BLAS=on -DGGML_BLAS_VENDOR=FLAME" python3 -m pip install --verbose -e .
3232

3333
build.metal:
34-
CMAKE_ARGS="-DLLAMA_METAL=on" python3 -m pip install --verbose -e .
34+
CMAKE_ARGS="-DGGML_METAL=on" python3 -m pip install --verbose -e .
3535

3636
build.vulkan:
37-
CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e .
37+
CMAKE_ARGS="-DGGML_VULKAN=on" python3 -m pip install --verbose -e .
3838

3939
build.kompute:
40-
CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e .
40+
CMAKE_ARGS="-DGGML_KOMPUTE=on" python3 -m pip install --verbose -e .
4141

4242
build.sycl:
43-
CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e .
43+
CMAKE_ARGS="-DGGML_SYCL=on" python3 -m pip install --verbose -e .
4444

4545
build.rpc:
46-
CMAKE_ARGS="-DLLAMA_RPC=on" python3 -m pip install --verbose -e .
46+
CMAKE_ARGS="-DGGML_RPC=on" python3 -m pip install --verbose -e .
4747

4848
build.sdist:
4949
python3 -m build --sdist
@@ -85,4 +85,4 @@ clean:
8585
deploy.pypi \
8686
deploy.gh-docs \
8787
docker \
88-
clean
88+
clean

0 commit comments

Comments
 (0)