Skip to content

Commit b01ce7d

Browse files
committed
Merge remote-tracking branch 'upstream/master' into t5-clean-3
2 parents 7c610fa + 3e2618b commit b01ce7d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2638
-1697
lines changed

.devops/nix/package.nix

+7-10
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,18 @@
1717
rocmPackages,
1818
vulkan-headers,
1919
vulkan-loader,
20-
clblast,
20+
curl,
2121
useBlas ? builtins.all (x: !x) [
2222
useCuda
2323
useMetalKit
24-
useOpenCL
2524
useRocm
2625
useVulkan
2726
] && blas.meta.available,
2827
useCuda ? config.cudaSupport,
29-
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
28+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
3029
useMpi ? false, # Increases the runtime closure size by ~700M
31-
useOpenCL ? false,
3230
useRocm ? config.rocmSupport,
31+
enableCurl ? true,
3332
useVulkan ? false,
3433
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
3534

@@ -56,7 +55,6 @@ let
5655
++ lib.optionals useCuda [ "CUDA" ]
5756
++ lib.optionals useMetalKit [ "MetalKit" ]
5857
++ lib.optionals useMpi [ "MPI" ]
59-
++ lib.optionals useOpenCL [ "OpenCL" ]
6058
++ lib.optionals useRocm [ "ROCm" ]
6159
++ lib.optionals useVulkan [ "Vulkan" ];
6260

@@ -198,19 +196,19 @@ effectiveStdenv.mkDerivation (
198196
optionals effectiveStdenv.isDarwin darwinBuildInputs
199197
++ optionals useCuda cudaBuildInputs
200198
++ optionals useMpi [ mpi ]
201-
++ optionals useOpenCL [ clblast ]
202199
++ optionals useRocm rocmBuildInputs
203200
++ optionals useBlas [ blas ]
204-
++ optionals useVulkan vulkanBuildInputs;
201+
++ optionals useVulkan vulkanBuildInputs
202+
++ optionals enableCurl [ curl ];
205203

206204
cmakeFlags =
207205
[
208206
(cmakeBool "LLAMA_BUILD_SERVER" true)
209207
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
210208
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
209+
(cmakeBool "LLAMA_CURL" enableCurl)
211210
(cmakeBool "GGML_NATIVE" false)
212211
(cmakeBool "GGML_BLAS" useBlas)
213-
(cmakeBool "GGML_CLBLAST" useOpenCL)
214212
(cmakeBool "GGML_CUDA" useCuda)
215213
(cmakeBool "GGML_HIPBLAS" useRocm)
216214
(cmakeBool "GGML_METAL" useMetalKit)
@@ -254,7 +252,6 @@ effectiveStdenv.mkDerivation (
254252
useCuda
255253
useMetalKit
256254
useMpi
257-
useOpenCL
258255
useRocm
259256
useVulkan
260257
;
@@ -281,7 +278,7 @@ effectiveStdenv.mkDerivation (
281278
# Configurations we don't want even the CI to evaluate. Results in the
282279
# "unsupported platform" messages. This is mostly a no-op, because
283280
# cudaPackages would've refused to evaluate anyway.
284-
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
281+
badPlatforms = optionals useCuda lib.platforms.darwin;
285282

286283
# Configurations that are known to result in build failures. Can be
287284
# overridden by importing Nixpkgs with `allowBroken = true`.

.github/ISSUE_TEMPLATE/config.yml

-2
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,3 @@ contact_links:
99
- name: Want to contribute?
1010
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
1111
about: Head to the contribution guide page of the wiki for areas you can help with
12-
13-

.github/workflows/build.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
sysctl -a
4848
mkdir build
4949
cd build
50-
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
50+
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
5151
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
5252
5353
- name: Test
@@ -105,7 +105,7 @@ jobs:
105105
sysctl -a
106106
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
107107
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
108-
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON
108+
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
109109
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
110110
111111
- name: Test
@@ -222,7 +222,7 @@ jobs:
222222
run: |
223223
mkdir build
224224
cd build
225-
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
225+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
226226
cmake --build . --config Release -j $(nproc)
227227
228228
- name: Test
@@ -799,6 +799,7 @@ jobs:
799799
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
800800
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
801801
cd build
802+
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
802803
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
803804
804805
- name: Determine tag name

CMakeLists.txt

+10-4
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,21 @@ set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
7979
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
8080
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
8181
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
82-
set(GGML_LLAMAFILE ON)
83-
set(GGML_CUDA_USE_GRAPHS ON)
82+
83+
# change the default for these ggml options
84+
if (NOT DEFINED GGML_LLAMAFILE)
85+
set(GGML_LLAMAFILE ON)
86+
endif()
87+
88+
if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
89+
set(GGML_CUDA_USE_GRAPHS ON)
90+
endif()
8491

8592
# transition helpers
8693
function (llama_option_depr TYPE OLD NEW)
8794
if (${OLD})
8895
message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
89-
set(${NEW} ON)
96+
set(${NEW} ON PARENT_SCOPE)
9097
endif()
9198
endfunction()
9299

@@ -96,7 +103,6 @@ llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE)
96103
llama_option_depr(WARNING LLAMA_METAL GGML_METAL)
97104
llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
98105
llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
99-
llama_option_depr(WARNING LLAMA_OPENMP GGML_OPENMP)
100106
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
101107
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
102108
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)

CMakePresets.json

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"cacheVariables": {
2020
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
2121
"CMAKE_CXX_COMPILER": "icx",
22+
"CMAKE_C_COMPILER": "cl",
2223
"GGML_SYCL": "ON",
2324
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
2425
}

Makefile

+8
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ BUILD_TARGETS = \
4545
TEST_TARGETS = \
4646
tests/test-autorelease \
4747
tests/test-backend-ops \
48+
tests/test-chat-template \
4849
tests/test-double-float \
4950
tests/test-grad0 \
5051
tests/test-grammar-integration \
@@ -61,6 +62,11 @@ TEST_TARGETS = \
6162
tests/test-tokenizer-1-bpe \
6263
tests/test-tokenizer-1-spm
6364

65+
# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
66+
LEGACY_TARGETS = main quantize quantize-stats perplexity imatrix embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml \
67+
simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
68+
retrieval speculative infill tokenize benchmark-matmult parallel finetune export-lora lookahead lookup passkey gritlm
69+
6470
# Deprecation aliases
6571
ifdef LLAMA_CUBLAS
6672
$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
@@ -1070,6 +1076,7 @@ clean:
10701076
rm -rvf src/*.o
10711077
rm -rvf tests/*.o
10721078
rm -rvf examples/*.o
1079+
rm -rvf common/*.o
10731080
rm -rvf *.a
10741081
rm -rvf *.dll
10751082
rm -rvf *.so
@@ -1084,6 +1091,7 @@ clean:
10841091
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
10851092
rm -rvf $(BUILD_TARGETS)
10861093
rm -rvf $(TEST_TARGETS)
1094+
rm -rvf $(LEGACY_TARGETS)
10871095
find examples pocs -type f -name "*.o" -delete
10881096

10891097
#

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ Typically finetunes of the base models below are supported as well.
108108
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
109109
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
110110
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
111+
- [X] [BERT](https://github.com/ggerganov/llama.cpp/pull/5423)
111112
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
112113
- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft)
113114
- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila)
@@ -217,6 +218,11 @@ Unless otherwise noted these projects are open-source with permissive licensing:
217218
**Tools:**
218219

219220
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
221+
- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
222+
223+
**Infrastructure:**
224+
225+
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
220226

221227
---
222228

0 commit comments

Comments
 (0)