From cfe3a085ca379f6a980480a265faf4529e08cc02 Mon Sep 17 00:00:00 2001 From: Adam Debreceni Date: Tue, 26 Nov 2024 09:57:37 +0100 Subject: [PATCH] Fix --- cmake/BundledLlamaCpp.cmake | 101 +++++++++++++++++++++++ extensions/ai/CMakeLists.txt | 1 - extensions/ai/processors/AiProcessor.cpp | 6 +- extensions/ai/processors/AiProcessor.h | 2 + thirdparty/llamacpp/metal.patch | 14 ++++ 5 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 cmake/BundledLlamaCpp.cmake create mode 100644 thirdparty/llamacpp/metal.patch diff --git a/cmake/BundledLlamaCpp.cmake b/cmake/BundledLlamaCpp.cmake new file mode 100644 index 0000000000..7dc964cfab --- /dev/null +++ b/cmake/BundledLlamaCpp.cmake @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +function(use_bundled_llamacpp SOURCE_DIR BINARY_DIR) + set(PC "${Patch_EXECUTABLE}" -p1 -i "${SOURCE_DIR}/thirdparty/llamacpp/metal.patch") + + set(BYPRODUCTS + "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libllama.a" + "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml.a" + "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-base.a" + "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-cpu.a" + "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-blas.a" + ) + + if (APPLE) + list(APPEND BYPRODUCTS + "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-metal.a" + ) + endif() + + set(LLAMACPP_CMAKE_ARGS ${PASSTHROUGH_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${BINARY_DIR}/thirdparty/llamacpp-install" + -DBUILD_SHARED_LIBS=OFF + -DLLAMA_BUILD_TESTS=OFF + -DLLAMA_BUILD_EXAMPLES=OFF + -DLLAMA_BUILD_SERVER=OFF + ) + + append_third_party_passthrough_args(LLAMACPP_CMAKE_ARGS "${LLAMACPP_CMAKE_ARGS}") + + ExternalProject_Add( + llamacpp-external + URL https://github.com/ggerganov/llama.cpp/archive/refs/tags/b4174.tar.gz + URL_HASH "SHA256=571ef4c645784db56a482c453e9090d737913d25a178a00d5e0afd8d434afae0" + SOURCE_DIR "${BINARY_DIR}/thirdparty/llamacpp-src" + CMAKE_ARGS ${LLAMACPP_CMAKE_ARGS} + BUILD_BYPRODUCTS ${BYPRODUCTS} + PATCH_COMMAND ${PC} + EXCLUDE_FROM_ALL TRUE + ) + +# set(LLAMACPP_FOUND "YES" CACHE STRING "" FORCE) + set(LLAMACPP_INCLUDE_DIR "${BINARY_DIR}/thirdparty/llamacpp-install/include" CACHE STRING "" FORCE) +# set(LLAMACPP_LIBRARIES "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libllama.a;${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml.a;${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-base.a" CACHE STRING "" FORCE) + + add_library(llamacpp INTERFACE) + + add_library(LlamaCpp::llama STATIC IMPORTED) + set_target_properties(LlamaCpp::llama PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libllama.a") + add_dependencies(LlamaCpp::llama llamacpp-external) + target_link_libraries(llamacpp INTERFACE LlamaCpp::llama) + + add_library(LlamaCpp::ggml STATIC IMPORTED) + set_target_properties(LlamaCpp::ggml PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml.a") + add_dependencies(LlamaCpp::ggml llamacpp-external) + target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml) + + add_library(LlamaCpp::ggml-base STATIC IMPORTED) + set_target_properties(LlamaCpp::ggml-base PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-base.a") + add_dependencies(LlamaCpp::ggml-base llamacpp-external) + target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-base) + + # backends + add_library(LlamaCpp::ggml-cpu STATIC IMPORTED) + set_target_properties(LlamaCpp::ggml-cpu PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-cpu.a") + add_dependencies(LlamaCpp::ggml-cpu llamacpp-external) + target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-cpu) + + add_library(LlamaCpp::ggml-blas STATIC IMPORTED) + set_target_properties(LlamaCpp::ggml-blas PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-blas.a") + add_dependencies(LlamaCpp::ggml-blas llamacpp-external) + target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-blas) + + if (APPLE) + add_library(LlamaCpp::ggml-metal STATIC IMPORTED) + set_target_properties(LlamaCpp::ggml-metal PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-metal.a") + add_dependencies(LlamaCpp::ggml-metal llamacpp-external) + target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-metal) + endif() + + file(MAKE_DIRECTORY ${LLAMACPP_INCLUDE_DIR}) + target_include_directories(llamacpp INTERFACE ${LLAMACPP_INCLUDE_DIR}) + + if (APPLE) + target_link_libraries(llamacpp INTERFACE "-framework Metal" "-framework CoreFoundation" "-framework Foundation" "-framework Accelerate") + endif() +endfunction() \ No newline at end of file diff --git a/extensions/ai/CMakeLists.txt b/extensions/ai/CMakeLists.txt index 5bdf053388..56443e7f8c 100644 --- a/extensions/ai/CMakeLists.txt +++ b/extensions/ai/CMakeLists.txt @@ -32,7 +32,6 @@ add_minifi_library(minifi-ai-processors SHARED ${SOURCES}) target_include_directories(minifi-ai-processors PUBLIC "${CMAKE_SOURCE_DIR}/extensions/ai") target_link_libraries(minifi-ai-processors ${LIBMINIFI} llamacpp) -add_dependencies(minifi-ai-processors llamacpp-external) register_extension(minifi-ai-processors "AI PROCESSORS" AI-PROCESSORS "Provides AI processors") diff --git a/extensions/ai/processors/AiProcessor.cpp b/extensions/ai/processors/AiProcessor.cpp index ec0075c904..380e45a129 100644 --- a/extensions/ai/processors/AiProcessor.cpp +++ b/extensions/ai/processors/AiProcessor.cpp @@ -207,8 +207,7 @@ void AiProcessor::onTrigger(core::ProcessContext& context, core::ProcessSession& }(); - llama_batch batch = llama_batch_get_one(enc_input.data(), enc_input.size(), 0, 0); - int n_pos = 0; + llama_batch batch = llama_batch_get_one(enc_input.data(), enc_input.size()); llama_token new_token_id; @@ -218,7 +217,6 @@ void AiProcessor::onTrigger(core::ProcessContext& context, core::ProcessSession& if (int32_t res = llama_decode(llama_ctx_, batch); res < 0) { throw std::logic_error("failed to execute decode"); } - n_pos += batch.n_tokens; new_token_id = llama_sampler_sample(llama_sampler_, llama_ctx_, -1); @@ -239,7 +237,7 @@ void AiProcessor::onTrigger(core::ProcessContext& context, core::ProcessSession& std::cout << token_str << std::flush; text += token_str; - batch = llama_batch_get_one(&new_token_id, 1, n_pos, 0); + batch = llama_batch_get_one(&new_token_id, 1); } logger_->log_debug("AI model output: {}", text); diff --git a/extensions/ai/processors/AiProcessor.h b/extensions/ai/processors/AiProcessor.h index 9678f01127..ed29d7333e 100644 --- a/extensions/ai/processors/AiProcessor.h +++ b/extensions/ai/processors/AiProcessor.h @@ -20,7 +20,9 @@ #include "core/Processor.h" #include "core/logging/LoggerFactory.h" #include "core/PropertyDefinitionBuilder.h" +#pragma push_macro("DEPRECATED") #include "llama.h" +#pragma pop_macro("DEPRECATED") namespace org::apache::nifi::minifi::processors { diff --git a/thirdparty/llamacpp/metal.patch b/thirdparty/llamacpp/metal.patch new file mode 100644 index 0000000000..5bf2c0c712 --- /dev/null +++ b/thirdparty/llamacpp/metal.patch @@ -0,0 +1,14 @@ +diff --color=auto -rupN llama.cpp-b4174/ggml/src/ggml-metal/ggml-metal.m llama.cpp-b4174-patched/ggml/src/ggml-metal/ggml-metal.m +--- llama.cpp-b4174/ggml/src/ggml-metal/ggml-metal.m 2024-11-26 02:47:20 ++++ llama.cpp-b4174-patched/ggml/src/ggml-metal/ggml-metal.m 2024-11-26 09:54:33 +@@ -58,7 +58,9 @@ static id ggml_backend_metal_device_acq(str + assert(ctx != NULL); + + if (ctx->mtl_device == nil) { +- ctx->mtl_device = MTLCreateSystemDefaultDevice(); ++ NSArray> *devices = MTLCopyAllDevices(); ++ assert(devices.count != 0); ++ ctx->mtl_device = devices.firstObject; + + ctx->has_simdgroup_reduction = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; + ctx->has_simdgroup_reduction |= [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML]; \ No newline at end of file