From cfe3a085ca379f6a980480a265faf4529e08cc02 Mon Sep 17 00:00:00 2001
From: Adam Debreceni <adebreceni@apache.org>
Date: Tue, 26 Nov 2024 09:57:37 +0100
Subject: [PATCH] Fix

---
 cmake/BundledLlamaCpp.cmake              | 101 +++++++++++++++++++++++
 extensions/ai/CMakeLists.txt             |   1 -
 extensions/ai/processors/AiProcessor.cpp |   6 +-
 extensions/ai/processors/AiProcessor.h   |   2 +
 thirdparty/llamacpp/metal.patch          |  14 ++++
 5 files changed, 119 insertions(+), 5 deletions(-)
 create mode 100644 cmake/BundledLlamaCpp.cmake
 create mode 100644 thirdparty/llamacpp/metal.patch

diff --git a/cmake/BundledLlamaCpp.cmake b/cmake/BundledLlamaCpp.cmake
new file mode 100644
index 0000000000..7dc964cfab
--- /dev/null
+++ b/cmake/BundledLlamaCpp.cmake
@@ -0,0 +1,101 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+function(use_bundled_llamacpp SOURCE_DIR BINARY_DIR)
+    set(PC "${Patch_EXECUTABLE}" -p1 -i "${SOURCE_DIR}/thirdparty/llamacpp/metal.patch")
+
+    set(BYPRODUCTS
+        "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libllama.a"
+        "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml.a"
+        "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-base.a"
+        "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-cpu.a"
+        "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-blas.a"
+    )
+
+    if (APPLE)
+        list(APPEND BYPRODUCTS
+            "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-metal.a"
+        )
+    endif()
+
+    set(LLAMACPP_CMAKE_ARGS ${PASSTHROUGH_CMAKE_ARGS}
+        "-DCMAKE_INSTALL_PREFIX=${BINARY_DIR}/thirdparty/llamacpp-install"
+        -DBUILD_SHARED_LIBS=OFF
+        -DLLAMA_BUILD_TESTS=OFF
+        -DLLAMA_BUILD_EXAMPLES=OFF
+        -DLLAMA_BUILD_SERVER=OFF
+    )
+
+    append_third_party_passthrough_args(LLAMACPP_CMAKE_ARGS "${LLAMACPP_CMAKE_ARGS}")
+
+    ExternalProject_Add(
+        llamacpp-external
+        URL https://github.com/ggerganov/llama.cpp/archive/refs/tags/b4174.tar.gz
+        URL_HASH "SHA256=571ef4c645784db56a482c453e9090d737913d25a178a00d5e0afd8d434afae0"
+        SOURCE_DIR "${BINARY_DIR}/thirdparty/llamacpp-src"
+        CMAKE_ARGS ${LLAMACPP_CMAKE_ARGS}
+        BUILD_BYPRODUCTS ${BYPRODUCTS}
+        PATCH_COMMAND ${PC}
+        EXCLUDE_FROM_ALL TRUE
+    )
+
+#    set(LLAMACPP_FOUND "YES" CACHE STRING "" FORCE)
+    set(LLAMACPP_INCLUDE_DIR "${BINARY_DIR}/thirdparty/llamacpp-install/include" CACHE STRING "" FORCE)
+#    set(LLAMACPP_LIBRARIES "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libllama.a;${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml.a;${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-base.a" CACHE STRING "" FORCE)
+
+    add_library(llamacpp INTERFACE)
+
+    add_library(LlamaCpp::llama STATIC IMPORTED)
+    set_target_properties(LlamaCpp::llama PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libllama.a")
+    add_dependencies(LlamaCpp::llama llamacpp-external)
+    target_link_libraries(llamacpp INTERFACE LlamaCpp::llama)
+
+    add_library(LlamaCpp::ggml STATIC IMPORTED)
+    set_target_properties(LlamaCpp::ggml PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml.a")
+    add_dependencies(LlamaCpp::ggml llamacpp-external)
+    target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml)
+
+    add_library(LlamaCpp::ggml-base STATIC IMPORTED)
+    set_target_properties(LlamaCpp::ggml-base PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-base.a")
+    add_dependencies(LlamaCpp::ggml-base llamacpp-external)
+    target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-base)
+
+    # backends
+    add_library(LlamaCpp::ggml-cpu STATIC IMPORTED)
+    set_target_properties(LlamaCpp::ggml-cpu PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-cpu.a")
+    add_dependencies(LlamaCpp::ggml-cpu llamacpp-external)
+    target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-cpu)
+
+    add_library(LlamaCpp::ggml-blas STATIC IMPORTED)
+    set_target_properties(LlamaCpp::ggml-blas PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-blas.a")
+    add_dependencies(LlamaCpp::ggml-blas llamacpp-external)
+    target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-blas)
+
+    if (APPLE)
+        add_library(LlamaCpp::ggml-metal STATIC IMPORTED)
+        set_target_properties(LlamaCpp::ggml-metal PROPERTIES IMPORTED_LOCATION "${BINARY_DIR}/thirdparty/llamacpp-install/lib/libggml-metal.a")
+        add_dependencies(LlamaCpp::ggml-metal llamacpp-external)
+        target_link_libraries(llamacpp INTERFACE LlamaCpp::ggml-metal)
+    endif()
+
+    file(MAKE_DIRECTORY ${LLAMACPP_INCLUDE_DIR})
+    target_include_directories(llamacpp INTERFACE ${LLAMACPP_INCLUDE_DIR})
+
+    if (APPLE)
+        target_link_libraries(llamacpp INTERFACE "-framework Metal" "-framework CoreFoundation" "-framework Foundation" "-framework Accelerate")
+    endif()
+endfunction()
\ No newline at end of file
diff --git a/extensions/ai/CMakeLists.txt b/extensions/ai/CMakeLists.txt
index 5bdf053388..56443e7f8c 100644
--- a/extensions/ai/CMakeLists.txt
+++ b/extensions/ai/CMakeLists.txt
@@ -32,7 +32,6 @@ add_minifi_library(minifi-ai-processors SHARED ${SOURCES})
 target_include_directories(minifi-ai-processors PUBLIC "${CMAKE_SOURCE_DIR}/extensions/ai")
 
 target_link_libraries(minifi-ai-processors ${LIBMINIFI} llamacpp)
-add_dependencies(minifi-ai-processors llamacpp-external)
 
 register_extension(minifi-ai-processors "AI PROCESSORS" AI-PROCESSORS "Provides AI processors")
 
diff --git a/extensions/ai/processors/AiProcessor.cpp b/extensions/ai/processors/AiProcessor.cpp
index ec0075c904..380e45a129 100644
--- a/extensions/ai/processors/AiProcessor.cpp
+++ b/extensions/ai/processors/AiProcessor.cpp
@@ -207,8 +207,7 @@ void AiProcessor::onTrigger(core::ProcessContext& context, core::ProcessSession&
   }();
 
 
-  llama_batch batch = llama_batch_get_one(enc_input.data(), enc_input.size(), 0, 0);
-  int n_pos = 0;
+  llama_batch batch = llama_batch_get_one(enc_input.data(), enc_input.size());
 
   llama_token new_token_id;
 
@@ -218,7 +217,6 @@ void AiProcessor::onTrigger(core::ProcessContext& context, core::ProcessSession&
     if (int32_t res = llama_decode(llama_ctx_, batch); res < 0) {
       throw std::logic_error("failed to execute decode");
     }
-    n_pos += batch.n_tokens;
 
     new_token_id = llama_sampler_sample(llama_sampler_, llama_ctx_, -1);
 
@@ -239,7 +237,7 @@ void AiProcessor::onTrigger(core::ProcessContext& context, core::ProcessSession&
     std::cout << token_str << std::flush;
     text += token_str;
 
-    batch = llama_batch_get_one(&new_token_id, 1, n_pos, 0);
+    batch = llama_batch_get_one(&new_token_id, 1);
   }
 
   logger_->log_debug("AI model output: {}", text);
diff --git a/extensions/ai/processors/AiProcessor.h b/extensions/ai/processors/AiProcessor.h
index 9678f01127..ed29d7333e 100644
--- a/extensions/ai/processors/AiProcessor.h
+++ b/extensions/ai/processors/AiProcessor.h
@@ -20,7 +20,9 @@
 #include "core/Processor.h"
 #include "core/logging/LoggerFactory.h"
 #include "core/PropertyDefinitionBuilder.h"
+#pragma push_macro("DEPRECATED")
 #include "llama.h"
+#pragma pop_macro("DEPRECATED")
 
 namespace org::apache::nifi::minifi::processors {
 
diff --git a/thirdparty/llamacpp/metal.patch b/thirdparty/llamacpp/metal.patch
new file mode 100644
index 0000000000..5bf2c0c712
--- /dev/null
+++ b/thirdparty/llamacpp/metal.patch
@@ -0,0 +1,14 @@
+diff --color=auto -rupN llama.cpp-b4174/ggml/src/ggml-metal/ggml-metal.m llama.cpp-b4174-patched/ggml/src/ggml-metal/ggml-metal.m
+--- llama.cpp-b4174/ggml/src/ggml-metal/ggml-metal.m	2024-11-26 02:47:20
++++ llama.cpp-b4174-patched/ggml/src/ggml-metal/ggml-metal.m	2024-11-26 09:54:33
+@@ -58,7 +58,9 @@ static id<MTLDevice> ggml_backend_metal_device_acq(str
+     assert(ctx != NULL);
+
+     if (ctx->mtl_device == nil) {
+-        ctx->mtl_device = MTLCreateSystemDefaultDevice();
++        NSArray<id<MTLDevice>> *devices = MTLCopyAllDevices();
++        assert(devices.count != 0);
++        ctx->mtl_device = devices.firstObject;
+
+         ctx->has_simdgroup_reduction  = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
+         ctx->has_simdgroup_reduction |= [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
\ No newline at end of file