pytorch · larryliu0820 · Jun 6, 2025 · Jun 4, 2025 · Jun 5, 2025 · Jun 5, 2025
diff --git a/.ci/scripts/build_llama_android.sh b/.ci/scripts/build_llama_android.sh
@@ -42,6 +42,7 @@ build_llama_runner() {
     popd
     ANDROID_ABI=arm64-v8a
     cmake -DBUCK2="${BUCK2}" \
+    -DBUILD_TESTING=OFF \
     -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake  \
     -DANDROID_ABI="${ANDROID_ABI}" \
     -DCMAKE_INSTALL_PREFIX=cmake-android-out \

diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
@@ -169,6 +169,7 @@ cmake_build_llama_runner() {
     popd
     dir="examples/models/llama"
     retry cmake \
+        -DBUILD_TESTING=OFF \
         -DCMAKE_INSTALL_PREFIX=cmake-out \
         -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
         -Bcmake-out/${dir} \

diff --git a/.ci/scripts/test_llama_torchao_lowbit.sh b/.ci/scripts/test_llama_torchao_lowbit.sh
@@ -40,6 +40,7 @@ cmake --build cmake-out -j16 --target install --config Release
 
 # Install llama runner with torchao
 cmake -DPYTHON_EXECUTABLE=python \
+    -DBUILD_TESTING=OFF \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \

diff --git a/.ci/scripts/test_llava.sh b/.ci/scripts/test_llava.sh
@@ -64,9 +64,10 @@ cmake_install_executorch_libraries_for_android() {
 
 
 LLAVA_COMMON_CMAKE_ARGS="                        \
+        -DBUILD_TESTING=OFF                      \
         -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
         -DCMAKE_INSTALL_PREFIX=${BUILD_DIR}      \
-        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}         \
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}   \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON     \
         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON  \
         -DEXECUTORCH_BUILD_XNNPACK=ON"

@@ -14,6 +14,7 @@
 
 using executorch::extension::llm::GenerationConfig;
 using executorch::extension::llm::Image;
+using executorch::extension::llm::TextLLMRunner;
 using executorch::runtime::Error;
 
 NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -23,15 +24,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
 @end
 
 @implementation LLaMARunner {
-  std::unique_ptr<example::Runner> _runner;
+  std::unique_ptr<TextLLMRunner> _runner;
 }
 
 - (instancetype)initWithModelPath:(NSString*)modelPath
                     tokenizerPath:(NSString*)tokenizerPath {
   self = [super init];
   if (self) {
     [ExecuTorchLog.sharedLog addSink:self];
-    _runner = example::Runner::create(
+    _runner = example::create_llama_runner(
         modelPath.UTF8String, tokenizerPath.UTF8String);
   }
   return self;

@@ -220,7 +220,6 @@ endif()
 target_include_directories(
   llama_main
   PUBLIC ${_common_include_directories}
-         ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
 )
 target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
@@ -81,8 +81,13 @@ int32_t main(int32_t argc, char** argv) {
   }
 #endif
   // create llama runner
-  std::unique_ptr<example::Runner> runner =
-      example::Runner::create(model_path, tokenizer_path, data_path);
+  std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
+      example::create_llama_runner(model_path, tokenizer_path, data_path);
+
+  if (runner == nullptr) {
+    ET_LOG(Error, "Failed to create llama runner");
+    return 1;
+  }
 
   if (warmup) {
     runner->warmup(prompt, /*max_new_tokens=*/seq_len);

@@ -52,23 +52,20 @@ else()
   add_library(llama_runner SHARED ${_llama_runner__srcs})
 endif()
 
+# For extension_llm_runner
+if(NOT TARGET extension_llm_runner)
+  add_subdirectory(
+    ${EXECUTORCH_ROOT}/extension/llm/runner
+    ${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
+  )
+endif()
+
 set(llama_runner_deps executorch_core extension_data_loader extension_module
-                      extension_tensor extension_flat_tensor
+                      extension_tensor extension_flat_tensor extension_llm_runner
 )
 
 target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
 
-target_include_directories(
-  llama_runner
-  INTERFACE ${_common_include_directories}
-)
-
-# Include tokenizers dependency
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-add_subdirectory(
-  ${EXECUTORCH_ROOT}/extension/llm/tokenizers
-  ${CMAKE_CURRENT_BINARY_DIR}/tokenizers
-)
 target_link_libraries(
   llama_runner PUBLIC tokenizers
 )