Skip to content

[pick][llm] Add a generic text only LLM runner #11441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci/scripts/build_llama_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ build_llama_runner() {
popd
ANDROID_ABI=arm64-v8a
cmake -DBUCK2="${BUCK2}" \
-DBUILD_TESTING=OFF \
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
Expand Down
1 change: 1 addition & 0 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ cmake_build_llama_runner() {
popd
dir="examples/models/llama"
retry cmake \
-DBUILD_TESTING=OFF \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
-Bcmake-out/${dir} \
Expand Down
1 change: 1 addition & 0 deletions .ci/scripts/test_llama_torchao_lowbit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cmake --build cmake-out -j16 --target install --config Release

# Install llama runner with torchao
cmake -DPYTHON_EXECUTABLE=python \
-DBUILD_TESTING=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
Expand Down
3 changes: 2 additions & 1 deletion .ci/scripts/test_llava.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,10 @@ cmake_install_executorch_libraries_for_android() {


LLAVA_COMMON_CMAKE_ARGS=" \
-DBUILD_TESTING=OFF \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

using executorch::extension::llm::GenerationConfig;
using executorch::extension::llm::Image;
using executorch::extension::llm::TextLLMRunner;
using executorch::runtime::Error;

NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
Expand All @@ -23,15 +24,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
@end

@implementation LLaMARunner {
std::unique_ptr<example::Runner> _runner;
std::unique_ptr<TextLLMRunner> _runner;
}

- (instancetype)initWithModelPath:(NSString*)modelPath
tokenizerPath:(NSString*)tokenizerPath {
self = [super init];
if (self) {
[ExecuTorchLog.sharedLog addSink:self];
_runner = example::Runner::create(
_runner = example::create_llama_runner(
modelPath.UTF8String, tokenizerPath.UTF8String);
}
return self;
Expand Down
1 change: 0 additions & 1 deletion examples/models/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ endif()
target_include_directories(
llama_main
PUBLIC ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)
target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
target_compile_options(llama_main PUBLIC ${_common_compile_options})
9 changes: 7 additions & 2 deletions examples/models/llama/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,13 @@ int32_t main(int32_t argc, char** argv) {
}
#endif
// create llama runner
std::unique_ptr<example::Runner> runner =
example::Runner::create(model_path, tokenizer_path, data_path);
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
example::create_llama_runner(model_path, tokenizer_path, data_path);

if (runner == nullptr) {
ET_LOG(Error, "Failed to create llama runner");
return 1;
}

if (warmup) {
runner->warmup(prompt, /*max_new_tokens=*/seq_len);
Expand Down
21 changes: 9 additions & 12 deletions examples/models/llama/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,23 +52,20 @@ else()
add_library(llama_runner SHARED ${_llama_runner__srcs})
endif()

# For extension_llm_runner
if(NOT TARGET extension_llm_runner)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/runner
${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
)
endif()

set(llama_runner_deps executorch_core extension_data_loader extension_module
extension_tensor extension_flat_tensor
extension_tensor extension_flat_tensor extension_llm_runner
)

target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})

target_include_directories(
llama_runner
INTERFACE ${_common_include_directories}
)

# Include tokenizers dependency
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/tokenizers
${CMAKE_CURRENT_BINARY_DIR}/tokenizers
)
target_link_libraries(
llama_runner PUBLIC tokenizers
)
Expand Down
Loading
Loading