Skip to content

Commit fddb40f

Browse files
committed
[llm] Add a generic text only LLM runner
Pull Request resolved: #11342 Introducing `text_llm_runner`. This can be used to run all text only decoder only LLM models supported by ExecuTorch. * Metadata is being read out from the .pte file and being used to construct the runner object. * examples/models/llama/runner.h[.cpp] only contains a simple wrapper around `text_llm_runner.h[.cpp]`. In next PRs I will move examples/models/phi-3-mini/runner to use the generic runner. Will look into QNN and MediaTek runners as well. ghstack-source-id: 288606073 @exported-using-ghexport Differential Revision: [D75910889](https://our.internmc.facebook.com/intern/diff/D75910889/)
1 parent b2c02fe commit fddb40f

File tree

28 files changed

+685
-515
lines changed

28 files changed

+685
-515
lines changed

.ci/scripts/build_llama_android.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ build_llama_runner() {
4242
popd
4343
ANDROID_ABI=arm64-v8a
4444
cmake -DBUCK2="${BUCK2}" \
45+
-DBUILD_TESTING=OFF \
4546
-DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK"/build/cmake/android.toolchain.cmake \
4647
-DANDROID_ABI="${ANDROID_ABI}" \
4748
-DCMAKE_INSTALL_PREFIX=cmake-android-out \

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ cmake_build_llama_runner() {
169169
popd
170170
dir="examples/models/llama"
171171
retry cmake \
172+
-DBUILD_TESTING=OFF \
172173
-DCMAKE_INSTALL_PREFIX=cmake-out \
173174
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
174175
-Bcmake-out/${dir} \

.ci/scripts/test_llava.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ cmake_install_executorch_libraries_for_android() {
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
67+
-DBUILD_TESTING=OFF \
6768
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6869
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
70+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7071
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7172
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7273
-DEXECUTORCH_BUILD_XNNPACK=ON"

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
using executorch::extension::llm::GenerationConfig;
1616
using executorch::extension::llm::Image;
17+
using executorch::extension::llm::TextLLMRunner;
1718
using executorch::runtime::Error;
1819

1920
NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -23,15 +24,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
2324
@end
2425

2526
@implementation LLaMARunner {
26-
std::unique_ptr<example::Runner> _runner;
27+
std::unique_ptr<TextLLMRunner> _runner;
2728
}
2829

2930
- (instancetype)initWithModelPath:(NSString*)modelPath
3031
tokenizerPath:(NSString*)tokenizerPath {
3132
self = [super init];
3233
if (self) {
3334
[ExecuTorchLog.sharedLog addSink:self];
34-
_runner = example::Runner::create(
35+
_runner = example::create_llama_runner(
3536
modelPath.UTF8String, tokenizerPath.UTF8String);
3637
}
3738
return self;

examples/models/llama/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,6 @@ endif()
220220
target_include_directories(
221221
llama_main
222222
PUBLIC ${_common_include_directories}
223-
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
224223
)
225224
target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
226225
target_compile_options(llama_main PUBLIC ${_common_compile_options})

examples/models/llama/main.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ int32_t main(int32_t argc, char** argv) {
8181
}
8282
#endif
8383
// create llama runner
84-
std::unique_ptr<example::Runner> runner =
85-
example::Runner::create(model_path, tokenizer_path, data_path);
84+
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
85+
example::create_llama_runner(model_path, tokenizer_path, data_path);
86+
87+
if (runner == nullptr) {
88+
ET_LOG(Error, "Failed to create llama runner");
89+
return 1;
90+
}
8691

8792
if (warmup) {
8893
runner->warmup(prompt, /*max_new_tokens=*/seq_len);

examples/models/llama/runner/CMakeLists.txt

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,20 @@ else()
5252
add_library(llama_runner SHARED ${_llama_runner__srcs})
5353
endif()
5454

55+
# For extension_llm_runner
56+
if (NOT TARGET extension_llm_runner)
57+
add_subdirectory(
58+
${EXECUTORCH_ROOT}/extension/llm/runner
59+
${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
60+
)
61+
endif()
62+
5563
set(llama_runner_deps executorch_core extension_data_loader extension_module
56-
extension_tensor extension_flat_tensor
64+
extension_tensor extension_flat_tensor extension_llm_runner
5765
)
5866

5967
target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
6068

61-
target_include_directories(
62-
llama_runner
63-
INTERFACE ${_common_include_directories}
64-
)
65-
66-
# Include tokenizers dependency
67-
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
68-
add_subdirectory(
69-
${EXECUTORCH_ROOT}/extension/llm/tokenizers
70-
${CMAKE_CURRENT_BINARY_DIR}/tokenizers
71-
)
7269
target_link_libraries(
7370
llama_runner PUBLIC tokenizers
7471
)

0 commit comments

Comments
 (0)