Skip to content

Commit f721f3f

Browse files
committed
[llm] Add a generic text only LLM runner
Pull Request resolved: #11342 Introducing `text_llm_runner`. This can be used to run all text only decoder only LLM models supported by ExecuTorch. * Metadata is being read out from the .pte file and being used to construct the runner object. * examples/models/llama/runner.h[.cpp] only contains a simple wrapper around `text_llm_runner.h[.cpp]`. In next PRs I will move examples/models/phi-3-mini/runner to use the generic runner. Will look into QNN and MediaTek runners as well. ghstack-source-id: c43c256 @exported-using-ghexport Differential Revision: [D75910889](https://our.internmc.facebook.com/intern/diff/D75910889/)
1 parent 8abd26a commit f721f3f

File tree

26 files changed

+681
-514
lines changed

26 files changed

+681
-514
lines changed

.ci/scripts/test_llama.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ cmake_build_llama_runner() {
169169
popd
170170
dir="examples/models/llama"
171171
retry cmake \
172+
-DBUILD_TESTING=OFF \
172173
-DCMAKE_INSTALL_PREFIX=cmake-out \
173174
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
174175
-Bcmake-out/${dir} \

.ci/scripts/test_llava.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,10 @@ cmake_install_executorch_libraries_for_android() {
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
67+
-DBUILD_TESTING=OFF \
6768
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6869
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
70+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7071
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7172
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7273
-DEXECUTORCH_BUILD_XNNPACK=ON"

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
using executorch::extension::llm::GenerationConfig;
1616
using executorch::extension::llm::Image;
17+
using executorch::extension::llm::TextLLMRunner;
1718
using executorch::runtime::Error;
1819

1920
NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -23,15 +24,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
2324
@end
2425

2526
@implementation LLaMARunner {
26-
std::unique_ptr<example::Runner> _runner;
27+
std::unique_ptr<TextLLMRunner> _runner;
2728
}
2829

2930
- (instancetype)initWithModelPath:(NSString*)modelPath
3031
tokenizerPath:(NSString*)tokenizerPath {
3132
self = [super init];
3233
if (self) {
3334
[ExecuTorchLog.sharedLog addSink:self];
34-
_runner = example::Runner::create(
35+
_runner = example::create_llama_runner(
3536
modelPath.UTF8String, tokenizerPath.UTF8String);
3637
}
3738
return self;

examples/models/llama/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,6 @@ endif()
220220
target_include_directories(
221221
llama_main
222222
PUBLIC ${_common_include_directories}
223-
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
224223
)
225224
target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
226225
target_compile_options(llama_main PUBLIC ${_common_compile_options})

examples/models/llama/main.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ int32_t main(int32_t argc, char** argv) {
8181
}
8282
#endif
8383
// create llama runner
84-
std::unique_ptr<example::Runner> runner =
85-
example::Runner::create(model_path, tokenizer_path, data_path);
84+
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
85+
example::create_llama_runner(model_path, tokenizer_path, data_path);
86+
87+
if (runner == nullptr) {
88+
ET_LOG(Error, "Failed to create llama runner");
89+
return 1;
90+
}
8691

8792
if (warmup) {
8893
runner->warmup(prompt, /*max_new_tokens=*/seq_len);

examples/models/llama/runner/CMakeLists.txt

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,23 +52,18 @@ else()
5252
add_library(llama_runner SHARED ${_llama_runner__srcs})
5353
endif()
5454

55+
# For extension_llm_runner
56+
add_subdirectory(
57+
${EXECUTORCH_ROOT}/extension/llm/runner
58+
${CMAKE_CURRENT_BINARY_DIR}/../../../../extension/llm/runner
59+
)
60+
5561
set(llama_runner_deps executorch_core extension_data_loader extension_module
56-
extension_tensor extension_flat_tensor
62+
extension_tensor extension_flat_tensor extension_llm_runner
5763
)
5864

5965
target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
6066

61-
target_include_directories(
62-
llama_runner
63-
INTERFACE ${_common_include_directories}
64-
)
65-
66-
# Include tokenizers dependency
67-
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
68-
add_subdirectory(
69-
${EXECUTORCH_ROOT}/extension/llm/tokenizers
70-
${CMAKE_CURRENT_BINARY_DIR}/tokenizers
71-
)
7267
target_link_libraries(
7368
llama_runner PUBLIC tokenizers
7469
)

0 commit comments

Comments
 (0)