Skip to content

Commit 85fb703

Browse files
committed
[llm] Add a generic text only LLM runner
Pull Request resolved: #11342 Introducing `text_llm_runner`. This can be used to run all text only decoder only LLM models supported by ExecuTorch. * Metadata is being read out from the .pte file and being used to construct the runner object. * examples/models/llama/runner.h[.cpp] only contains a simple wrapper around `text_llm_runner.h[.cpp]`. In next PRs I will move examples/models/phi-3-mini/runner to use the generic runner. Will look into QNN and MediaTek runners as well. ghstack-source-id: 288346995 @exported-using-ghexport Differential Revision: [D75910889](https://our.internmc.facebook.com/intern/diff/D75910889/)
1 parent 8abd26a commit 85fb703

File tree

19 files changed

+652
-485
lines changed

19 files changed

+652
-485
lines changed

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
using executorch::extension::llm::GenerationConfig;
1616
using executorch::extension::llm::Image;
17+
using executorch::extension::llm::TextLLMRunner;
1718
using executorch::runtime::Error;
1819

1920
NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
@@ -23,15 +24,15 @@ @interface LLaMARunner ()<ExecuTorchLogSink>
2324
@end
2425

2526
@implementation LLaMARunner {
26-
std::unique_ptr<example::Runner> _runner;
27+
std::unique_ptr<TextLLMRunner> _runner;
2728
}
2829

2930
- (instancetype)initWithModelPath:(NSString*)modelPath
3031
tokenizerPath:(NSString*)tokenizerPath {
3132
self = [super init];
3233
if (self) {
3334
[ExecuTorchLog.sharedLog addSink:self];
34-
_runner = example::Runner::create(
35+
_runner = example::create_llama_runner(
3536
modelPath.UTF8String, tokenizerPath.UTF8String);
3637
}
3738
return self;

examples/models/llama/main.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,13 @@ int32_t main(int32_t argc, char** argv) {
8181
}
8282
#endif
8383
// create llama runner
84-
std::unique_ptr<example::Runner> runner =
85-
example::Runner::create(model_path, tokenizer_path, data_path);
84+
std::unique_ptr<::executorch::extension::llm::TextLLMRunner> runner =
85+
example::create_llama_runner(model_path, tokenizer_path, data_path);
86+
87+
if (runner == nullptr) {
88+
ET_LOG(Error, "Failed to create llama runner");
89+
return 1;
90+
}
8691

8792
if (warmup) {
8893
runner->warmup(prompt, /*max_new_tokens=*/seq_len);

0 commit comments

Comments
 (0)