diff --git a/CMakeLists.txt b/CMakeLists.txt index 91d3508a..bf95d42e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,12 +2,12 @@ cmake_minimum_required(VERSION 3.5) project(mnn-llm) option(BUILD_FOR_ANDROID "Build for android whith mini memory mode." OFF) -option(USING_VISUAL_MODEL "Using visual model will need dpes: MNNOpenCV and httplib." OFF) +option(LLM_SUPPORT_VISION "Llm model support vision input." OFF) option(DUMP_PROFILE_INFO "Dump profile info when chat." OFF) option(BUILD_JNI "Build JNI for android app." OFF) -if (USING_VISUAL_MODEL) - add_definitions(-DUSING_VISUAL_MODEL) +if (LLM_SUPPORT_VISION) + add_definitions(-DLLM_SUPPORT_VISION) endif() if (DUMP_PROFILE_INFO) @@ -24,7 +24,7 @@ set(MNN_SUPPORT_TRANSFORMER_FUSE ON CACHE BOOL "Open MNN_SUPPORT_TRANSFORMER_FUS if (BUILD_FOR_ANDROID) set(MNN_ARM82 ON CACHE BOOL "Open MNN_ARM82" FORCE) endif() -if (USING_VISUAL_MODEL) +if (LLM_SUPPORT_VISION) set(MNN_BUILD_OPENCV ON CACHE BOOL "Open MNN_BUILD_OPENCV" FORCE) set(MNN_IMGCODECS ON CACHE BOOL "Open MNN_IMGCODECS" FORCE) endif() @@ -33,7 +33,7 @@ add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/MNN) # include dir include_directories(${CMAKE_CURRENT_LIST_DIR}/include/ ${CMAKE_CURRENT_LIST_DIR}/MNN/include/ - ${CMAKE_CURRENT_LIST_DIR}/MNN/tools/cv/include/cv/ + ${CMAKE_CURRENT_LIST_DIR}/MNN/tools/cv/include/ ) # source files @@ -58,7 +58,7 @@ else() set_target_properties(llm PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE) target_link_libraries(llm MNN MNN_Express) - if (USING_VISUAL_MODEL) + if (LLM_SUPPORT_VISION) target_link_libraries(llm MNNOpenCV) endif() endif() diff --git a/README.md b/README.md index f14205fd..337f9d5f 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang `modelscope`模型下载:
- qwen系列 + qwen - [modelscope-qwen-1.8b-chat] - [modelscope-qwen-7b-chat] @@ -31,14 +31,16 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang - [modelscope-qwen1.5-1.8b-chat] - [modelscope-qwen1.5-4b-chat] - [modelscope-qwen1.5-7b-chat] -- [modelscope-qwen2-0.5b-chat] -- [modelscope-qwen2-1.5b-chat] -- [modelscope-qwen2-7b-chat] +- [modelscope-qwen2-0.5b-instruct] +- [modelscope-qwen2-1.5b-instruct] +- [modelscope-qwen2-7b-instruct] +- [modelscope-qwen2-vl-2b-instruct] +- [modelscope-qwen2-vl-7b-instruct]
- glm系列 + glm - [modelscope-chatglm-6b] - [modelscope-chatglm2-6b] @@ -49,7 +51,7 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
- llama系列 + llama - [modelscope-llama2-7b-chat] - [modelscope-llama3-8b-instruct] @@ -62,10 +64,17 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
- 其他 + phi - [modelscope-phi-2] + +
+ +
+ embedding + - [modelscope-bge-large-zh] +- [modelscope-gte_sentence-embedding_multilingual-base]
@@ -77,9 +86,11 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang [modelscope-qwen1.5-1.8b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-1.8B-Chat-MNN/files [modelscope-qwen1.5-4b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-4B-Chat-MNN/files [modelscope-qwen1.5-7b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-7B-Chat-MNN/files -[modelscope-qwen2-0.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files -[modelscope-qwen2-1.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files -[modelscope-qwen2-7b-chat]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files +[modelscope-qwen2-0.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files +[modelscope-qwen2-1.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files +[modelscope-qwen2-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files +[modelscope-qwen2-vl-2b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-2B-Instruct-MNN/files +[modelscope-qwen2-vl-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-7B-Instruct-MNN/files [modelscope-chatglm-6b]: https://modelscope.cn/models/zhaode/chatglm-6b-MNN/files [modelscope-chatglm2-6b]: https://modelscope.cn/models/zhaode/chatglm2-6b-MNN/files @@ -96,6 +107,7 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang [modelscope-tinyllama-1.1b-chat]: https://modelscope.cn/models/zhaode/TinyLlama-1.1B-Chat-MNN/files [modelscope-phi-2]: https://modelscope.cn/models/zhaode/phi-2-MNN/files [modelscope-bge-large-zh]: https://modelscope.cn/models/zhaode/bge-large-zh-MNN/files +[modelscope-gte_sentence-embedding_multilingual-base]: https://modelscope.cn/models/zhaode/gte_sentence-embedding_multilingual-base-MNN/files ## 构建 @@ -151,13 +163,13 @@ cd mnn-llm 一些编译宏: - `BUILD_FOR_ANDROID`: 编译到Android设备; -- `USING_VISUAL_MODEL`: 支持多模态能力的模型,需要依赖`libMNNOpenCV`; +- `LLM_SUPPORT_VISION`: 是否支持视觉处理能力; - `DUMP_PROFILE_INFO`: 每次对话后dump出性能数据到命令行中; -默认使用`CPU`后端且不实用多模态能力,如果使用其他后端或能力,可以在编译MNN的脚本中添加`MNN`编译宏 +默认使用`CPU`,如果使用其他后端或能力,可以在编译MNN时添加`MNN`编译宏 - cuda: `-DMNN_CUDA=ON` - opencl: `-DMNN_OPENCL=ON` -- opencv: `-DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON` +- metal: `-DMNN_METAL=ON` ### 4. 执行 @@ -181,27 +193,35 @@ adb shell "cd /data/local/tmp && export LD_LIBRARY_PATH=. && ./cli_demo ./Qwen2-
reference +- [cpp-httplib](https://github.com/yhirose/cpp-httplib) +- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web) +- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo) +- [nlohmann/json](https://github.com/nlohmann/json) +- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary) +- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary) +- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary) +- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary) +- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary) +- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary) +- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary) +- [Qwen2-0.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-0.5B-Instruct/summary) +- [Qwen2-1.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct/summary) +- [Qwen2-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary) +- [Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary) +- [Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary) - [chatglm-6b](https://modelscope.cn/models/ZhipuAI/chatglm-6b/summary) - [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary) -- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary) - [codegeex2-6b](https://modelscope.cn/models/ZhipuAI/codegeex2-6b/summary) -- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary) -- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary) -- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary) -- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary) +- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary) +- [glm4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary) - [Llama-2-7b-chat-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary) +- [Llama-3-8B-Instruct](https://modelscope.cn/models/modelscope/Meta-Llama-3-8B-Instruct/summary) +- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary) - [internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) +- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary) +- [deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary) +- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6) - [phi-2](https://modelscope.cn/models/AI-ModelScope/phi-2/summary) - [bge-large-zh](https://modelscope.cn/models/AI-ModelScope/bge-large-zh/summary) -- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6) -- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary) -- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary) -- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary) -- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary) -- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary) -- [cpp-httplib](https://github.com/yhirose/cpp-httplib) -- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web) -- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo) -- [nlohmann/json](https://github.com/nlohmann/json) - +- [gte_sentence-embedding_multilingual-base](https://modelscope.cn/models/iic/gte_sentence-embedding_multilingual-base/summary)
\ No newline at end of file diff --git a/README_en.md b/README_en.md index 9d666ea9..a7c5d1ed 100644 --- a/README_en.md +++ b/README_en.md @@ -32,9 +32,11 @@ Download models from `modelscope`: - [modelscope-qwen1.5-1.8b-chat] - [modelscope-qwen1.5-4b-chat] - [modelscope-qwen1.5-7b-chat] -- [modelscope-qwen2-0.5b-chat] -- [modelscope-qwen2-1.5b-chat] -- [modelscope-qwen2-7b-chat] +- [modelscope-qwen2-0.5b-instruct] +- [modelscope-qwen2-1.5b-instruct] +- [modelscope-qwen2-7b-instruct] +- [modelscope-qwen2-vl-2b-instruct] +- [modelscope-qwen2-vl-7b-instruct] @@ -63,10 +65,17 @@ Download models from `modelscope`:
- others + phi - [modelscope-phi-2] + +
+ +
+ embedding + - [modelscope-bge-large-zh] +- [modelscope-gte_sentence-embedding_multilingual-base]
@@ -78,9 +87,11 @@ Download models from `modelscope`: [modelscope-qwen1.5-1.8b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-1.8B-Chat-MNN/files [modelscope-qwen1.5-4b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-4B-Chat-MNN/files [modelscope-qwen1.5-7b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-7B-Chat-MNN/files -[modelscope-qwen2-0.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files -[modelscope-qwen2-1.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files -[modelscope-qwen2-7b-chat]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files +[modelscope-qwen2-0.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files +[modelscope-qwen2-1.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files +[modelscope-qwen2-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files +[modelscope-qwen2-vl-2b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-2B-Instruct-MNN/files +[modelscope-qwen2-vl-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-7B-Instruct-MNN/files [modelscope-chatglm-6b]: https://modelscope.cn/models/zhaode/chatglm-6b-MNN/files [modelscope-chatglm2-6b]: https://modelscope.cn/models/zhaode/chatglm2-6b-MNN/files @@ -97,6 +108,7 @@ Download models from `modelscope`: [modelscope-tinyllama-1.1b-chat]: https://modelscope.cn/models/zhaode/TinyLlama-1.1B-Chat-MNN/files [modelscope-phi-2]: https://modelscope.cn/models/zhaode/phi-2-MNN/files [modelscope-bge-large-zh]: https://modelscope.cn/models/zhaode/bge-large-zh-MNN/files +[modelscope-gte_sentence-embedding_multilingual-base]: https://modelscope.cn/models/zhaode/gte_sentence-embedding_multilingual-base-MNN/files ## Building @@ -147,9 +159,10 @@ cd mnn-llm ./script/ios_build.sh ``` -The default backend used is `CPU`. If you want to use a different backend, you can add a MNN compilation macro within the script: +The default backend used is `CPU`. If you want to use a different backend, you can add a MNN compilation macro: - cuda: `-DMNN_CUDA=ON` - opencl: `-DMNN_OPENCL=ON` +- metal: `-DMNN_METAL=ON` ### 4. Execution @@ -174,27 +187,36 @@ adb shell "cd /data/local/tmp && export LD_LIBRARY_PATH=. && ./cli_demo ./Qwen2-
reference +- [cpp-httplib](https://github.com/yhirose/cpp-httplib) +- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web) +- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo) +- [nlohmann/json](https://github.com/nlohmann/json) +- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary) +- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary) +- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary) +- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary) +- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary) +- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary) +- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary) +- [Qwen2-0.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-0.5B-Instruct/summary) +- [Qwen2-1.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct/summary) +- [Qwen2-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary) +- [Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary) +- [Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary) - [chatglm-6b](https://modelscope.cn/models/ZhipuAI/chatglm-6b/summary) - [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary) -- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary) - [codegeex2-6b](https://modelscope.cn/models/ZhipuAI/codegeex2-6b/summary) -- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary) -- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary) -- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary) -- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary) +- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary) +- [glm4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary) - [Llama-2-7b-chat-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary) +- [Llama-3-8B-Instruct](https://modelscope.cn/models/modelscope/Meta-Llama-3-8B-Instruct/summary) +- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary) - [internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary) +- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary) +- [deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary) +- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6) - [phi-2](https://modelscope.cn/models/AI-ModelScope/phi-2/summary) - [bge-large-zh](https://modelscope.cn/models/AI-ModelScope/bge-large-zh/summary) -- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6) -- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary) -- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary) -- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary) -- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary) -- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary) -- [cpp-httplib](https://github.com/yhirose/cpp-httplib) -- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web) -- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo) -- [nlohmann/json](https://github.com/nlohmann/json) +- [gte_sentence-embedding_multilingual-base](https://modelscope.cn/models/iic/gte_sentence-embedding_multilingual-base/summary)
\ No newline at end of file diff --git a/demo/embedding_demo.cpp b/demo/embedding_demo.cpp index c998717e..457e21f5 100644 --- a/demo/embedding_demo.cpp +++ b/demo/embedding_demo.cpp @@ -31,9 +31,9 @@ int main(int argc, const char* argv[]) { std::string model_dir = argv[1]; std::cout << "model path is " << model_dir << std::endl; std::unique_ptr embedding(Embedding::createEmbedding(model_dir)); - auto vec_0 = embedding->embedding("在春暖花开的季节,走在樱花缤纷的道路上,人们纷纷拿出手机拍照留念。樱花树下,情侣手牵手享受着这绝美的春光。孩子们在树下追逐嬉戏,脸上洋溢着纯真的笑容。春天的气息在空气中弥漫,一切都显得那么生机勃勃,充满希望。"); - auto vec_1 = embedding->embedding("春天到了,樱花树悄然绽放,吸引了众多游客前来观赏。小朋友们在花瓣飘落的树下玩耍,而恋人们则在这浪漫的景色中尽情享受二人世界。每个人的脸上都挂着幸福的笑容,仿佛整个世界都被春天温暖的阳光和满树的樱花渲染得更加美好。"); - auto vec_2 = embedding->embedding("在炎热的夏日里,沙滩上的游客们穿着泳装享受着海水的清凉。孩子们在海边堆沙堡,大人们则在太阳伞下品尝冷饮,享受悠闲的时光。远处,冲浪者们挑战着波涛,体验着与海浪争斗的刺激。夏天的海滩,总是充满了活力和热情。"); + auto vec_0 = embedding->txt_embedding("在春暖花开的季节,走在樱花缤纷的道路上,人们纷纷拿出手机拍照留念。樱花树下,情侣手牵手享受着这绝美的春光。孩子们在树下追逐嬉戏,脸上洋溢着纯真的笑容。春天的气息在空气中弥漫,一切都显得那么生机勃勃,充满希望。"); + auto vec_1 = embedding->txt_embedding("春天到了,樱花树悄然绽放,吸引了众多游客前来观赏。小朋友们在花瓣飘落的树下玩耍,而恋人们则在这浪漫的景色中尽情享受二人世界。每个人的脸上都挂着幸福的笑容,仿佛整个世界都被春天温暖的阳光和满树的樱花渲染得更加美好。"); + auto vec_2 = embedding->txt_embedding("在炎热的夏日里,沙滩上的游客们穿着泳装享受着海水的清凉。孩子们在海边堆沙堡,大人们则在太阳伞下品尝冷饮,享受悠闲的时光。远处,冲浪者们挑战着波涛,体验着与海浪争斗的刺激。夏天的海滩,总是充满了活力和热情。"); dumpVARP(vec_0); dumpVARP(vec_1); dumpVARP(vec_2); diff --git a/include/llm.hpp b/include/llm.hpp index 393b6f44..2a61eb74 100644 --- a/include/llm.hpp +++ b/include/llm.hpp @@ -277,37 +277,41 @@ class Llm { class Lvlm : public Llm { public: Lvlm(std::shared_ptr config) : Llm(config) { - img_size_ = config->llm_config_.value("img_size", img_size_); - imgpad_len_ = config->llm_config_.value("imgpad_len", imgpad_len_); - img_start_ = config->llm_config_.value("img_start", img_start_); - img_end_ = config->llm_config_.value("img_end", img_end_); - img_pad_ = config->llm_config_.value("img_pad", img_pad_); + image_size_ = config->llm_config_.value("image_size", image_size_); + image_pad_ = config->llm_config_.value("image_pad", image_pad_); + vision_start_ = config->llm_config_.value("vision_start", vision_start_); + vision_end_ = config->llm_config_.value("vision_end", vision_end_); + image_mean_ = config->llm_config_.value("image_mean", image_mean_); + image_norm_ = config->llm_config_.value("image_norm", image_norm_); } ~Lvlm() { visual_module_.reset(); } virtual void load() override; + virtual std::vector tokenizer(const std::string& query) override; + virtual MNN::Express::VARP embedding(const std::vector& input_ids) override; private: - int img_size_ = 448, imgpad_len_ = 256, img_start_ = 151857, img_end_ = 151858, img_pad_ = 151859; + int image_size_ = 448, vision_start_ = 151857, vision_end_ = 151858, image_pad_ = 151859; + std::vector image_mean_ {122.7709383 , 116.7460125 , 104.09373615}; + std::vector image_norm_ {0.01459843, 0.01500777, 0.01422007}; + std::vector image_process(const std::string& img_info); std::shared_ptr visual_module_; - VARP visual_embedding(const std::vector& input_ids); - std::vector url_encode(const std::string& url); - virtual std::vector tokenizer(const std::string& query) override; - virtual VARP embedding(const std::vector& input_ids) override; + std::vector image_embeddings_; }; // Llm end // Embedding start class Embedding : public Llm { public: - Embedding(std::shared_ptr config) : Llm(config) {} - static Embedding* createEmbedding(const std::string& config_path); - static float dist(VARP var0, VARP var1); + Embedding(std::shared_ptr config); + static Embedding* createEmbedding(const std::string& config_path, bool load = true); + static float dist(MNN::Express::VARP var0, MNN::Express::VARP var1); virtual void load() override; - VARP embedding(const std::string& txt); - int dim() { return config_->hidden_size(); } + MNN::Express::VARP ids_embedding(const std::vector& ids); + MNN::Express::VARP txt_embedding(const std::string& txt); + int dim() const; private: virtual std::vector tokenizer(const std::string& query) override; - virtual VARP gen_attention_mask(int seq_len) override; - virtual VARP gen_position_ids(int seq_len) override; + virtual MNN::Express::VARP gen_attention_mask(int seq_len) override; + virtual MNN::Express::VARP gen_position_ids(int seq_len) override; }; // Embedding end diff --git a/src/llm.cpp b/src/llm.cpp index e59d55f2..321152ea 100644 --- a/src/llm.cpp +++ b/src/llm.cpp @@ -17,7 +17,7 @@ #include "llm.hpp" #include "tokenizer.hpp" -#ifdef USING_VISUAL_MODEL +#ifdef LLM_SUPPORT_VISION #include "httplib.h" #include #endif @@ -499,77 +499,22 @@ void Lvlm::load() { Module::Config module_config; module_config.shapeMutable = true; module_config.rearrange = false; + runtime_manager_->setExternalFile(config_->visual_model() + ".weight"); visual_module_.reset(Module::load({}, {}, config_->visual_model().c_str(), runtime_manager_, &module_config)); } -std::vector Lvlm::url_encode(const std::string& url) { - std::vector ascii_values(imgpad_len_ + 2, img_pad_); - ascii_values[0] = img_start_; - ascii_values[imgpad_len_ + 1] = img_end_; - for (int i = 0; i < url.size(); i++) { - ascii_values[i + 1] = static_cast(url[i]); - } - return ascii_values; -} - -std::vector Lvlm::tokenizer(const std::string& query) { - auto prompt = apply_prompt_template(query); - // split query - std::regex img_regex("(.*?)"); - std::string::const_iterator searchStart(prompt.cbegin()); - std::smatch match; - std::vector img_info, txt_info; - std::vector ids {}; - while (std::regex_search(searchStart, prompt.cend(), match, img_regex)) { - std::cout << match[1].str() << std::endl; - auto txt_ids = tokenizer_->encode(match.prefix().str()); - ids.insert(ids.end(), txt_ids.begin(), txt_ids.end()); - auto img_ids = url_encode(match[1].str()); - ids.insert(ids.end(), img_ids.begin(), img_ids.end()); - searchStart = match.suffix().first; - } - if (searchStart != prompt.cend()) { - auto txt_ids = tokenizer_->encode(std::string(searchStart, prompt.cend())); - ids.insert(ids.end(), txt_ids.begin(), txt_ids.end()); - } - return ids; -} - -VARP Lvlm::embedding(const std::vector& input_ids) { -#ifdef USING_VISUAL_MODEL - int start_pos = 0, pad_pos = 0, end_pos = 0; - for (int i = 0; i < input_ids.size(); i++) { - int id = input_ids[i]; - if (id == img_start_ && !start_pos) { - start_pos = i; - } - if (id == img_pad_ && !pad_pos) { - pad_pos = i; - } - if (id == img_end_ && !end_pos) { - end_pos = i; - } - } - if (!start_pos) { - return Llm::embedding(input_ids); - } - std::vector prefix(input_ids.begin(), input_ids.begin() + start_pos + 1); - std::vector img_ascii(input_ids.begin() + start_pos + 1, input_ids.begin() + pad_pos); - std::vector suffix(input_ids.begin() + end_pos, input_ids.end()); - std::string img_path; - for (auto ascii_val : img_ascii) { - img_path += static_cast(ascii_val); - } +std::vector Lvlm::image_process(const std::string& image_info) { +#ifdef LLM_SUPPORT_VISION VARP image = nullptr; - if (img_path.substr(0, 4) == "http") { + if (image_info.substr(0, 4) == "http") { std::regex url_regex(R"(^https?://([^/]+)(/.*))"); std::smatch url_match_result; std::string host, path; - if (std::regex_search(img_path, url_match_result, url_regex) && url_match_result.size() == 3) { + if (std::regex_search(image_info, url_match_result, url_regex) && url_match_result.size() == 3) { host = url_match_result[1].str(); path = url_match_result[2].str(); } - std::cout << host << "#" << path << std::endl; + // std::cout << host << "#" << path << std::endl; httplib::Client cli(host); auto res = cli.Get(path); std::string img_file = "downloaded_image.jpg"; @@ -589,21 +534,77 @@ VARP Lvlm::embedding(const std::vector& input_ids) { } image = MNN::CV::imread(img_file); } else { - image = MNN::CV::imread(img_path); + image = MNN::CV::imread(image_info); } - image = MNN::CV::resize(image, {img_size_, img_size_}, 0, 0, MNN::CV::INTER_LINEAR, MNN::CV::COLOR_BGR2RGB, - {123.25239296, 117.20384, 104.50194688}, {0.0145414 , 0.01494914, 0.01416452}); + image = MNN::CV::resize(image, {image_size_, image_size_}, 0, 0, MNN::CV::INTER_LINEAR, MNN::CV::COLOR_BGR2RGB, image_mean_, image_norm_); image = MNN::Express::_Unsqueeze(image, {0}); image = MNN::Express::_Convert(image, NC4HW4); auto image_embedding = visual_module_->forward(image); - image_embedding = MNN::Express::_Permute(image_embedding, {1, 0, 2}); - auto prefix_embedding = Llm::embedding(prefix); - auto suffix_embedding = Llm::embedding(suffix); - auto embeddings = MNN::Express::_Concat({prefix_embedding, image_embedding, suffix_embedding}, 0); + image_embeddings_.push_back(image_embedding); + int visual_len = image_embedding->getInfo()->dim[0]; + std::vector img_ids(visual_len, image_pad_); + img_ids.insert(img_ids.begin(), vision_start_); + img_ids.push_back(vision_end_); + return img_ids; #else - auto embeddings = Llm::embedding(input_ids); + return std::vector(0); #endif - return embeddings; +} + +std::vector Lvlm::tokenizer(const std::string& query) { + auto prompt = apply_prompt_template(query); + // split query + std::regex img_regex("(.*?)"); + std::string::const_iterator searchStart(prompt.cbegin()); + std::smatch match; + std::vector img_infos; + std::vector ids {}; + + while (std::regex_search(searchStart, prompt.cend(), match, img_regex)) { + // std::cout << "img match: " << match[1].str() << std::endl; + auto txt_ids = tokenizer_->encode(match.prefix().str()); + ids.insert(ids.end(), txt_ids.begin(), txt_ids.end()); + auto img_ids = image_process(match[1].str()); + ids.insert(ids.end(), img_ids.begin(), img_ids.end()); + searchStart = match.suffix().first; + } + if (searchStart != prompt.cend()) { + auto txt_ids = tokenizer_->encode(std::string(searchStart, prompt.cend())); + ids.insert(ids.end(), txt_ids.begin(), txt_ids.end()); + } + // printf("ids = ["); for (auto id : ids) printf("%d, ", id); printf("]\n"); + return ids; +} + +VARP Lvlm::embedding(const std::vector& input_ids) { + if (input_ids.size() == 1) { + return Llm::embedding(input_ids); + } + std::vector embeddings; + int img_idx = 0; + std::vector cur_txt_ids; + for (int i = 0; i < input_ids.size(); i++) { + int id = input_ids[i]; + if (id == image_pad_) { + continue; + } + cur_txt_ids.push_back(id); + if (id == vision_start_) { + auto txt_embedding = Llm::embedding(cur_txt_ids); + auto img_embedding = image_embeddings_[img_idx++]; + embeddings.push_back(txt_embedding); + embeddings.push_back(img_embedding); + } else if (id == vision_end_) { + cur_txt_ids.clear(); + cur_txt_ids.push_back(id); + } + } + if (!cur_txt_ids.empty()) { + auto txt_embedding = Llm::embedding(cur_txt_ids); + embeddings.push_back(txt_embedding); + } + auto embedding = MNN::Express::_Concat(embeddings, 0); + return embedding; } // Llm end @@ -614,13 +615,19 @@ float Embedding::dist(VARP var0, VARP var1) { return dist; } -Embedding* Embedding::createEmbedding(const std::string& config_path) { +Embedding* Embedding::createEmbedding(const std::string& config_path, bool load) { std::shared_ptr config(new LlmConfig(config_path)); Embedding* embedding = new Embedding(config); - embedding->load(); + if (load) { + embedding->load(); + } return embedding; } +Embedding::Embedding(std::shared_ptr config) : Llm(config) {} + +int Embedding::dim() const { return config_->hidden_size(); } + void Embedding::load() { init_runtime(); printf("load tokenizer\n"); @@ -636,15 +643,14 @@ void Embedding::load() { MNN_PRINT("load %s ... ", model_path.c_str()); modules_.resize(1); modules_[0].reset(Module::load( - {"input_ids", "attention_mask", "position_ids"}, - {"sentence_embeddings"}, model_path.c_str(), runtime_manager_, &module_config)); + {"input_ids", "attention_mask", "position_ids"}, + {"sentence_embeddings"}, model_path.c_str(), runtime_manager_, &module_config)); MNN_PRINT("Done!\n"); } -VARP Embedding::embedding(const std::string& txt) { - auto ids = tokenizer(txt); +VARP Embedding::ids_embedding(const std::vector& ids) { int prompt_len = ids.size(); - auto inputs_ids = _Const(ids.data(), {prompt_len}, NCHW, halide_type_of()); + auto inputs_ids = embedding(ids); auto attention_mask = gen_attention_mask(prompt_len); auto position_ids = gen_position_ids(prompt_len); auto outputs = modules_[0]->onForward({inputs_ids, attention_mask, position_ids}); @@ -652,12 +658,12 @@ VARP Embedding::embedding(const std::string& txt) { return sentence_embeddings; } +VARP Embedding::txt_embedding(const std::string& txt) { + return ids_embedding(tokenizer(txt)); +} + std::vector Embedding::tokenizer(const std::string& query) { - auto prompt = query; - if (query.size() <= 256) { - prompt = "为这个句子生成表示以用于检索相关文章:" + query; - } - prompt = apply_prompt_template(prompt); + auto prompt = apply_prompt_template(query); auto ids = tokenizer_->encode(prompt); return ids; } @@ -770,7 +776,7 @@ VARP TextVectorStore::text2vector(const std::string& text) { std::cerr << "Not set embedding for TextVectorStore." << std::endl; return nullptr; } - auto vector = embedding_->embedding(text); + auto vector = embedding_->txt_embedding(text); return vector; } // TextVectorStore end