diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91d3508a..bf95d42e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,12 +2,12 @@ cmake_minimum_required(VERSION 3.5)
project(mnn-llm)
option(BUILD_FOR_ANDROID "Build for android whith mini memory mode." OFF)
-option(USING_VISUAL_MODEL "Using visual model will need dpes: MNNOpenCV and httplib." OFF)
+option(LLM_SUPPORT_VISION "Llm model support vision input." OFF)
option(DUMP_PROFILE_INFO "Dump profile info when chat." OFF)
option(BUILD_JNI "Build JNI for android app." OFF)
-if (USING_VISUAL_MODEL)
- add_definitions(-DUSING_VISUAL_MODEL)
+if (LLM_SUPPORT_VISION)
+ add_definitions(-DLLM_SUPPORT_VISION)
endif()
if (DUMP_PROFILE_INFO)
@@ -24,7 +24,7 @@ set(MNN_SUPPORT_TRANSFORMER_FUSE ON CACHE BOOL "Open MNN_SUPPORT_TRANSFORMER_FUS
if (BUILD_FOR_ANDROID)
set(MNN_ARM82 ON CACHE BOOL "Open MNN_ARM82" FORCE)
endif()
-if (USING_VISUAL_MODEL)
+if (LLM_SUPPORT_VISION)
set(MNN_BUILD_OPENCV ON CACHE BOOL "Open MNN_BUILD_OPENCV" FORCE)
set(MNN_IMGCODECS ON CACHE BOOL "Open MNN_IMGCODECS" FORCE)
endif()
@@ -33,7 +33,7 @@ add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/MNN)
# include dir
include_directories(${CMAKE_CURRENT_LIST_DIR}/include/
${CMAKE_CURRENT_LIST_DIR}/MNN/include/
- ${CMAKE_CURRENT_LIST_DIR}/MNN/tools/cv/include/cv/
+ ${CMAKE_CURRENT_LIST_DIR}/MNN/tools/cv/include/
)
# source files
@@ -58,7 +58,7 @@ else()
set_target_properties(llm PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
target_link_libraries(llm MNN MNN_Express)
- if (USING_VISUAL_MODEL)
+ if (LLM_SUPPORT_VISION)
target_link_libraries(llm MNNOpenCV)
endif()
endif()
diff --git a/README.md b/README.md
index f14205fd..337f9d5f 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
`modelscope`模型下载:
- qwen系列
+ qwen
- [modelscope-qwen-1.8b-chat]
- [modelscope-qwen-7b-chat]
@@ -31,14 +31,16 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
- [modelscope-qwen1.5-1.8b-chat]
- [modelscope-qwen1.5-4b-chat]
- [modelscope-qwen1.5-7b-chat]
-- [modelscope-qwen2-0.5b-chat]
-- [modelscope-qwen2-1.5b-chat]
-- [modelscope-qwen2-7b-chat]
+- [modelscope-qwen2-0.5b-instruct]
+- [modelscope-qwen2-1.5b-instruct]
+- [modelscope-qwen2-7b-instruct]
+- [modelscope-qwen2-vl-2b-instruct]
+- [modelscope-qwen2-vl-7b-instruct]
- glm系列
+ glm
- [modelscope-chatglm-6b]
- [modelscope-chatglm2-6b]
@@ -49,7 +51,7 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
- llama系列
+ llama
- [modelscope-llama2-7b-chat]
- [modelscope-llama3-8b-instruct]
@@ -62,10 +64,17 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
- 其他
+ phi
- [modelscope-phi-2]
+
+
+
+
+ embedding
+
- [modelscope-bge-large-zh]
+- [modelscope-gte_sentence-embedding_multilingual-base]
@@ -77,9 +86,11 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
[modelscope-qwen1.5-1.8b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-1.8B-Chat-MNN/files
[modelscope-qwen1.5-4b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-4B-Chat-MNN/files
[modelscope-qwen1.5-7b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-7B-Chat-MNN/files
-[modelscope-qwen2-0.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files
-[modelscope-qwen2-1.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files
-[modelscope-qwen2-7b-chat]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files
+[modelscope-qwen2-0.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files
+[modelscope-qwen2-1.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files
+[modelscope-qwen2-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files
+[modelscope-qwen2-vl-2b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-2B-Instruct-MNN/files
+[modelscope-qwen2-vl-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-7B-Instruct-MNN/files
[modelscope-chatglm-6b]: https://modelscope.cn/models/zhaode/chatglm-6b-MNN/files
[modelscope-chatglm2-6b]: https://modelscope.cn/models/zhaode/chatglm2-6b-MNN/files
@@ -96,6 +107,7 @@ llm模型导出`onnx`和`mnn`模型请使用[llm-export](https://github.com/wang
[modelscope-tinyllama-1.1b-chat]: https://modelscope.cn/models/zhaode/TinyLlama-1.1B-Chat-MNN/files
[modelscope-phi-2]: https://modelscope.cn/models/zhaode/phi-2-MNN/files
[modelscope-bge-large-zh]: https://modelscope.cn/models/zhaode/bge-large-zh-MNN/files
+[modelscope-gte_sentence-embedding_multilingual-base]: https://modelscope.cn/models/zhaode/gte_sentence-embedding_multilingual-base-MNN/files
## 构建
@@ -151,13 +163,13 @@ cd mnn-llm
一些编译宏:
- `BUILD_FOR_ANDROID`: 编译到Android设备;
-- `USING_VISUAL_MODEL`: 支持多模态能力的模型,需要依赖`libMNNOpenCV`;
+- `LLM_SUPPORT_VISION`: 是否支持视觉处理能力;
- `DUMP_PROFILE_INFO`: 每次对话后dump出性能数据到命令行中;
-默认使用`CPU`后端且不实用多模态能力,如果使用其他后端或能力,可以在编译MNN的脚本中添加`MNN`编译宏
+默认使用`CPU`,如果使用其他后端或能力,可以在编译MNN时添加`MNN`编译宏
- cuda: `-DMNN_CUDA=ON`
- opencl: `-DMNN_OPENCL=ON`
-- opencv: `-DMNN_BUILD_OPENCV=ON -DMNN_IMGCODECS=ON`
+- metal: `-DMNN_METAL=ON`
### 4. 执行
@@ -181,27 +193,35 @@ adb shell "cd /data/local/tmp && export LD_LIBRARY_PATH=. && ./cli_demo ./Qwen2-
reference
+- [cpp-httplib](https://github.com/yhirose/cpp-httplib)
+- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web)
+- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo)
+- [nlohmann/json](https://github.com/nlohmann/json)
+- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary)
+- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary)
+- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)
+- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary)
+- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary)
+- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary)
+- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary)
+- [Qwen2-0.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-0.5B-Instruct/summary)
+- [Qwen2-1.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct/summary)
+- [Qwen2-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary)
+- [Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary)
+- [Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary)
- [chatglm-6b](https://modelscope.cn/models/ZhipuAI/chatglm-6b/summary)
- [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)
-- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary)
- [codegeex2-6b](https://modelscope.cn/models/ZhipuAI/codegeex2-6b/summary)
-- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary)
-- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary)
-- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)
-- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary)
+- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary)
+- [glm4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary)
- [Llama-2-7b-chat-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary)
+- [Llama-3-8B-Instruct](https://modelscope.cn/models/modelscope/Meta-Llama-3-8B-Instruct/summary)
+- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary)
- [internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary)
+- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary)
+- [deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary)
+- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6)
- [phi-2](https://modelscope.cn/models/AI-ModelScope/phi-2/summary)
- [bge-large-zh](https://modelscope.cn/models/AI-ModelScope/bge-large-zh/summary)
-- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6)
-- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary)
-- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary)
-- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary)
-- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary)
-- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary)
-- [cpp-httplib](https://github.com/yhirose/cpp-httplib)
-- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web)
-- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo)
-- [nlohmann/json](https://github.com/nlohmann/json)
-
+- [gte_sentence-embedding_multilingual-base](https://modelscope.cn/models/iic/gte_sentence-embedding_multilingual-base/summary)
\ No newline at end of file
diff --git a/README_en.md b/README_en.md
index 9d666ea9..a7c5d1ed 100644
--- a/README_en.md
+++ b/README_en.md
@@ -32,9 +32,11 @@ Download models from `modelscope`:
- [modelscope-qwen1.5-1.8b-chat]
- [modelscope-qwen1.5-4b-chat]
- [modelscope-qwen1.5-7b-chat]
-- [modelscope-qwen2-0.5b-chat]
-- [modelscope-qwen2-1.5b-chat]
-- [modelscope-qwen2-7b-chat]
+- [modelscope-qwen2-0.5b-instruct]
+- [modelscope-qwen2-1.5b-instruct]
+- [modelscope-qwen2-7b-instruct]
+- [modelscope-qwen2-vl-2b-instruct]
+- [modelscope-qwen2-vl-7b-instruct]
@@ -63,10 +65,17 @@ Download models from `modelscope`:
- others
+ phi
- [modelscope-phi-2]
+
+
+
+
+ embedding
+
- [modelscope-bge-large-zh]
+- [modelscope-gte_sentence-embedding_multilingual-base]
@@ -78,9 +87,11 @@ Download models from `modelscope`:
[modelscope-qwen1.5-1.8b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-1.8B-Chat-MNN/files
[modelscope-qwen1.5-4b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-4B-Chat-MNN/files
[modelscope-qwen1.5-7b-chat]: https://modelscope.cn/models/zhaode/Qwen1.5-7B-Chat-MNN/files
-[modelscope-qwen2-0.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files
-[modelscope-qwen2-1.5b-chat]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files
-[modelscope-qwen2-7b-chat]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files
+[modelscope-qwen2-0.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-0.5B-Instruct-MNN/files
+[modelscope-qwen2-1.5b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-1.5B-Instruct-MNN/files
+[modelscope-qwen2-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-7B-Instruct-MNN/files
+[modelscope-qwen2-vl-2b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-2B-Instruct-MNN/files
+[modelscope-qwen2-vl-7b-instruct]: https://modelscope.cn/models/zhaode/Qwen2-VL-7B-Instruct-MNN/files
[modelscope-chatglm-6b]: https://modelscope.cn/models/zhaode/chatglm-6b-MNN/files
[modelscope-chatglm2-6b]: https://modelscope.cn/models/zhaode/chatglm2-6b-MNN/files
@@ -97,6 +108,7 @@ Download models from `modelscope`:
[modelscope-tinyllama-1.1b-chat]: https://modelscope.cn/models/zhaode/TinyLlama-1.1B-Chat-MNN/files
[modelscope-phi-2]: https://modelscope.cn/models/zhaode/phi-2-MNN/files
[modelscope-bge-large-zh]: https://modelscope.cn/models/zhaode/bge-large-zh-MNN/files
+[modelscope-gte_sentence-embedding_multilingual-base]: https://modelscope.cn/models/zhaode/gte_sentence-embedding_multilingual-base-MNN/files
## Building
@@ -147,9 +159,10 @@ cd mnn-llm
./script/ios_build.sh
```
-The default backend used is `CPU`. If you want to use a different backend, you can add a MNN compilation macro within the script:
+The default backend used is `CPU`. If you want to use a different backend, you can add a MNN compilation macro:
- cuda: `-DMNN_CUDA=ON`
- opencl: `-DMNN_OPENCL=ON`
+- metal: `-DMNN_METAL=ON`
### 4. Execution
@@ -174,27 +187,36 @@ adb shell "cd /data/local/tmp && export LD_LIBRARY_PATH=. && ./cli_demo ./Qwen2-
reference
+- [cpp-httplib](https://github.com/yhirose/cpp-httplib)
+- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web)
+- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo)
+- [nlohmann/json](https://github.com/nlohmann/json)
+- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary)
+- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary)
+- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)
+- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary)
+- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary)
+- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary)
+- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary)
+- [Qwen2-0.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-0.5B-Instruct/summary)
+- [Qwen2-1.5B-Instruct](https://modelscope.cn/models/qwen/Qwen2-1.5B-Instruct/summary)
+- [Qwen2-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary)
+- [Qwen2-VL-2B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-2B-Instruct/summary)
+- [Qwen2-VL-7B-Instruct](https://modelscope.cn/models/qwen/Qwen2-VL-7B-Instruct/summary)
- [chatglm-6b](https://modelscope.cn/models/ZhipuAI/chatglm-6b/summary)
- [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary)
-- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary)
- [codegeex2-6b](https://modelscope.cn/models/ZhipuAI/codegeex2-6b/summary)
-- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary)
-- [Qwen-7B-Chat](https://modelscope.cn/models/qwen/Qwen-7B-Chat/summary)
-- [Qwen-VL-Chat](https://modelscope.cn/models/qwen/Qwen-VL-Chat/summary)
-- [Qwen-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen-1_8B-Chat/summary)
+- [chatglm3-6b](https://modelscope.cn/models/ZhipuAI/chatglm3-6b/summary)
+- [glm4-9b-chat](https://modelscope.cn/models/ZhipuAI/glm-4-9b-chat/summary)
- [Llama-2-7b-chat-ms](https://modelscope.cn/models/modelscope/Llama-2-7b-chat-ms/summary)
+- [Llama-3-8B-Instruct](https://modelscope.cn/models/modelscope/Meta-Llama-3-8B-Instruct/summary)
+- [Baichuan2-7B-Chat](https://modelscope.cn/models/baichuan-inc/baichuan-7B/summary)
- [internlm-chat-7b](https://modelscope.cn/models/Shanghai_AI_Laboratory/internlm-chat-7b/summary)
+- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary)
+- [deepseek-llm-7b-chat](https://modelscope.cn/models/deepseek-ai/deepseek-llm-7b-chat/summary)
+- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6)
- [phi-2](https://modelscope.cn/models/AI-ModelScope/phi-2/summary)
- [bge-large-zh](https://modelscope.cn/models/AI-ModelScope/bge-large-zh/summary)
-- [TinyLlama-1.1B-Chat-v0.6](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6)
-- [Yi-6B-Chat](https://modelscope.cn/models/01ai/Yi-6B-Chat/summary)
-- [Qwen1.5-0.5B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat/summary)
-- [Qwen1.5-1.8B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-1.8B-Chat/summary)
-- [Qwen1.5-4B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-4B-Chat/summary)
-- [Qwen1.5-7B-Chat](https://modelscope.cn/models/qwen/Qwen1.5-7B-Chat/summary)
-- [cpp-httplib](https://github.com/yhirose/cpp-httplib)
-- [chatgpt-web](https://github.com/xqdoo00o/chatgpt-web)
-- [ChatViewDemo](https://github.com/BrettFX/ChatViewDemo)
-- [nlohmann/json](https://github.com/nlohmann/json)
+- [gte_sentence-embedding_multilingual-base](https://modelscope.cn/models/iic/gte_sentence-embedding_multilingual-base/summary)
\ No newline at end of file
diff --git a/demo/embedding_demo.cpp b/demo/embedding_demo.cpp
index c998717e..457e21f5 100644
--- a/demo/embedding_demo.cpp
+++ b/demo/embedding_demo.cpp
@@ -31,9 +31,9 @@ int main(int argc, const char* argv[]) {
std::string model_dir = argv[1];
std::cout << "model path is " << model_dir << std::endl;
std::unique_ptr embedding(Embedding::createEmbedding(model_dir));
- auto vec_0 = embedding->embedding("在春暖花开的季节,走在樱花缤纷的道路上,人们纷纷拿出手机拍照留念。樱花树下,情侣手牵手享受着这绝美的春光。孩子们在树下追逐嬉戏,脸上洋溢着纯真的笑容。春天的气息在空气中弥漫,一切都显得那么生机勃勃,充满希望。");
- auto vec_1 = embedding->embedding("春天到了,樱花树悄然绽放,吸引了众多游客前来观赏。小朋友们在花瓣飘落的树下玩耍,而恋人们则在这浪漫的景色中尽情享受二人世界。每个人的脸上都挂着幸福的笑容,仿佛整个世界都被春天温暖的阳光和满树的樱花渲染得更加美好。");
- auto vec_2 = embedding->embedding("在炎热的夏日里,沙滩上的游客们穿着泳装享受着海水的清凉。孩子们在海边堆沙堡,大人们则在太阳伞下品尝冷饮,享受悠闲的时光。远处,冲浪者们挑战着波涛,体验着与海浪争斗的刺激。夏天的海滩,总是充满了活力和热情。");
+ auto vec_0 = embedding->txt_embedding("在春暖花开的季节,走在樱花缤纷的道路上,人们纷纷拿出手机拍照留念。樱花树下,情侣手牵手享受着这绝美的春光。孩子们在树下追逐嬉戏,脸上洋溢着纯真的笑容。春天的气息在空气中弥漫,一切都显得那么生机勃勃,充满希望。");
+ auto vec_1 = embedding->txt_embedding("春天到了,樱花树悄然绽放,吸引了众多游客前来观赏。小朋友们在花瓣飘落的树下玩耍,而恋人们则在这浪漫的景色中尽情享受二人世界。每个人的脸上都挂着幸福的笑容,仿佛整个世界都被春天温暖的阳光和满树的樱花渲染得更加美好。");
+ auto vec_2 = embedding->txt_embedding("在炎热的夏日里,沙滩上的游客们穿着泳装享受着海水的清凉。孩子们在海边堆沙堡,大人们则在太阳伞下品尝冷饮,享受悠闲的时光。远处,冲浪者们挑战着波涛,体验着与海浪争斗的刺激。夏天的海滩,总是充满了活力和热情。");
dumpVARP(vec_0);
dumpVARP(vec_1);
dumpVARP(vec_2);
diff --git a/include/llm.hpp b/include/llm.hpp
index 393b6f44..2a61eb74 100644
--- a/include/llm.hpp
+++ b/include/llm.hpp
@@ -277,37 +277,41 @@ class Llm {
class Lvlm : public Llm {
public:
Lvlm(std::shared_ptr config) : Llm(config) {
- img_size_ = config->llm_config_.value("img_size", img_size_);
- imgpad_len_ = config->llm_config_.value("imgpad_len", imgpad_len_);
- img_start_ = config->llm_config_.value("img_start", img_start_);
- img_end_ = config->llm_config_.value("img_end", img_end_);
- img_pad_ = config->llm_config_.value("img_pad", img_pad_);
+ image_size_ = config->llm_config_.value("image_size", image_size_);
+ image_pad_ = config->llm_config_.value("image_pad", image_pad_);
+ vision_start_ = config->llm_config_.value("vision_start", vision_start_);
+ vision_end_ = config->llm_config_.value("vision_end", vision_end_);
+ image_mean_ = config->llm_config_.value("image_mean", image_mean_);
+ image_norm_ = config->llm_config_.value("image_norm", image_norm_);
}
~Lvlm() { visual_module_.reset(); }
virtual void load() override;
+ virtual std::vector tokenizer(const std::string& query) override;
+ virtual MNN::Express::VARP embedding(const std::vector& input_ids) override;
private:
- int img_size_ = 448, imgpad_len_ = 256, img_start_ = 151857, img_end_ = 151858, img_pad_ = 151859;
+ int image_size_ = 448, vision_start_ = 151857, vision_end_ = 151858, image_pad_ = 151859;
+ std::vector image_mean_ {122.7709383 , 116.7460125 , 104.09373615};
+ std::vector image_norm_ {0.01459843, 0.01500777, 0.01422007};
+ std::vector image_process(const std::string& img_info);
std::shared_ptr visual_module_;
- VARP visual_embedding(const std::vector& input_ids);
- std::vector url_encode(const std::string& url);
- virtual std::vector tokenizer(const std::string& query) override;
- virtual VARP embedding(const std::vector& input_ids) override;
+ std::vector image_embeddings_;
};
// Llm end
// Embedding start
class Embedding : public Llm {
public:
- Embedding(std::shared_ptr config) : Llm(config) {}
- static Embedding* createEmbedding(const std::string& config_path);
- static float dist(VARP var0, VARP var1);
+ Embedding(std::shared_ptr config);
+ static Embedding* createEmbedding(const std::string& config_path, bool load = true);
+ static float dist(MNN::Express::VARP var0, MNN::Express::VARP var1);
virtual void load() override;
- VARP embedding(const std::string& txt);
- int dim() { return config_->hidden_size(); }
+ MNN::Express::VARP ids_embedding(const std::vector& ids);
+ MNN::Express::VARP txt_embedding(const std::string& txt);
+ int dim() const;
private:
virtual std::vector tokenizer(const std::string& query) override;
- virtual VARP gen_attention_mask(int seq_len) override;
- virtual VARP gen_position_ids(int seq_len) override;
+ virtual MNN::Express::VARP gen_attention_mask(int seq_len) override;
+ virtual MNN::Express::VARP gen_position_ids(int seq_len) override;
};
// Embedding end
diff --git a/src/llm.cpp b/src/llm.cpp
index e59d55f2..321152ea 100644
--- a/src/llm.cpp
+++ b/src/llm.cpp
@@ -17,7 +17,7 @@
#include "llm.hpp"
#include "tokenizer.hpp"
-#ifdef USING_VISUAL_MODEL
+#ifdef LLM_SUPPORT_VISION
#include "httplib.h"
#include
#endif
@@ -499,77 +499,22 @@ void Lvlm::load() {
Module::Config module_config;
module_config.shapeMutable = true;
module_config.rearrange = false;
+ runtime_manager_->setExternalFile(config_->visual_model() + ".weight");
visual_module_.reset(Module::load({}, {}, config_->visual_model().c_str(), runtime_manager_, &module_config));
}
-std::vector Lvlm::url_encode(const std::string& url) {
- std::vector ascii_values(imgpad_len_ + 2, img_pad_);
- ascii_values[0] = img_start_;
- ascii_values[imgpad_len_ + 1] = img_end_;
- for (int i = 0; i < url.size(); i++) {
- ascii_values[i + 1] = static_cast(url[i]);
- }
- return ascii_values;
-}
-
-std::vector Lvlm::tokenizer(const std::string& query) {
- auto prompt = apply_prompt_template(query);
- // split query
- std::regex img_regex("
(.*?)");
- std::string::const_iterator searchStart(prompt.cbegin());
- std::smatch match;
- std::vector img_info, txt_info;
- std::vector ids {};
- while (std::regex_search(searchStart, prompt.cend(), match, img_regex)) {
- std::cout << match[1].str() << std::endl;
- auto txt_ids = tokenizer_->encode(match.prefix().str());
- ids.insert(ids.end(), txt_ids.begin(), txt_ids.end());
- auto img_ids = url_encode(match[1].str());
- ids.insert(ids.end(), img_ids.begin(), img_ids.end());
- searchStart = match.suffix().first;
- }
- if (searchStart != prompt.cend()) {
- auto txt_ids = tokenizer_->encode(std::string(searchStart, prompt.cend()));
- ids.insert(ids.end(), txt_ids.begin(), txt_ids.end());
- }
- return ids;
-}
-
-VARP Lvlm::embedding(const std::vector& input_ids) {
-#ifdef USING_VISUAL_MODEL
- int start_pos = 0, pad_pos = 0, end_pos = 0;
- for (int i = 0; i < input_ids.size(); i++) {
- int id = input_ids[i];
- if (id == img_start_ && !start_pos) {
- start_pos = i;
- }
- if (id == img_pad_ && !pad_pos) {
- pad_pos = i;
- }
- if (id == img_end_ && !end_pos) {
- end_pos = i;
- }
- }
- if (!start_pos) {
- return Llm::embedding(input_ids);
- }
- std::vector prefix(input_ids.begin(), input_ids.begin() + start_pos + 1);
- std::vector img_ascii(input_ids.begin() + start_pos + 1, input_ids.begin() + pad_pos);
- std::vector suffix(input_ids.begin() + end_pos, input_ids.end());
- std::string img_path;
- for (auto ascii_val : img_ascii) {
- img_path += static_cast(ascii_val);
- }
+std::vector Lvlm::image_process(const std::string& image_info) {
+#ifdef LLM_SUPPORT_VISION
VARP image = nullptr;
- if (img_path.substr(0, 4) == "http") {
+ if (image_info.substr(0, 4) == "http") {
std::regex url_regex(R"(^https?://([^/]+)(/.*))");
std::smatch url_match_result;
std::string host, path;
- if (std::regex_search(img_path, url_match_result, url_regex) && url_match_result.size() == 3) {
+ if (std::regex_search(image_info, url_match_result, url_regex) && url_match_result.size() == 3) {
host = url_match_result[1].str();
path = url_match_result[2].str();
}
- std::cout << host << "#" << path << std::endl;
+ // std::cout << host << "#" << path << std::endl;
httplib::Client cli(host);
auto res = cli.Get(path);
std::string img_file = "downloaded_image.jpg";
@@ -589,21 +534,77 @@ VARP Lvlm::embedding(const std::vector& input_ids) {
}
image = MNN::CV::imread(img_file);
} else {
- image = MNN::CV::imread(img_path);
+ image = MNN::CV::imread(image_info);
}
- image = MNN::CV::resize(image, {img_size_, img_size_}, 0, 0, MNN::CV::INTER_LINEAR, MNN::CV::COLOR_BGR2RGB,
- {123.25239296, 117.20384, 104.50194688}, {0.0145414 , 0.01494914, 0.01416452});
+ image = MNN::CV::resize(image, {image_size_, image_size_}, 0, 0, MNN::CV::INTER_LINEAR, MNN::CV::COLOR_BGR2RGB, image_mean_, image_norm_);
image = MNN::Express::_Unsqueeze(image, {0});
image = MNN::Express::_Convert(image, NC4HW4);
auto image_embedding = visual_module_->forward(image);
- image_embedding = MNN::Express::_Permute(image_embedding, {1, 0, 2});
- auto prefix_embedding = Llm::embedding(prefix);
- auto suffix_embedding = Llm::embedding(suffix);
- auto embeddings = MNN::Express::_Concat({prefix_embedding, image_embedding, suffix_embedding}, 0);
+ image_embeddings_.push_back(image_embedding);
+ int visual_len = image_embedding->getInfo()->dim[0];
+ std::vector img_ids(visual_len, image_pad_);
+ img_ids.insert(img_ids.begin(), vision_start_);
+ img_ids.push_back(vision_end_);
+ return img_ids;
#else
- auto embeddings = Llm::embedding(input_ids);
+ return std::vector(0);
#endif
- return embeddings;
+}
+
+std::vector Lvlm::tokenizer(const std::string& query) {
+ auto prompt = apply_prompt_template(query);
+ // split query
+ std::regex img_regex("
(.*?)");
+ std::string::const_iterator searchStart(prompt.cbegin());
+ std::smatch match;
+ std::vector img_infos;
+ std::vector ids {};
+
+ while (std::regex_search(searchStart, prompt.cend(), match, img_regex)) {
+ // std::cout << "img match: " << match[1].str() << std::endl;
+ auto txt_ids = tokenizer_->encode(match.prefix().str());
+ ids.insert(ids.end(), txt_ids.begin(), txt_ids.end());
+ auto img_ids = image_process(match[1].str());
+ ids.insert(ids.end(), img_ids.begin(), img_ids.end());
+ searchStart = match.suffix().first;
+ }
+ if (searchStart != prompt.cend()) {
+ auto txt_ids = tokenizer_->encode(std::string(searchStart, prompt.cend()));
+ ids.insert(ids.end(), txt_ids.begin(), txt_ids.end());
+ }
+ // printf("ids = ["); for (auto id : ids) printf("%d, ", id); printf("]\n");
+ return ids;
+}
+
+VARP Lvlm::embedding(const std::vector& input_ids) {
+ if (input_ids.size() == 1) {
+ return Llm::embedding(input_ids);
+ }
+ std::vector embeddings;
+ int img_idx = 0;
+ std::vector cur_txt_ids;
+ for (int i = 0; i < input_ids.size(); i++) {
+ int id = input_ids[i];
+ if (id == image_pad_) {
+ continue;
+ }
+ cur_txt_ids.push_back(id);
+ if (id == vision_start_) {
+ auto txt_embedding = Llm::embedding(cur_txt_ids);
+ auto img_embedding = image_embeddings_[img_idx++];
+ embeddings.push_back(txt_embedding);
+ embeddings.push_back(img_embedding);
+ } else if (id == vision_end_) {
+ cur_txt_ids.clear();
+ cur_txt_ids.push_back(id);
+ }
+ }
+ if (!cur_txt_ids.empty()) {
+ auto txt_embedding = Llm::embedding(cur_txt_ids);
+ embeddings.push_back(txt_embedding);
+ }
+ auto embedding = MNN::Express::_Concat(embeddings, 0);
+ return embedding;
}
// Llm end
@@ -614,13 +615,19 @@ float Embedding::dist(VARP var0, VARP var1) {
return dist;
}
-Embedding* Embedding::createEmbedding(const std::string& config_path) {
+Embedding* Embedding::createEmbedding(const std::string& config_path, bool load) {
std::shared_ptr config(new LlmConfig(config_path));
Embedding* embedding = new Embedding(config);
- embedding->load();
+ if (load) {
+ embedding->load();
+ }
return embedding;
}
+Embedding::Embedding(std::shared_ptr config) : Llm(config) {}
+
+int Embedding::dim() const { return config_->hidden_size(); }
+
void Embedding::load() {
init_runtime();
printf("load tokenizer\n");
@@ -636,15 +643,14 @@ void Embedding::load() {
MNN_PRINT("load %s ... ", model_path.c_str());
modules_.resize(1);
modules_[0].reset(Module::load(
- {"input_ids", "attention_mask", "position_ids"},
- {"sentence_embeddings"}, model_path.c_str(), runtime_manager_, &module_config));
+ {"input_ids", "attention_mask", "position_ids"},
+ {"sentence_embeddings"}, model_path.c_str(), runtime_manager_, &module_config));
MNN_PRINT("Done!\n");
}
-VARP Embedding::embedding(const std::string& txt) {
- auto ids = tokenizer(txt);
+VARP Embedding::ids_embedding(const std::vector& ids) {
int prompt_len = ids.size();
- auto inputs_ids = _Const(ids.data(), {prompt_len}, NCHW, halide_type_of());
+ auto inputs_ids = embedding(ids);
auto attention_mask = gen_attention_mask(prompt_len);
auto position_ids = gen_position_ids(prompt_len);
auto outputs = modules_[0]->onForward({inputs_ids, attention_mask, position_ids});
@@ -652,12 +658,12 @@ VARP Embedding::embedding(const std::string& txt) {
return sentence_embeddings;
}
+VARP Embedding::txt_embedding(const std::string& txt) {
+ return ids_embedding(tokenizer(txt));
+}
+
std::vector Embedding::tokenizer(const std::string& query) {
- auto prompt = query;
- if (query.size() <= 256) {
- prompt = "为这个句子生成表示以用于检索相关文章:" + query;
- }
- prompt = apply_prompt_template(prompt);
+ auto prompt = apply_prompt_template(query);
auto ids = tokenizer_->encode(prompt);
return ids;
}
@@ -770,7 +776,7 @@ VARP TextVectorStore::text2vector(const std::string& text) {
std::cerr << "Not set embedding for TextVectorStore." << std::endl;
return nullptr;
}
- auto vector = embedding_->embedding(text);
+ auto vector = embedding_->txt_embedding(text);
return vector;
}
// TextVectorStore end