diff --git a/MultimodalQnA/README.md b/MultimodalQnA/README.md index bda42ee285..6b92b19062 100644 --- a/MultimodalQnA/README.md +++ b/MultimodalQnA/README.md @@ -90,7 +90,7 @@ In the below, we provide a table that describes for each microservice component | MicroService | Open Source Project | HW | Port | Endpoint | | ------------ | --------------------- | ----- | ---- | ----------------------------------------------------------- | | Embedding | Langchain | Xeon | 6000 | /v1/embeddings | -| Retriever | Langchain, Redis | Xeon | 7000 | /v1/multimodal_retrieval | +| Retriever | Langchain, Redis | Xeon | 7000 | /v1/retrieval | | LVM | Langchain, TGI | Gaudi | 9399 | /v1/lvm | | Dataprep | Redis, Langchain, TGI | Gaudi | 6007 | /v1/generate_transcripts, /v1/generate_captions, /v1/ingest | diff --git a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md index f49b9815f1..4e3a031da9 100644 --- a/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md +++ b/MultimodalQnA/docker_compose/amd/gpu/rocm/README.md @@ -178,7 +178,7 @@ curl http://${host_ip}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://${host_ip}:7000/v1/multimodal_retrieval \ +curl http://${host_ip}:7000/v1/retrieval \ -X POST \ -H "Content-Type: application/json" \ -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md index 7e4fa6894a..d3cff8e32c 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/README.md @@ -10,116 +10,16 @@ For detailed information about these instance types, you can refer to this [link After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. -**Certain ports in the EC2 instance need to opened up in the security group, for the microservices to work with the curl commands** - -> See one example below. Please open up these ports in the EC2 instance based on the IP addresses you want to allow - -``` -redis-vector-db -=============== -Port 6379 - Open to 0.0.0.0/0 -Port 8001 - Open to 0.0.0.0/0 - -embedding-multimodal-bridgetower -===================== -Port 6006 - Open to 0.0.0.0/0 - -embedding -========= -Port 6000 - Open to 0.0.0.0/0 - -retriever-multimodal-redis -========= -Port 7000 - Open to 0.0.0.0/0 - -lvm-llava -================ -Port 8399 - Open to 0.0.0.0/0 - -lvm -=== -Port 9399 - Open to 0.0.0.0/0 - -whisper -=== -port 7066 - Open to 0.0.0.0/0 - -dataprep-multimodal-redis -=== -Port 6007 - Open to 0.0.0.0/0 - -multimodalqna -========================== -Port 8888 - Open to 0.0.0.0/0 - -multimodalqna-ui -===================== -Port 5173 - Open to 0.0.0.0/0 -``` - ## Setup Environment Variables Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. -**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** - -> Change the External_Public_IP below with the actual IPV4 value - -``` -export host_ip="External_Public_IP" -``` - -**Append the value of the public IP address to the no_proxy list** - -```bash -export your_no_proxy=${your_no_proxy},"External_Public_IP" -``` - ```bash -export no_proxy=${your_no_proxy} -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} -export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} -export MEGA_SERVICE_HOST_IP=${host_ip} -export WHISPER_PORT=7066 -export WHISPER_SERVER_ENDPOINT="http://${host_ip}:${WHISPER_PORT}/v1/asr" -export WHISPER_MODEL="base" -export MAX_IMAGES=1 -export REDIS_DB_PORT=6379 -export REDIS_INSIGHTS_PORT=8001 -export REDIS_URL="redis://${host_ip}:${REDIS_DB_PORT}" -export REDIS_HOST=${host_ip} -export INDEX_NAME="mm-rag-redis" -export DATAPREP_MMR_PORT=6007 -export DATAPREP_INGEST_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/ingest" -export DATAPREP_GEN_TRANSCRIPT_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_transcripts" -export DATAPREP_GEN_CAPTION_SERVICE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/generate_captions" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/get" -export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:${DATAPREP_MMR_PORT}/v1/dataprep/delete" -export EMM_BRIDGETOWER_PORT=6006 -export EMBEDDING_MODEL_ID="BridgeTower/bridgetower-large-itm-mlm-itc" -export BRIDGE_TOWER_EMBEDDING=true -export MMEI_EMBEDDING_ENDPOINT="http://${host_ip}:$EMM_BRIDGETOWER_PORT" -export MM_EMBEDDING_PORT_MICROSERVICE=6000 -export REDIS_RETRIEVER_PORT=7000 -export LVM_PORT=9399 -export LLAVA_SERVER_PORT=8399 -export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" -export LVM_ENDPOINT="http://${host_ip}:$LLAVA_SERVER_PORT" -export MEGA_SERVICE_PORT=8888 -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:$MEGA_SERVICE_PORT/v1/multimodalqna" -export UI_PORT=5173 +source set_env.sh ``` Note: Please replace with `host_ip` with you external IP address, do not use localhost. -> Note: The `MAX_IMAGES` environment variable is used to specify the maximum number of images that will be sent from the LVM service to the LLaVA server. -> If an image list longer than `MAX_IMAGES` is sent to the LVM server, a shortened image list will be sent to the LLaVA service. If the image list -> needs to be shortened, the most recent images (the ones at the end of the list) are prioritized to send to the LLaVA service. Some LLaVA models have not -> been trained with multiple images and may lead to inaccurate results. If `MAX_IMAGES` is not set, it will default to `1`. - ## πŸš€ Build Docker Images ### 1. Build embedding-multimodal-bridgetower Image @@ -146,7 +46,13 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_ ### 3. Build LVM Images -Build lvm-llava image +Pull vllm image + +```bash +docker pull opea/vllm:latest +``` + +Build lvm-llava image (Optional) ```bash docker build --no-cache -t opea/lvm-llava:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/llava/Dockerfile . @@ -197,14 +103,15 @@ Then run the command `docker images`, you will have the following 11 Docker Imag 1. `opea/dataprep:latest` 2. `opea/lvm:latest` -3. `opea/lvm-llava:latest` -4. `opea/retriever:latest` -5. `opea/whisper:latest` -6. `opea/redis-vector-db` -7. `opea/embedding:latest` -8. `opea/embedding-multimodal-bridgetower:latest` -9. `opea/multimodalqna:latest` -10. `opea/multimodalqna-ui:latest` +3. `opea/vllm:latest` +4. `opea/lvm-llava:latest` (Optional) +5. `opea/retriever:latest` +6. `opea/whisper:latest` +7. `opea/redis-vector-db` +8. `opea/embedding:latest` +9. `opea/embedding-multimodal-bridgetower:latest` +10. `opea/multimodalqna:latest` +11. `opea/multimodalqna-ui:latest` ## πŸš€ Start Microservices @@ -264,7 +171,7 @@ curl http://${host_ip}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://${host_ip}:${REDIS_RETRIEVER_PORT}/v1/multimodal_retrieval \ +curl http://${host_ip}:${REDIS_RETRIEVER_PORT}/v1/retrieval \ -X POST \ -H "Content-Type: application/json" \ -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" @@ -279,7 +186,7 @@ curl ${WHISPER_SERVER_ENDPOINT} \ -d '{"audio" : "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' ``` -5. lvm-llava +5. lvm-llava (Optional) ```bash curl http://${host_ip}:${LLAVA_SERVER_PORT}/generate \ diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml index 31f543c755..328cdd43ee 100644 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -24,7 +24,7 @@ services: container_name: dataprep-multimodal-redis depends_on: - redis-vector-db - - lvm-llava + - vllm-service ports: - "${DATAPREP_MMR_PORT}:5000" environment: @@ -97,24 +97,31 @@ services: LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS" restart: unless-stopped - lvm-llava: - image: ${REGISTRY:-opea}/lvm-llava:${TAG:-latest} - container_name: lvm-llava + vllm-service: + image: ${REGISTRY:-opea}/vllm:${TAG:-latest} + container_name: vllm-service ports: - - "${LLAVA_SERVER_PORT}:${LLAVA_SERVER_PORT}" + - ${VLLM_PORT:-8399}:80 + volumes: + - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LLAVA_SERVER_PORT: ${LLAVA_SERVER_PORT} - LVM_PORT: ${LVM_PORT} - entrypoint: ["python", "llava_server.py", "--device", "cpu", "--model_name_or_path", $LVM_MODEL_ID] - restart: unless-stopped + HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + VLLM_TORCH_PROFILER_DIR: "/mnt" + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 + command: --model $LVM_MODEL_ID --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html + lvm: image: ${REGISTRY:-opea}/lvm:${TAG:-latest} container_name: lvm depends_on: - - lvm-llava + - vllm-service ports: - "${LVM_PORT}:${LVM_PORT}" ipc: host @@ -122,11 +129,10 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - LVM_COMPONENT_NAME: "OPEA_LLAVA_LVM" + LVM_COMPONENT_NAME: "OPEA_VLLM_LVM" LVM_ENDPOINT: ${LVM_ENDPOINT} LLAVA_SERVER_PORT: ${LLAVA_SERVER_PORT} LVM_PORT: ${LVM_PORT} - MAX_IMAGES: ${MAX_IMAGES:-1} restart: unless-stopped multimodalqna: image: ${REGISTRY:-opea}/multimodalqna:${TAG:-latest} diff --git a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh index 057f90990c..9556aa321d 100755 --- a/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/MultimodalQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -8,10 +8,6 @@ popd > /dev/null export host_ip=$(hostname -I | awk '{print $1}') -export no_proxy=${your_no_proxy} -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} - export MM_EMBEDDING_SERVICE_HOST_IP=${host_ip} export MM_RETRIEVER_SERVICE_HOST_IP=${host_ip} export LVM_SERVICE_HOST_IP=${host_ip} @@ -44,6 +40,9 @@ export BRIDGE_TOWER_EMBEDDING=true export REDIS_RETRIEVER_PORT=7000 export LVM_PORT=9399 +# for vllm server +export VLLM_PORT=8399 +# for llava naive server export LLAVA_SERVER_PORT=8399 export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" export LVM_ENDPOINT="http://${host_ip}:${LLAVA_SERVER_PORT}" diff --git a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md index 2379fc3d4d..0276236ede 100644 --- a/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md +++ b/MultimodalQnA/docker_compose/intel/hpu/gaudi/README.md @@ -210,7 +210,7 @@ curl http://${host_ip}:$MM_EMBEDDING_PORT_MICROSERVICE/v1/embeddings \ ```bash export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") -curl http://${host_ip}:7000/v1/multimodal_retrieval \ +curl http://${host_ip}:7000/v1/retrieval \ -X POST \ -H "Content-Type: application/json" \ -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" diff --git a/VideoQnA/Dockerfile b/VideoQnA/Dockerfile deleted file mode 100644 index 2aade6088f..0000000000 --- a/VideoQnA/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -ARG BASE_TAG=latest -FROM opea/comps-base:$BASE_TAG - -COPY ./videoqna.py $HOME/videoqna.py - -ENTRYPOINT ["python", "videoqna.py"] diff --git a/VideoQnA/README.md b/VideoQnA/README.md deleted file mode 100644 index e64a045df0..0000000000 --- a/VideoQnA/README.md +++ /dev/null @@ -1,100 +0,0 @@ -# VideoQnA Application - -VideoQnA is a framework that retrieves video based on provided user prompt. It uses only the video embeddings to perform vector similarity search in Intel's VDMS vector database and performs all operations on Intel Xeon CPU. The pipeline supports long form videos and time-based search. - -VideoQnA is implemented on top of [GenAIComps](https://github.com/opea-project/GenAIComps), with the architecture flow chart shows below: - -```mermaid ---- -config: - flowchart: - nodeSpacing: 400 - rankSpacing: 100 - curve: linear - themeVariables: - fontSize: 50px ---- -flowchart LR - %% Colors %% - classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef invisible fill:transparent,stroke:transparent; - style VideoQnA-MegaService stroke:#000000 - %% Subgraphs %% - subgraph VideoQnA-MegaService["VideoQnA-MegaService"] - direction LR - EM([Embedding MicroService]):::blue - RET([Retrieval MicroService]):::blue - RER([Rerank MicroService]):::blue - LVM([LVM MicroService]):::blue - end - subgraph User Interface - direction LR - a([User Input Query]):::orchid - UI([UI server
]):::orchid - Ingest([Ingest
]):::orchid - end - - LOCAL_RER{{Reranking service
}} - CLIP_EM{{Embedding service
}} - VDB{{Vector DB

}} - V_RET{{Retriever service
}} - Ingest{{Ingest data
}} - DP([Data Preparation
]):::blue - LVM_gen{{LVM Service
}} - GW([VideoQnA GateWay
]):::orange - - %% Data Preparation flow - %% Ingest data flow - direction LR - Ingest[Ingest data] --> UI - UI --> DP - DP <-.-> CLIP_EM - - %% Questions interaction - direction LR - a[User Input Query] --> UI - UI --> GW - GW <==> VideoQnA-MegaService - EM ==> RET - RET ==> RER - RER ==> LVM - - - %% Embedding service flow - direction LR - EM <-.-> CLIP_EM - RET <-.-> V_RET - RER <-.-> LOCAL_RER - LVM <-.-> LVM_gen - - direction TB - %% Vector DB interaction - V_RET <-.->VDB - DP <-.->VDB -``` - -- This project implements a Retrieval-Augmented Generation (RAG) workflow using LangChain, Intel VDMS VectorDB, and Text Generation Inference, optimized for Intel Xeon Scalable Processors. -- Video Processing: Videos are converted into feature vectors using mean aggregation and stored in the VDMS vector store. -- Query Handling: When a user submits a query, the system performs a similarity search in the vector store to retrieve the best-matching videos. -- Contextual Inference: The retrieved videos are then sent to the Large Vision Model (LVM) for inference, providing supplemental context for the query. - -## Deploy VideoQnA Service - -The VideoQnA service can be effortlessly deployed on Intel Xeon Scalable Processors. - -### Required Models - -By default, the embedding and LVM models are set to a default value as listed below: - -| Service | Model | -| --------- | ---------------------------- | -| Embedding | openai/clip-vit-base-patch32 | -| LVM | DAMO-NLP-SG/Video-LLaMA | - -### Deploy VideoQnA on Xeon - -For full instruction of deployment, please check [Guide](docker_compose/intel/cpu/xeon/README.md) - -Currently we support deploying VideoQnA services with docker compose, using the docker images `built from source`. Find the corresponding [compose.yaml](docker_compose/intel/cpu/xeon/compose.yaml). diff --git a/VideoQnA/assets/img/videoqna.gif b/VideoQnA/assets/img/videoqna.gif deleted file mode 100644 index 45bf7a462a..0000000000 Binary files a/VideoQnA/assets/img/videoqna.gif and /dev/null differ diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/README.md b/VideoQnA/docker_compose/intel/cpu/xeon/README.md deleted file mode 100644 index 478f99fe38..0000000000 --- a/VideoQnA/docker_compose/intel/cpu/xeon/README.md +++ /dev/null @@ -1,346 +0,0 @@ -# Build Mega Service of VideoQnA on Xeon - -This document outlines the deployment process for a videoqna application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `lvm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. - -VideoQnA is a pipeline that retrieves video based on provided user prompt. It uses only the video embeddings to perform vector similarity search in Intel's VDMS vector database and performs all operations on Intel Xeon CPU. The pipeline supports long form videos and time-based search. - -## πŸš€ Port used for the microservices - -``` -dataprep -======== -Port 6007 - Open to 0.0.0.0/0 - -vdms-vector-db -=============== -Port 8001 - Open to 0.0.0.0/0 - -embedding -========= -Port 6000 - Open to 0.0.0.0/0 - -retriever -========= -Port 7000 - Open to 0.0.0.0/0 - -reranking -========= -Port 8000 - Open to 0.0.0.0/0 - -lvm video-llama -=============== -Port 9009 - Open to 0.0.0.0/0 - -lvm -=== -Port 9000 - Open to 0.0.0.0/0 - -chaqna-xeon-backend-server -========================== -Port 8888 - Open to 0.0.0.0/0 - -chaqna-xeon-ui-server -===================== -Port 5173 - Open to 0.0.0.0/0 -``` - -## πŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build Embedding Image - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build -t opea/embedding-multimodal-clip:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/clip/src/Dockerfile . -``` - -### 2. Build Retriever Image - -```bash -docker build -t opea/retriever:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/src/Dockerfile . -``` - -### 3. Build Reranking Image - -```bash -docker build -t opea/reranking:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/rerankings/src/Dockerfile . -``` - -### 4. Build LVM Image (Xeon) - -```bash -docker build -t opea/lvm-video-llama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/integrations/dependency/video-llama/Dockerfile . - -# LVM Service Image -docker build -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . -``` - -### 5. Build Dataprep Image - -```bash -docker build -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile . -``` - -### 6. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `videoqna.py` Python script. - -Build MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/VideoQnA/ -docker build -t opea/videoqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 7. Build UI Docker Image - -Build frontend Docker image via below command: - -```bash -cd GenAIExamples/VideoQnA/ui/ -docker build -t opea/videoqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following 8 Docker Images: - -1. `opea/dataprep:latest` -2. `opea/embedding-multimodal-clip:latest` -3. `opea/retriever:latest` -4. `opea/reranking:latest` -5. `opea/video-llama-lvm-server:latest` -6. # `opea/lvm-video-llama:latest` -7. `opea/reranking-tei:latest` -8. `opea/lvm-video-llama:latest` -9. `opea/lvm:latest` -10. `opea/videoqna:latest` -11. `opea/videoqna-ui:latest` - -## πŸš€ Start Microservices - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** - -> Change the `External_Public_IP` below with the actual IPV4 value - -``` -export host_ip="External_Public_IP" -``` - -**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** - -> Change the `Your_Huggingface_API_Token` below with your actual Huggingface API Token value - -``` -export your_hf_api_token="Your_Huggingface_API_Token" -``` - -**Append the value of the public IP address to the no_proxy list** - -``` -export your_no_proxy="${your_no_proxy},${host_ip}" -``` - -Then you can run below commands or `source set_env.sh` to set all the variables - -```bash -export no_proxy=${your_no_proxy} -export http_proxy=${your_http_proxy} -export https_proxy=${your_http_proxy} -export MEGA_SERVICE_HOST_IP=${host_ip} -export EMBEDDING_SERVICE_HOST_IP=${host_ip} -export RETRIEVER_SERVICE_HOST_IP=${host_ip} -export RERANK_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} - -export LVM_ENDPOINT="http://${host_ip}:9009" -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna" -export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" -export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos" - -export VDMS_HOST=${host_ip} -export VDMS_PORT=8001 -export INDEX_NAME="mega-videoqna" -export LLM_DOWNLOAD="True" -export USECLIP=1 - -export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} -``` - -Note: Replace with `host_ip` with you external IP address, do not use localhost. - -### Start all the services with Docker Containers - -Before running the docker compose command, you need to be in the folder that has the docker compose yaml file. To avoid model re-download, we manage the volume separately using [external volume](https://docs.docker.com/reference/compose-file/volumes/#external). - -There are 2 parts of the pipeline: - -- The first is the data preparation, with which you could add your videos into the database. -- The second is the megaservice, serves as the main service, takes the user query, consumes the microservices to give the response. Including embedding, retrieving, reranking and LVM. - -In the deploy steps, you need to start the VDMS DB and dataprep firstly, then insert some sample data into it. After that you could get the megaservice up. - -```bash -cd GenAIExamples/VideoQnA/docker_compose/intel/cpu/xeon/ - -docker volume create video-llama-model -docker compose up vdms-vector-db dataprep -d -sleep 1m # wait for the services ready - -# Insert some sample data to the DB -curl -X POST http://${host_ip}:6007/v1/dataprep/ingest \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./data/op_1_0320241830.mp4" - -# Bring all the others -docker compose up -d -# wait until all the services is up. The LVM server will download models, so it take ~1.5hr to get ready. -``` - -### Validate Microservices - -1. Dataprep Microservice - - Once the microservice is up, ingest the videos files into vector store using dataprep microservice. Both single and multiple file(s) uploads are supported. - - ```bash - # Single file upload - curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.mp4" - # Multiple file upload - curl -X POST ${DATAPREP_SERVICE_ENDPOINT} \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./file1.mp4" \ - -F "files=@./file2.mp4" \ - -F "files=@./file3.mp4" - ``` - - Use below method to check and download available videos the microservice. The download endpoint is also used for LVM and UI. - - ```bash - # List available videos - curl -X 'GET' ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} -H 'accept: application/json' - # Download available video - curl -X 'GET' ${DATAPREP_GET_FILE_ENDPOINT}/video_name.mp4 -H 'accept: application/json' - ``` - -2. Embedding Microservice - - ```bash - curl http://${host_ip}:6000/v1/embeddings \ - -X POST \ - -d '{"text":"Sample text"}' \ - -H 'Content-Type: application/json' - ``` - -3. Retriever Microservice - - To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector - is determined by the embedding model. - Here we use the model `openai/clip-vit-base-patch32`, which vector size is 512. - - Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it. - - ```bash - export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") - curl http://${host_ip}:7000/v1/retrieval \ - -X POST \ - -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ - -H 'Content-Type: application/json' - ``` - -4. Reranking Microservice - - ```bash - curl http://${host_ip}:8000/v1/reranking \ - -X 'POST' \ - -H 'accept: application/json' \ - -H 'Content-Type: application/json' \ - -d '{ - "retrieved_docs": [{"doc": [{"text": "this is the retrieved text"}]}], - "initial_query": "this is the query", - "top_n": 1, - "metadata": [ - {"other_key": "value", "video":"top_video_name", "timestamp":"20"} - ] - }' - ``` - -5. LVM backend Service - - In first startup, this service will take times to download the LLM file. After it's finished, the service will be ready. - - Use `docker logs lvm-video-llama` to check if the download is finished. - - ```bash - curl -X POST \ - "http://${host_ip}:9009/generate?video_url=silence_girl.mp4&start=0.0&duration=9&prompt=What%20is%20the%20person%20doing%3F&max_new_tokens=150" \ - -H "accept: */*" \ - -d '' - ``` - - > To avoid re-download for the model in case of restart, see [here](#clean-microservices) - -6. LVM Microservice - - This service depends on above LLM backend service startup. It will be ready after long time, to wait for them being ready in first startup. - - ```bash - curl http://${host_ip}:9000/v1/lvm\ - -X POST \ - -d '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' \ - -H 'Content-Type: application/json' - ``` - - > Note that the local video file will be deleted after completion to conserve disk space. - -7. MegaService - - ```bash - curl http://${host_ip}:8888/v1/videoqna -H "Content-Type: application/json" -d '{ - "messages": "What is the man doing?", - "stream": "True" - }' - ``` - - > Note that the megaservice support only stream output. - -## πŸš€ Launch the UI - -To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: - -```yaml - videoqna-xeon-ui-server: - image: opea/videoqna-ui:latest - ... - ports: - - "80:5173" # port map to host port 80 -``` - -Here is an example of running videoqna: - -![project-screenshot](../../../../assets/img/videoqna.gif) - -## Clean Microservices - -All the allocated resources could be easily removed by: - -```bash -docker compose -f compose.yaml down -``` - -If you plan to restart the service in the future, the above command is enough. The model file is saved in docker volume `video-llama-model` and will be reserved on your server. Next time when you restart the service, set `export LLM_DOWNLOAD="False"` before start to reuse the volume. - -To clean the volume: - -```bash -docker volume rm video-llama-model -``` diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml deleted file mode 100644 index 780ff3c704..0000000000 --- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ /dev/null @@ -1,155 +0,0 @@ - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -services: - vdms-vector-db: - image: intellabs/vdms:v2.8.0 - container_name: vdms-vector-db - ports: - - "8001:55555" - dataprep: - image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - container_name: dataprep-vdms-server - depends_on: - - vdms-vector-db - ports: - - "6007:5000" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VDMS_HOST: ${VDMS_HOST} - VDMS_PORT: ${VDMS_PORT} - INDEX_NAME: ${INDEX_NAME} - MULTIMODAL_DATAPREP: true - entrypoint: sh -c 'sleep 15 && python ingest_videos.py' - volumes: - - /home/$USER/.cache/clip:/home/user/.cache/clip - - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub - embedding: - image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} - container_name: embedding-multimodal-server - ports: - - "6000:6000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - volumes: - - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub - restart: unless-stopped - retriever: - image: ${REGISTRY:-opea}/retriever:${TAG:-latest} - container_name: retriever-vdms-server - depends_on: - - vdms-vector-db - ports: - - "7000:7000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - VDMS_INDEX_NAME: ${INDEX_NAME} - VDMS_HOST: ${VDMS_HOST} - VDMS_PORT: ${VDMS_PORT} - VDMS_USE_CLIP: ${USECLIP} - LOGFLAG: ${LOGFLAG} - RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS" - entrypoint: sh -c 'sleep 30 && python retriever_vdms.py' - restart: unless-stopped - volumes: - - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub - reranking: - image: ${REGISTRY:-opea}/reranking:${TAG:-latest} - container_name: reranking-tei-server - ports: - - "8000:8000" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - CHUNK_DURATION: ${CHUNK_DURATION} - FILE_SERVER_ENDPOINT: ${DATAPREP_GET_FILE_ENDPOINT} - DATAPREP_GET_VIDEO_LIST_ENDPOINT: ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} - restart: unless-stopped - lvm-video-llama: - image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest} - container_name: lvm-video-llama - ports: - - "9009:9009" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - llm_download: ${LLM_DOWNLOAD} - volumes: - - "/home/$USER/.cache:/home/user/.cache" - - video-llama-model:/home/user/model - restart: unless-stopped - lvm: - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - container_name: lvm - ports: - - "9000:9000" - ipc: host - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - LVM_COMPONENT_NAME: "OPEA_VIDEO_LLAMA_LVM" - LVM_ENDPOINT: ${LVM_ENDPOINT} - restart: unless-stopped - depends_on: - - lvm-video-llama - videoqna-xeon-backend-server: - image: ${REGISTRY:-opea}/videoqna:${TAG:-latest} - container_name: videoqna-xeon-backend-server - depends_on: - - vdms-vector-db - - dataprep - - embedding - - retriever - - reranking - - lvm-video-llama - - lvm - ports: - - "8888:8888" - entrypoint: sh -c 'sleep 45 && python videoqna.py' - environment: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} - EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} - RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} - RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} - LVM_SERVICE_HOST_IP: ${LVM_SERVICE_HOST_IP} - ipc: host - restart: always - videoqna-xeon-ui-server: - image: ${REGISTRY:-opea}/videoqna-ui:${TAG:-latest} - container_name: videoqna-xeon-ui-server - depends_on: - - videoqna-xeon-backend-server - ports: - - "5173:5173" - environment: - https_proxy: ${https_proxy} - http_proxy: ${http_proxy} - no_proxy: ${no_proxy} - BACKEND_SERVICE_ENDPOINT: ${BACKEND_SERVICE_ENDPOINT} - BACKEND_HEALTH_CHECK_ENDPOINT: ${BACKEND_HEALTH_CHECK_ENDPOINT} - ipc: host - restart: always -volumes: - video-llama-model: - external: true -networks: - default: - driver: bridge diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/data/op_1_0320241830.mp4 b/VideoQnA/docker_compose/intel/cpu/xeon/data/op_1_0320241830.mp4 deleted file mode 100644 index 29c5dffcdb..0000000000 Binary files a/VideoQnA/docker_compose/intel/cpu/xeon/data/op_1_0320241830.mp4 and /dev/null differ diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh deleted file mode 100644 index dcf574774b..0000000000 --- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -pushd "../../../../../" > /dev/null -source .set_env.sh -popd > /dev/null - -host_ip=$(hostname -I | awk '{print $1}') - -export MEGA_SERVICE_HOST_IP=${host_ip} -export EMBEDDING_SERVICE_HOST_IP=${host_ip} -export RETRIEVER_SERVICE_HOST_IP=${host_ip} -export RERANK_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} - -export LVM_ENDPOINT="http://${host_ip}:9009" -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/videoqna" -export BACKEND_HEALTH_CHECK_ENDPOINT="http://${host_ip}:8888/v1/health_check" -export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/ingest" -export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get" -export DATAPREP_GET_VIDEO_LIST_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_videos" - -export VDMS_HOST=${host_ip} -export VDMS_PORT=8001 -export INDEX_NAME="mega-videoqna" -export USECLIP=1 -export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download diff --git a/VideoQnA/docker_image_build/build.yaml b/VideoQnA/docker_image_build/build.yaml deleted file mode 100644 index 9ed0bee955..0000000000 --- a/VideoQnA/docker_image_build/build.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - videoqna: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile - image: ${REGISTRY:-opea}/videoqna:${TAG:-latest} - videoqna-ui: - build: - context: ../ui - dockerfile: ./docker/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/videoqna-ui:${TAG:-latest} - dataprep: - build: - context: GenAIComps - dockerfile: comps/dataprep/src/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - embedding-multimodal-clip: - build: - context: GenAIComps - dockerfile: comps/third_parties/clip/src/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} - retriever: - build: - context: GenAIComps - dockerfile: comps/retrievers/src/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/retriever:${TAG:-latest} - reranking: - build: - context: GenAIComps - dockerfile: comps/rerankings/src/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/reranking:${TAG:-latest} - lvm-video-llama: - build: - context: GenAIComps - dockerfile: comps/lvms/src/integrations/dependency/video-llama/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest} - lvm: - build: - context: GenAIComps - dockerfile: comps/lvms/src/Dockerfile - extends: videoqna - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} diff --git a/VideoQnA/tests/test_compose_on_xeon.sh b/VideoQnA/tests/test_compose_on_xeon.sh deleted file mode 100755 index 614c2efc47..0000000000 --- a/VideoQnA/tests/test_compose_on_xeon.sh +++ /dev/null @@ -1,263 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - - docker pull intellabs/vdms:v2.8.0 - docker images && sleep 1s -} - - -function start_services() { - cd $WORKPATH/docker_compose/intel/cpu/xeon/ - - source set_env.sh - docker volume create video-llama-model - docker compose up vdms-vector-db dataprep -d - sleep 30s - - # Insert some sample data to the DB - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./data/op_1_0320241830.mp4") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "Inserted some data at the beginning." - else - echo "Inserted failed at the beginning. Received status was $HTTP_STATUS" - docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log - exit 1 - fi - # Bring all the others - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - sleep 1m - - # List of containers running uvicorn - list=("dataprep-vdms-server" "embedding-multimodal-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "lvm-video-llama" "videoqna-xeon-backend-server") - - # Define the maximum time limit in seconds - TIME_LIMIT=5400 - start_time=$(date +%s) - - check_condition() { - local item=$1 - - if docker logs $item 2>&1 | grep -q "Uvicorn running on"; then - return 0 - else - return 1 - fi - } - - # Main loop - while [[ ${#list[@]} -gt 0 ]]; do - # Get the current time - current_time=$(date +%s) - elapsed_time=$((current_time - start_time)) - - # Exit if time exceeds the limit - if (( elapsed_time >= TIME_LIMIT )); then - echo "Time limit exceeded." - break - fi - - # Iterate through the list - for i in "${!list[@]}"; do - item=${list[i]} - if check_condition "$item"; then - echo "Condition met for $item, removing from list." - unset list[i] - else - echo "Condition not met for $item, keeping in list." - fi - done - - # Clean up the list to remove empty elements - list=("${list[@]}") - - # Check if the list is empty - if [[ ${#list[@]} -eq 0 ]]; then - echo "List is empty. Exiting." - break - fi - sleep 5m - done - - if docker logs videoqna-xeon-ui-server 2>&1 | grep -q "Streamlit app"; then - return 0 - else - return 1 - fi - -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - # Check if the microservices are running correctly. - cd $WORKPATH/docker_compose/intel/cpu/xeon//data - - # dataprep microservice - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ - -H "Content-Type: multipart/form-data" \ - -F "files=@./op_1_0320241830.mp4") - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "Dataprep microservice is running correctly." - else - echo "Dataprep microservice is not running correctly. Received status was $HTTP_STATUS" - docker logs dataprep-vdms-server >> ${LOG_PATH}/dataprep.log - exit 1 - fi - - # Embedding Microservice - validate_services \ - "${ip_address}:6000/v1/embeddings" \ - "Sample text" \ - "embedding" \ - "embedding-multimodal-server" \ - '{"text":"Sample text"}' - - # Retriever Microservice - export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") - validate_services \ - "${ip_address}:7000/v1/retrieval" \ - "retrieved_docs" \ - "retriever" \ - "retriever-vdms-server" \ - "{\"text\":\"test\",\"embedding\":${your_embedding}}" - - # Reranking Microservice - validate_services \ - "${ip_address}:8000/v1/reranking" \ - "video_url" \ - "reranking" \ - "reranking-tei-server" \ - '{ - "retrieved_docs": [{"doc": [{"text": "retrieved text"}]}], - "initial_query": "query", - "top_n": 1, - "metadata": [ - {"other_key": "value", "video":"top_video_name", "timestamp":"20"} - ] - }' - - # LVM Microservice - validate_services \ - "${ip_address}:9000/v1/lvm" \ - "silence" \ - "lvm" \ - "lvm-video-llama" \ - '{"video_url":"https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4","chunk_start": 0,"chunk_duration": 7,"prompt":"What is the person doing?","max_new_tokens": 50}' - - sleep 1s -} - -function validate_megaservice() { - validate_services \ - "${ip_address}:8888/v1/videoqna" \ - "man" \ - "videoqna-xeon-backend-server" \ - "videoqna-xeon-backend-server" \ - '{"messages":"What is the man doing?","stream":"True"}' -} - -function validate_frontend() { - HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X GET http://${ip_address}:5173/_stcore/health) - - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "Frontend is running correctly." - local CONTENT=$(curl -s -X GET http://${ip_address}:5173/_stcore/health) - if echo "$CONTENT" | grep -q "ok"; then - echo "Frontend Content is as expected." - else - echo "Frontend Content does not match the expected result: $CONTENT" - docker logs videoqna-xeon-ui-server >> ${LOG_PATH}/ui.log - exit 1 - fi - else - echo "Frontend is not running correctly. Received status was $HTTP_STATUS" - docker logs videoqna-xeon-ui-server >> ${LOG_PATH}/ui.log - exit 1 - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop && docker compose rm -f - docker volume rm video-llama-model -} - -function main() { - - stop_docker - - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_services - - validate_microservices - validate_megaservice - validate_frontend - - stop_docker - echo y | docker system prune - -} - -# main diff --git a/VideoQnA/ui/docker/Dockerfile b/VideoQnA/ui/docker/Dockerfile deleted file mode 100644 index 019999de8a..0000000000 --- a/VideoQnA/ui/docker/Dockerfile +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -FROM python:3.9-slim - -WORKDIR /app - -RUN apt-get update && apt-get install -y curl && \ - rm -rf /var/lib/apt/lists/* - - -RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir streamlit - -COPY ui.py /app/ui.py - -EXPOSE 5173 - -HEALTHCHECK CMD curl --fail http://localhost:5173/_stcore/health - -ENTRYPOINT ["streamlit", "run", "ui.py", "--server.port=5173", "--server.address=0.0.0.0"] diff --git a/VideoQnA/ui/ui.py b/VideoQnA/ui/ui.py deleted file mode 100644 index cfcbaf0897..0000000000 --- a/VideoQnA/ui/ui.py +++ /dev/null @@ -1,207 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import time -from io import BytesIO - -import requests -import streamlit as st - -BACKEND_SERVICE_ENDPOINT = os.getenv("BACKEND_SERVICE_ENDPOINT", "http://localhost:8888/v1/videoqna") -BACKEND_HEALTH_CHECK_ENDPOINT = os.getenv("BACKEND_HEALTH_CHECK_ENDPOINT", "http://localhost:8888/v1/health_check") - - -def perform_health_check(): - url = BACKEND_HEALTH_CHECK_ENDPOINT - response = requests.get(url, headers={"accept": "application/json"}) - return response - - -def download_video(url): - """Download video from URL and return as bytes.""" - response = requests.get(url) - if response.status_code == 200: - return BytesIO(response.content) - else: - st.error(f"Failed to download video. Status code: {response.status_code}") - return None - - -def play_video(url, offset): - """Play video from URL with specified offset.""" - with st.spinner("Loading Video ..."): - video_bytes = download_video(url) - if video_bytes: - st.video(video_bytes, start_time=int(offset)) - - -def clear_chat_history(): - st.session_state.example_video = "Enter Text" - st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] - - -def handle_selectbox_change(): - prompt = st.session_state.example_video - - if prompt is not None: - st.session_state["prompt"] = prompt - st.session_state.messages.append({"role": "user", "content": prompt}) - - -def handle_chat_input(): - print("st.session_state.custom_prompt update", st.session_state.custom_prompt) - prompt = st.session_state.custom_prompt - - st.session_state["prompt"] = prompt - st.session_state.messages.append({"role": "user", "content": prompt}) - - -def handle_message(col): - params = None - full_response = "" - - # Generate a new response if last message is not from assistant - if st.session_state.messages[-1]["role"] != "assistant": - # Handle user messages here - with st.chat_message("assistant"): - placeholder = st.empty() - start = time.time() - prompt = st.session_state["prompt"] - request_data = {"messages": prompt, "stream": "True"} - try: - response = requests.post(BACKEND_SERVICE_ENDPOINT, data=json.dumps(request_data), stream=True) - response.raise_for_status() - for chunk in response.iter_content(chunk_size=8192): - if chunk: - if params is None: - try: - chunk_str = chunk.decode("utf-8").replace("'", '"') - params = json.loads(chunk_str) - - video_url = params["video_url"] - chunk_start = params["chunk_start"] - print("VIDEO NAME USED IN PLAYBACK: ", video_url) - - video_name = video_url.split("/")[-1] - full_response += f"Most relevant retrieved video is **{video_name}** \n\n" - placeholder.markdown(full_response) - - with col: - play_video(video_url, chunk_start) - - except json.JSONDecodeError: - print("In the param decode error branch") - print(chunk.decode("utf-8")) - else: - new_text = chunk.decode("utf-8") - # print(new_text, end=" ", flush=True) - full_response += new_text - placeholder.markdown(full_response) - # Fake response - # video_url = "https://github.com/DAMO-NLP-SG/Video-LLaMA/raw/main/examples/silence_girl.mp4" - # chunk_start=0 - # video_name = video_url.split('/')[-1] - # full_response += f"Most relevant retrieved video is **{video_name}** \n\n" - # placeholder.markdown(full_response) - # with col: - # play_video(video_url, chunk_start) - # for i in range(10): - # full_response += f"new_text {i} " - # time.sleep(1) - # placeholder.markdown(full_response) - - except requests.HTTPError as http_err: - st.error(f"HTTP error occurred: {http_err}") - except requests.RequestException as req_err: - st.error(f"Error occurred: {req_err}") - except Exception as err: - st.error(f"An unexpected error occurred: {err}") - - end = time.time() - full_response += f"\n\nπŸš€ Generated in {(end - start):.4f} seconds." - placeholder.markdown(full_response) - - message = {"role": "assistant", "content": full_response} - - st.session_state.messages.append(message) - - -def display_messages(): - # Display chat messages - for message in st.session_state.messages: - with st.chat_message(message["role"]): - st.write(message["content"]) - - -def main(): - st.set_page_config(initial_sidebar_state="collapsed", layout="wide") - st.title("VideoQnA") - title_alignment = """ - - """ - st.markdown(title_alignment, unsafe_allow_html=True) - st.sidebar.button("Clear Chat History", on_click=clear_chat_history) - - placeholder = st.empty() - - # check server health - if "health_check" not in st.session_state.keys(): - with st.spinner("Checking health of the server..."): - time.sleep(1) - response = perform_health_check() - if response.status_code == 200: - placeholder.success("Server is healthy!", icon="βœ…") - time.sleep(1) - placeholder.empty() # Remove the message - st.session_state["health_check"] = True - else: - st.error(f"Server health check failed with status code {response.status_code}") - st.stop() - - # Initialize conversation state - if "messages" not in st.session_state.keys(): - st.session_state.messages = [{"role": "assistant", "content": "How may I assist you today?"}] - if "prompt" not in st.session_state.keys(): - st.session_state["prompt"] = "" - - col1, col2 = st.columns([2, 1]) - - with col1: - st.selectbox( - "Example Prompts", - ( - "Man wearing glasses", - "People reading item description", - "Man holding red shopping basket", - "Was there any person wearing a blue shirt seen today?", - "Was there any person wearing a blue shirt seen in the last 6 hours?", - "Was there any person wearing a blue shirt seen last Sunday?", - "Was a person wearing glasses seen in the last 30 minutes?", - "Was a person wearing glasses seen in the last 72 hours?", - ), - key="example_video", - index=None, - placeholder="--- Options ---", - on_change=handle_selectbox_change, - ) - - st.chat_input(disabled=False, key="custom_prompt", on_submit=handle_chat_input) - - with col1: - display_messages() - handle_message(col2) - - -if __name__ == "__main__": - main() diff --git a/VideoQnA/videoqna.py b/VideoQnA/videoqna.py deleted file mode 100644 index c447dd2abf..0000000000 --- a/VideoQnA/videoqna.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType -from comps.cores.mega.utils import handle_message -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - ChatCompletionResponse, - ChatCompletionResponseChoice, - ChatMessage, - UsageInfo, -) -from comps.cores.proto.docarray import LLMParams -from fastapi import Request -from fastapi.responses import StreamingResponse - -MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) -EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") -EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000)) -RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") -RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000)) -RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") -RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000)) -LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0") -LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000)) - - -class VideoQnAService: - def __init__(self, host="0.0.0.0", port=8888): - self.host = host - self.port = port - self.megaservice = ServiceOrchestrator() - self.endpoint = str(MegaServiceEndpoint.VIDEO_RAG_QNA) - - def add_remote_service(self): - embedding = MicroService( - name="embedding", - host=EMBEDDING_SERVICE_HOST_IP, - port=EMBEDDING_SERVICE_PORT, - endpoint="/v1/embeddings", - use_remote_service=True, - service_type=ServiceType.EMBEDDING, - ) - retriever = MicroService( - name="retriever", - host=RETRIEVER_SERVICE_HOST_IP, - port=RETRIEVER_SERVICE_PORT, - endpoint="/v1/retrieval", - use_remote_service=True, - service_type=ServiceType.RETRIEVER, - ) - rerank = MicroService( - name="rerank", - host=RERANK_SERVICE_HOST_IP, - port=RERANK_SERVICE_PORT, - endpoint="/v1/reranking", - use_remote_service=True, - service_type=ServiceType.RERANK, - ) - lvm = MicroService( - name="lvm", - host=LVM_SERVICE_HOST_IP, - port=LVM_SERVICE_PORT, - endpoint="/v1/lvm", - use_remote_service=True, - service_type=ServiceType.LVM, - ) - self.megaservice.add(embedding).add(retriever).add(rerank).add(lvm) - self.megaservice.flow_to(embedding, retriever) - self.megaservice.flow_to(retriever, rerank) - self.megaservice.flow_to(rerank, lvm) - - async def handle_request(self, request: Request): - data = await request.json() - stream_opt = data.get("stream", False) - chat_request = ChatCompletionRequest.parse_obj(data) - prompt = handle_message(chat_request.messages) - parameters = LLMParams( - max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, - top_k=chat_request.top_k if chat_request.top_k else 10, - top_p=chat_request.top_p if chat_request.top_p else 0.95, - temperature=chat_request.temperature if chat_request.temperature else 0.01, - frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0, - presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, - repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, - stream=stream_opt, - ) - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"text": prompt}, llm_parameters=parameters - ) - for node, response in result_dict.items(): - # Here it suppose the last microservice in the megaservice is LVM. - if ( - isinstance(response, StreamingResponse) - and node == list(self.megaservice.services.keys())[-1] - and self.megaservice.services[node].service_type == ServiceType.LVM - ): - return response - last_node = runtime_graph.all_leaves()[-1] - response = result_dict[last_node]["text"] - choices = [] - usage = UsageInfo() - choices.append( - ChatCompletionResponseChoice( - index=0, - message=ChatMessage(role="assistant", content=response), - finish_reason="stop", - ) - ) - return ChatCompletionResponse(model="videoqna", choices=choices, usage=usage) - - def start(self): - self.service = MicroService( - self.__class__.__name__, - service_role=ServiceRoleType.MEGASERVICE, - host=self.host, - port=self.port, - endpoint=self.endpoint, - input_datatype=ChatCompletionRequest, - output_datatype=ChatCompletionResponse, - ) - self.service.add_route(self.endpoint, self.handle_request, methods=["POST"]) - self.service.start() - - -if __name__ == "__main__": - videoqna = VideoQnAService(port=MEGA_SERVICE_PORT) - videoqna.add_remote_service() - videoqna.start() diff --git a/VisualQnA/Dockerfile b/VisualQnA/Dockerfile deleted file mode 100644 index 95936d9c03..0000000000 --- a/VisualQnA/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -ARG BASE_TAG=latest -FROM opea/comps-base:$BASE_TAG - -COPY ./visualqna.py $HOME/visualqna.py - -ENTRYPOINT ["python", "visualqna.py"] diff --git a/VisualQnA/README.md b/VisualQnA/README.md deleted file mode 100644 index 74ab54fdff..0000000000 --- a/VisualQnA/README.md +++ /dev/null @@ -1,146 +0,0 @@ -# Visual Question and Answering - -Visual Question Answering (VQA) is the task of answering open-ended questions based on an image. The input to models supporting this task is typically a combination of an image and a question, and the output is an answer expressed in natural language. - -Some noteworthy use case examples for VQA include: - -- Accessibility applications for visually impaired individuals. -- Education: posing questions about visual materials presented in lectures or textbooks. VQA can also be utilized in interactive museum exhibits or historical sites. -- Customer service and e-commerce: VQA can enhance user experience by letting users ask questions about products. -- Image retrieval: VQA models can be used to retrieve images with specific characteristics. For example, the user can ask β€œIs there a dog?” to find all images with dogs from a set of images. - -General architecture of VQA shows below: - -![VQA](./assets/img/vqa.png) - -The VisualQnA example is implemented using the component-level microservices defined in [GenAIComps](https://github.com/opea-project/GenAIComps). The flow chart below shows the information flow between different microservices for this example. - -```mermaid ---- -config: - flowchart: - nodeSpacing: 400 - rankSpacing: 100 - curve: linear - themeVariables: - fontSize: 50px ---- -flowchart LR - %% Colors %% - classDef blue fill:#ADD8E6,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef orange fill:#FBAA60,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef orchid fill:#C26DBC,stroke:#ADD8E6,stroke-width:2px,fill-opacity:0.5 - classDef invisible fill:transparent,stroke:transparent; - style VisualQnA-MegaService stroke:#000000 - - %% Subgraphs %% - subgraph VisualQnA-MegaService["VisualQnA MegaService "] - direction LR - LVM([LVM MicroService]):::blue - end - subgraph UserInterface[" User Interface "] - direction LR - a([User Input Query]):::orchid - Ingest([Ingest data]):::orchid - UI([UI server
]):::orchid - end - - - LVM_gen{{LVM Service
}} - GW([VisualQnA GateWay
]):::orange - NG([Nginx MicroService]):::blue - - - %% Questions interaction - direction LR - Ingest[Ingest data] --> UI - a[User Input Query] --> |Need Proxy Server|NG - a[User Input Query] --> UI - NG --> UI - UI --> GW - GW <==> VisualQnA-MegaService - - - %% Embedding service flow - direction LR - LVM <-.-> LVM_gen - -``` - -This example guides you through how to deploy a [LLaVA-NeXT](https://github.com/LLaVA-VL/LLaVA-NeXT) (Open Large Multimodal Models) model on [Intel Gaudi2](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html) and [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon.html). We invite contributions from other hardware vendors to expand the OPEA ecosystem. - -![llava screenshot](./assets/img/llava_screenshot1.png) -![llava-screenshot](./assets/img/llava_screenshot2.png) - -## Required Models - -By default, the model is set to `llava-hf/llava-v1.6-mistral-7b-hf`. To use a different model, update the `LVM_MODEL_ID` variable in the [`set_env.sh`](./docker_compose/intel/hpu/gaudi/set_env.sh) file. - -``` -export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf" -``` - -You can choose other llava-next models, such as `llava-hf/llava-v1.6-vicuna-13b-hf`, as needed. - -## Deploy VisualQnA Service - -The VisualQnA service can be effortlessly deployed on either Intel Gaudi2 or Intel Xeon Scalable Processors. - -Currently we support deploying VisualQnA services with docker compose. - -### Setup Environment Variable - -To set up environment variables for deploying VisualQnA services, follow these steps: - -1. Set the required environment variables: - - ```bash - # Example: host_ip="192.168.1.1" - export host_ip="External_Public_IP" - # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1" - export no_proxy="Your_No_Proxy" - ``` - -2. If you are in a proxy environment, also set the proxy-related environment variables: - - ```bash - export http_proxy="Your_HTTP_Proxy" - export https_proxy="Your_HTTPs_Proxy" - ``` - -3. Set up other environment variables: - - > Notice that you can only choose **one** command below to set up envs according to your hardware. Other that the port numbers may be set incorrectly. - - ```bash - # on Gaudi - source ./docker_compose/intel/hpu/gaudi/set_env.sh - # on Xeon - source ./docker_compose/intel/cpu/xeon/set_env.sh - ``` - -### Deploy VisualQnA on Gaudi - -Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source. - -Find the corresponding [compose.yaml](./docker_compose/intel/hpu/gaudi/compose.yaml). - -```bash -cd GenAIExamples/VisualQnA/docker_compose/intel/hpu/gaudi/ -docker compose up -d -``` - -### Deploy VisualQnA on Xeon - -Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source. - -Find the corresponding [compose.yaml](./docker_compose/intel/cpu/xeon/compose.yaml). - -```bash -cd GenAIExamples/VisualQnA/docker_compose/intel/cpu/xeon/ -docker compose up -d -``` - -### Deploy VisualQnA on Kubernetes using Helm Chart - -Refer to the [VisualQnA helm chart](./kubernetes/helm/README.md) for instructions on deploying VisualQnA on Kubernetes. diff --git a/VisualQnA/assets/img/llava_screenshot1.png b/VisualQnA/assets/img/llava_screenshot1.png deleted file mode 100644 index b08c8b2459..0000000000 Binary files a/VisualQnA/assets/img/llava_screenshot1.png and /dev/null differ diff --git a/VisualQnA/assets/img/llava_screenshot2.png b/VisualQnA/assets/img/llava_screenshot2.png deleted file mode 100644 index ed6255e565..0000000000 Binary files a/VisualQnA/assets/img/llava_screenshot2.png and /dev/null differ diff --git a/VisualQnA/assets/img/vqa.png b/VisualQnA/assets/img/vqa.png deleted file mode 100644 index 7022a92568..0000000000 Binary files a/VisualQnA/assets/img/vqa.png and /dev/null differ diff --git a/VisualQnA/benchmark/performance/README.md b/VisualQnA/benchmark/performance/README.md deleted file mode 100644 index 45e76558ca..0000000000 --- a/VisualQnA/benchmark/performance/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# VisualQnA Benchmarking - -This folder contains a collection of scripts to enable inference benchmarking by leveraging a comprehensive benchmarking tool, [GenAIEval](https://github.com/opea-project/GenAIEval/blob/main/evals/benchmark/README.md), that enables throughput analysis to assess inference performance. - -By following this guide, you can run benchmarks on your deployment and share the results with the OPEA community. - -## Purpose - -We aim to run these benchmarks and share them with the OPEA community for three primary reasons: - -- To offer insights on inference throughput in real-world scenarios, helping you choose the best service or deployment for your needs. -- To establish a baseline for validating optimization solutions across different implementations, providing clear guidance on which methods are most effective for your use case. -- To inspire the community to build upon our benchmarks, allowing us to better quantify new solutions in conjunction with current leading llms, serving frameworks etc. - -## Metrics - -The benchmark will report the below metrics, including: - -- Number of Concurrent Requests -- End-to-End Latency: P50, P90, P99 (in milliseconds) -- End-to-End First Token Latency: P50, P90, P99 (in milliseconds) -- Average Next Token Latency (in milliseconds) -- Average Token Latency (in milliseconds) -- Requests Per Second (RPS) -- Output Tokens Per Second -- Input Tokens Per Second - -Results will be displayed in the terminal and saved as CSV file named `1_testspec.yaml`. - -## Getting Started - -We recommend using Kubernetes to deploy the VisualQnA service, as it offers benefits such as load balancing and improved scalability. However, you can also deploy the service using Docker if that better suits your needs. - -### Prerequisites - -- Install Kubernetes by following [this guide](https://github.com/opea-project/docs/blob/main/guide/installation/k8s_install/k8s_install_kubespray.md). - -- Every node has direct internet access -- Set up kubectl on the master node with access to the Kubernetes cluster. -- Install Python 3.8+ on the master node for running GenAIEval. -- Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods. -- Ensure that the container's ulimit can meet the the number of requests. - -```bash -# The way to modify the containered ulimit: -sudo systemctl edit containerd -# Add two lines: -[Service] -LimitNOFILE=65536:1048576 - -sudo systemctl daemon-reload; sudo systemctl restart containerd -``` - -### Test Steps - -Please deploy VisualQnA service before benchmarking. - -#### Run Benchmark Test - -Before the benchmark, we can configure the number of test queries and test output directory by: - -```bash -export USER_QUERIES="[1, 1, 1, 1]" -export TEST_OUTPUT_DIR="/tmp/benchmark_output" -``` - -And then run the benchmark by: - -```bash -bash benchmark.sh -n -``` - -The argument `-n` refers to the number of test nodes. - -#### Data collection - -All the test results will come to this folder `/tmp/benchmark_output` configured by the environment variable `TEST_OUTPUT_DIR` in previous steps. diff --git a/VisualQnA/benchmark/performance/benchmark.sh b/VisualQnA/benchmark/performance/benchmark.sh deleted file mode 100644 index 44abdecbb1..0000000000 --- a/VisualQnA/benchmark/performance/benchmark.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -deployment_type="k8s" -node_number=1 -service_port=8888 -query_per_node=128 - -benchmark_tool_path="$(pwd)/GenAIEval" - -usage() { - echo "Usage: $0 [-d deployment_type] [-n node_number] [-i service_ip] [-p service_port]" - echo " -d deployment_type deployment type, select between k8s and docker (default: ${deployment_type})" - echo " -n node_number Test node number, required only for k8s deployment_type, (default: ${node_number})" - echo " -i service_ip service ip, required only for docker deployment_type" - echo " -p service_port service port, required only for docker deployment_type, (default: ${service_port})" - exit 1 -} - -while getopts ":d:n:i:p:" opt; do - case ${opt} in - d ) - deployment_type=$OPTARG - ;; - n ) - node_number=$OPTARG - ;; - i ) - service_ip=$OPTARG - ;; - p ) - service_port=$OPTARG - ;; - \? ) - echo "Invalid option: -$OPTARG" 1>&2 - usage - ;; - : ) - echo "Invalid option: -$OPTARG requires an argument" 1>&2 - usage - ;; - esac -done - -if [[ "$deployment_type" == "docker" && -z "$service_ip" ]]; then - echo "Error: service_ip is required for docker deployment_type" 1>&2 - usage -fi - -if [[ "$deployment_type" == "k8s" && ( -n "$service_ip" || -n "$service_port" ) ]]; then - echo "Warning: service_ip and service_port are ignored for k8s deployment_type" 1>&2 -fi - -function main() { - if [[ ! -d ${benchmark_tool_path} ]]; then - echo "Benchmark tool not found, setting up..." - setup_env - fi - run_benchmark -} - -function setup_env() { - git clone https://github.com/opea-project/GenAIEval.git - pushd ${benchmark_tool_path} - python3 -m venv stress_venv - source stress_venv/bin/activate - pip install -r requirements.txt - popd -} - -function run_benchmark() { - source ${benchmark_tool_path}/stress_venv/bin/activate - export DEPLOYMENT_TYPE=${deployment_type} - export SERVICE_IP=${service_ip:-"None"} - export SERVICE_PORT=${service_port:-"None"} - if [[ -z $USER_QUERIES ]]; then - user_query=$((query_per_node*node_number)) - export USER_QUERIES="[${user_query}, ${user_query}, ${user_query}, ${user_query}]" - echo "USER_QUERIES not configured, setting to: ${USER_QUERIES}." - fi - export WARMUP=$(echo $USER_QUERIES | sed -e 's/[][]//g' -e 's/,.*//') - if [[ -z $WARMUP ]]; then export WARMUP=0; fi - if [[ -z $TEST_OUTPUT_DIR ]]; then - if [[ $DEPLOYMENT_TYPE == "k8s" ]]; then - export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/node_${node_number}" - else - export TEST_OUTPUT_DIR="${benchmark_tool_path}/evals/benchmark/benchmark_output/docker" - fi - echo "TEST_OUTPUT_DIR not configured, setting to: ${TEST_OUTPUT_DIR}." - fi - - envsubst < ./benchmark.yaml > ${benchmark_tool_path}/evals/benchmark/benchmark.yaml - cd ${benchmark_tool_path}/evals/benchmark - python benchmark.py -} - -main diff --git a/VisualQnA/benchmark/performance/benchmark.yaml b/VisualQnA/benchmark/performance/benchmark.yaml deleted file mode 100644 index 179317bdba..0000000000 --- a/VisualQnA/benchmark/performance/benchmark.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -test_suite_config: # Overall configuration settings for the test suite - examples: ["visualqna"] # The specific test cases being tested, e.g., chatqna, codegen, codetrans, faqgen, audioqna, visualqna - deployment_type: "k8s" # Default is "k8s", can also be "docker" - service_ip: None # Leave as None for k8s, specify for Docker - service_port: None # Leave as None for k8s, specify for Docker - warm_ups: 0 # Number of test requests for warm-up - run_time: 60m # The max total run time for the test suite - seed: # The seed for all RNGs - user_queries: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] # Number of test requests at each concurrency level - query_timeout: 120 # Number of seconds to wait for a simulated user to complete any executing task before exiting. 120 sec by defeult. - random_prompt: false # Use random prompts if true, fixed prompts if false - collect_service_metric: false # Collect service metrics if true, do not collect service metrics if false - data_visualization: false # Generate data visualization if true, do not generate data visualization if false - llm_model: "llava-hf/llava-v1.6-mistral-7b-hf" # The LLM model used for the test - test_output_dir: "/tmp/benchmark_output" # The directory to store the test output - load_shape: # Tenant concurrency pattern - name: constant # poisson or constant(locust default load shape) - params: # Loadshape-specific parameters - constant: # Constant load shape specific parameters, activate only if load_shape.name is constant - concurrent_level: 4 # If user_queries is specified, concurrent_level is target number of requests per user. If not, it is the number of simulated users - # arrival_rate: 1.0 # Request arrival rate. If set, concurrent_level will be overridden, constant load will be generated based on arrival-rate - poisson: # Poisson load shape specific parameters, activate only if load_shape.name is poisson - arrival_rate: 1.0 # Request arrival rate - namespace: "" # Fill the user-defined namespace. Otherwise, it will be default. - -test_cases: - visualqna: - lvm: - run_test: true - service_name: "llm-svc" # Replace with your service name - parameters: - model_name: "llava-hf/llava-v1.6-mistral-7b-hf" - max_new_tokens: 128 - temperature: 0.01 - top_k: 10 - top_p: 0.95 - repetition_penalty: 1.03 - stream: true - lvmserve: - run_test: true - service_name: "lvm-serving-svc" # Replace with your service name - e2e: - run_test: true - service_name: "visualqna-backend-server-svc" # Replace with your service name diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/README.md b/VisualQnA/docker_compose/amd/gpu/rocm/README.md deleted file mode 100644 index 483e9b0da6..0000000000 --- a/VisualQnA/docker_compose/amd/gpu/rocm/README.md +++ /dev/null @@ -1,156 +0,0 @@ -# Build Mega Service of VisualQnA on AMD ROCm - -This document outlines the deployment process for a VisualQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. - -## πŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LVM and NGINX Docker Images - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . -docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile . -``` - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visualqna.py` Python script. Build MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/VisualQnA -docker build --no-cache -t opea/visualqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build frontend Docker image via below command: - -```bash -cd GenAIExamples/VisualQnA/ui -docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -### 4. Pull TGI AMD ROCm Image - -```bash -docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm -``` - -Then run the command `docker images`, you will have the following 5 Docker Images: - -1. `ghcr.io/huggingface/text-generation-inference:2.4.1-rocm` -2. `opea/lvm:latest` -3. `opea/visualqna:latest` -4. `opea/visualqna-ui:latest` -5. `opea/nginx` - -## πŸš€ Start Microservices - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -**Export the value of the public IP address of your ROCM server to the `host_ip` environment variable** - -> Change the External_Public_IP below with the actual IPV4 value - -``` -export host_ip="External_Public_IP" -``` - -**Append the value of the public IP address to the no_proxy list** - -``` -export your_no_proxy="${your_no_proxy},${host_ip}" -``` - -```bash -export HOST_IP=${your_host_ip} -export VISUALQNA_TGI_SERVICE_PORT="8399" -export VISUALQNA_HUGGINGFACEHUB_API_TOKEN={your_hugginface_api_token} -export VISUALQNA_CARD_ID="card1" -export VISUALQNA_RENDER_ID="renderD136" -export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" -export MODEL="llava-hf/llava-v1.6-mistral-7b-hf" -export LVM_ENDPOINT="http://${HOST_IP}:8399" -export LVM_SERVICE_PORT=9399 -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LVM_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:18003/v1/visualqna" -export FRONTEND_SERVICE_IP=${HOST_IP} -export FRONTEND_SERVICE_PORT=18001 -export BACKEND_SERVICE_NAME=visualqna -export BACKEND_SERVICE_IP=${HOST_IP} -export BACKEND_SERVICE_PORT=18002 -export NGINX_PORT=18003 - -``` - -Note: Please replace with `host_ip` with you external IP address, do not use localhost. - -Note: You can use set_env.sh file with bash command (. setset_env.sh) to set up needed variables. - -### Start all the services Docker Containers - -> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file - -```bash -cd GenAIExamples/VisualQnA/docker_compose/amd/gpu/rocm -``` - -```bash -docker compose -f compose.yaml up -d -``` - -### Validate Microservices - -Follow the instructions to validate MicroServices. - -> Note: If you see an "Internal Server Error" from the `curl` command, wait a few minutes for the microserver to be ready and then try again. - -1. LLM Microservice - - ```bash - http_proxy="" curl http://${host_ip}:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' - ``` - -2. MegaService - -```bash -curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -``` - -## πŸš€ Launch the UI - -To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: - -```yaml - visualqna-gaudi-ui-server: - image: opea/visualqna-ui:latest - ... - ports: - - "80:5173" -``` diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/compose.yaml b/VisualQnA/docker_compose/amd/gpu/rocm/compose.yaml deleted file mode 100644 index 72f667ab2f..0000000000 --- a/VisualQnA/docker_compose/amd/gpu/rocm/compose.yaml +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -services: - visualqna-llava-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.1-rocm - container_name: visualqna-tgi-service - ports: - - "${VISUALQNA_TGI_SERVICE_PORT:-8399}:80" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: "http://${HOST_IP}:${VISUALQNA_TGI_SERVICE_PORT}" - HUGGINGFACEHUB_API_TOKEN: ${VISUALQNA_HUGGINGFACEHUB_API_TOKEN} - HUGGING_FACE_HUB_TOKEN: ${VISUALQNA_HUGGINGFACEHUB_API_TOKEN} - volumes: - - "/var/opea/visualqna-service/data:/data" - shm_size: 64g - devices: - - /dev/kfd:/dev/kfd - - /dev/dri/:/dev/dri/ - cap_add: - - SYS_PTRACE - group_add: - - video - security_opt: - - seccomp:unconfined - ipc: host - command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 - lvm: - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - container_name: lvm-server - depends_on: - - visualqna-llava-tgi-service - ports: - - "9399:9399" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LVM_ENDPOINT: ${LVM_ENDPOINT} - LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM" - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - restart: unless-stopped - visualqna-rocm-backend-server: - image: ${REGISTRY:-opea}/visualqna:${TAG:-latest} - container_name: visualqna-rocm-backend-server - depends_on: - - visualqna-llava-tgi-service - - lvm - ports: - - "${BACKEND_SERVICE_PORT:-8888}:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP} - ipc: host - restart: always - visualqna-rocm-ui-server: - image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest} - container_name: visualqna-rocm-ui-server - depends_on: - - visualqna-rocm-backend-server - ports: - - "${FRONTEND_SERVICE_PORT:-5173}:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - visualqna-nginx-server: - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} - container_name: visualqna-rocm-nginx-server - depends_on: - - visualqna-rocm-backend-server - - visualqna-rocm-ui-server - ports: - - "${NGINX_PORT:-80}:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - FRONTEND_SERVICE_IP=${HOST_IP} - - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} - - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} - - BACKEND_SERVICE_IP=${HOST_IP} - - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh b/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh deleted file mode 100644 index bf73465cef..0000000000 --- a/VisualQnA/docker_compose/amd/gpu/rocm/set_env.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Advanced Micro Devices, Inc -# SPDX-License-Identifier: Apache-2.0 - -export HOST_IP=${Your_host_ip_address} -export VISUALQNA_TGI_SERVICE_PORT="8399" -export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${Your_HUGGINGFACEHUB_API_TOKEN} -export VISUALQNA_CARD_ID="card1" -export VISUALQNA_RENDER_ID="renderD136" -export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" -export LVM_ENDPOINT="http://${HOST_IP}:8399" -export LVM_SERVICE_PORT=9399 -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LVM_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/visualqna" -export FRONTEND_SERVICE_IP=${HOST_IP} -export FRONTEND_SERVICE_PORT=18001 -export BACKEND_SERVICE_NAME=visualqna -export BACKEND_SERVICE_IP=${HOST_IP} -export BACKEND_SERVICE_PORT=18002 -export NGINX_PORT=18003 diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/README.md b/VisualQnA/docker_compose/intel/cpu/xeon/README.md deleted file mode 100644 index cfbc3ab1c1..0000000000 --- a/VisualQnA/docker_compose/intel/cpu/xeon/README.md +++ /dev/null @@ -1,136 +0,0 @@ -# Build Mega Service of VisualQnA on Xeon - -This document outlines the deployment process for a VisualQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. - -## πŸš€ Apply Xeon Server on AWS - -To apply a Xeon server on AWS, start by creating an AWS account if you don't have one already. Then, head to the [EC2 Console](https://console.aws.amazon.com/ec2/v2/home) to begin the process. Within the EC2 service, select the Amazon EC2 M7i or M7i-flex instance type to leverage 4th Generation Intel Xeon Scalable processors. These instances are optimized for high-performance computing and demanding workloads. - -For detailed information about these instance types, you can refer to this [link](https://aws.amazon.com/ec2/instance-types/m7i/). Once you've chosen the appropriate instance type, proceed with configuring your instance settings, including network configurations, security groups, and storage options. - -After launching your instance, you can connect to it using SSH (for Linux instances) or Remote Desktop Protocol (RDP) (for Windows instances). From there, you'll have full access to your Xeon server, allowing you to install, configure, and manage your applications as needed. - -## πŸš€ Build Docker Images - -First of all, you need to build Docker Images locally and install the python package of it. - -### 1. Build LVM and NGINX Docker Images - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . -docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile . -``` - -### 2. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visualqna.py` Python script. Build MegaService Docker image via below command: - -```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/VisualQnA -docker build --no-cache -t opea/visualqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -``` - -### 3. Build UI Docker Image - -Build frontend Docker image via below command: - -```bash -cd GenAIExamples/VisualQnA/ui -docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f docker/Dockerfile . -``` - -### 4. Pull vLLM/TGI Xeon Image - -```bash -# vLLM -docker pull opea/vllm:latest -# TGI (Optional) -docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu -``` - -Then run the command `docker images`, you will have the following Docker Images: - -1. `opea/vllm:latest` -2. `ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu` (Optional) -3. `opea/lvm:latest` -4. `opea/visualqna:latest` -5. `opea/visualqna-ui:latest` -6. `opea/nginx` - -## πŸš€ Start Microservices - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -source set_env.sh -``` - -Note: Please replace with `host_ip` with you external IP address, do not use localhost. - -### Start all the services Docker Containers - -> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file - -```bash -cd GenAIExamples/VisualQnA/docker_compose/intel/cpu/xeon -``` - -```bash -docker compose -f compose.yaml up -d -# if use TGI as the LLM serving backend -docker compose -f compose_tgi.yaml up -d -``` - -### Validate Microservices - -Follow the instructions to validate MicroServices. - -> Note: If you see an "Internal Server Error" from the `curl` command, wait a few minutes for the microserver to be ready and then try again. - -1. LLM Microservice - - ```bash - http_proxy="" curl http://${host_ip}:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' - ``` - -2. MegaService - -```bash -curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -``` - -## πŸš€ Launch the UI - -To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: - -```yaml - visualqna-gaudi-ui-server: - image: opea/visualqna-ui:latest - ... - ports: - - "80:5173" -``` diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml deleted file mode 100644 index 9c19695493..0000000000 --- a/VisualQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - vllm-service: - image: ${REGISTRY:-opea}/vllm:${TAG:-latest} - container_name: vllm-service - ports: - - ${VLLM_PORT:-8399}:80 - volumes: - - "${MODEL_CACHE:-./data}:/root/.cache/huggingface/hub" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - VLLM_TORCH_PROFILER_DIR: "/mnt" - healthcheck: - test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] - interval: 10s - timeout: 10s - retries: 100 - command: --model $LVM_MODEL_ID --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html - - lvm: - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - container_name: lvm-xeon-server - depends_on: - vllm-service: - condition: service_healthy - ports: - - "9399:9399" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LVM_ENDPOINT: ${LVM_ENDPOINT} - LVM_COMPONENT_NAME: "OPEA_VLLM_LVM" - LLM_MODEL_ID: ${LVM_MODEL_ID} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - restart: unless-stopped - visualqna-xeon-backend-server: - image: ${REGISTRY:-opea}/visualqna:${TAG:-latest} - container_name: visualqna-xeon-backend-server - depends_on: - - vllm-service - - lvm - ports: - - "8888:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP} - ipc: host - restart: always - visualqna-xeon-ui-server: - image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest} - container_name: visualqna-xeon-ui-server - depends_on: - - visualqna-xeon-backend-server - ports: - - "5173:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - visualqna-xeon-nginx-server: - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} - container_name: visualqna-xeon-nginx-server - depends_on: - - visualqna-xeon-backend-server - - visualqna-xeon-ui-server - ports: - - "${NGINX_PORT:-80}:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} - - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} - - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} - - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} - - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml b/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml deleted file mode 100644 index b595bdcba7..0000000000 --- a/VisualQnA/docker_compose/intel/cpu/xeon/compose_tgi.yaml +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - llava-tgi-service: - image: ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - container_name: tgi-llava-xeon-server - ports: - - "8399:80" - volumes: - - "${MODEL_CACHE:-./data}:/data" - shm_size: 1g - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - host_ip: ${host_ip} - healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"] - interval: 10s - timeout: 10s - retries: 60 - command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --cuda-graphs 0 - lvm: - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - container_name: lvm-xeon-server - depends_on: - llava-tgi-service: - condition: service_healthy - ports: - - "9399:9399" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LVM_ENDPOINT: ${LVM_ENDPOINT} - LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM" - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - restart: unless-stopped - visualqna-xeon-backend-server: - image: ${REGISTRY:-opea}/visualqna:${TAG:-latest} - container_name: visualqna-xeon-backend-server - depends_on: - - llava-tgi-service - - lvm - ports: - - "8888:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP} - ipc: host - restart: always - visualqna-xeon-ui-server: - image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest} - container_name: visualqna-xeon-ui-server - depends_on: - - visualqna-xeon-backend-server - ports: - - "5173:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - visualqna-xeon-nginx-server: - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} - container_name: visualqna-xeon-nginx-server - depends_on: - - visualqna-xeon-backend-server - - visualqna-xeon-ui-server - ports: - - "${NGINX_PORT:-80}:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} - - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} - - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} - - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} - - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh deleted file mode 100644 index b47f12fe31..0000000000 --- a/VisualQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -pushd "../../../../../" > /dev/null -source .set_env.sh -popd > /dev/null - -export host_ip=$(hostname -I | awk '{print $1}') -export no_proxy=$host_ip,$no_proxy -export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf" -export LVM_ENDPOINT="http://${host_ip}:8399" -export LVM_SERVICE_PORT=9399 -export MEGA_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/visualqna" -export FRONTEND_SERVICE_IP=${host_ip} -export FRONTEND_SERVICE_PORT=5173 -export BACKEND_SERVICE_NAME=visualqna -export BACKEND_SERVICE_IP=${host_ip} -export BACKEND_SERVICE_PORT=8888 diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md b/VisualQnA/docker_compose/intel/hpu/gaudi/README.md deleted file mode 100644 index 9c3b0cd4e0..0000000000 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/README.md +++ /dev/null @@ -1,140 +0,0 @@ -# Build MegaService of VisualQnA on Gaudi - -This document outlines the deployment process for a VisualQnA application utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as llm. We will publish the Docker images to Docker Hub, it will simplify the deployment process for this service. - -## πŸš€ Build Docker Images - -First of all, you need to build Docker Images locally. This step can be ignored after the Docker images published to Docker hub. - -### 1. Build LVM and NGINX Docker Images - -```bash -git clone https://github.com/opea-project/GenAIComps.git -cd GenAIComps -docker build --no-cache -t opea/lvm:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/lvms/src/Dockerfile . -docker build --no-cache -t opea/nginx:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/third_parties/nginx/src/Dockerfile . -``` - -### 2. Build vLLM/Pull TGI Gaudi Image - -```bash -# vLLM - -# currently you have to build the opea/vllm-gaudi with the habana_main branch and the specific commit locally -# we will update it to stable release tag in the future -git clone https://github.com/HabanaAI/vllm-fork.git -cd ./vllm-fork/ -docker build -f Dockerfile.hpu -t opea/vllm-gaudi:latest --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -cd .. -rm -rf vllm-fork -``` - -```bash -# TGI (Optional) - -docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 -``` - -### 3. Build MegaService Docker Image - -To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `visualqna.py` Python script. Build the MegaService Docker image using the command below: - -```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/VisualQnA -docker build --no-cache -t opea/visualqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -cd ../.. -``` - -### 4. Build UI Docker Image - -Build frontend Docker image via below command: - -```bash -cd GenAIExamples/VisualQnA/ui -docker build --no-cache -t opea/visualqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . -``` - -Then run the command `docker images`, you will have the following 5 Docker Images: - -1. `opea/vllm-gaudi:latest` -2. `ghcr.io/huggingface/tgi-gaudi:2.0.6` (Optional) -3. `opea/lvm:latest` -4. `opea/visualqna:latest` -5. `opea/visualqna-ui:latest` -6. `opea/nginx` - -## πŸš€ Start MicroServices and MegaService - -### Setup Environment Variables - -Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. - -```bash -source set_env.sh -``` - -Note: Please replace with `host_ip` with you external IP address, do not use localhost. - -### Start all the services Docker Containers - -```bash -cd GenAIExamples/VisualQnA/docker_compose/intel/hpu/gaudi/ -``` - -```bash -docker compose -f compose.yaml up -d -# if use TGI as the LLM serving backend -docker compose -f compose_tgi.yaml up -d -``` - -> **_NOTE:_** Users need at least one Gaudi cards to run the VisualQnA successfully. - -### Validate MicroServices and MegaService - -Follow the instructions to validate MicroServices. - -> Note: If you see an "Internal Server Error" from the `curl` command, wait a few minutes for the microserver to be ready and then try again. - -1. LLM Microservice - - ```bash - http_proxy="" curl http://${host_ip}:9399/v1/lvm -XPOST -d '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -H 'Content-Type: application/json' - ``` - -2. MegaService - -```bash -curl http://${host_ip}:8888/v1/visualqna -H "Content-Type: application/json" -d '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -``` - -## πŸš€ Launch the UI - -To access the frontend, open the following URL in your browser: http://{host_ip}:5173. By default, the UI runs on port 5173 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: - -```yaml - visualqna-gaudi-ui-server: - image: opea/visualqna-ui:latest - ... - ports: - - "80:5173" -``` diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml deleted file mode 100644 index c1950a14d4..0000000000 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ /dev/null @@ -1,106 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - vllm-gaudi-service: - image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest} - container_name: vllm-gaudi-service - ports: - - ${VLLM_PORT:-8399}:80 - volumes: - - "./data:/root/.cache/huggingface/hub" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - LLM_MODEL_ID: ${LVM_MODEL_ID} - VLLM_TORCH_PROFILER_DIR: "/mnt" - VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false} - MAX_MODEL_LEN: ${MAX_TOTAL_TOKENS:-4096} - MAX_SEQ_LEN_TO_CAPTURE: ${MAX_TOTAL_TOKENS:-4096} - PT_HPUGRAPH_DISABLE_TENSOR_CACHE: false # https://github.com/HabanaAI/vllm-fork/issues/841#issuecomment-2700421704 - runtime: habana - cap_add: - - SYS_NICE - ipc: host - healthcheck: - test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"] - interval: 10s - timeout: 10s - retries: 150 - command: --model $LVM_MODEL_ID --tensor-parallel-size 1 --host 0.0.0.0 --port 80 --chat-template examples/template_llava.jinja # https://docs.vllm.ai/en/v0.5.0/models/vlm.html - lvm: - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - container_name: lvm-vllm-gaudi-service - depends_on: - vllm-gaudi-service: - condition: service_healthy - ports: - - "9399:9399" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LVM_ENDPOINT: ${LVM_ENDPOINT} - LVM_COMPONENT_NAME: "OPEA_VLLM_LVM" - LLM_MODEL_ID: ${LVM_MODEL_ID} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - restart: unless-stopped - visualqna-gaudi-backend-server: - image: ${REGISTRY:-opea}/visualqna:${TAG:-latest} - container_name: visualqna-gaudi-backend-server - depends_on: - - vllm-gaudi-service - - lvm - ports: - - "8888:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP} - ipc: host - restart: always - visualqna-gaudi-ui-server: - image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest} - container_name: visualqna-gaudi-ui-server - depends_on: - - visualqna-gaudi-backend-server - ports: - - "5173:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - visualqna-gaudi-nginx-server: - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} - container_name: visualqna-gaudi-nginx-server - depends_on: - - visualqna-gaudi-backend-server - - visualqna-gaudi-ui-server - ports: - - "${NGINX_PORT:-80}:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} - - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} - - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} - - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} - - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml deleted file mode 100644 index 251b4fce70..0000000000 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - llava-tgi-service: - image: ghcr.io/huggingface/tgi-gaudi:2.3.1 - container_name: tgi-llava-gaudi-server - ports: - - "8399:80" - volumes: - - "${MODEL_CACHE:-./data}:/data" - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all - OMPI_MCA_btl_vader_single_copy_mechanism: none - HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - ENABLE_HPU_GRAPH: true - LIMIT_HPU_GRAPH: true - USE_FLASH_ATTENTION: true - FLASH_ATTENTION_RECOMPUTE: true - healthcheck: - test: ["CMD-SHELL", "curl -f http://$host_ip:8399/health || exit 1"] - interval: 10s - timeout: 10s - retries: 60 - runtime: habana - cap_add: - - SYS_NICE - ipc: host - command: --model-id ${LVM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 - lvm: - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - container_name: lvm-gaudi-server - depends_on: - llava-tgi-service: - condition: service_healthy - ports: - - "9399:9399" - ipc: host - environment: - no_proxy: ${no_proxy} - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - LVM_ENDPOINT: ${LVM_ENDPOINT} - LVM_COMPONENT_NAME: "OPEA_TGI_LLAVA_LVM" - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 - restart: unless-stopped - visualqna-gaudi-backend-server: - image: ${REGISTRY:-opea}/visualqna:${TAG:-latest} - container_name: visualqna-gaudi-backend-server - depends_on: - - llava-tgi-service - - lvm - ports: - - "8888:8888" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - LVM_SERVICE_HOST_IP=${LVM_SERVICE_HOST_IP} - ipc: host - restart: always - visualqna-gaudi-ui-server: - image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest} - container_name: visualqna-gaudi-ui-server - depends_on: - - visualqna-gaudi-backend-server - ports: - - "5173:5173" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - BACKEND_BASE_URL=${BACKEND_SERVICE_ENDPOINT} - ipc: host - restart: always - visualqna-gaudi-nginx-server: - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} - container_name: visualqna-gaudi-nginx-server - depends_on: - - visualqna-gaudi-backend-server - - visualqna-gaudi-ui-server - ports: - - "${NGINX_PORT:-80}:80" - environment: - - no_proxy=${no_proxy} - - https_proxy=${https_proxy} - - http_proxy=${http_proxy} - - FRONTEND_SERVICE_IP=${FRONTEND_SERVICE_IP} - - FRONTEND_SERVICE_PORT=${FRONTEND_SERVICE_PORT} - - BACKEND_SERVICE_NAME=${BACKEND_SERVICE_NAME} - - BACKEND_SERVICE_IP=${BACKEND_SERVICE_IP} - - BACKEND_SERVICE_PORT=${BACKEND_SERVICE_PORT} - ipc: host - restart: always - -networks: - default: - driver: bridge diff --git a/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh b/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh deleted file mode 100644 index 57032fdce5..0000000000 --- a/VisualQnA/docker_compose/intel/hpu/gaudi/set_env.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -pushd "../../../../../" > /dev/null -source .set_env.sh -popd > /dev/null - -export host_ip=$(hostname -I | awk '{print $1}') -export no_proxy=$host_ip,$no_proxy -# export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf" -export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf" -export LVM_ENDPOINT="http://${host_ip}:8399" -export LVM_SERVICE_PORT=9399 -export MEGA_SERVICE_HOST_IP=${host_ip} -export LVM_SERVICE_HOST_IP=${host_ip} -export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/visualqna" -export FRONTEND_SERVICE_IP=${host_ip} -export FRONTEND_SERVICE_PORT=5173 -export BACKEND_SERVICE_NAME=visualqna -export BACKEND_SERVICE_IP=${host_ip} -export BACKEND_SERVICE_PORT=8888 diff --git a/VisualQnA/docker_image_build/build.yaml b/VisualQnA/docker_image_build/build.yaml deleted file mode 100644 index 7dcb7be0a1..0000000000 --- a/VisualQnA/docker_image_build/build.yaml +++ /dev/null @@ -1,31 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -services: - visualqna: - build: - args: - http_proxy: ${http_proxy} - https_proxy: ${https_proxy} - no_proxy: ${no_proxy} - context: ../ - dockerfile: ./Dockerfile - image: ${REGISTRY:-opea}/visualqna:${TAG:-latest} - visualqna-ui: - build: - context: ../ui - dockerfile: ./docker/Dockerfile - extends: visualqna - image: ${REGISTRY:-opea}/visualqna-ui:${TAG:-latest} - lvm: - build: - context: GenAIComps - dockerfile: comps/lvms/src/Dockerfile - extends: visualqna - image: ${REGISTRY:-opea}/lvm:${TAG:-latest} - nginx: - build: - context: GenAIComps - dockerfile: comps/third_parties/nginx/src/Dockerfile - extends: visualqna - image: ${REGISTRY:-opea}/nginx:${TAG:-latest} diff --git a/VisualQnA/kubernetes/gmc/README.md b/VisualQnA/kubernetes/gmc/README.md deleted file mode 100644 index 75669d4e39..0000000000 --- a/VisualQnA/kubernetes/gmc/README.md +++ /dev/null @@ -1,56 +0,0 @@ -# Deploy VisualQnA in a Kubernetes Cluster - -This document outlines the deployment process for a Visual Question Answering (VisualQnA) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines. - -Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector/README.md). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install. - -If you have only Intel Xeon machines you could use the visualqna_xeon.yaml file or if you have a Gaudi cluster you could use visualqna_gaudi.yaml -In the below example we illustrate on Xeon. - -## Deploy the VisualQnA application - -1. Create the desired namespace if it does not already exist and deploy the application - ```bash - export APP_NAMESPACE=visualqna - kubectl create ns $APP_NAMESPACE - kubectl apply -f ./visualqna_xeon.yaml - ``` - -2. Check if the application is up and ready - ```bash - kubectl get pods -n $APP_NAMESPACE - ``` - -3. Deploy a client pod for testing - ```bash - kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity - ``` - -4. Check that client pod is ready - ```bash - kubectl get pods -n $APP_NAMESPACE - ``` - -5. Send request to application - ```bash - export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) - export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='visualqna')].status.accessUrl}") - kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 128}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_visualqna.log - ``` diff --git a/VisualQnA/kubernetes/gmc/visualqna_gaudi.yaml b/VisualQnA/kubernetes/gmc/visualqna_gaudi.yaml deleted file mode 100644 index 4bd8248026..0000000000 --- a/VisualQnA/kubernetes/gmc/visualqna_gaudi.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: gmc.opea.io/v1alpha3 -kind: GMConnector -metadata: - labels: - app.kubernetes.io/name: gmconnector - app.kubernetes.io/managed-by: kustomize - gmc/platform: gaudi - name: visualqna - namespace: visualqna -spec: - routerConfig: - name: router - serviceName: router-service - nodes: - root: - routerType: Sequence - steps: - - name: Lvm - data: $response - internalService: - serviceName: visualqna-service - config: - endpoint: /v1/lvm - LVM_ENDPOINT: visualqna-tgi-svc - - name: TgiGaudi - internalService: - serviceName: visualqna-tgi-svc - config: - MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf - endpoint: /generate - isDownstreamService: true diff --git a/VisualQnA/kubernetes/gmc/visualqna_xeon.yaml b/VisualQnA/kubernetes/gmc/visualqna_xeon.yaml deleted file mode 100644 index c789c09dab..0000000000 --- a/VisualQnA/kubernetes/gmc/visualqna_xeon.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: gmc.opea.io/v1alpha3 -kind: GMConnector -metadata: - labels: - app.kubernetes.io/name: gmconnector - app.kubernetes.io/managed-by: kustomize - gmc/platform: xeon - name: visualqna - namespace: visualqna -spec: - routerConfig: - name: router - serviceName: router-service - nodes: - root: - routerType: Sequence - steps: - - name: Lvm - data: $response - internalService: - serviceName: visualqna-service - config: - endpoint: /v1/lvm - LVM_ENDPOINT: visualqna-tgi-svc - - name: Tgi - internalService: - serviceName: visualqna-tgi-svc - config: - MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf - endpoint: /generate - isDownstreamService: true diff --git a/VisualQnA/kubernetes/helm/README.md b/VisualQnA/kubernetes/helm/README.md deleted file mode 100644 index d6dd382b90..0000000000 --- a/VisualQnA/kubernetes/helm/README.md +++ /dev/null @@ -1,18 +0,0 @@ -# Deploy VisualQnA on Kubernetes cluster - -- You should have Helm (version >= 3.15) installed. Refer to the [Helm Installation Guide](https://helm.sh/docs/intro/install/) for more information. -- For more deploy options, refer to [helm charts README](https://github.com/opea-project/GenAIInfra/tree/main/helm-charts#readme). - -## Deploy on Xeon - -``` -export HFTOKEN="insert-your-huggingface-token-here" -helm install visualqna oci://ghcr.io/opea-project/charts/visualqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f cpu-values.yaml -``` - -## Deploy on Gaudi - -``` -export HFTOKEN="insert-your-huggingface-token-here" -helm install visualqna oci://ghcr.io/opea-project/charts/visualqna --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} -f gaudi-values.yaml -``` diff --git a/VisualQnA/kubernetes/helm/cpu-values.yaml b/VisualQnA/kubernetes/helm/cpu-values.yaml deleted file mode 100644 index acc9d4e28d..0000000000 --- a/VisualQnA/kubernetes/helm/cpu-values.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -tgi: - MAX_INPUT_LENGTH: "4096" - MAX_TOTAL_TOKENS: "8192" - LLM_MODEL_ID: llava-hf/llava-v1.6-mistral-7b-hf diff --git a/VisualQnA/kubernetes/helm/gaudi-values.yaml b/VisualQnA/kubernetes/helm/gaudi-values.yaml deleted file mode 100644 index eb6494a142..0000000000 --- a/VisualQnA/kubernetes/helm/gaudi-values.yaml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Accelerate inferencing in heaviest components to improve performance -# by overriding their subchart values - -# TGI: largest bottleneck for VisualQnA -tgi: - accelDevice: "gaudi" - image: - repository: ghcr.io/huggingface/tgi-gaudi - tag: "2.3.1" - resources: - limits: - habana.ai/gaudi: 1 - MAX_INPUT_LENGTH: "4096" - MAX_TOTAL_TOKENS: "8192" - CUDA_GRAPHS: "" - OMPI_MCA_btl_vader_single_copy_mechanism: "none" - ENABLE_HPU_GRAPH: "true" - LIMIT_HPU_GRAPH: "true" - USE_FLASH_ATTENTION: "true" - FLASH_ATTENTION_RECOMPUTE: "true" - livenessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 - readinessProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 - startupProbe: - initialDelaySeconds: 5 - periodSeconds: 5 - timeoutSeconds: 1 - failureThreshold: 120 diff --git a/VisualQnA/tests/test_compose_on_gaudi.sh b/VisualQnA/tests/test_compose_on_gaudi.sh deleted file mode 100644 index 3fbc8e0adc..0000000000 --- a/VisualQnA/tests/test_compose_on_gaudi.sh +++ /dev/null @@ -1,203 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} -export MODEL_CACHE=${model_cache:-"./data"} -export NGINX_PORT=81 -export VLLM_SKIP_WARMUP=true - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch main https://github.com/opea-project/GenAIComps.git - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - - git clone https://github.com/HabanaAI/vllm-fork.git - cd ./vllm-fork/ - docker build -f Dockerfile.hpu -t opea/vllm-gaudi:${TAG} --shm-size=128g . --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy - cd .. - rm -rf vllm-fork - - docker images && sleep 1s -} - -function start_services() { - cd $WORKPATH/docker_compose/intel/hpu/gaudi - - source ./set_env.sh - - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env - - # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - - n=0 - until [[ "$n" -ge 100 ]]; do - docker logs vllm-gaudi-service > ${LOG_PATH}/lvm_vllm_service_start.log - if grep -q Starting ${LOG_PATH}/lvm_vllm_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - sleep 15s - # Check if the microservices are running correctly. - - # lvm microservice - validate_services \ - "${ip_address}:9399/v1/lvm" \ - "yellow" \ - "lvm" \ - "lvm-vllm-gaudi-service" \ - '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -} - -function validate_megaservice() { - sleep 15s - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/visualqna" \ - "sign" \ - "visualqna-gaudi-backend-server" \ - "visualqna-gaudi-backend-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - - # test the megeservice via nginx - validate_services \ - "${ip_address}:${NGINX_PORT}/v1/visualqna" \ - "sign" \ - "visualqna-gaudi-nginx-server" \ - "visualqna-gaudi-nginx-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -} - -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_services - - validate_microservices - validate_megaservice - #validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/VisualQnA/tests/test_compose_on_rocm.sh b/VisualQnA/tests/test_compose_on_rocm.sh deleted file mode 100644 index 5092dd39cd..0000000000 --- a/VisualQnA/tests/test_compose_on_rocm.sh +++ /dev/null @@ -1,224 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Advanced Micro Devices, Inc. -# SPDX-License-Identifier: Apache-2.0 - -set -x -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} -export HOST_IP=${ip_address} -export VISUALQNA_TGI_SERVICE_PORT="8399" -export VISUALQNA_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} -export VISUALQNA_CARD_ID="card1" -export VISUALQNA_RENDER_ID="renderD136" -export LVM_MODEL_ID="Xkev/Llama-3.2V-11B-cot" -export MODEL="llava-hf/llava-v1.6-mistral-7b-hf" -export LVM_ENDPOINT="http://${HOST_IP}:8399" -export LVM_SERVICE_PORT=9399 -export MEGA_SERVICE_HOST_IP=${HOST_IP} -export LVM_SERVICE_HOST_IP=${HOST_IP} -export BACKEND_SERVICE_ENDPOINT="http://${HOST_IP}:${BACKEND_SERVICE_PORT}/v1/visualqna" -export FRONTEND_SERVICE_IP=${HOST_IP} -export FRONTEND_SERVICE_PORT=5173 -export BACKEND_SERVICE_NAME=visualqna -export BACKEND_SERVICE_IP=${HOST_IP} -export BACKEND_SERVICE_PORT=8888 -export NGINX_PORT=18003 -export PATH="~/miniconda3/bin:$PATH" - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - - docker pull ghcr.io/huggingface/text-generation-inference:2.4.1-rocm - docker images && sleep 1s -} - -function start_services() { - cd $WORKPATH/docker_compose/amd/gpu/rocm - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env - - # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - - n=0 - until [[ "$n" -ge 100 ]]; do - docker logs visualqna-tgi-service > ${LOG_PATH}/lvm_tgi_service_start.log - if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - # Check if the microservices are running correctly. - - # lvm microservice - validate_services \ - "${ip_address}:9399/v1/lvm" \ - "The image" \ - "lvm" \ - "visualqna-tgi-service" \ - '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -} - -function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/visualqna" \ - "The image" \ - "visualqna-rocm-backend-server" \ - "visualqna-rocm-backend-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - - # test the megeservice via nginx - validate_services \ - "${ip_address}:${NGINX_PORT}/v1/visualqna" \ - "The image" \ - "visualqna-rocm-nginx-server" \ - "visualqna-rocm-nginx-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -} - -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/amd/gpu/rocm/ - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_services - - validate_microservices - validate_megaservice - #validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/VisualQnA/tests/test_compose_on_xeon.sh b/VisualQnA/tests/test_compose_on_xeon.sh deleted file mode 100644 index 0e645c324b..0000000000 --- a/VisualQnA/tests/test_compose_on_xeon.sh +++ /dev/null @@ -1,196 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} -export MODEL_CACHE=${model_cache:-"./data"} -export NGINX_PORT=81 - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch main https://github.com/opea-project/GenAIComps.git - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - - docker pull opea/vllm:latest - docker tag opea/vllm:latest opea/vllm:${TAG} - docker images && sleep 1s -} - -function start_services() { - cd $WORKPATH/docker_compose/intel/cpu/xeon/ - - source ./set_env.sh - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env - - # Start Docker Containers - docker compose up -d > ${LOG_PATH}/start_services_with_compose.log - - n=0 - until [[ "$n" -ge 200 ]]; do - docker logs vllm-service > ${LOG_PATH}/lvm_vllm_service_start.log - if grep -q Starting ${LOG_PATH}/lvm_vllm_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - sleep 15s - # Check if the microservices are running correctly. - - # lvm microservice - validate_services \ - "${ip_address}:9399/v1/lvm" \ - "yellow" \ - "lvm" \ - "lvm-xeon-server" \ - '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -} - -function validate_megaservice() { - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/visualqna" \ - "sign" \ - "visualqna-xeon-backend-server" \ - "visualqna-xeon-backend-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - - # test the megeservice via nginx - validate_services \ - "${ip_address}:${NGINX_PORT}/v1/visualqna" \ - "sign" \ - "visualqna-xeon-nginx-server" \ - "visualqna-xeon-nginx-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -} - -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_services - - validate_microservices - validate_megaservice - #validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/VisualQnA/tests/test_compose_tgi_on_gaudi.sh b/VisualQnA/tests/test_compose_tgi_on_gaudi.sh deleted file mode 100644 index 913d6ed527..0000000000 --- a/VisualQnA/tests/test_compose_tgi_on_gaudi.sh +++ /dev/null @@ -1,222 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} -export MODEL_CACHE=${model_cache:-"./data"} - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - - docker pull ghcr.io/huggingface/tgi-gaudi:2.0.6 - docker images && sleep 1s -} - -function start_services() { - cd $WORKPATH/docker_compose/intel/hpu/gaudi - - export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf" - export LVM_ENDPOINT="http://${ip_address}:8399" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export LVM_SERVICE_PORT=9399 - export MEGA_SERVICE_HOST_IP=${ip_address} - export LVM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/visualqna" - export FRONTEND_SERVICE_IP=${ip_address} - export FRONTEND_SERVICE_PORT=5173 - export BACKEND_SERVICE_NAME=visualqna - export BACKEND_SERVICE_IP=${ip_address} - export BACKEND_SERVICE_PORT=8888 - export NGINX_PORT=80 - export host_ip=${ip_address} - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env - - # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log - - n=0 - until [[ "$n" -ge 100 ]]; do - docker logs tgi-llava-gaudi-server > ${LOG_PATH}/lvm_tgi_service_start.log - if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - sleep 15s - # Check if the microservices are running correctly. - - # lvm microservice - validate_services \ - "${ip_address}:9399/v1/lvm" \ - "The image" \ - "lvm" \ - "lvm-gaudi-server" \ - '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -} - -function validate_megaservice() { - sleep 15s - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/visualqna" \ - "The image" \ - "visualqna-gaudi-backend-server" \ - "visualqna-gaudi-backend-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - - # test the megeservice via nginx - validate_services \ - "${ip_address}:80/v1/visualqna" \ - "The image" \ - "visualqna-gaudi-nginx-server" \ - "visualqna-gaudi-nginx-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -} - -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/intel/hpu/gaudi - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_services - - validate_microservices - validate_megaservice - # validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/VisualQnA/tests/test_compose_tgi_on_xeon.sh b/VisualQnA/tests/test_compose_tgi_on_xeon.sh deleted file mode 100644 index d6311719d0..0000000000 --- a/VisualQnA/tests/test_compose_tgi_on_xeon.sh +++ /dev/null @@ -1,222 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x -IMAGE_REPO=${IMAGE_REPO:-"opea"} -IMAGE_TAG=${IMAGE_TAG:-"latest"} -echo "REGISTRY=IMAGE_REPO=${IMAGE_REPO}" -echo "TAG=IMAGE_TAG=${IMAGE_TAG}" -export REGISTRY=${IMAGE_REPO} -export TAG=${IMAGE_TAG} -export MODEL_CACHE=${model_cache:-"./data"} - -WORKPATH=$(dirname "$PWD") -LOG_PATH="$WORKPATH/tests" -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - opea_branch=${opea_branch:-"main"} - # If the opea_branch isn't main, replace the git clone branch in Dockerfile. - if [[ "${opea_branch}" != "main" ]]; then - cd $WORKPATH - OLD_STRING="RUN git clone --depth 1 https://github.com/opea-project/GenAIComps.git" - NEW_STRING="RUN git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git" - find . -type f -name "Dockerfile*" | while read -r file; do - echo "Processing file: $file" - sed -i "s|$OLD_STRING|$NEW_STRING|g" "$file" - done - fi - - cd $WORKPATH/docker_image_build - git clone --depth 1 --branch ${opea_branch} https://github.com/opea-project/GenAIComps.git - - echo "Build all the images with --no-cache, check docker_image_build.log for details..." - docker compose -f build.yaml build --no-cache > ${LOG_PATH}/docker_image_build.log - - docker pull ghcr.io/huggingface/text-generation-inference:2.4.0-intel-cpu - docker images && sleep 1s -} - -function start_services() { - cd $WORKPATH/docker_compose/intel/cpu/xeon/ - - export LVM_MODEL_ID="llava-hf/llava-v1.6-mistral-7b-hf" - export LVM_ENDPOINT="http://${ip_address}:8399" - export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} - export LVM_SERVICE_PORT=9399 - export MEGA_SERVICE_HOST_IP=${ip_address} - export LVM_SERVICE_HOST_IP=${ip_address} - export BACKEND_SERVICE_ENDPOINT="http://${ip_address}:8888/v1/visualqna" - export FRONTEND_SERVICE_IP=${ip_address} - export FRONTEND_SERVICE_PORT=5173 - export BACKEND_SERVICE_NAME=visualqna - export BACKEND_SERVICE_IP=${ip_address} - export BACKEND_SERVICE_PORT=8888 - export NGINX_PORT=80 - export host_ip=${ip_address} - - sed -i "s/backend_address/$ip_address/g" $WORKPATH/ui/svelte/.env - - # Start Docker Containers - docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log - - n=0 - until [[ "$n" -ge 200 ]]; do - docker logs tgi-llava-xeon-server > ${LOG_PATH}/lvm_tgi_service_start.log - if grep -q Connected ${LOG_PATH}/lvm_tgi_service_start.log; then - break - fi - sleep 5s - n=$((n+1)) - done -} - -function validate_services() { - local URL="$1" - local EXPECTED_RESULT="$2" - local SERVICE_NAME="$3" - local DOCKER_NAME="$4" - local INPUT_DATA="$5" - - local HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL") - if [ "$HTTP_STATUS" -eq 200 ]; then - echo "[ $SERVICE_NAME ] HTTP status is 200. Checking content..." - - local CONTENT=$(curl -s -X POST -d "$INPUT_DATA" -H 'Content-Type: application/json' "$URL" | tee ${LOG_PATH}/${SERVICE_NAME}.log) - - if echo "$CONTENT" | grep -q "$EXPECTED_RESULT"; then - echo "[ $SERVICE_NAME ] Content is as expected." - else - echo "[ $SERVICE_NAME ] Content does not match the expected result: $CONTENT" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - else - echo "[ $SERVICE_NAME ] HTTP status is not 200. Received status was $HTTP_STATUS" - docker logs ${DOCKER_NAME} >> ${LOG_PATH}/${SERVICE_NAME}.log - exit 1 - fi - sleep 1s -} - -function validate_microservices() { - sleep 15s - # Check if the microservices are running correctly. - - # lvm microservice - validate_services \ - "${ip_address}:9399/v1/lvm" \ - "The image" \ - "lvm" \ - "lvm-xeon-server" \ - '{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt":"What is this?"}' -} - -function validate_megaservice() { - sleep 15s - # Curl the Mega Service - validate_services \ - "${ip_address}:8888/v1/visualqna" \ - "The image" \ - "visualqna-xeon-backend-server" \ - "visualqna-xeon-backend-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' - - # test the megeservice via nginx - validate_services \ - "${ip_address}:80/v1/visualqna" \ - "The image" \ - "visualqna-xeon-nginx-server" \ - "visualqna-xeon-nginx-server" \ - '{ - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "https://www.ilankelman.org/stopsigns/australia.jpg" - } - } - ] - } - ], - "max_tokens": 300 - }' -} - -function validate_frontend() { - cd $WORKPATH/ui/svelte - local conda_env_name="OPEA_e2e" - export PATH=${HOME}/miniforge3/bin/:$PATH - if conda info --envs | grep -q "$conda_env_name"; then - echo "$conda_env_name exist!" - else - conda create -n ${conda_env_name} python=3.12 -y - fi - source activate ${conda_env_name} - - sed -i "s/localhost/$ip_address/g" playwright.config.ts - - conda install -c conda-forge nodejs=22.6.0 -y - npm install && npm ci && npx playwright install --with-deps - node -v && npm -v && pip list - - exit_status=0 - npx playwright test || exit_status=$? - - if [ $exit_status -ne 0 ]; then - echo "[TEST INFO]: ---------frontend test failed---------" - exit $exit_status - else - echo "[TEST INFO]: ---------frontend test passed---------" - fi -} - -function stop_docker() { - cd $WORKPATH/docker_compose/intel/cpu/xeon/ - docker compose stop && docker compose rm -f -} - -function main() { - - stop_docker - - if [[ "$IMAGE_REPO" == "opea" ]]; then build_docker_images; fi - start_services - - validate_microservices - validate_megaservice - # validate_frontend - - stop_docker - echo y | docker system prune - -} - -main diff --git a/VisualQnA/ui/docker/Dockerfile b/VisualQnA/ui/docker/Dockerfile deleted file mode 100644 index 1d5115f4b5..0000000000 --- a/VisualQnA/ui/docker/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Use node 20.11.1 as the base image -FROM node:20.11.1 - -# Update package manager and install Git -RUN apt-get update -y && apt-get install -y git - -# Copy the front-end code repository -COPY svelte /home/user/svelte - -# Set the working directory -WORKDIR /home/user/svelte - -# Install front-end dependencies -RUN npm install - -# Build the front-end application -RUN npm run build - -# Expose the port of the front-end application -EXPOSE 5173 - -# Run the front-end application in preview mode -CMD ["npm", "run", "preview", "--", "--port", "5173", "--host", "0.0.0.0"] diff --git a/VisualQnA/ui/svelte/.editorconfig b/VisualQnA/ui/svelte/.editorconfig deleted file mode 100644 index 2b7a6637f7..0000000000 --- a/VisualQnA/ui/svelte/.editorconfig +++ /dev/null @@ -1,10 +0,0 @@ -[*] -indent_style = tab - -[package.json] -indent_style = space -indent_size = 2 - -[*.md] -indent_style = space -indent_size = 2 diff --git a/VisualQnA/ui/svelte/.env b/VisualQnA/ui/svelte/.env deleted file mode 100644 index ce3ce40333..0000000000 --- a/VisualQnA/ui/svelte/.env +++ /dev/null @@ -1 +0,0 @@ -BACKEND_BASE_URL = '/v1/visualqna' diff --git a/VisualQnA/ui/svelte/.eslintignore b/VisualQnA/ui/svelte/.eslintignore deleted file mode 100644 index 38972655fa..0000000000 --- a/VisualQnA/ui/svelte/.eslintignore +++ /dev/null @@ -1,13 +0,0 @@ -.DS_Store -node_modules -/build -/.svelte-kit -/package -.env -.env.* -!.env.example - -# Ignore files for PNPM, NPM and YARN -pnpm-lock.yaml -package-lock.json -yarn.lock diff --git a/VisualQnA/ui/svelte/.eslintrc.cjs b/VisualQnA/ui/svelte/.eslintrc.cjs deleted file mode 100644 index a6592d11f7..0000000000 --- a/VisualQnA/ui/svelte/.eslintrc.cjs +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -module.exports = { - root: true, - parser: "@typescript-eslint/parser", - extends: ["eslint:recommended", "plugin:@typescript-eslint/recommended", "prettier"], - plugins: ["svelte3", "@typescript-eslint", "neverthrow"], - ignorePatterns: ["*.cjs"], - overrides: [{ files: ["*.svelte"], processor: "svelte3/svelte3" }], - settings: { - "svelte3/typescript": () => require("typescript"), - }, - parserOptions: { - sourceType: "module", - ecmaVersion: 2020, - }, - env: { - browser: true, - es2017: true, - node: true, - }, -}; diff --git a/VisualQnA/ui/svelte/.prettierignore b/VisualQnA/ui/svelte/.prettierignore deleted file mode 100644 index 38972655fa..0000000000 --- a/VisualQnA/ui/svelte/.prettierignore +++ /dev/null @@ -1,13 +0,0 @@ -.DS_Store -node_modules -/build -/.svelte-kit -/package -.env -.env.* -!.env.example - -# Ignore files for PNPM, NPM and YARN -pnpm-lock.yaml -package-lock.json -yarn.lock diff --git a/VisualQnA/ui/svelte/.prettierrc b/VisualQnA/ui/svelte/.prettierrc deleted file mode 100644 index c932dd178f..0000000000 --- a/VisualQnA/ui/svelte/.prettierrc +++ /dev/null @@ -1 +0,0 @@ -{"pluginSearchDirs": ["."], "overrides": [{"files": "*.svelte", "options": {"parser": "svelte"}}]} diff --git a/VisualQnA/ui/svelte/package.json b/VisualQnA/ui/svelte/package.json deleted file mode 100644 index 7f5f496ba5..0000000000 --- a/VisualQnA/ui/svelte/package.json +++ /dev/null @@ -1,62 +0,0 @@ -{ - "name": "visual-qna", - "version": "0.0.1", - "private": true, - "scripts": { - "dev": "vite dev", - "build": "vite build", - "preview": "vite preview", - "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json", - "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch", - "lint": "prettier --check . && eslint .", - "format": "prettier --write ." - }, - "peerDependencies": { - "svelte": "^4.0.0" - }, - "devDependencies": { - "@fortawesome/free-solid-svg-icons": "6.2.0", - "@playwright/test": "^1.45.2", - "@sveltejs/adapter-auto": "^3.0.0", - "@sveltejs/kit": "^2.0.0", - "@sveltejs/vite-plugin-svelte": "^3.0.0", - "@tailwindcss/typography": "0.5.7", - "@types/debug": "4.1.7", - "@types/node": "^20.12.13", - "@types/pica": "^9.0.5", - "@typescript-eslint/eslint-plugin": "^5.27.0", - "@typescript-eslint/parser": "^5.27.0", - "autoprefixer": "^10.4.16", - "date-picker-svelte": "^2.6.0", - "debug": "4.3.4", - "postcss": "^8.4.31", - "postcss-load-config": "^4.0.1", - "postcss-preset-env": "^8.3.2", - "prettier": "^2.8.8", - "prettier-plugin-svelte": "^2.7.0", - "prettier-plugin-tailwindcss": "^0.3.0", - "svelte": "^4.2.7", - "svelte-check": "^3.6.0", - "svelte-fa": "3.0.3", - "tailwindcss": "^3.3.6", - "tslib": "^2.4.1", - "typescript": "^5.0.0", - "vite": "^5.0.11" - }, - "type": "module", - "dependencies": { - "date-fns": "^2.30.0", - "driver.js": "^1.3.0", - "flowbite": "^2.5.2", - "flowbite-svelte": "^0.38.5", - "flowbite-svelte-icons": "^1.4.0", - "fuse.js": "^6.6.2", - "lodash": "^4.17.21", - "pica": "^9.0.1", - "playwright": "^1.44.0", - "ramda": "^0.29.0", - "sse.js": "^0.6.1", - "svelte-notifications": "^0.9.98", - "svrollbar": "^0.12.0" - } -} diff --git a/VisualQnA/ui/svelte/playwright.config.ts b/VisualQnA/ui/svelte/playwright.config.ts deleted file mode 100644 index 578a1c2872..0000000000 --- a/VisualQnA/ui/svelte/playwright.config.ts +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 - -import { defineConfig, devices } from "@playwright/test"; - -/** - * Read environment variables from file. - * https://github.com/motdotla/dotenv - */ -// require('dotenv').config(); - -/** - * See https://playwright.dev/docs/test-configuration. - */ -export default defineConfig({ - testDir: "./tests", - /* Maximum time one test can run for. */ - timeout: 30 * 1000, - expect: { - /** - * Maximum time expect() should wait for the condition to be met. - * For example in `await expect(locator).toHaveText();` - */ - timeout: 5000, - }, - /* Run tests in files in parallel */ - fullyParallel: true, - /* Fail the build on CI if you accidentally left test.only in the source code. */ - forbidOnly: !!process.env.CI, - /* Retry on CI only */ - retries: process.env.CI ? 2 : 0, - /* Opt out of parallel tests on CI. */ - workers: process.env.CI ? 1 : undefined, - /* Reporter to use. See https://playwright.dev/docs/test-reporters */ - reporter: [["html", { open: "never" }]], - /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */ - use: { - /* Maximum time each action such as `click()` can take. Defaults to 0 (no limit). */ - actionTimeout: 0, - /* Base URL to use in actions like `await page.goto('/')`. */ - baseURL: "http://localhost:5173", - - /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */ - trace: "on-first-retry", - }, - - /* Configure projects for major browsers */ - projects: [ - { - name: "webkit", - use: { ...devices["Desktop Safari"] }, - }, - ], -}); diff --git a/VisualQnA/ui/svelte/postcss.config.cjs b/VisualQnA/ui/svelte/postcss.config.cjs deleted file mode 100644 index b384b43ebe..0000000000 --- a/VisualQnA/ui/svelte/postcss.config.cjs +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -const tailwindcss = require("tailwindcss"); -const autoprefixer = require("autoprefixer"); - -const config = { - plugins: [ - //Some plugins, like tailwindcss/nesting, need to run before Tailwind, - tailwindcss(), - //But others, like autoprefixer, need to run after, - autoprefixer, - ], -}; - -module.exports = config; diff --git a/VisualQnA/ui/svelte/src/app.d.ts b/VisualQnA/ui/svelte/src/app.d.ts deleted file mode 100644 index fa6a0abf77..0000000000 --- a/VisualQnA/ui/svelte/src/app.d.ts +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// See: https://kit.svelte.dev/docs/types#app -// import { Result} from "neverthrow"; -interface Window { - deviceType: string; -} diff --git a/VisualQnA/ui/svelte/src/app.html b/VisualQnA/ui/svelte/src/app.html deleted file mode 100644 index db69926ea8..0000000000 --- a/VisualQnA/ui/svelte/src/app.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - %sveltekit.head% - - -
%sveltekit.body%
- - diff --git a/VisualQnA/ui/svelte/src/app.postcss b/VisualQnA/ui/svelte/src/app.postcss deleted file mode 100644 index 963bbca4ef..0000000000 --- a/VisualQnA/ui/svelte/src/app.postcss +++ /dev/null @@ -1,86 +0,0 @@ -/* Write your global styles here, in PostCSS syntax */ -@tailwind base; -@tailwind components; -@tailwind utilities; - -html, body { - height: 100%; -} - -.btn { - @apply flex-nowrap; -} -a.btn { - @apply no-underline; -} -.input { - @apply text-base; -} - -.bg-dark-blue { - background-color: #004a86; -} - -.bg-light-blue { - background-color: #0068b5; -} - -.bg-turquoise { - background-color: #00a3f6; -} - -.bg-header { - background-color: #ffffff; -} - -.bg-button { - background-color: #0068b5; -} - -.bg-title { - background-color: #f7f7f7; -} - -.text-header { - color: #0068b5; -} - -.text-button { - color: #252e47; -} - -.text-title-color { - color: rgb(38,38,38); -} - -.font-intel { - font-family: "intel-clear","tahoma",Helvetica,"helvetica",Arial,sans-serif; -} - -.font-title-intel { - font-family: "intel-one","intel-clear",Helvetica,Arial,sans-serif; -} - -.bg-footer { - background-color: #e7e7e7; -} - -.bg-light-green { - background-color: #d7f3a1; -} - -.bg-purple { - background-color: #653171; -} - -.bg-dark-blue { - background-color: #224678; -} - -.border-input-color { - border-color: #605e5c; -} - -.w-12\/12 { - width: 100% -} diff --git a/VisualQnA/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte b/VisualQnA/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte deleted file mode 100644 index 8847a22275..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/avatar/svelte/Delete.svelte +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - { - dispatch('DeleteAvatar') }} -viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" width="20" height="20"> - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/Assistant.svelte b/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/Assistant.svelte deleted file mode 100644 index b68d2a08cd..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/Assistant.svelte +++ /dev/null @@ -1,44 +0,0 @@ - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/PaperAirplane.svelte b/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/PaperAirplane.svelte deleted file mode 100644 index d1d14077f2..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/PaperAirplane.svelte +++ /dev/null @@ -1,68 +0,0 @@ - - - - - - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/PersonOutlined.svelte b/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/PersonOutlined.svelte deleted file mode 100644 index dd2f9fdb78..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/chat/svelte/PersonOutlined.svelte +++ /dev/null @@ -1,26 +0,0 @@ - - - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/header/intelLogo.svelte b/VisualQnA/ui/svelte/src/lib/assets/header/intelLogo.svelte deleted file mode 100644 index 50039d5b37..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/header/intelLogo.svelte +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/imageData/extreme_ironing.png b/VisualQnA/ui/svelte/src/lib/assets/imageData/extreme_ironing.png deleted file mode 100644 index 93ef4b7961..0000000000 Binary files a/VisualQnA/ui/svelte/src/lib/assets/imageData/extreme_ironing.png and /dev/null differ diff --git a/VisualQnA/ui/svelte/src/lib/assets/imageData/waterview.png b/VisualQnA/ui/svelte/src/lib/assets/imageData/waterview.png deleted file mode 100644 index d5562df1d0..0000000000 Binary files a/VisualQnA/ui/svelte/src/lib/assets/imageData/waterview.png and /dev/null differ diff --git a/VisualQnA/ui/svelte/src/lib/assets/layout/css/driver.css b/VisualQnA/ui/svelte/src/lib/assets/layout/css/driver.css deleted file mode 100644 index 453db6082a..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/layout/css/driver.css +++ /dev/null @@ -1,94 +0,0 @@ -.driverjs-theme { - background: transparent; - color: #fff; - box-shadow: none; - padding: 0; -} - -.driver-popover-arrow { - border: 10px solid transparent; - animation: blink 1s 3 steps(1); -} - -@keyframes blink { - 0% { - opacity: 1; - } - 50% { - opacity: 0.2; - } - 100% { - opacity: 1; - } -} - -.driver-popover.driverjs-theme .driver-popover-arrow-side-left.driver-popover-arrow { - border-left-color: #174ed1; -} - -.driver-popover.driverjs-theme .driver-popover-arrow-side-right.driver-popover-arrow { - border-right-color: #174ed1; -} - -.driver-popover.driverjs-theme .driver-popover-arrow-side-top.driver-popover-arrow { - border-top-color: #174ed1; -} - -.driver-popover.driverjs-theme .driver-popover-arrow-side-bottom.driver-popover-arrow { - border-bottom-color: #174ed1; -} - -.driver-popover-footer { - background: transparent; - color: #fff; -} -.driver-popover-title { - border-top-left-radius: 5px; - border-top-right-radius: 5px; -} - -.driver-popover-title, -.driver-popover-description { - display: block; - padding: 15px 15px 7px 15px; - background: #174ed1; - border: none; -} - -.driver-popover-close-btn { - color: #fff; -} - -.driver-popover-footer button:hover, -.driver-popover-footer button:focus { - background: #174ed1; - color: #fff; -} - -.driver-popover-description { - padding: 5px 15px; - border-bottom-left-radius: 5px; - border-bottom-right-radius: 5px; -} - -.driver-popover-title[style*="block"] + .driver-popover-description { - margin: 0; -} -.driver-popover-progress-text { - color: #fff; -} - -.driver-popover-footer button { - background: #174ed1; - border: 2px #174ed1 dashed; - color: #fff; - border-radius: 50%; - text-shadow: none; -} -.driver-popover-close-btn:hover, -.driver-popover-close-btn:focus { - color: #fff; -} -.driver-popover-navigation-btns button + button { - margin-left: 10px; -} diff --git a/VisualQnA/ui/svelte/src/lib/assets/upload/help.svelte b/VisualQnA/ui/svelte/src/lib/assets/upload/help.svelte deleted file mode 100644 index adccf7bb56..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/upload/help.svelte +++ /dev/null @@ -1,24 +0,0 @@ - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/upload/next.svelte b/VisualQnA/ui/svelte/src/lib/assets/upload/next.svelte deleted file mode 100644 index 70f4fe25e8..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/upload/next.svelte +++ /dev/null @@ -1,31 +0,0 @@ - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/upload/previous.svelte b/VisualQnA/ui/svelte/src/lib/assets/upload/previous.svelte deleted file mode 100644 index c47d9c49da..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/upload/previous.svelte +++ /dev/null @@ -1,31 +0,0 @@ - - - diff --git a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg b/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg deleted file mode 100644 index 8910f0ea64..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/paste.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg b/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg deleted file mode 100644 index 9a77286a8f..0000000000 --- a/VisualQnA/ui/svelte/src/lib/assets/voice/svg/uploadFile.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/VisualQnA/ui/svelte/src/lib/modules/chat/ChatMessage.svelte b/VisualQnA/ui/svelte/src/lib/modules/chat/ChatMessage.svelte deleted file mode 100644 index 720908163b..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/chat/ChatMessage.svelte +++ /dev/null @@ -1,76 +0,0 @@ - - - - -
-
- -
-
-
- {#if msg.imgSrc} - Uploaded Image - {/if} -

- {@html msg.content} -

-
-
-
-{#if time} -
- { - dispatch("scrollTop"); - }} - /> -
-{/if} - - diff --git a/VisualQnA/ui/svelte/src/lib/modules/chat/MessageAvatar.svelte b/VisualQnA/ui/svelte/src/lib/modules/chat/MessageAvatar.svelte deleted file mode 100644 index 0f6a24b96d..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/chat/MessageAvatar.svelte +++ /dev/null @@ -1,30 +0,0 @@ - - - - -{#if role === MessageRole.User} - -{:else} - -{/if} diff --git a/VisualQnA/ui/svelte/src/lib/modules/chat/MessageTimer.svelte b/VisualQnA/ui/svelte/src/lib/modules/chat/MessageTimer.svelte deleted file mode 100644 index 9416cc8795..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/chat/MessageTimer.svelte +++ /dev/null @@ -1,68 +0,0 @@ - - - - -
-
-
- - { - dispatch("handleTop"); - }} - > -
-
-
- -
-
- End to End Time: -

{time}s

-
-
-
-
diff --git a/VisualQnA/ui/svelte/src/lib/modules/frame/Layout.svelte b/VisualQnA/ui/svelte/src/lib/modules/frame/Layout.svelte deleted file mode 100644 index 0c5b997d28..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/frame/Layout.svelte +++ /dev/null @@ -1,48 +0,0 @@ - - - - -
-
-
- - - -
-
-
diff --git a/VisualQnA/ui/svelte/src/lib/modules/upload/imagePrompt.svelte b/VisualQnA/ui/svelte/src/lib/modules/upload/imagePrompt.svelte deleted file mode 100644 index 90e55bff96..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/upload/imagePrompt.svelte +++ /dev/null @@ -1,102 +0,0 @@ - - - - -
-

Example

-
- - -
- {images[currentIndex].alt} -
-

{images[currentIndex].prompt}

-
-
- - -
-
- - diff --git a/VisualQnA/ui/svelte/src/lib/modules/upload/upload.svelte b/VisualQnA/ui/svelte/src/lib/modules/upload/upload.svelte deleted file mode 100644 index 5347d6bd77..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/upload/upload.svelte +++ /dev/null @@ -1,32 +0,0 @@ - - - - -
-

Upload Images

- -
or -
- - - - -
-

Parameters

- -

Max output tokens: {stepValue}

-
diff --git a/VisualQnA/ui/svelte/src/lib/modules/upload/uploadImg.svelte b/VisualQnA/ui/svelte/src/lib/modules/upload/uploadImg.svelte deleted file mode 100644 index 9fcf6eefcc..0000000000 --- a/VisualQnA/ui/svelte/src/lib/modules/upload/uploadImg.svelte +++ /dev/null @@ -1,168 +0,0 @@ - - - - - { - event.preventDefault(); - }} - on:change={handleChange} -> - {#if value.length === 0 && !imageUrl} - -

- Click to upload or drag and drop -

-

- SVG, PNG, JPG -

- {:else if imageUrl} - Uploaded Image - {/if} -
diff --git a/VisualQnA/ui/svelte/src/lib/network/chat/Network.ts b/VisualQnA/ui/svelte/src/lib/network/chat/Network.ts deleted file mode 100644 index 8a78c0cda8..0000000000 --- a/VisualQnA/ui/svelte/src/lib/network/chat/Network.ts +++ /dev/null @@ -1,52 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { env } from "$env/dynamic/public"; -import { SSE } from "sse.js"; - -const BACKEND_BASE_URL = env.BACKEND_BASE_URL; - -export async function fetchTextStream(query: string, stepValueStore: number, base64ImageStore: string) { - let payload = {}; - let url = ""; - base64ImageStore = base64ImageStore.replace(/^data:[a-zA-Z]+\/[a-zA-Z]+;base64,/, ""); - - payload = { - messages: [ - { - role: "user", - content: [ - { - type: "text", - text: query, - }, - { - type: "image_url", - image_url: { url: base64ImageStore }, - }, - ], - }, - ], - max_tokens: stepValueStore, - stream: true, - }; - console.log("payload", payload); - - url = `${BACKEND_BASE_URL}`; - - return new SSE(url, { - headers: { "Content-Type": "application/json" }, - payload: JSON.stringify(payload), - }); -} diff --git a/VisualQnA/ui/svelte/src/lib/network/upload/Network.ts b/VisualQnA/ui/svelte/src/lib/network/upload/Network.ts deleted file mode 100644 index 284494f851..0000000000 --- a/VisualQnA/ui/svelte/src/lib/network/upload/Network.ts +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { env } from "$env/dynamic/public"; - -const BACKEND_BASE_URL = env.BACKEND_BASE_URL; - -export async function fetchKnowledgeBaseId(file: Blob, fileName: string) { - const url = `${BACKEND_BASE_URL}/create`; - const formData = new FormData(); - formData.append("file", file, fileName); - const init: RequestInit = { - method: "POST", - body: formData, - }; - - try { - const response = await fetch(url, init); - if (!response.ok) throw response.status; - return await response.json(); - } catch (error) { - console.error("network error: ", error); - return undefined; - } -} - -export async function fetchKnowledgeBaseIdByPaste(pasteUrlList: any, urlType: string | undefined) { - const url = `${BACKEND_BASE_URL}/upload_link`; - const data = { - link_list: pasteUrlList, - }; - const init: RequestInit = { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(data), - }; - - try { - const response = await fetch(url, init); - if (!response.ok) throw response.status; - return await response.json(); - } catch (error) { - console.error("network error: ", error); - return undefined; - } -} diff --git a/VisualQnA/ui/svelte/src/lib/shared/Utils.ts b/VisualQnA/ui/svelte/src/lib/shared/Utils.ts deleted file mode 100644 index fb182cef67..0000000000 --- a/VisualQnA/ui/svelte/src/lib/shared/Utils.ts +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -export function scrollToBottom(scrollToDiv: HTMLElement) { - if (scrollToDiv) { - setTimeout( - () => - scrollToDiv.scroll({ - behavior: "auto", - top: scrollToDiv.scrollHeight, - }), - 100, - ); - } -} - -export function scrollToTop(scrollToDiv: HTMLElement) { - if (scrollToDiv) { - setTimeout( - () => - scrollToDiv.scroll({ - behavior: "auto", - top: 0, - }), - 100, - ); - } -} - -export function getCurrentTimeStamp() { - return Math.floor(new Date().getTime()); -} - -export function fromTimeStampToTime(timeStamp: number) { - return new Date(timeStamp * 1000).toTimeString().slice(0, 8); -} - -export function formatTime(seconds) { - const hours = String(Math.floor(seconds / 3600)).padStart(2, "0"); - const minutes = String(Math.floor((seconds % 3600) / 60)).padStart(2, "0"); - const remainingSeconds = String(seconds % 60).padStart(2, "0"); - return `${hours}:${minutes}:${remainingSeconds}`; -} diff --git a/VisualQnA/ui/svelte/src/lib/shared/components/header/header.svelte b/VisualQnA/ui/svelte/src/lib/shared/components/header/header.svelte deleted file mode 100644 index c851dec988..0000000000 --- a/VisualQnA/ui/svelte/src/lib/shared/components/header/header.svelte +++ /dev/null @@ -1,33 +0,0 @@ - - - -
- -
diff --git a/VisualQnA/ui/svelte/src/lib/shared/components/loading/Loading.svelte b/VisualQnA/ui/svelte/src/lib/shared/components/loading/Loading.svelte deleted file mode 100644 index 51e89cfe7e..0000000000 --- a/VisualQnA/ui/svelte/src/lib/shared/components/loading/Loading.svelte +++ /dev/null @@ -1,48 +0,0 @@ - - -
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/VisualQnA/ui/svelte/src/lib/shared/components/scrollbar/Scrollbar.svelte b/VisualQnA/ui/svelte/src/lib/shared/components/scrollbar/Scrollbar.svelte deleted file mode 100644 index f18e23e690..0000000000 --- a/VisualQnA/ui/svelte/src/lib/shared/components/scrollbar/Scrollbar.svelte +++ /dev/null @@ -1,48 +0,0 @@ - - - - -
- -
- -
-
-
- - diff --git a/VisualQnA/ui/svelte/src/lib/shared/constant/Interface.ts b/VisualQnA/ui/svelte/src/lib/shared/constant/Interface.ts deleted file mode 100644 index c461e2b791..0000000000 --- a/VisualQnA/ui/svelte/src/lib/shared/constant/Interface.ts +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -export enum MessageRole { - Assistant, - User, -} - -export enum MessageType { - Text, - SingleAudio, - AudioList, - SingleImage, - ImageList, - singleVideo, -} - -type Map = T extends MessageType.Text | MessageType.SingleAudio - ? string - : T extends MessageType.AudioList - ? string[] - : T extends MessageType.SingleImage - ? { imgSrc: string; imgId: string } - : { imgSrc: string; imgId: string }[]; - -export interface Message { - imgSrc: string | null | undefined; - role: MessageRole; - type: MessageType; - content: Map; - time: number; -} - -export enum LOCAL_STORAGE_KEY { - STORAGE_CHAT_KEY = "chatMessages", - STORAGE_TIME_KEY = "initTime", -} diff --git a/VisualQnA/ui/svelte/src/lib/shared/stores/common/Store.ts b/VisualQnA/ui/svelte/src/lib/shared/stores/common/Store.ts deleted file mode 100644 index 88a581ab68..0000000000 --- a/VisualQnA/ui/svelte/src/lib/shared/stores/common/Store.ts +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { writable } from "svelte/store"; - -export let open = writable(true); - -export let knowledgeAccess = writable(true); - -export let showTemplate = writable(false); - -export let showSidePage = writable(false); - -export let droppedObj = writable({}); - -export let isLoading = writable(false); - -export let newUploadNum = writable(0); - -export let ifStoreMsg = writable(true); - -export let isCheckedStore = writable(false); - -export const resetControl = writable(false); - -export const knowledge1 = writable<{ - id: string; -}>(); - -export const knowledgeName = writable(""); - -export const base64ImageStore = writable(""); - -export const stepValueStore = writable(512); diff --git a/VisualQnA/ui/svelte/src/routes/+layout.svelte b/VisualQnA/ui/svelte/src/routes/+layout.svelte deleted file mode 100644 index 8141177d4a..0000000000 --- a/VisualQnA/ui/svelte/src/routes/+layout.svelte +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - -
- -
- -
-
diff --git a/VisualQnA/ui/svelte/src/routes/+page.svelte b/VisualQnA/ui/svelte/src/routes/+page.svelte deleted file mode 100644 index 4dd3e6efda..0000000000 --- a/VisualQnA/ui/svelte/src/routes/+page.svelte +++ /dev/null @@ -1,279 +0,0 @@ - - - - -
-
-
- - -
-
-
-
-
-
- { - if (event.key === "Enter" && !event.shiftKey && query) { - event.preventDefault(); - handleTextSubmit(); - } - }} - /> - -
-
-
- - - {#if Array.isArray(chatMessages) && chatMessages.length > 0 && !loading} -
-
- -
-
- {/if} - - -
- - {#if loading} - - {/if} - - {#each chatMessages as message, i} - handleTop()} - msg={message} - time={i === 0 || (message.time > 0 && message.time < 100) - ? message.time - : ""} - /> - {/each} - - -
- -
-
-
- - diff --git a/VisualQnA/ui/svelte/src/routes/+page.ts b/VisualQnA/ui/svelte/src/routes/+page.ts deleted file mode 100644 index f4de8d6760..0000000000 --- a/VisualQnA/ui/svelte/src/routes/+page.ts +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { browser } from "$app/environment"; -import { LOCAL_STORAGE_KEY } from "$lib/shared/constant/Interface"; - -export const load = async () => { - if (browser) { - const chat = localStorage.getItem(LOCAL_STORAGE_KEY.STORAGE_CHAT_KEY); - - return { - chatMsg: JSON.parse(chat || "[]"), - }; - } -}; diff --git a/VisualQnA/ui/svelte/static/favicon.png b/VisualQnA/ui/svelte/static/favicon.png deleted file mode 100644 index 75b997f815..0000000000 Binary files a/VisualQnA/ui/svelte/static/favicon.png and /dev/null differ diff --git a/VisualQnA/ui/svelte/svelte.config.js b/VisualQnA/ui/svelte/svelte.config.js deleted file mode 100644 index 0f2977ecce..0000000000 --- a/VisualQnA/ui/svelte/svelte.config.js +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import adapter from "@sveltejs/adapter-auto"; -import preprocess from "svelte-preprocess"; -import postcssPresetEnv from "postcss-preset-env"; - -/** @type {import('@sveltejs/kit').Config} */ -const config = { - // Consult https://github.com/sveltejs/svelte-preprocess - // for more information about preprocessors - preprocess: preprocess({ - sourceMap: true, - postcss: { - plugins: [postcssPresetEnv({ features: { "nesting-rules": true } })], - }, - }), - - kit: { - adapter: adapter(), - env: { - publicPrefix: "", - }, - }, -}; - -export default config; diff --git a/VisualQnA/ui/svelte/tailwind.config.cjs b/VisualQnA/ui/svelte/tailwind.config.cjs deleted file mode 100644 index 6cc3a8b951..0000000000 --- a/VisualQnA/ui/svelte/tailwind.config.cjs +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -const config = { - content: ["./src/**/*.{html,js,svelte,ts}", "./node_modules/flowbite-svelte/**/*.{html,js,svelte,ts}"], - - plugins: [require("flowbite/plugin")], - - darkMode: "class", - - theme: { - extend: { - colors: { - // flowbite-svelte - primary: { - 50: "#FFF5F2", - 100: "#FFF1EE", - 200: "#FFE4DE", - 300: "#FFD5CC", - 400: "#FFBCAD", - 500: "#FE795D", - 600: "#EF562F", - 700: "#EB4F27", - 800: "#CC4522", - 900: "#A5371B", - }, - }, - }, - }, -}; - -module.exports = config; diff --git a/VisualQnA/ui/svelte/tsconfig.json b/VisualQnA/ui/svelte/tsconfig.json deleted file mode 100644 index 0f47472f79..0000000000 --- a/VisualQnA/ui/svelte/tsconfig.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "extends": "./.svelte-kit/tsconfig.json", - "compilerOptions": { - "allowJs": true, - "checkJs": true, - "esModuleInterop": true, - "forceConsistentCasingInFileNames": true, - "resolveJsonModule": true, - "skipLibCheck": true, - "sourceMap": true, - "strict": true - } -} diff --git a/VisualQnA/ui/svelte/vite.config.ts b/VisualQnA/ui/svelte/vite.config.ts deleted file mode 100644 index d095cbdcb6..0000000000 --- a/VisualQnA/ui/svelte/vite.config.ts +++ /dev/null @@ -1,24 +0,0 @@ -// Copyright (c) 2024 Intel Corporation -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import { sveltekit } from "@sveltejs/kit/vite"; -import type { UserConfig } from "vite"; - -const config: UserConfig = { - plugins: [sveltekit()], - server: { - allowedHosts: true, - }, -}; -export default config; diff --git a/VisualQnA/visualqna.py b/VisualQnA/visualqna.py deleted file mode 100644 index 6d7fae1540..0000000000 --- a/VisualQnA/visualqna.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import os - -from comps import MegaServiceEndpoint, MicroService, ServiceOrchestrator, ServiceRoleType, ServiceType -from comps.cores.mega.utils import handle_message -from comps.cores.proto.api_protocol import ( - ChatCompletionRequest, - ChatCompletionResponse, - ChatCompletionResponseChoice, - ChatMessage, - UsageInfo, -) -from comps.cores.proto.docarray import LLMParams -from fastapi import Request -from fastapi.responses import StreamingResponse - -MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) -LVM_SERVICE_HOST_IP = os.getenv("LVM_SERVICE_HOST_IP", "0.0.0.0") -LVM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9399)) - - -class VisualQnAService: - def __init__(self, host="0.0.0.0", port=8000): - self.host = host - self.port = port - self.megaservice = ServiceOrchestrator() - self.endpoint = str(MegaServiceEndpoint.VISUAL_QNA) - - def add_remote_service(self): - llm = MicroService( - name="lvm", - host=LVM_SERVICE_HOST_IP, - port=LVM_SERVICE_PORT, - endpoint="/v1/lvm", - use_remote_service=True, - service_type=ServiceType.LVM, - ) - self.megaservice.add(llm) - - async def handle_request(self, request: Request): - data = await request.json() - stream_opt = data.get("stream", False) - chat_request = ChatCompletionRequest.parse_obj(data) - prompt, images = handle_message(chat_request.messages) - parameters = LLMParams( - max_new_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024, - top_k=chat_request.top_k if chat_request.top_k else 10, - top_p=chat_request.top_p if chat_request.top_p else 0.95, - temperature=chat_request.temperature if chat_request.temperature else 0.01, - frequency_penalty=chat_request.frequency_penalty if chat_request.frequency_penalty else 0.0, - presence_penalty=chat_request.presence_penalty if chat_request.presence_penalty else 0.0, - repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03, - stream=stream_opt, - ) - result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"prompt": prompt, "image": images[0]}, llm_parameters=parameters - ) - for node, response in result_dict.items(): - # Here it suppose the last microservice in the megaservice is LVM. - if ( - isinstance(response, StreamingResponse) - and node == list(self.megaservice.services.keys())[-1] - and self.megaservice.services[node].service_type == ServiceType.LVM - ): - return response - last_node = runtime_graph.all_leaves()[-1] - response = result_dict[last_node]["text"] - choices = [] - usage = UsageInfo() - choices.append( - ChatCompletionResponseChoice( - index=0, - message=ChatMessage(role="assistant", content=response), - finish_reason="stop", - ) - ) - return ChatCompletionResponse(model="visualqna", choices=choices, usage=usage) - - def start(self): - self.service = MicroService( - self.__class__.__name__, - service_role=ServiceRoleType.MEGASERVICE, - host=self.host, - port=self.port, - endpoint=self.endpoint, - input_datatype=ChatCompletionRequest, - output_datatype=ChatCompletionResponse, - ) - self.service.add_route(self.endpoint, self.handle_request, methods=["POST"]) - self.service.start() - - -if __name__ == "__main__": - visualqna = VisualQnAService(port=MEGA_SERVICE_PORT) - visualqna.add_remote_service() - visualqna.start()