diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml index 780ff3c704..7a4b85158e 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml +++ b/VideoQnA/docker_compose/intel/cpu/xeon/compose.yaml @@ -22,15 +22,15 @@ services: https_proxy: ${https_proxy} VDMS_HOST: ${VDMS_HOST} VDMS_PORT: ${VDMS_PORT} - INDEX_NAME: ${INDEX_NAME} + COLLECTION_NAME: ${INDEX_NAME} MULTIMODAL_DATAPREP: true - entrypoint: sh -c 'sleep 15 && python ingest_videos.py' + DATAPREP_COMPONENT_NAME: "OPEA_DATAPREP_MULTIMODALVDMS" volumes: - /home/$USER/.cache/clip:/home/user/.cache/clip - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub embedding: - image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} - container_name: embedding-multimodal-server + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} + container_name: clip-embedding-server ports: - "6000:6000" ipc: host @@ -38,6 +38,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + EMBEDDING_COMPONENT_NAME: "OPEA_CLIP_EMBEDDING" volumes: - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub restart: unless-stopped @@ -59,7 +60,7 @@ services: VDMS_USE_CLIP: ${USECLIP} LOGFLAG: ${LOGFLAG} RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_VDMS" - entrypoint: sh -c 'sleep 30 && python retriever_vdms.py' + NUMBA_CACHE_DIR: "/tmp/numba_cache" restart: unless-stopped volumes: - /home/$USER/.cache/huggingface/hub:/home/user/.cache/huggingface/hub @@ -76,6 +77,7 @@ services: CHUNK_DURATION: ${CHUNK_DURATION} FILE_SERVER_ENDPOINT: ${DATAPREP_GET_FILE_ENDPOINT} DATAPREP_GET_VIDEO_LIST_ENDPOINT: ${DATAPREP_GET_VIDEO_LIST_ENDPOINT} + RERANK_COMPONENT_NAME: ${RERANK_COMPONENT_NAME:-OPEA_VIDEO_RERANKING} restart: unless-stopped lvm-video-llama: image: ${REGISTRY:-opea}/lvm-video-llama:${TAG:-latest} @@ -89,14 +91,13 @@ services: no_proxy: ${no_proxy} llm_download: ${LLM_DOWNLOAD} volumes: - - "/home/$USER/.cache:/home/user/.cache" - video-llama-model:/home/user/model restart: unless-stopped lvm: image: ${REGISTRY:-opea}/lvm:${TAG:-latest} container_name: lvm ports: - - "9000:9000" + - "9000:9399" ipc: host environment: http_proxy: ${http_proxy} diff --git a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh index dcf574774b..652217f639 100644 --- a/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh +++ b/VideoQnA/docker_compose/intel/cpu/xeon/set_env.sh @@ -26,3 +26,5 @@ export VDMS_PORT=8001 export INDEX_NAME="mega-videoqna" export USECLIP=1 export LLM_DOWNLOAD="True" # Set to "False" before redeploy LVM server to avoid model download + +export RERANK_COMPONENT_NAME="OPEA_VIDEO_RERANKING" diff --git a/VideoQnA/docker_image_build/build.yaml b/VideoQnA/docker_image_build/build.yaml index 9ed0bee955..f46d3aa6a4 100644 --- a/VideoQnA/docker_image_build/build.yaml +++ b/VideoQnA/docker_image_build/build.yaml @@ -23,12 +23,12 @@ services: dockerfile: comps/dataprep/src/Dockerfile extends: videoqna image: ${REGISTRY:-opea}/dataprep:${TAG:-latest} - embedding-multimodal-clip: + embedding: build: context: GenAIComps - dockerfile: comps/third_parties/clip/src/Dockerfile + dockerfile: comps/embeddings/src/Dockerfile extends: videoqna - image: ${REGISTRY:-opea}/embedding-multimodal-clip:${TAG:-latest} + image: ${REGISTRY:-opea}/embedding:${TAG:-latest} retriever: build: context: GenAIComps diff --git a/VideoQnA/tests/test_compose_on_xeon.sh b/VideoQnA/tests/test_compose_on_xeon.sh index 614c2efc47..1ba4e05c43 100755 --- a/VideoQnA/tests/test_compose_on_xeon.sh +++ b/VideoQnA/tests/test_compose_on_xeon.sh @@ -63,7 +63,7 @@ function start_services() { sleep 1m # List of containers running uvicorn - list=("dataprep-vdms-server" "embedding-multimodal-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "lvm-video-llama" "videoqna-xeon-backend-server") + list=("dataprep-vdms-server" "clip-embedding-server" "retriever-vdms-server" "reranking-tei-server" "lvm-video-llama" "videoqna-xeon-backend-server") # Define the maximum time limit in seconds TIME_LIMIT=5400 @@ -151,7 +151,7 @@ function validate_services() { function validate_microservices() { # Check if the microservices are running correctly. - cd $WORKPATH/docker_compose/intel/cpu/xeon//data + cd $WORKPATH/docker_compose/intel/cpu/xeon/data # dataprep microservice HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" -X POST http://${ip_address}:6007/v1/dataprep/ingest \ @@ -169,10 +169,10 @@ function validate_microservices() { # Embedding Microservice validate_services \ "${ip_address}:6000/v1/embeddings" \ - "Sample text" \ "embedding" \ - "embedding-multimodal-server" \ - '{"text":"Sample text"}' + "embedding" \ + "clip-embedding-server" \ + '{"input":"Sample text"}' # Retriever Microservice export your_embedding=$(python -c "import random; embedding = [random.uniform(-1, 1) for _ in range(512)]; print(embedding)") @@ -260,4 +260,4 @@ function main() { } -# main +main diff --git a/VideoQnA/videoqna.py b/VideoQnA/videoqna.py index c447dd2abf..9b0bde028e 100644 --- a/VideoQnA/videoqna.py +++ b/VideoQnA/videoqna.py @@ -27,10 +27,31 @@ LVM_SERVICE_PORT = int(os.getenv("LVM_SERVICE_PORT", 9000)) +def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **kwargs): + if self.services[cur_node].service_type == ServiceType.RETRIEVER: + breakpoint() + # next_inputs = {"text": "test", "embedding": inputs["data"][0]["embedding"]} + return inputs + # else: + return inputs + + +def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_dict, **kwargs): + if self.services[cur_node].service_type == ServiceType.EMBEDDING: + # assert isinstance(data, list) + return {"text": inputs["input"], "embedding": data["data"][0]["embedding"]} + elif self.services[cur_node].service_type == ServiceType.RERANK: + return data + else: + return data + + class VideoQnAService: def __init__(self, host="0.0.0.0", port=8888): self.host = host self.port = port + ServiceOrchestrator.align_inputs = align_inputs + ServiceOrchestrator.align_outputs = align_outputs self.megaservice = ServiceOrchestrator() self.endpoint = str(MegaServiceEndpoint.VIDEO_RAG_QNA) @@ -88,7 +109,7 @@ async def handle_request(self, request: Request): stream=stream_opt, ) result_dict, runtime_graph = await self.megaservice.schedule( - initial_inputs={"text": prompt}, llm_parameters=parameters + initial_inputs={"input": prompt}, llm_parameters=parameters ) for node, response in result_dict.items(): # Here it suppose the last microservice in the megaservice is LVM.