Align OpenAI API for FaqGen, DocSum (#1401)

XinyaoWa · web-flow · commit 39409d7f6146 · 2025-01-17T11:19:35.000+08:00
Signed-off-by: Xinyao Wang &lt;xinyao.wang@intel.com&gt;
diff --git a/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml b/DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -8,7 +8,7 @@ services:
     ports:
       - ${LLM_ENDPOINT_PORT:-8008}:80
     volumes:
-      - "./data:/data"
+      - "${DATA_PATH:-data}:/data"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
diff --git a/DocSum/docsum.py b/DocSum/docsum.py
@@ -15,9 +15,9 @@
     ChatCompletionResponse,
     ChatCompletionResponseChoice,
     ChatMessage,
+    DocSumChatCompletionRequest,
     UsageInfo,
 )
-from comps.cores.proto.docarray import DocSumLLMParams
 from fastapi import File, Request, UploadFile
 from fastapi.responses import StreamingResponse
 
@@ -34,14 +34,20 @@ def align_inputs(self, inputs, cur_node, runtime_graph, llm_parameters_dict, **k
     if self.services[cur_node].service_type == ServiceType.LLM:
         for key_to_replace in ["text", "asr_result"]:
             if key_to_replace in inputs:
-                inputs["query"] = inputs[key_to_replace]
+                inputs["messages"] = inputs[key_to_replace]
                 del inputs[key_to_replace]
 
         docsum_parameters = kwargs.get("docsum_parameters", None)
         if docsum_parameters:
             docsum_parameters = docsum_parameters.model_dump()
-            del docsum_parameters["query"]
+            del docsum_parameters["messages"]
             inputs.update(docsum_parameters)
+        if "id" in inputs:
+            del inputs["id"]
+        if "max_new_tokens" in inputs:
+            del inputs["max_new_tokens"]
+        if "input" in inputs:
+            del inputs["input"]
     elif self.services[cur_node].service_type == ServiceType.ASR:
         if "video" in inputs:
             audio_base64 = video2audio(inputs["video"])
@@ -217,13 +223,13 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
                 initial_inputs_data = {}
                 initial_inputs_data[data_type] = prompt
             else:
-                initial_inputs_data = {"query": prompt}
+                initial_inputs_data = {"messages": prompt}
 
         else:
             raise ValueError(f"Unknown request type: {request.headers.get('content-type')}")
 
-        docsum_parameters = DocSumLLMParams(
-            query="",
+        docsum_parameters = DocSumChatCompletionRequest(
+            messages="",
             max_tokens=chat_request.max_tokens if chat_request.max_tokens else 1024,
             top_k=chat_request.top_k if chat_request.top_k else 10,
             top_p=chat_request.top_p if chat_request.top_p else 0.95,
diff --git a/DocSum/tests/test_compose_on_gaudi.sh b/DocSum/tests/test_compose_on_gaudi.sh
@@ -28,6 +28,7 @@ export DOCSUM_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
 export DocSum_COMPONENT_NAME="OpeaDocSumTgi"
 export LOGFLAG=True
+export DATA_PATH="/data/cache"
 
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
@@ -51,6 +52,7 @@ function build_docker_images() {
 function start_services() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
 
+    sed -i "s|container_name: docsum-gaudi-backend-server|container_name: docsum-gaudi-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 3m
 }
@@ -158,10 +160,10 @@ function validate_microservices() {
     # llm microservice
     validate_services_json \
         "${host_ip}:9000/v1/docsum" \
-        "data: " \
+        "text" \
         "llm-docsum-tgi" \
         "llm-docsum-gaudi-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 
     # whisper microservice
     ulimit -s 65536
diff --git a/DocSum/tests/test_compose_on_rocm.sh b/DocSum/tests/test_compose_on_rocm.sh
@@ -50,7 +50,7 @@ function build_docker_images() {
 function start_services() {
     cd "$WORKPATH"/docker_compose/amd/gpu/rocm
     sed -i "s/backend_address/$ip_address/g" "$WORKPATH"/ui/svelte/.env
-
+    sed -i "s|container_name: docsum-backend-server|container_name: docsum-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     # Start Docker Containers
     docker compose up -d > "${LOG_PATH}"/start_services_with_compose.log
     sleep 3m
@@ -138,10 +138,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${host_ip}:9000/v1/docsum" \
-        "data: " \
+        "text" \
         "docsum-llm-server" \
         "docsum-llm-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 
 }
 
diff --git a/DocSum/tests/test_compose_on_xeon.sh b/DocSum/tests/test_compose_on_xeon.sh
@@ -49,7 +49,7 @@ function build_docker_images() {
 
 function start_services() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon/
-
+    sed -i "s|container_name: docsum-xeon-backend-server|container_name: docsum-xeon-backend-server\n    volumes:\n      - \"${WORKPATH}\/docker_image_build\/GenAIComps:\/home\/user\/GenAIComps\"|g" compose.yaml
     docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     sleep 3m
 }
@@ -160,10 +160,10 @@ function validate_microservices() {
     # llm microservice
     validate_services_json \
         "${host_ip}:9000/v1/docsum" \
-        "data: " \
+        "text" \
         "llm-docsum-tgi" \
         "llm-docsum-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 
     # whisper microservice
     ulimit -s 65536
diff --git a/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml b/FaqGen/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -8,7 +8,7 @@ services:
     ports:
       - ${LLM_ENDPOINT_PORT:-8008}:80
     volumes:
-      - "./data:/data"
+      - "${DATA_PATH:-data}:/data"
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
diff --git a/FaqGen/faqgen.py b/FaqGen/faqgen.py
@@ -113,7 +113,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(
             model=chat_request.model if chat_request.model else None,
         )
         result_dict, runtime_graph = await self.megaservice.schedule(
-            initial_inputs={"query": prompt}, llm_parameters=parameters
+            initial_inputs={"messages": prompt}, llm_parameters=parameters
         )
         for node, response in result_dict.items():
             # Here it suppose the last microservice in the megaservice is LLM.
diff --git a/FaqGen/tests/test_compose_on_gaudi.sh b/FaqGen/tests/test_compose_on_gaudi.sh
@@ -13,6 +13,7 @@ export TAG=${IMAGE_TAG}
 WORKPATH=$(dirname "$PWD")
 LOG_PATH="$WORKPATH/tests"
 ip_address=$(hostname -I | awk '{print $1}')
+export DATA_PATH="/data/cache"
 
 function build_docker_images() {
     cd $WORKPATH/docker_image_build
@@ -91,10 +92,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${ip_address}:9000/v1/faqgen" \
-        "data: " \
+        "text" \
         "llm" \
         "llm-faqgen-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 }
 
 function validate_megaservice() {
diff --git a/FaqGen/tests/test_compose_on_rocm.sh b/FaqGen/tests/test_compose_on_rocm.sh
@@ -95,10 +95,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${ip_address}:9000/v1/faqgen" \
-        "data: " \
+        "text" \
         "llm" \
         "faqgen-llm-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 }
 
 function validate_megaservice() {
diff --git a/FaqGen/tests/test_compose_on_xeon.sh b/FaqGen/tests/test_compose_on_xeon.sh
@@ -91,10 +91,10 @@ function validate_microservices() {
     # llm microservice
     validate_services \
         "${ip_address}:9000/v1/faqgen" \
-        "data: " \
+        "text" \
         "llm" \
         "llm-faqgen-server" \
-        '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
+        '{"messages":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}'
 }
 
 function validate_megaservice() {

Original file line number	Diff line number	Diff line change
`@@ -113,7 +113,7 @@ async def handle_request(self, request: Request, files: List[UploadFile] = File(`
`113`	`113`	`model=chat_request.model if chat_request.model else None,`
`114`	`114`	`)`
`115`	`115`	`result_dict, runtime_graph = await self.megaservice.schedule(`
`116`		`- initial_inputs={"query": prompt}, llm_parameters=parameters`
	`116`	`+ initial_inputs={"messages": prompt}, llm_parameters=parameters`
`117`	`117`	`)`
`118`	`118`	`for node, response in result_dict.items():`
`119`	`119`	`# Here it suppose the last microservice in the megaservice is LLM.`