opea-project · XinyaoWa · Mar 17, 2025 · Mar 17, 2025 · Mar 17, 2025 · Mar 19, 2025
@@ -12,7 +12,7 @@ on:
         type: string
       examples:
         default: ""
-        description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
+        description: 'List of examples to publish "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"'
         required: false
         type: string
       images:

@@ -5,5 +5,6 @@ ARG BASE_TAG=latest
 FROM opea/comps-base:$BASE_TAG
 
 COPY ./chatqna.py $HOME/chatqna.py
+COPY ./entrypoint.sh $HOME/entrypoint.sh
 
-ENTRYPOINT ["python", "chatqna.py"]
+ENTRYPOINT ["bash", "entrypoint.sh"]
@@ -159,7 +159,10 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di
         next_data["inputs"] = prompt
 
     elif self.services[cur_node].service_type == ServiceType.LLM and not llm_parameters_dict["stream"]:
-        next_data["text"] = data["choices"][0]["message"]["content"]
+        if "faqgen" in self.services[cur_node].endpoint:
+            next_data = data
+        else:
+            next_data["text"] = data["choices"][0]["message"]["content"]
     else:
         next_data = data
 
@@ -178,7 +181,12 @@ def align_generator(self, gen, **kwargs):
         try:
             # sometimes yield empty chunk, do a fallback here
             json_data = json.loads(json_str)
-            if (
+            if "ops" in json_data and "op" in json_data["ops"][0]:
+                if "value" in json_data["ops"][0] and isinstance(json_data["ops"][0]["value"], str):
+                    yield f"data: {repr(json_data['ops'][0]['value'].encode('utf-8'))}\n\n"
+                else:
+                    pass
+            elif (
                 json_data["choices"][0]["finish_reason"] != "eos_token"
                 and "content" in json_data["choices"][0]["delta"]
             ):
@@ -329,6 +337,48 @@ def add_remote_service_with_guardrails(self):
         self.megaservice.flow_to(rerank, llm)
         # self.megaservice.flow_to(llm, guardrail_out)
 
+    def add_remote_service_faqgen(self):
+
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVER_HOST_IP,
+            port=EMBEDDING_SERVER_PORT,
+            endpoint="/embed",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVER_HOST_IP,
+            port=RERANK_SERVER_PORT,
+            endpoint="/rerank",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVER_HOST_IP,
+            port=LLM_SERVER_PORT,
+            endpoint="/v1/faqgen",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, llm)
+
     async def handle_request(self, request: Request):
         data = await request.json()
         stream_opt = data.get("stream", True)
@@ -344,6 +394,7 @@ async def handle_request(self, request: Request):
             repetition_penalty=chat_request.repetition_penalty if chat_request.repetition_penalty else 1.03,
             stream=stream_opt,
             chat_template=chat_request.chat_template if chat_request.chat_template else None,
+            model=chat_request.model if chat_request.model else None,
         )
         retriever_parameters = RetrieverParms(
             search_type=chat_request.search_type if chat_request.search_type else "similarity",
@@ -399,6 +450,7 @@ def start(self):
     parser = argparse.ArgumentParser()
     parser.add_argument("--without-rerank", action="store_true")
     parser.add_argument("--with-guardrails", action="store_true")
+    parser.add_argument("--faqgen", action="store_true")
 
     args = parser.parse_args()
 
@@ -407,6 +459,8 @@ def start(self):
         chatqna.add_remote_service_without_rerank()
     elif args.with_guardrails:
         chatqna.add_remote_service_with_guardrails()
+    elif args.faqgen:
+        chatqna.add_remote_service_faqgen()
     else:
         chatqna.add_remote_service()
 

@@ -105,7 +105,15 @@ docker build --no-cache -t opea/retriever:latest --build-arg https_proxy=$https_
 docker build --no-cache -t opea/dataprep:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/src/Dockerfile .
 ```
 
-### 4. Build MegaService Docker Image
+### 4. Build FaqGen LLM Image (Optional)
+
+If you want to enable FAQ generation LLM in the pipeline, please use the below command:
+
+```bash
+docker build -t opea/llm-faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/src/faq-generation/Dockerfile .
+```
+
+### 5. Build MegaService Docker Image
 
 To construct the Mega Service, we utilize the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline within the `chatqna.py` Python script. Build the MegaService Docker image using the command below:
 
@@ -116,7 +124,7 @@ docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_pr
 cd ../../..
 ```
 
-### 5. Build UI Docker Image
+### 6. Build UI Docker Image
 
 Construct the frontend Docker image using the command below:
 
@@ -126,7 +134,7 @@ docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https
 cd ../../../..
 ```
 
-### 6. Build React UI Docker Image (Optional)
+### 7. Build React UI Docker Image (Optional)
 
 Construct the frontend Docker image using the command below:
 
@@ -136,7 +144,7 @@ docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=
 cd ../../../..
 ```
 
-### 7. Build Nginx Docker Image
+### 8. Build Nginx Docker Image
 
 ```bash
 cd GenAIComps
@@ -151,6 +159,10 @@ Then run the command `docker images`, you will have the following 5 Docker Image
 4. `opea/chatqna-ui:latest` or `opea/chatqna-react-ui:latest`
 5. `opea/nginx:latest`
 
+If FaqGen docker image is built, you will find one more image:
+
+- `opea/llm-faqgen:latest`
+
 ## 🚀 Start MicroServices and MegaService
 
 ### Required Models
@@ -190,6 +202,7 @@ Change the `xxx_MODEL_ID` below for your needs.
    export CHATQNA_REDIS_VECTOR_INSIGHT_PORT=8001
    export CHATQNA_REDIS_DATAPREP_PORT=6007
    export CHATQNA_REDIS_RETRIEVER_PORT=7000
+   export CHATQNA_LLM_FAQGEN_PORT=9000
    export CHATQNA_INDEX_NAME="rag-redis"
    export CHATQNA_MEGA_SERVICE_HOST_IP=${HOST_IP}
    export CHATQNA_RETRIEVER_SERVICE_HOST_IP=${HOST_IP}
@@ -246,7 +259,10 @@ Please find more information about accessing and restricting AMD GPUs in the lin
 
 ```bash
 cd GenAIExamples/ChatQnA/docker_compose/amd/gpu/rocm
+## for text generation
 docker compose up -d
+## for FAQ generation
+docker compose -f compose_faqgen.yaml up -d
 ```
 
 ### Validate MicroServices and MegaService
@@ -310,23 +326,32 @@ docker compose up -d
      -H 'Content-Type: application/json'
    ```
 
-5. MegaService
+5. FaqGen LLM Microservice (if enabled)
+
+```bash
+curl http://${host_ip}:${CHATQNA_LLM_FAQGEN_PORT}/v1/faqgen \
+  -X POST \
+  -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
+  -H 'Content-Type: application/json'
+```
+
+6. MegaService
 
    ```bash
    curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
         "messages": "What is the revenue of Nike in 2023?"
         }'
    ```
 
-6. Nginx Service
+7. Nginx Service
 
    ```bash
    curl http://${host_ip}:${NGINX_PORT}/v1/chatqna \
        -H "Content-Type: application/json" \
        -d '{"messages": "What is the revenue of Nike in 2023?"}'
    ```
 
-7. Dataprep Microservice（Optional）
+8. Dataprep Microservice（Optional）
 
 If you want to update the default knowledge base, you can use the following commands: