opea-project
diff --git a/‎EdgeCraftRAG/Dockerfile
Lines changed: 11 additions & 3 deletions b/‎EdgeCraftRAG/Dockerfile
Lines changed: 11 additions & 3 deletions
diff --git a/‎EdgeCraftRAG/Dockerfile.server
Lines changed: 5 additions & 1 deletion b/‎EdgeCraftRAG/Dockerfile.server
Lines changed: 5 additions & 1 deletion
diff --git a/‎EdgeCraftRAG/README.md
Lines changed: 33 additions & 4 deletions b/‎EdgeCraftRAG/README.md
Lines changed: 33 additions & 4 deletions
diff --git a/‎EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
Lines changed: 1 addition & 0 deletions b/‎EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml
Lines changed: 1 addition & 0 deletions b/‎EdgeCraftRAG/docker_compose/intel/gpu/arc/compose_vllm.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
Lines changed: 28 additions & 2 deletions b/‎EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py
Lines changed: 28 additions & 2 deletions
diff --git a/‎EdgeCraftRAG/edgecraftrag/api/v1/data.py
Lines changed: 2 additions & 2 deletions b/‎EdgeCraftRAG/edgecraftrag/api/v1/data.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
Lines changed: 26 additions & 2 deletions b/‎EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py
Lines changed: 26 additions & 2 deletions
diff --git a/‎EdgeCraftRAG/edgecraftrag/api_schema.py
Lines changed: 6 additions & 0 deletions b/‎EdgeCraftRAG/edgecraftrag/api_schema.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎EdgeCraftRAG/edgecraftrag/base.py
Lines changed: 2 additions & 0 deletions b/‎EdgeCraftRAG/edgecraftrag/base.py
Lines changed: 2 additions & 0 deletions
@@ -7,20 +7,28 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
     libgl1-mesa-glx \
-    libjemalloc-dev 
+    libjemalloc-dev \
+    git
 
 RUN useradd -m -s /bin/bash user && \
     mkdir -p /home/user && \
     chown -R user /home/user/
 
-COPY ./requirements.txt /home/user/requirements.txt
 COPY ./chatqna.py /home/user/chatqna.py
 
 WORKDIR /home/user
-RUN pip install --no-cache-dir -r requirements.txt
+RUN git clone https://github.com/opea-project/GenAIComps.git
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip setuptools && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
 
 USER user
 
+WORKDIR /home/user
+
 RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
 
 ENTRYPOINT ["python", "chatqna.py"]
@@ -4,7 +4,11 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
     libgl1-mesa-glx \
-    libjemalloc-dev
+    libjemalloc-dev \
+    libmagic1 \
+    libglib2.0-0 \
+    poppler-utils \
+    tesseract-ocr
 
 RUN apt-get update && apt-get install -y gnupg wget 
 RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
 
@@ -5,6 +5,14 @@ Retrieval-Augmented Generation system for edge solutions. It is designed to
 curate the RAG pipeline to meet hardware requirements at edge with guaranteed
 quality and performance.
 
+## What's New in this release?
+
+- Support image/url data retrieval and display in EC-RAG
+- Support display of LLM-used context sources in UI
+- Support pipeline remove operation in RESTful API and UI
+- Support RAG pipeline performance benchmark and display in UI
+- Fixed known issues in EC-RAG UI and server
+
 ## Quick Start Guide
 
 ### (Optional) Build Docker Images for Mega Service, Server and UI by your own
@@ -43,6 +51,8 @@ export GRADIO_PATH="your gradio cache path for transferring files"
 
 # Make sure all 3 folders have 1000:1000 permission, otherwise
 # chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${GRADIO_PATH}
+# In addition, also make sure the .cache folder has 1000:1000 permission, otherwise
+# chown 1000:1000 $HOME/.cache
 
 # Use `ip a` to check your active ip
 export HOST_IP="your host ip"
@@ -192,7 +202,7 @@ curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: app
 #### Update a pipeline
 
 ```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm  -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.'
 ```
 
 #### Check all pipelines
@@ -204,15 +214,34 @@ curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: appl
 #### Activate a pipeline
 
 ```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/test1 -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.'
+```
+
+#### Remove a pipeline
+
+```bash
+# Firstly, deactivate the pipeline if the pipeline status is active
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" -d '{"active": "false"}' | jq '.'
+# Then delete the pipeline
+curl -X DELETE http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" | jq '.'
+```
+
+#### Enable and check benchmark for pipelines
+
+```bash
+# Set ENABLE_BENCHMARK as true before launch services
+export ENABLE_BENCHMARK="true"
+
+# check the benchmark data for pipeline {pipeline_name}
+curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{pipeline_name}/benchmark -H "Content-Type: application/json" | jq '.'
 ```
 
 ### Model Management
 
 #### Load a model
 
 ```bash
-curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "cpu"}' | jq '.'
+curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "cpu", "weight": "INT4"}' | jq '.'
 ```
 
 It will take some time to load the model.
@@ -226,7 +255,7 @@ curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: applica
 #### Update a model
 
 ```bash
-curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "gpu"}' | jq '.'
+curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "gpu", "weight": "INT4"}' | jq '.'
 ```
 
 #### Check a certain model
 
@@ -11,6 +11,7 @@ services:
       https_proxy: ${https_proxy}
       HF_ENDPOINT: ${HF_ENDPOINT}
       vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+      ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
     volumes:
       - ${MODEL_PATH:-${PWD}}:/home/user/models
       - ${DOC_PATH:-${PWD}}:/home/user/docs
 
@@ -11,6 +11,7 @@ services:
       https_proxy: ${https_proxy}
       HF_ENDPOINT: ${HF_ENDPOINT}
       vLLM_ENDPOINT: ${vLLM_ENDPOINT}
+      ENABLE_BENCHMARK: ${ENABLE_BENCHMARK:-false}
     volumes:
       - ${MODEL_PATH:-${PWD}}:/home/user/models
       - ${DOC_PATH:-${PWD}}:/home/user/docs
 
@@ -1,9 +1,12 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+from comps import GeneratedDoc
 from comps.cores.proto.api_protocol import ChatCompletionRequest
+from edgecraftrag.api_schema import RagOut
 from edgecraftrag.context import ctx
 from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
 
 chatqna_app = FastAPI()
 
@@ -25,8 +28,31 @@ async def retrieval(request: ChatCompletionRequest):
 # ChatQnA
 @chatqna_app.post(path="/v1/chatqna")
 async def chatqna(request: ChatCompletionRequest):
+    generator = ctx.get_pipeline_mgr().get_active_pipeline().generator
+    if generator:
+        request.model = generator.model_id
     if request.stream:
-        return ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+        ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+        return ret
     else:
-        ret = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+        ret, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
         return str(ret)
+
+
+# RAGQnA
+@chatqna_app.post(path="/v1/ragqna")
+async def ragqna(request: ChatCompletionRequest):
+    res, retri_res = ctx.get_pipeline_mgr().run_pipeline(chat_request=request)
+    if isinstance(res, GeneratedDoc):
+        res = res.text
+    elif isinstance(res, StreamingResponse):
+        collected_data = []
+        async for chunk in res.body_iterator:
+            collected_data.append(chunk)
+        res = "".join(collected_data)
+
+    ragout = RagOut(query=request.messages, contexts=[], response=str(res))
+    for n in retri_res:
+        origin_text = n.node.get_text()
+        ragout.contexts.append(origin_text.strip())
+    return ragout
@@ -65,7 +65,7 @@ async def delete_file(name):
         # TODO: delete the nodes related to the file
         all_docs = ctx.get_file_mgr().get_all_docs()
 
-        nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs)
+        nodelist = ctx.get_pipeline_mgr().run_data_update(docs=all_docs)
         if nodelist is None:
             return "Error"
         pl = ctx.get_pipeline_mgr().get_active_pipeline()
@@ -91,7 +91,7 @@ async def update_file(name, request: DataIn):
         # 3. Re-run the pipeline
         # TODO: update the nodes related to the file
         all_docs = ctx.get_file_mgr().get_all_docs()
-        nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs)
+        nodelist = ctx.get_pipeline_mgr().run_data_update(docs=all_docs)
         if nodelist is None:
             return "Error"
         pl = ctx.get_pipeline_mgr().get_active_pipeline()
 
@@ -5,9 +5,15 @@
 
 from edgecraftrag.api_schema import PipelineCreateIn
 from edgecraftrag.base import IndexerType, InferenceType, ModelType, NodeParserType, PostProcessorType, RetrieverType
+from edgecraftrag.components.benchmark import Benchmark
 from edgecraftrag.components.generator import QnAGenerator
 from edgecraftrag.components.indexer import VectorIndexer
-from edgecraftrag.components.node_parser import HierarchyNodeParser, SimpleNodeParser, SWindowNodeParser
+from edgecraftrag.components.node_parser import (
+    HierarchyNodeParser,
+    SimpleNodeParser,
+    SWindowNodeParser,
+    UnstructedNodeParser,
+)
 from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor
 from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever
 from edgecraftrag.context import ctx
@@ -28,6 +34,14 @@ async def get_pipeline(name):
     return ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
 
 
+# GET Pipeline benchmark
+@pipeline_app.get(path="/v1/settings/pipelines/{name}/benchmark")
+async def get_pipeline_benchmark(name):
+    pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
+    if pl and pl.benchmark:
+        return pl.benchmark
+
+
 # POST Pipeline
 @pipeline_app.post(path="/v1/settings/pipelines")
 async def add_pipeline(request: PipelineCreateIn):
@@ -49,7 +63,7 @@ async def add_pipeline(request: PipelineCreateIn):
 async def update_pipeline(name, request: PipelineCreateIn):
     pl = ctx.get_pipeline_mgr().get_pipeline_by_name_or_id(name)
     if pl is None:
-        return None
+        return "Pipeline not exists"
     active_pl = ctx.get_pipeline_mgr().get_active_pipeline()
     if pl == active_pl:
         if not request.active:
@@ -61,6 +75,12 @@ async def update_pipeline(name, request: PipelineCreateIn):
     return pl
 
 
+# REMOVE Pipeline
+@pipeline_app.delete(path="/v1/settings/pipelines/{name}")
+async def remove_pipeline(name):
+    return ctx.get_pipeline_mgr().remove_pipeline_by_name_or_id(name)
+
+
 def update_pipeline_handler(pl, req):
     if req.node_parser is not None:
         np = req.node_parser
@@ -86,6 +106,8 @@ def update_pipeline_handler(pl, req):
                     )
                 case NodeParserType.SENTENCEWINDOW:
                     pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size)
+                case NodeParserType.UNSTRUCTURED:
+                    pl.node_parser = UnstructedNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap)
             ctx.get_node_parser_mgr().add(pl.node_parser)
 
     if req.indexer is not None:
@@ -169,6 +191,8 @@ def update_pipeline_handler(pl, req):
             # Use weakref to achieve model deletion and memory release
             model_ref = weakref.ref(model)
             pl.generator = QnAGenerator(model_ref, gen.prompt_path, gen.inference_type)
+
+            pl.benchmark = Benchmark(pl.enable_benchmark, gen.inference_type)
         else:
             return "Inference Type Not Supported"
 
 
@@ -61,3 +61,9 @@ class DataIn(BaseModel):
 
 class FilesIn(BaseModel):
     local_paths: Optional[list[str]] = None
+
+
+class RagOut(BaseModel):
+    query: str
+    contexts: Optional[list[str]] = None
+    response: str
@@ -44,6 +44,7 @@ class NodeParserType(str, Enum):
     SIMPLE = "simple"
     HIERARCHY = "hierarchical"
     SENTENCEWINDOW = "sentencewindow"
+    UNSTRUCTURED = "unstructured"
 
 
 class IndexerType(str, Enum):
@@ -81,6 +82,7 @@ class InferenceType(str, Enum):
 class CallbackType(str, Enum):
 
     DATAPREP = "dataprep"
+    DATAUPDATE = "dataupdate"
     RETRIEVE = "retrieve"
     PIPELINE = "pipeline"