opea-project
diff --git a/‎DocSum/benchmark_docsum.yaml
Lines changed: 3 additions & 7 deletions b/‎DocSum/benchmark_docsum.yaml
Lines changed: 3 additions & 7 deletions
diff --git a/‎DocSum/docker_compose/amd/gpu/rocm/README.md
Lines changed: 133 additions & 31 deletions b/‎DocSum/docker_compose/amd/gpu/rocm/README.md
Lines changed: 133 additions & 31 deletions
diff --git a/‎DocSum/docker_compose/amd/gpu/rocm/set_env.sh
Lines changed: 1 addition & 1 deletion b/‎DocSum/docker_compose/amd/gpu/rocm/set_env.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh
Lines changed: 1 addition & 1 deletion b/‎DocSum/docker_compose/amd/gpu/rocm/set_env_vllm.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎DocSum/docker_compose/intel/cpu/xeon/README.md
Lines changed: 37 additions & 21 deletions b/‎DocSum/docker_compose/intel/cpu/xeon/README.md
Lines changed: 37 additions & 21 deletions
diff --git a/‎DocSum/docker_compose/intel/cpu/xeon/compose.yaml
Lines changed: 1 addition & 0 deletions b/‎DocSum/docker_compose/intel/cpu/xeon/compose.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Lines changed: 1 addition & 0 deletions b/‎DocSum/docker_compose/intel/cpu/xeon/compose_tgi.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎DocSum/docker_compose/intel/hpu/gaudi/README.md
Lines changed: 37 additions & 21 deletions b/‎DocSum/docker_compose/intel/hpu/gaudi/README.md
Lines changed: 37 additions & 21 deletions
diff --git a/‎DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 2 additions & 0 deletions b/‎DocSum/docker_compose/intel/hpu/gaudi/compose.yaml
Lines changed: 2 additions & 0 deletions
diff --git a/‎DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Lines changed: 1 addition & 0 deletions b/‎DocSum/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
Lines changed: 1 addition & 0 deletions
diff --git a/‎DocSum/docker_compose/set_env.sh renamed to ‎DocSum/docker_compose/intel/set_env.sh
Lines changed: 11 additions & 4 deletions b/‎DocSum/docker_compose/set_env.sh renamed to ‎DocSum/docker_compose/intel/set_env.sh
Lines changed: 11 additions & 4 deletions
@@ -3,7 +3,7 @@
 
 deploy:
   device: gaudi
-  version: 1.2.0
+  version: 1.3.0
   modelUseHostPath: /mnt/models
   HUGGINGFACEHUB_API_TOKEN: "" # mandatory
   node: [1]
@@ -20,14 +20,10 @@ deploy:
         memory_capacity: "8000Mi"
       replicaCount: [1]
 
-    teirerank:
-      enabled: False
-
     llm:
       engine: vllm  # or tgi
       model_id: "meta-llama/Llama-3.2-3B-Instruct" # mandatory
-      replicaCount:
-        without_teirerank: [1]   # When teirerank.enabled is False
+      replicaCount: [1]
       resources:
         enabled: False
         cards_per_instance: 1
@@ -78,7 +74,7 @@ benchmark:
 
   # workload, all of the test cases will run for benchmark
   bench_target: ["docsumfixed"] # specify the bench_target for benchmark
-  dataset: "/home/sdp/upload.txt"  # specify the absolute path to the dataset file
+  dataset: "/home/sdp/pubmed_10.txt"  # specify the absolute path to the dataset file
   summary_type: "stuff"
   stream: True
 
 
@@ -3,7 +3,7 @@
 # Copyright (C) 2024 Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: Apache-2.0
 
-export HOST_IP=''
+export HOST_IP=${ip_address}
 export DOCSUM_MAX_INPUT_TOKENS="2048"
 export DOCSUM_MAX_TOTAL_TOKENS="4096"
 export DOCSUM_LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
 
@@ -3,7 +3,7 @@
 # Copyright (C) 2024 Advanced Micro Devices, Inc.
 # SPDX-License-Identifier: Apache-2.0
 
-export HOST_IP=''
+export HOST_IP=${ip_address}
 export DOCSUM_HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 export DOCSUM_MAX_INPUT_TOKENS=2048
 export DOCSUM_MAX_TOTAL_TOKENS=4096
 
@@ -21,40 +21,34 @@ This section describes how to quickly deploy and test the DocSum service manuall
 6. [Test the Pipeline](#test-the-pipeline)
 7. [Cleanup the Deployment](#cleanup-the-deployment)
 
-### Access the Code
+### Access the Code and Set Up Environment
 
 Clone the GenAIExample repository and access the ChatQnA Intel Xeon platform Docker Compose files and supporting scripts:
 
-```
+```bash
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/DocSum/docker_compose/intel/cpu/xeon/
+cd GenAIExamples/DocSum/docker_compose
+source intel/set_env.sh
 ```
 
-Checkout a released version, such as v1.2:
+NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`.
 
-```
-git checkout v1.2
+Checkout a released version, such as v1.3:
+
+```bash
+git checkout v1.3
 ```
 
 ### Generate a HuggingFace Access Token
 
 Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
 
-### Configure the Deployment Environment
-
-To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory:
-
-```
-source ./set_env.sh
-```
-
-The _set_env.sh_ script will prompt for required and optional environment variables used to configure the DocSum services. If a value is not entered, the script will use a default value for the same. It will also generate a _.env_ file defining the desired configuration. Consult the section on [DocSum Service configuration](#docsum-service-configuration) for information on how service specific configuration parameters affect deployments.
-
 ### Deploy the Services Using Docker Compose
 
 To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute:
 
 ```bash
+cd intel/cpu/xeon/
 docker compose up -d
 ```
 
@@ -78,13 +72,13 @@ Please refer to the table below to build different microservices from source:
 
 After running docker compose, check if all the containers launched via docker compose have started:
 
-```
+```bash
 docker ps -a
 ```
 
 For the default deployment, the following 5 containers should have started:
 
-```
+```bash
 CONTAINER ID   IMAGE                                 COMMAND                  CREATED         STATUS                   PORTS                                       NAMES
 748f577b3c78   opea/whisper:latest                   "python whisper_s…"      5 minutes ago   Up About a minute        0.0.0.0:7066->7066/tcp, :::7066->7066/tcp   docsum-xeon-whisper-server
 4eq8b7034fd9   opea/docsum-gradio-ui:latest          "docker-entrypoint.s…"   5 minutes ago   Up About a minute        0.0.0.0:5173->5173/tcp, :::5173->5173/tcp   docsum-xeon-ui-server
@@ -109,7 +103,7 @@ curl -X POST http://${host_ip}:8888/v1/docsum \
 
 To stop the containers associated with the deployment, execute the following command:
 
-```
+```bash
 docker compose -f compose.yaml down
 ```
 
@@ -156,16 +150,19 @@ curl http://${host_ip}:8888/v1/docsum \
    -F "messages=" \
    -F "files=@/path to your file (.txt, .docx, .pdf)" \
    -F "max_tokens=32" \
-   -F "language=en" \
+   -F "language=en"
 ```
 
+Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
+
 ### Query with audio and video
 
-> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
+> Audio and video can be passed as base64 strings or uploaded by providing a local file path.
 
 Audio:
 
 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
    -H "Content-Type: application/json" \
    -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -177,11 +174,21 @@ curl http://${host_ip}:8888/v1/docsum \
    -F "max_tokens=32" \
    -F "language=en" \
    -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=audio" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp3, .wav)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```
 
 Video:
 
 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
    -H "Content-Type: application/json" \
    -d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -193,6 +200,15 @@ curl http://${host_ip}:8888/v1/docsum \
    -F "max_tokens=32" \
    -F "language=en" \
    -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=video" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp4)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```
 
 ### Query with long context
 
@@ -40,6 +40,7 @@ services:
       LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
 
@@ -40,6 +40,7 @@ services:
       LLM_ENDPOINT: ${LLM_ENDPOINT}
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       DocSum_COMPONENT_NAME: ${DocSum_COMPONENT_NAME}
 
@@ -23,40 +23,34 @@ This section describes how to quickly deploy and test the DocSum service manuall
 6. [Test the Pipeline](#test-the-pipeline)
 7. [Cleanup the Deployment](#cleanup-the-deployment)
 
-### Access the Code
+### Access the Code and Set Up Environment
 
-Clone the GenAIExample repository and access the ChatQnA Intel® Gaudi® platform Docker Compose files and supporting scripts:
+Clone the GenAIExample repository and access the DocSum Intel® Gaudi® platform Docker Compose files and supporting scripts:
 
-```
+```bash
 git clone https://github.com/opea-project/GenAIExamples.git
-cd GenAIExamples/DocSum/docker_compose/intel/hpu/gaudi/
+cd GenAIExamples/DocSum/docker_compose
+source intel/set_env.sh
 ```
 
-Checkout a released version, such as v1.2:
+NOTE: by default vLLM does "warmup" at start, to optimize its performance for the specified model and the underlying platform, which can take long time. For development (and e.g. autoscaling) it can be skipped with `export VLLM_SKIP_WARMUP=true`.
 
-```
-git checkout v1.2
+Checkout a released version, such as v1.3:
+
+```bash
+git checkout v1.3
 ```
 
 ### Generate a HuggingFace Access Token
 
 Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
 
-### Configure the Deployment Environment
-
-To set up environment variables for deploying DocSum services, source the _set_env.sh_ script in this directory:
-
-```
-source ./set_env.sh
-```
-
-The _set_env.sh_ script will prompt for required and optional environment variables used to configure the DocSum services. If a value is not entered, the script will use a default value for the same. It will also generate a _.env_ file defining the desired configuration. Consult the section on [DocSum Service configuration](#docsum-service-configuration) for information on how service specific configuration parameters affect deployments.
-
 ### Deploy the Services Using Docker Compose
 
 To deploy the DocSum services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute:
 
 ```bash
+cd intel/hpu/gaudi/
 docker compose up -d
 ```
 
@@ -80,13 +74,13 @@ Please refer to the table below to build different microservices from source:
 
 After running docker compose, check if all the containers launched via docker compose have started:
 
-```
+```bash
 docker ps -a
 ```
 
 For the default deployment, the following 5 containers should have started:
 
-```
+```bash
 CONTAINER ID   IMAGE                                 COMMAND                  CREATED         STATUS                   PORTS                                       NAMES
 748f577b3c78   opea/whisper:latest                   "python whisper_s…"      5 minutes ago   Up About a minute        0.0.0.0:7066->7066/tcp, :::7066->7066/tcp   docsum-gaudi-whisper-server
 4eq8b7034fd9   opea/docsum-gradio-ui:latest          "docker-entrypoint.s…"   5 minutes ago   Up About a minute        0.0.0.0:5173->5173/tcp, :::5173->5173/tcp   docsum-gaudi-ui-server
@@ -111,7 +105,7 @@ curl -X POST http://${host_ip}:8888/v1/docsum \
 
 To stop the containers associated with the deployment, execute the following command:
 
-```
+```bash
 docker compose -f compose.yaml down
 ```
 
@@ -161,13 +155,16 @@ curl http://${host_ip}:8888/v1/docsum \
    -F "language=en" \
 ```
 
+Note that the `-F "messages="` flag is required, even for file uploads. Multiple files can be uploaded in a single call with multiple `-F "files=@/path"` inputs.
+
 ### Query with audio and video
 
-> Audio and Video file uploads are not supported in docsum with curl request, please use the Gradio-UI.
+> Audio and video can be passed as base64 strings or uploaded by providing a local file path.
 
 Audio:
 
 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
    -H "Content-Type: application/json" \
    -d '{"type": "audio", "messages": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}'
@@ -179,11 +176,21 @@ curl http://${host_ip}:8888/v1/docsum \
    -F "max_tokens=32" \
    -F "language=en" \
    -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=audio" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp3, .wav)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```
 
 Video:
 
 ```bash
+# Send base64 string
 curl -X POST http://${host_ip}:8888/v1/docsum \
    -H "Content-Type: application/json" \
    -d '{"type": "video", "messages": "convert your video to base64 data type"}'
@@ -195,6 +202,15 @@ curl http://${host_ip}:8888/v1/docsum \
    -F "max_tokens=32" \
    -F "language=en" \
    -F "stream=True"
+
+# Upload file
+curl http://${host_ip}:8888/v1/docsum \
+   -H "Content-Type: multipart/form-data" \
+   -F "type=video" \
+   -F "messages=" \
+   -F "files=@/path to your file (.mp4)" \
+   -F "max_tokens=32" \
+   -F "language=en"
 ```
 
 ### Query with long context
 
@@ -18,6 +18,7 @@ services:
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       LLM_MODEL_ID: ${LLM_MODEL_ID}
       NUM_CARDS: ${NUM_CARDS}
+      VLLM_SKIP_WARMUP: ${VLLM_SKIP_WARMUP:-false}
       VLLM_TORCH_PROFILER_DIR: "/mnt"
     healthcheck:
       test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
@@ -44,6 +45,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
 
@@ -49,6 +49,7 @@ services:
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
       MAX_INPUT_TOKENS: ${MAX_INPUT_TOKENS}
       MAX_TOTAL_TOKENS: ${MAX_TOTAL_TOKENS}
       LLM_ENDPOINT: ${LLM_ENDPOINT}
 
@@ -6,24 +6,31 @@ pushd "../../" > /dev/null
 source .set_env.sh
 popd > /dev/null
 
+export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
 export no_proxy="${no_proxy},${host_ip}" # Example: no_proxy="localhost, 127.0.0.1, 192.168.1.1"
 export http_proxy=$http_proxy
 export https_proxy=$https_proxy
-export host_ip=$(hostname -I | awk '{print $1}') # Example: host_ip="192.168.1.1"
-export HUGGINGFACEHUB_API_TOKEN="Your_Huggingface_API_Token"
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
 
 export LLM_ENDPOINT_PORT=8008
-export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
 export MAX_INPUT_TOKENS=1024
 export MAX_TOTAL_TOKENS=2048
 
 export LLM_PORT=9000
 export LLM_ENDPOINT="http://${host_ip}:${LLM_ENDPOINT_PORT}"
 export DocSum_COMPONENT_NAME="OpeaDocSumvLLM" # OpeaDocSumTgi
-
+export FRONTEND_SERVICE_PORT=5173
 export MEGA_SERVICE_HOST_IP=${host_ip}
 export LLM_SERVICE_HOST_IP=${host_ip}
 export ASR_SERVICE_HOST_IP=${host_ip}
 
 export BACKEND_SERVICE_PORT=8888
 export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:${BACKEND_SERVICE_PORT}/v1/docsum"
+
+export LOGFLAG=True
+
+export NUM_CARDS=1
+export BLOCK_SIZE=128
+export MAX_NUM_SEQS=256
+export MAX_SEQ_LEN_TO_CAPTURE=2048