Enable OpenTelemtry Tracing for ChatQnA on Xeon and Gaudi by docker compose merge feature (#1488)

louie-tsai · web-flow · commit ad5523bac704 · 2025-02-10T22:58:50.000-08:00
Signed-off-by: Louie, Tsai &lt;louie.tsai@intel.com&gt;
Signed-off-by: Tsai, Louie &lt;louie.tsai@intel.com&gt;
diff --git a/ChatQnA/README.md b/ChatQnA/README.md
@@ -91,6 +91,14 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
 docker compose up -d
 ```
 
+To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.  
+CPU example with Open Telemetry feature:
+
+```bash
+cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
+```
+
 It will automatically download the docker image on `docker hub`:
 
 ```bash
@@ -232,6 +240,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
 docker compose up -d
 ```
 
+To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
+
+```bash
+cd GenAIExamples/ChatQnA/docker_compose/intel/hpu/gaudi/
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
+```
+
 Refer to the [Gaudi Guide](./docker_compose/intel/hpu/gaudi/README.md) to build docker images from source.
 
 ### Deploy ChatQnA on Xeon
@@ -243,6 +258,13 @@ cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
 docker compose up -d
 ```
 
+To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
+
+```bash
+cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
+```
+
 Refer to the [Xeon Guide](./docker_compose/intel/cpu/xeon/README.md) for more instructions on building docker images from source.
 
 ### Deploy ChatQnA on NVIDIA GPU
@@ -346,7 +368,7 @@ OPEA microservice deployment can easily be monitored through Grafana dashboards
 
 ## Tracing Services with OpenTelemetry Tracing and Jaeger
 
-> NOTE: limited support. Only LLM inference serving with TGI on Gaudi is enabled for this feature.
+> NOTE: This feature is disabled by default. Please check the Deploy ChatQnA sessions for how to enable this feature with compose_telemetry.yaml file.
 
 OPEA microservice and TGI/TEI serving can easily be traced through Jaeger dashboards in conjunction with OpenTelemetry Tracing feature. Follow the [README](https://github.com/opea-project/GenAIComps/tree/main/comps/cores/telemetry#tracing) to trace additional functions if needed.
 
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/README.md b/ChatQnA/docker_compose/intel/cpu/xeon/README.md
@@ -44,6 +44,14 @@ To set up environment variables for deploying ChatQnA services, follow these ste
 docker compose up -d
 ```
 
+To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.  
+CPU example with Open Telemetry feature:
+
+```bash
+cd GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
+```
+
 It will automatically download the docker image on `docker hub`:
 
 ```bash
@@ -263,12 +271,16 @@ If use vLLM as the LLM serving backend.
 docker compose -f compose.yaml up -d
 # Start ChatQnA without Rerank Pipeline
 docker compose -f compose_without_rerank.yaml up -d
+# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
 ```
 
 If use TGI as the LLM serving backend.
 
 ```bash
 docker compose -f compose_tgi.yaml up -d
+# Start ChatQnA with Open Telemetry Tracing
+docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d
 ```
 
 ### Validate Microservices
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose.telemetry.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose.telemetry.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tei-embedding-service:
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tei-reranking-service:
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    container_name: jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
+      - "9411:9411"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      COLLECTOR_ZIPKIN_HOST_PORT: 9411
+    restart: unless-stopped
+  chatqna-xeon-backend-server:
+    environment:
+      - ENABLE_OPEA_TELEMETRY=true
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.telemetry.yaml b/ChatQnA/docker_compose/intel/cpu/xeon/compose_tgi.telemetry.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tei-embedding-service:
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tei-reranking-service:
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tgi-service:
+    command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    container_name: jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
+      - "9411:9411"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      COLLECTOR_ZIPKIN_HOST_PORT: 9411
+    restart: unless-stopped
+  chatqna-xeon-backend-server:
+    environment:
+      - ENABLE_OPEA_TELEMETRY=true
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
diff --git a/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh b/ChatQnA/docker_compose/intel/cpu/xeon/set_env.sh
@@ -19,3 +19,7 @@ export INDEX_NAME="rag-redis"
 # Set it as a non-null string, such as true, if you want to enable logging facility,
 # otherwise, keep it as "" to disable it.
 export LOGFLAG=""
+# Set OpenTelemetry Tracing Endpoint
+export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/README.md b/ChatQnA/docker_compose/intel/hpu/gaudi/README.md
@@ -45,6 +45,12 @@ To set up environment variables for deploying ChatQnA services, follow these ste
 docker compose up -d
 ```
 
+To enable Open Telemetry Tracing, compose.telemetry.yaml file need to be merged along with default compose.yaml file.
+
+```bash
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
+```
+
 It will automatically download the docker image on `docker hub`:
 
 ```bash
@@ -259,12 +265,16 @@ If use vLLM as the LLM serving backend.
 docker compose -f compose.yaml up -d
 # Start ChatQnA without Rerank Pipeline
 docker compose -f compose_without_rerank.yaml up -d
+# Start ChatQnA with Rerank Pipeline and Open Telemetry Tracing
+docker compose -f compose.yaml -f compose.telemetry.yaml up -d
 ```
 
 If use TGI as the LLM serving backend.
 
 ```bash
 docker compose -f compose_tgi.yaml up -d
+# Start ChatQnA with Open Telemetry Tracing
+docker compose -f compose_tgi.yaml -f compose_tgi.telemetry.yaml up -d
 ```
 
 If you want to enable guardrails microservice in the pipeline, please follow the below command instead:
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.telemetry.yaml
@@ -0,0 +1,27 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tei-embedding-service:
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tei-reranking-service:
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    container_name: jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
+      - "9411:9411"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      COLLECTOR_ZIPKIN_HOST_PORT: 9411
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    environment:
+      - ENABLE_OPEA_TELEMETRY=true
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.telemetry.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.telemetry.yaml
@@ -0,0 +1,29 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  tei-embedding-service:
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tei-reranking-service:
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  tgi-service:
+    command: --model-id ${LLM_MODEL_ID} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+  jaeger:
+    image: jaegertracing/all-in-one:latest
+    container_name: jaeger
+    ports:
+      - "16686:16686"
+      - "4317:4317"
+      - "4318:4318"
+      - "9411:9411"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      COLLECTOR_ZIPKIN_HOST_PORT: 9411
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    environment:
+      - ENABLE_OPEA_TELEMETRY=true
+      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_tgi.yaml
@@ -25,7 +25,6 @@ services:
       INDEX_NAME: ${INDEX_NAME}
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
   tei-embedding-service:
     image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
@@ -38,7 +37,7 @@ services:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever:${TAG:-latest}
     container_name: retriever-redis-server
@@ -56,7 +55,6 @@ services:
       INDEX_NAME: ${INDEX_NAME}
       TEI_EMBEDDING_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      TELEMETRY_ENDPOINT: ${TELEMETRY_ENDPOINT}
       LOGFLAG: ${LOGFLAG}
       RETRIEVER_COMPONENT_NAME: "OPEA_RETRIEVER_REDIS"
     restart: unless-stopped
@@ -80,7 +78,7 @@ services:
       HABANA_VISIBLE_DEVICES: all
       OMPI_MCA_btl_vader_single_copy_mechanism: none
       MAX_WARMUP_SEQUENCE_LENGTH: 512
-    command: --model-id ${RERANK_MODEL_ID} --auto-truncate --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
   tgi-service:
     image: ghcr.io/huggingface/tgi-gaudi:2.0.6
     container_name: tgi-gaudi-server
@@ -106,22 +104,7 @@ services:
     cap_add:
       - SYS_NICE
     ipc: host
-    command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096 --otlp-endpoint $OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
-  jaeger:
-    image: jaegertracing/all-in-one:latest
-    container_name: jaeger
-    ports:
-      - "16686:16686"
-      - "4317:4317"
-      - "4318:4318"
-      - "9411:9411"
-    ipc: host
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      COLLECTOR_ZIPKIN_HOST_PORT: 9411
-    restart: unless-stopped
+    command: --model-id ${LLM_MODEL_ID} --num-shard ${NUM_CARDS} --max-input-length 2048 --max-total-tokens 4096
   chatqna-gaudi-backend-server:
     image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
     container_name: chatqna-gaudi-backend-server
@@ -147,7 +130,6 @@ services:
       - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80}
       - LLM_MODEL=${LLM_MODEL_ID}
       - LOGFLAG=${LOGFLAG}
-      - TELEMETRY_ENDPOINT=${TELEMETRY_ENDPOINT}
     ipc: host
     restart: always
   chatqna-gaudi-ui-server:
diff --git a/ChatQnA/tests/test_compose_on_gaudi.sh b/ChatQnA/tests/test_compose_on_gaudi.sh
@@ -49,9 +49,12 @@ function start_services() {
     export INDEX_NAME="rag-redis"
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export host_ip=${ip_address}
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
 
     # Start Docker Containers
-    docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
+    docker compose -f compose.yaml -f compose_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 160 ]]; do
         echo "n=$n"
@@ -172,7 +175,7 @@ function validate_frontend() {
 
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    docker compose -f compose.yaml down
+    docker compose -f compose.yaml -f compose_telemetry.yaml down
 }
 
 function main() {
diff --git a/ChatQnA/tests/test_compose_on_xeon.sh b/ChatQnA/tests/test_compose_on_xeon.sh
@@ -49,9 +49,12 @@ function start_services() {
     export INDEX_NAME="rag-redis"
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
     export host_ip=${ip_address}
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
 
     # Start Docker Containers
-    docker compose -f compose.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
+    docker compose -f compose.yaml -f compose_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
     n=0
     until [[ "$n" -ge 100 ]]; do
         docker logs vllm-service > ${LOG_PATH}/vllm_service_start.log 2>&1
@@ -172,7 +175,7 @@ function validate_frontend() {
 
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon
-    docker compose -f compose.yaml down
+    docker compose -f compose.yaml -f compose_telemetry.yaml down
 }
 
 function main() {
diff --git a/ChatQnA/tests/test_compose_tgi_on_gaudi.sh b/ChatQnA/tests/test_compose_tgi_on_gaudi.sh
@@ -54,7 +54,7 @@ function start_services() {
     export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
 
     # Start Docker Containers
-    docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
+    docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
 
     n=0
     until [[ "$n" -ge 500 ]]; do
@@ -218,7 +218,7 @@ function validate_frontend() {
 
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/hpu/gaudi
-    docker compose -f compose_tgi.yaml down
+    docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml down
 }
 
 function main() {
diff --git a/ChatQnA/tests/test_compose_tgi_on_xeon.sh b/ChatQnA/tests/test_compose_tgi_on_xeon.sh
@@ -48,9 +48,12 @@ function start_services() {
     export LLM_MODEL_ID="meta-llama/Meta-Llama-3-8B-Instruct"
     export INDEX_NAME="rag-redis"
     export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+    export JAEGER_IP=$(ip route get 8.8.8.8 | grep -oP 'src \K[^ ]+')
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=grpc://$JAEGER_IP:4317
+    export TELEMETRY_ENDPOINT=http://$JAEGER_IP:4318/v1/traces
 
     # Start Docker Containers
-    docker compose -f compose_tgi.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
+    docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml up -d > ${LOG_PATH}/start_services_with_compose.log
 
     n=0
     until [[ "$n" -ge 100 ]]; do
@@ -216,7 +219,7 @@ function validate_frontend() {
 
 function stop_docker() {
     cd $WORKPATH/docker_compose/intel/cpu/xeon
-    docker compose -f compose_tgi.yaml down
+    docker compose -f compose_tgi.yaml -f compose_tgi_telemetry.yaml down
 }
 
 function main() {