From a2700a82c8b0e11e6d850165259f9489ad92572e Mon Sep 17 00:00:00 2001
From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com>
Date: Tue, 10 Dec 2024 17:13:47 +0100
Subject: [PATCH] Test examples (#306)

---
 .github/workflows/test_api_cpu.yaml           | 17 ++++++-
 .github/workflows/test_api_cuda.yaml          | 15 +++++-
 .github/workflows/test_api_misc.yaml          |  4 +-
 .github/workflows/test_cli_cpu_ipex.yaml      | 15 +++---
 .github/workflows/test_cli_cpu_llama_cpp.yaml | 10 +++-
 .../workflows/test_cli_cpu_onnxruntime.yaml   |  8 ++++
 .github/workflows/test_cli_cpu_openvino.yaml  | 14 +++---
 .github/workflows/test_cli_cpu_py_txi.yaml    | 11 ++++-
 .github/workflows/test_cli_cpu_pytorch.yaml   |  9 ++++
 .../workflows/test_cli_cuda_onnxruntime.yaml  |  9 ++++
 .github/workflows/test_cli_cuda_py_txi.yaml   | 10 +++-
 .github/workflows/test_cli_cuda_pytorch.yaml  |  8 ++++
 .../workflows/test_cli_cuda_tensorrt_llm.yaml | 10 ++++
 .../workflows/test_cli_cuda_torch_ort.yaml    | 15 ++++--
 .github/workflows/test_cli_cuda_vllm.yaml     |  9 ++++
 ...energy_star.yaml => test_energy_star.yaml} |  6 +--
 .../energy_star => energy_star}/_base_.yaml   |  0
 .../automatic_speech_recognition.yaml         |  0
 .../image_classification.yaml                 |  0
 .../image_to_text.yaml                        |  0
 .../object_detection.yaml                     |  0
 .../question_answering.yaml                   |  0
 .../sentence_similarity.yaml                  |  0
 .../summarization.yaml                        |  0
 .../t5_question_answering.yaml                |  0
 .../t5_summarization.yaml                     |  0
 .../t5_text_classification.yaml               |  0
 .../t5_text_generation.yaml                   |  0
 .../text_classification.yaml                  |  0
 .../text_generation.yaml                      |  0
 .../text_to_image.yaml                        |  0
 .../{ipex_bert.yaml => cpu_ipex_bert.yaml}    | 19 ++++----
 .../{ipex_llama.yaml => cpu_ipex_llama.yaml}  | 25 +++++-----
 ...ding.yaml => cpu_llama_cpp_embedding.yaml} | 18 ++++----
 ...aml => cpu_llama_cpp_text_generation.yaml} | 18 ++++----
 ... => cpu_onnxruntime_static_quant_vit.yaml} |  9 +++-
 ...me_timm.yaml => cpu_onnxruntime_timm.yaml} |  3 +-
 ..._bert.yaml => cpu_openvino_8bit_bert.yaml} | 23 ++++------
 ...usion.yaml => cpu_openvino_diffusion.yaml} |  5 +-
 .../{pytorch_bert.py => cuda_pytorch_bert.py} | 36 +++++----------
 ...torch_bert.yaml => cuda_pytorch_bert.yaml} | 15 +++---
 ...trt_llama.yaml => cuda_pytorch_llama.yaml} | 12 +++--
 ..._llama.py => cuda_pytorch_llama_quants.py} | 22 +++------
 ...pytorch_vlm.yaml => cuda_pytorch_vlm.yaml} |  4 +-
 .../{tgi_llama.yaml => cuda_tgi_llama.yaml}   | 13 +++---
 ...pytorch_llama.yaml => cuda_trt_llama.yaml} | 23 ++++------
 .../{vllm_llama.yaml => cuda_vllm_llama.yaml} | 14 +++---
 ...ch_bert_mps.yaml => mps_pytorch_bert.yaml} | 11 ++---
 examples/neural_compressor_ptq_bert.yaml      | 20 --------
 examples/openvino_static_quant_bert.yaml      | 21 ---------
 examples/tei_bge.yaml                         | 21 ---------
 optimum_benchmark/backends/py_txi/config.py   |  2 +-
 optimum_benchmark/cli.py                      |  4 --
 setup.py                                      |  1 +
 tests/test_energy_star.py                     | 14 +++++-
 tests/test_examples.py                        | 46 +++++++++++++++++++
 56 files changed, 324 insertions(+), 245 deletions(-)
 rename .github/workflows/{test_cli_energy_star.yaml => test_energy_star.yaml} (84%)
 rename {examples/energy_star => energy_star}/_base_.yaml (100%)
 rename {examples/energy_star => energy_star}/automatic_speech_recognition.yaml (100%)
 rename {examples/energy_star => energy_star}/image_classification.yaml (100%)
 rename {examples/energy_star => energy_star}/image_to_text.yaml (100%)
 rename {examples/energy_star => energy_star}/object_detection.yaml (100%)
 rename {examples/energy_star => energy_star}/question_answering.yaml (100%)
 rename {examples/energy_star => energy_star}/sentence_similarity.yaml (100%)
 rename {examples/energy_star => energy_star}/summarization.yaml (100%)
 rename {examples/energy_star => energy_star}/t5_question_answering.yaml (100%)
 rename {examples/energy_star => energy_star}/t5_summarization.yaml (100%)
 rename {examples/energy_star => energy_star}/t5_text_classification.yaml (100%)
 rename {examples/energy_star => energy_star}/t5_text_generation.yaml (100%)
 rename {examples/energy_star => energy_star}/text_classification.yaml (100%)
 rename {examples/energy_star => energy_star}/text_generation.yaml (100%)
 rename {examples/energy_star => energy_star}/text_to_image.yaml (100%)
 rename examples/{ipex_bert.yaml => cpu_ipex_bert.yaml} (59%)
 rename examples/{ipex_llama.yaml => cpu_ipex_llama.yaml} (66%)
 rename examples/{llama_cpp_embedding.yaml => cpu_llama_cpp_embedding.yaml} (57%)
 rename examples/{llama_cpp_text_generation.yaml => cpu_llama_cpp_text_generation.yaml} (61%)
 rename examples/{onnxruntime_static_quant_vit.yaml => cpu_onnxruntime_static_quant_vit.yaml} (70%)
 rename examples/{onnxruntime_timm.yaml => cpu_onnxruntime_timm.yaml} (82%)
 rename examples/{numactl_bert.yaml => cpu_openvino_8bit_bert.yaml} (57%)
 rename examples/{openvino_diffusion.yaml => cpu_openvino_diffusion.yaml} (78%)
 rename examples/{pytorch_bert.py => cuda_pytorch_bert.py} (59%)
 rename examples/{pytorch_bert.yaml => cuda_pytorch_bert.yaml} (90%)
 rename examples/{trt_llama.yaml => cuda_pytorch_llama.yaml} (70%)
 rename examples/{pytorch_llama.py => cuda_pytorch_llama_quants.py} (81%)
 rename examples/{pytorch_vlm.yaml => cuda_pytorch_vlm.yaml} (92%)
 rename examples/{tgi_llama.yaml => cuda_tgi_llama.yaml} (63%)
 rename examples/{pytorch_llama.yaml => cuda_trt_llama.yaml} (56%)
 rename examples/{vllm_llama.yaml => cuda_vllm_llama.yaml} (62%)
 rename examples/{pytorch_bert_mps.yaml => mps_pytorch_bert.yaml} (67%)
 delete mode 100644 examples/neural_compressor_ptq_bert.yaml
 delete mode 100644 examples/openvino_static_quant_bert.yaml
 delete mode 100644 examples/tei_bge.yaml
 create mode 100644 tests/test_examples.py

diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml
index 126e500b..b48490a5 100644
--- a/.github/workflows/test_api_cpu.yaml
+++ b/.github/workflows/test_api_cpu.yaml
@@ -47,8 +47,21 @@ jobs:
           pip install -e .[testing,timm,diffusers,codecarbon]
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -s -k "api and cpu"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/cpu
-        run: |
-          pytest tests/test_api.py -s -k "api and cpu"
+
+      # no examples for now
+      # - if: ${{
+      #     (github.event_name == 'push') ||
+      #     (github.event_name == 'workflow_dispatch') ||
+      #     contains( github.event.pull_request.labels.*.name, 'examples')
+      #     }}
+      #   name: Run examples
+      #   run: |
+      #     pytest tests/test_examples.py -s -k "api and cpu"
+      #   env:
+      #     HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      #     PUSH_REPO_ID: optimum-benchmark/cpu
diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml
index c8be0ece..d45afa40 100644
--- a/.github/workflows/test_api_cuda.yaml
+++ b/.github/workflows/test_api_cuda.yaml
@@ -45,8 +45,21 @@ jobs:
           pip install -e .[testing,timm,diffusers,codecarbon]
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -x -s -k "api and cuda"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/cuda
+
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
         run: |
-          pytest tests/test_api.py -x -s -k "api and cuda"
+          pip install -e .[testing,torchao,autoawq,auto-gptq]
+          pytest tests/test_examples.py -x -s -k "api and cuda and pytorch"
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          PUSH_REPO_ID: optimum-benchmark/cuda
diff --git a/.github/workflows/test_api_misc.yaml b/.github/workflows/test_api_misc.yaml
index 2da1e7ec..36c26215 100644
--- a/.github/workflows/test_api_misc.yaml
+++ b/.github/workflows/test_api_misc.yaml
@@ -58,8 +58,8 @@ jobs:
           UV_SYSTEM_PYTHON: 1
 
       - name: Run tests
+        run: |
+          pytest tests/test_api.py -s -k "api and not (cpu or cuda or rocm or mps)"
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
           PUSH_REPO_ID: optimum-benchmark/misc-${{ matrix.os }}-${{ matrix.python }}
-        run: |
-          pytest tests/test_api.py -s -k "api and not (cpu or cuda or rocm or mps)"
diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml
index d6b94d3e..5bf0be92 100644
--- a/.github/workflows/test_cli_cpu_ipex.yaml
+++ b/.github/workflows/test_cli_cpu_ipex.yaml
@@ -36,16 +36,17 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
       - name: Install requirements
         run: |
-          pip install --upgrade pip
-          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
           pip install -e .[testing,ipex,diffusers,timm]
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and ipex"
+
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and ipex"
diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml
index 05d43683..145c0f83 100644
--- a/.github/workflows/test_cli_cpu_llama_cpp.yaml
+++ b/.github/workflows/test_cli_cpu_llama_cpp.yaml
@@ -48,4 +48,12 @@ jobs:
           pip install -e .[testing,llama-cpp]
 
       - name: Run tests
-        run: pytest tests/test_cli.py -s -k "llama_cpp"
+        run: pytest tests/test_cli.py -s -k "cli and cpu and llama_cpp"
+
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and llama_cpp"
diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml
index 21e65235..ef8482b7 100644
--- a/.github/workflows/test_cli_cpu_onnxruntime.yaml
+++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml
@@ -49,3 +49,11 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime"
+
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and onnxruntime"
diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml
index 4612370c..2ef0312e 100644
--- a/.github/workflows/test_cli_cpu_openvino.yaml
+++ b/.github/workflows/test_cli_cpu_openvino.yaml
@@ -36,16 +36,18 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Set up Python 3.10
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.10"
-
       - name: Install requirements
         run: |
-          pip install --upgrade pip
           pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
           pip install -e .[testing,openvino,diffusers,timm]
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and openvino"
+
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: pytest tests/test_examples.py -s -k "cli and cpu and openvino"
diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml
index d07f6170..7b1946e7 100644
--- a/.github/workflows/test_cli_cpu_py_txi.yaml
+++ b/.github/workflows/test_cli_cpu_py_txi.yaml
@@ -45,7 +45,16 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-          pip install -e .[testing,py-txi]
+          pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi"
+
+      # no examples for now
+      # - if: ${{
+      #     (github.event_name == 'push') ||
+      #     (github.event_name == 'workflow_dispatch') ||
+      #     contains( github.event.pull_request.labels.*.name, 'examples')
+      #     }}
+      #   name: Run examples
+      #   run: pytest tests/test_examples.py -s -k "cli and cpu and (tgi or tei)"
diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml
index fef2a772..dab603c7 100644
--- a/.github/workflows/test_cli_cpu_pytorch.yaml
+++ b/.github/workflows/test_cli_cpu_pytorch.yaml
@@ -49,3 +49,12 @@ jobs:
 
       - name: Run tests
         run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch"
+
+      # no examples for now
+      # - if: ${{
+      #     (github.event_name == 'push') ||
+      #     (github.event_name == 'workflow_dispatch') ||
+      #     contains( github.event.pull_request.labels.*.name, 'examples')
+      #     }}
+      #   name: Run examples
+      #   run: pytest tests/test_examples.py -s -k "cli and cpu and pytorch"
diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml
index 0584665c..1351e1b0 100644
--- a/.github/workflows/test_cli_cuda_onnxruntime.yaml
+++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml
@@ -48,3 +48,12 @@ jobs:
       - name: Run tests
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime"
+
+      # no examples for now
+      # - if: ${{
+      #     (github.event_name == 'push') ||
+      #     (github.event_name == 'workflow_dispatch') ||
+      #     contains( github.event.pull_request.labels.*.name, 'examples')
+      #     }}
+      #   name: Run examples
+      #   run: pytest tests/test_examples.py -x -s -k "cli and cuda and onnxruntime"
diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml
index 7339b98e..5c090b28 100644
--- a/.github/workflows/test_cli_cuda_py_txi.yaml
+++ b/.github/workflows/test_cli_cuda_py_txi.yaml
@@ -45,7 +45,15 @@ jobs:
       - name: Install requirements
         run: |
           pip install --upgrade pip
-          pip install -e .[testing,py-txi]
+          pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git
 
       - name: Run tests
         run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi"
+
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)"
diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
index 0bc5dfaf..2aa54d5d 100644
--- a/.github/workflows/test_cli_cuda_pytorch.yaml
+++ b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -50,6 +50,14 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)"
 
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: pytest tests/test_examples.py -x -s -k "cli and cuda and pytorch"
+
   run_cli_cuda_pytorch_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
index acb04fe2..c75aac92 100644
--- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
+++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
@@ -50,6 +50,16 @@ jobs:
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)"
 
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: |
+          huggingface-cli delete-cache
+          pytest tests/test_examples.py -x -s -k "cli and cuda and trt"
+
   cli_cuda_tensorrt_llm_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml
index ee886e8c..7dccafb8 100644
--- a/.github/workflows/test_cli_cuda_torch_ort.yaml
+++ b/.github/workflows/test_cli_cuda_torch_ort.yaml
@@ -44,13 +44,21 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install -e .[testing,torch-ort,peft]
-          pip install optimum@git+https://github.com/huggingface/optimum.git
+          pip install -e .[testing,torch-ort,peft] optimum@git+https://github.com/huggingface/optimum.git@fxi-ort-trainer
 
       - name: Run tests
         run: |
           pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)"
 
+      # - if: ${{
+      #     (github.event_name == 'push') ||
+      #     (github.event_name == 'workflow_dispatch') ||
+      #     contains( github.event.pull_request.labels.*.name, 'examples')
+      #     }}
+      #   name: Run examples
+      #   run: |
+      #     pytest tests/test_examples.py -x -s -k "cli and cuda and torch_ort"
+
   run_cli_cuda_torch_ort_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
@@ -75,8 +83,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          pip install -e .[testing,torch-ort,peft] 
-          pip install optimum@git+https://github.com/huggingface/optimum.git
+          pip install -e .[testing,torch-ort,peft] optimum@git+https://github.com/huggingface/optimum.git@fxi-ort-trainer
 
       - name: Run tests
         run: |
diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml
index 732513d2..6072dd8c 100644
--- a/.github/workflows/test_cli_cuda_vllm.yaml
+++ b/.github/workflows/test_cli_cuda_vllm.yaml
@@ -50,6 +50,15 @@ jobs:
         run: |
           FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)"
 
+      - if: ${{
+          (github.event_name == 'push') ||
+          (github.event_name == 'workflow_dispatch') ||
+          contains( github.event.pull_request.labels.*.name, 'examples')
+          }}
+        name: Run examples
+        run: |
+          pytest tests/test_examples.py -x -s -k "cli and cuda and vllm"
+
   run_cli_cuda_vllm_multi_gpu_tests:
     if: ${{
       (github.event_name == 'push') ||
diff --git a/.github/workflows/test_cli_energy_star.yaml b/.github/workflows/test_energy_star.yaml
similarity index 84%
rename from .github/workflows/test_cli_energy_star.yaml
rename to .github/workflows/test_energy_star.yaml
index 24c487f6..db9a22cd 100644
--- a/.github/workflows/test_cli_energy_star.yaml
+++ b/.github/workflows/test_energy_star.yaml
@@ -20,13 +20,11 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
 
 jobs:
-  run_cli_energy_star_tests:
+  run_energy_star:
     if: ${{
       (github.event_name == 'push') ||
       (github.event_name == 'workflow_dispatch') ||
-      contains( github.event.pull_request.labels.*.name, 'cli') ||
-      contains( github.event.pull_request.labels.*.name, 'energy_star') ||
-      contains( github.event.pull_request.labels.*.name, 'cli_energy_star')
+      contains( github.event.pull_request.labels.*.name, 'energy_star')
       }}
 
     runs-on:
diff --git a/examples/energy_star/_base_.yaml b/energy_star/_base_.yaml
similarity index 100%
rename from examples/energy_star/_base_.yaml
rename to energy_star/_base_.yaml
diff --git a/examples/energy_star/automatic_speech_recognition.yaml b/energy_star/automatic_speech_recognition.yaml
similarity index 100%
rename from examples/energy_star/automatic_speech_recognition.yaml
rename to energy_star/automatic_speech_recognition.yaml
diff --git a/examples/energy_star/image_classification.yaml b/energy_star/image_classification.yaml
similarity index 100%
rename from examples/energy_star/image_classification.yaml
rename to energy_star/image_classification.yaml
diff --git a/examples/energy_star/image_to_text.yaml b/energy_star/image_to_text.yaml
similarity index 100%
rename from examples/energy_star/image_to_text.yaml
rename to energy_star/image_to_text.yaml
diff --git a/examples/energy_star/object_detection.yaml b/energy_star/object_detection.yaml
similarity index 100%
rename from examples/energy_star/object_detection.yaml
rename to energy_star/object_detection.yaml
diff --git a/examples/energy_star/question_answering.yaml b/energy_star/question_answering.yaml
similarity index 100%
rename from examples/energy_star/question_answering.yaml
rename to energy_star/question_answering.yaml
diff --git a/examples/energy_star/sentence_similarity.yaml b/energy_star/sentence_similarity.yaml
similarity index 100%
rename from examples/energy_star/sentence_similarity.yaml
rename to energy_star/sentence_similarity.yaml
diff --git a/examples/energy_star/summarization.yaml b/energy_star/summarization.yaml
similarity index 100%
rename from examples/energy_star/summarization.yaml
rename to energy_star/summarization.yaml
diff --git a/examples/energy_star/t5_question_answering.yaml b/energy_star/t5_question_answering.yaml
similarity index 100%
rename from examples/energy_star/t5_question_answering.yaml
rename to energy_star/t5_question_answering.yaml
diff --git a/examples/energy_star/t5_summarization.yaml b/energy_star/t5_summarization.yaml
similarity index 100%
rename from examples/energy_star/t5_summarization.yaml
rename to energy_star/t5_summarization.yaml
diff --git a/examples/energy_star/t5_text_classification.yaml b/energy_star/t5_text_classification.yaml
similarity index 100%
rename from examples/energy_star/t5_text_classification.yaml
rename to energy_star/t5_text_classification.yaml
diff --git a/examples/energy_star/t5_text_generation.yaml b/energy_star/t5_text_generation.yaml
similarity index 100%
rename from examples/energy_star/t5_text_generation.yaml
rename to energy_star/t5_text_generation.yaml
diff --git a/examples/energy_star/text_classification.yaml b/energy_star/text_classification.yaml
similarity index 100%
rename from examples/energy_star/text_classification.yaml
rename to energy_star/text_classification.yaml
diff --git a/examples/energy_star/text_generation.yaml b/energy_star/text_generation.yaml
similarity index 100%
rename from examples/energy_star/text_generation.yaml
rename to energy_star/text_generation.yaml
diff --git a/examples/energy_star/text_to_image.yaml b/energy_star/text_to_image.yaml
similarity index 100%
rename from examples/energy_star/text_to_image.yaml
rename to energy_star/text_to_image.yaml
diff --git a/examples/ipex_bert.yaml b/examples/cpu_ipex_bert.yaml
similarity index 59%
rename from examples/ipex_bert.yaml
rename to examples/cpu_ipex_bert.yaml
index e549da0a..0e7ed37b 100644
--- a/examples/ipex_bert.yaml
+++ b/examples/cpu_ipex_bert.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: ipex_bert
+name: cpu_ipex_bert
 
 launcher:
   numactl: true
@@ -14,16 +14,17 @@ launcher:
     cpunodebind: 0
     membind: 0
 
+backend:
+  device: cpu
+  export: true
+  no_weights: false # because on multi-node machines, intializing weights could harm performance
+  torch_dtype: float32 # but use bfloat16 on compatible Intel CPUs
+  model: google-bert/bert-base-uncased
+
 scenario:
-  latency: true
   memory: true
+  latency: true
+
   input_shapes:
     batch_size: 1
     sequence_length: 128
-
-backend:
-  device: cpu
-  no_weights: false
-  export: true
-  torch_dtype: bfloat16
-  model: bert-base-uncased
diff --git a/examples/ipex_llama.yaml b/examples/cpu_ipex_llama.yaml
similarity index 66%
rename from examples/ipex_llama.yaml
rename to examples/cpu_ipex_llama.yaml
index b564316b..898ed0df 100644
--- a/examples/ipex_llama.yaml
+++ b/examples/cpu_ipex_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: ipex_llama
+name: cpu_ipex_llama
 
 launcher:
   numactl: true
@@ -14,24 +14,21 @@ launcher:
     cpunodebind: 0
     membind: 0
 
+backend:
+  device: cpu
+  export: true
+  no_weights: false # because on multi-node machines, intializing weights could harm performance
+  torch_dtype: float32 # but use bfloat16 on compatible Intel CPUs
+  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+
 scenario:
-  latency: true
   memory: true
+  latency: true
 
-  warmup_runs: 10
-  iterations: 10
-  duration: 10
-  
   input_shapes:
     batch_size: 1
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
     max_new_tokens: 32
     min_new_tokens: 32
-
-backend:
-  device: cpu
-  export: true
-  no_weights: false
-  torch_dtype: bfloat16
-  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
diff --git a/examples/llama_cpp_embedding.yaml b/examples/cpu_llama_cpp_embedding.yaml
similarity index 57%
rename from examples/llama_cpp_embedding.yaml
rename to examples/cpu_llama_cpp_embedding.yaml
index bdd86cce..666277c6 100644
--- a/examples/llama_cpp_embedding.yaml
+++ b/examples/cpu_llama_cpp_embedding.yaml
@@ -1,26 +1,24 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: inline
   - backend: llama_cpp
+  - launcher: process
   - _base_
   - _self_
 
-name: llama_cpp_llama
+name: cpu_llama_cpp_embedding
 
 backend:
-  device: mps
-  model: nomic-ai/nomic-embed-text-v1.5-GGUF
+  device: cpu
   task: feature-extraction
+  model: nomic-ai/nomic-embed-text-v1.5-GGUF
   filename: nomic-embed-text-v1.5.Q4_0.gguf
 
 scenario:
   input_shapes:
     batch_size: 1
-    sequence_length: 256
-    vocab_size: 30000
-    type_vocab_size: 1
-    max_position_embeddings: 512
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 32
+    min_new_tokens: 32
diff --git a/examples/llama_cpp_text_generation.yaml b/examples/cpu_llama_cpp_text_generation.yaml
similarity index 61%
rename from examples/llama_cpp_text_generation.yaml
rename to examples/cpu_llama_cpp_text_generation.yaml
index 96def950..2cd55514 100644
--- a/examples/llama_cpp_text_generation.yaml
+++ b/examples/cpu_llama_cpp_text_generation.yaml
@@ -1,25 +1,23 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: inline
   - backend: llama_cpp
+  - launcher: process
   - _base_
   - _self_
 
-name: llama_cpp_llama
+name: cpu_llama_cpp_text_generation
 
 backend:
-  device: mps
-  model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
+  device: cpu
   task: text-generation
+  model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF
   filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf
 
-
 scenario:
+  memory: true
+  latency: true
+
   input_shapes:
     batch_size: 1
-    sequence_length: 256
-    vocab_size: 32000
-  generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    sequence_length: 128
diff --git a/examples/onnxruntime_static_quant_vit.yaml b/examples/cpu_onnxruntime_static_quant_vit.yaml
similarity index 70%
rename from examples/onnxruntime_static_quant_vit.yaml
rename to examples/cpu_onnxruntime_static_quant_vit.yaml
index 3d298473..97591bcd 100644
--- a/examples/onnxruntime_static_quant_vit.yaml
+++ b/examples/cpu_onnxruntime_static_quant_vit.yaml
@@ -6,10 +6,11 @@ defaults:
   - _base_
   - _self_
 
-name: onnxruntime_static_quant_vit
+name: cpu_onnxruntime_static_quant_vit
 
 backend:
   device: cpu
+  export: true
   no_weights: true
   model: google/vit-base-patch16-224
   quantization: true
@@ -17,3 +18,9 @@ backend:
     is_static: true
     per_channel: false
   calibration: true
+
+scenario:
+  memory: true
+  latency: true
+  input_shapes:
+    batch_size: 2
diff --git a/examples/onnxruntime_timm.yaml b/examples/cpu_onnxruntime_timm.yaml
similarity index 82%
rename from examples/onnxruntime_timm.yaml
rename to examples/cpu_onnxruntime_timm.yaml
index 165fc28a..963f44f0 100644
--- a/examples/onnxruntime_timm.yaml
+++ b/examples/cpu_onnxruntime_timm.yaml
@@ -10,7 +10,8 @@ name: onnxruntime_timm
 
 backend:
   device: cpu
-  model: timm/mobilenetv3_large_100.ra_in1k
+  export: true
+  model: timm/tiny_vit_21m_224.in1k
 
 scenario:
   memory: true
diff --git a/examples/numactl_bert.yaml b/examples/cpu_openvino_8bit_bert.yaml
similarity index 57%
rename from examples/numactl_bert.yaml
rename to examples/cpu_openvino_8bit_bert.yaml
index 7add65e7..73ef474d 100644
--- a/examples/numactl_bert.yaml
+++ b/examples/cpu_openvino_8bit_bert.yaml
@@ -1,27 +1,24 @@
 defaults:
   - benchmark
   - scenario: inference
+  - backend: openvino
   - launcher: process
-  - backend: pytorch
   - _base_
   - _self_
 
-name: pytorch_bert
+name: openvino_static_quant
 
-launcher:
-  numactl: true
-  numactl_kwargs:
-    cpunodebind: 0
-    membind: 0
+backend:
+  device: cpu
+  reshape: true
+  no_weights: true
+  load_in_8bit: false # enable 8bit on compatible Intel CPU machines
+  model: google-bert/bert-base-uncased
 
 scenario:
-  latency: true
   memory: true
+  latency: true
+
   input_shapes:
     batch_size: 1
     sequence_length: 128
-
-backend:
-  device: cpu
-  no_weights: true
-  model: bert-base-uncased
diff --git a/examples/openvino_diffusion.yaml b/examples/cpu_openvino_diffusion.yaml
similarity index 78%
rename from examples/openvino_diffusion.yaml
rename to examples/cpu_openvino_diffusion.yaml
index f0501101..30d21935 100644
--- a/examples/openvino_diffusion.yaml
+++ b/examples/cpu_openvino_diffusion.yaml
@@ -10,10 +10,9 @@ name: openvino_diffusion
 
 backend:
   device: cpu
-  model: stabilityai/stable-diffusion-2-1
-  reshape: true
   export: true
-  half: true
+  model: stabilityai/stable-diffusion-2-1
+  half: false # enable half-precision on compatible Intel CPU machines
 
 scenario:
   input_shapes:
diff --git a/examples/pytorch_bert.py b/examples/cuda_pytorch_bert.py
similarity index 59%
rename from examples/pytorch_bert.py
rename to examples/cuda_pytorch_bert.py
index 09f62b8d..2a7ddf89 100644
--- a/examples/pytorch_bert.py
+++ b/examples/cuda_pytorch_bert.py
@@ -1,22 +1,20 @@
 import os
 
-from huggingface_hub import whoami
-
 from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 from optimum_benchmark.logging_utils import setup_logging
 
-try:
-    USERNAME = whoami()["name"]
-except Exception as e:
-    print(f"Failed to get username from Hugging Face Hub: {e}")
-    USERNAME = None
+BENCHMARK_NAME = "cuda_pytorch_bert"
+MODEL = "google-bert/bert-base-uncased"
+PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None)
 
-BENCHMARK_NAME = "pytorch_bert"
 
+if __name__ == "__main__":
+    level = os.environ.get("LOG_LEVEL", "INFO")
+    to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
+    setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")
 
-def run_benchmark():
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
-    backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased")
+    backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL)
     scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
     benchmark_config = BenchmarkConfig(
         name=BENCHMARK_NAME,
@@ -27,19 +25,9 @@ def run_benchmark():
         log_report=True,
     )
     benchmark_report = Benchmark.launch(benchmark_config)
-
-    return benchmark_config, benchmark_report
-
-
-if __name__ == "__main__":
-    level = os.environ.get("LOG_LEVEL", "INFO")
-    to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
-    setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")
-
-    benchmark_config, benchmark_report = run_benchmark()
     benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
 
-    if USERNAME is not None:
-        benchmark_config.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
-        benchmark_report.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
-        benchmark.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
+    if PUSH_REPO_ID is not None:
+        benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME)
+        benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME)
+        benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME)
diff --git a/examples/pytorch_bert.yaml b/examples/cuda_pytorch_bert.yaml
similarity index 90%
rename from examples/pytorch_bert.yaml
rename to examples/cuda_pytorch_bert.yaml
index 8bb702ca..8ab9b5cb 100644
--- a/examples/pytorch_bert.yaml
+++ b/examples/cuda_pytorch_bert.yaml
@@ -12,15 +12,16 @@ launcher:
   device_isolation: true
   device_isolation_action: warn
 
+backend:
+  device: cuda
+  device_ids: 0
+  no_weights: true
+  model: google-bert/bert-base-uncased
+
 scenario:
-  latency: true
   memory: true
+  latency: true
+
   input_shapes:
     batch_size: 1
     sequence_length: 128
-
-backend:
-  device: cuda
-  device_ids: 0
-  no_weights: true
-  model: bert-base-uncased
diff --git a/examples/trt_llama.yaml b/examples/cuda_pytorch_llama.yaml
similarity index 70%
rename from examples/trt_llama.yaml
rename to examples/cuda_pytorch_llama.yaml
index 30cb600a..1f85bd10 100644
--- a/examples/trt_llama.yaml
+++ b/examples/cuda_pytorch_llama.yaml
@@ -1,12 +1,12 @@
 defaults:
   - benchmark
-  - backend: tensorrt-llm
   - scenario: inference
   - launcher: process
+  - backend: pytorch
   - _base_
   - _self_
 
-name: trt_llama
+name: cuda_pytorch_llama
 
 launcher:
   device_isolation: true
@@ -16,12 +16,14 @@ backend:
   device: cuda
   device_ids: 0
   no_weights: true
+  torch_dtype: float16
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:
   input_shapes:
     batch_size: 4
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 32
+    min_new_tokens: 32
diff --git a/examples/pytorch_llama.py b/examples/cuda_pytorch_llama_quants.py
similarity index 81%
rename from examples/pytorch_llama.py
rename to examples/cuda_pytorch_llama_quants.py
index fe732bfa..01d492cb 100644
--- a/examples/pytorch_llama.py
+++ b/examples/cuda_pytorch_llama_quants.py
@@ -1,17 +1,11 @@
 import os
 
-from huggingface_hub import whoami
-
 from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 from optimum_benchmark.logging_utils import setup_logging
 
-try:
-    USERNAME = whoami()["name"]
-except Exception as e:
-    print(f"Failed to get username from Hugging Face Hub: {e}")
-    USERNAME = None
-
-BENCHMARK_NAME = "pytorch-llama"
+BENCHMARK_NAME = "cuda_pytorch_llama"
+MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None)
 
 WEIGHTS_CONFIGS = {
     "float16": {
@@ -40,10 +34,10 @@
 def run_benchmark(weight_config: str):
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
     backend_config = PyTorchConfig(
+        model=MODEL,
         device="cuda",
         device_ids="0",
         no_weights=True,
-        model="gpt2",
         **WEIGHTS_CONFIGS[weight_config],
     )
     scenario_config = InferenceConfig(
@@ -52,7 +46,7 @@ def run_benchmark(weight_config: str):
         duration=10,
         iterations=10,
         warmup_runs=10,
-        input_shapes={"batch_size": 1, "sequence_length": 128},
+        input_shapes={"batch_size": 1, "sequence_length": 64},
         generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32},
     )
     benchmark_config = BenchmarkConfig(
@@ -77,7 +71,5 @@ def run_benchmark(weight_config: str):
         benchmark_config, benchmark_report = run_benchmark(weight_config)
         benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
 
-        if USERNAME is not None:
-            benchmark.push_to_hub(
-                repo_id=f"{USERNAME}/benchmarks", filename=f"{weight_config}.json", subfolder=BENCHMARK_NAME
-            )
+        if PUSH_REPO_ID is not None:
+            benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME, filename=f"{weight_config}.json")
diff --git a/examples/pytorch_vlm.yaml b/examples/cuda_pytorch_vlm.yaml
similarity index 92%
rename from examples/pytorch_vlm.yaml
rename to examples/cuda_pytorch_vlm.yaml
index a39f8c8a..8f1e0f3c 100644
--- a/examples/pytorch_vlm.yaml
+++ b/examples/cuda_pytorch_vlm.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: pytorch_vlm
+name: cuda_pytorch_vlm
 
 launcher:
   device_isolation: true
@@ -30,7 +30,7 @@ scenario:
   input_shapes:
     # text
     batch_size: 1
-    sequence_length: 256
+    sequence_length: 64
     # image
     num_images: 2
     num_channels: 3
diff --git a/examples/tgi_llama.yaml b/examples/cuda_tgi_llama.yaml
similarity index 63%
rename from examples/tgi_llama.yaml
rename to examples/cuda_tgi_llama.yaml
index 399667fb..297403c8 100644
--- a/examples/tgi_llama.yaml
+++ b/examples/cuda_tgi_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: tgi_llama
+name: cuda_tgi_llama
 
 launcher:
   device_isolation: true
@@ -14,14 +14,15 @@ launcher:
 
 backend:
   device: cuda
-  device_ids: 4
-  # no_weights: true
+  device_ids: 0
+  cuda_graphs: 0 # remove for better perf but bigger memory footprint
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:
   input_shapes:
     batch_size: 4
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 32
+    min_new_tokens: 32
diff --git a/examples/pytorch_llama.yaml b/examples/cuda_trt_llama.yaml
similarity index 56%
rename from examples/pytorch_llama.yaml
rename to examples/cuda_trt_llama.yaml
index becd1f2e..c483fc2f 100644
--- a/examples/pytorch_llama.yaml
+++ b/examples/cuda_trt_llama.yaml
@@ -1,33 +1,30 @@
 defaults:
   - benchmark
+  - backend: tensorrt-llm
   - scenario: inference
   - launcher: process
-  - backend: pytorch
   - _base_
   - _self_
 
-name: pytorch_llama
+name: cuda_trt_llama
 
 launcher:
   device_isolation: true
   device_isolation_action: warn
 
 backend:
-  model: gpt2
   device: cuda
-  torch_dtype: float16
+  device_ids: 0
+  max_batch_size: 4
+  max_new_tokens: 32
+  max_prompt_length: 64
+  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
 
 scenario:
-  memory: true
-  latency: true
-
-  warmup_runs: 10
-  iterations: 10
-  duration: 10
-
   input_shapes:
-    batch_size: 1
-    sequence_length: 256
+    batch_size: 4
+    sequence_length: 64
+
   generate_kwargs:
     max_new_tokens: 32
     min_new_tokens: 32
diff --git a/examples/vllm_llama.yaml b/examples/cuda_vllm_llama.yaml
similarity index 62%
rename from examples/vllm_llama.yaml
rename to examples/cuda_vllm_llama.yaml
index 8bbb4025..5ec4b5a8 100644
--- a/examples/vllm_llama.yaml
+++ b/examples/cuda_vllm_llama.yaml
@@ -6,7 +6,7 @@ defaults:
   - _base_
   - _self_
 
-name: vllm_llama
+name: cuda_vllm_llama
 
 launcher:
   device_isolation: true
@@ -15,16 +15,16 @@ launcher:
 backend:
   device: cuda
   device_ids: 0
-  no_weights: false
-  serving_mode: offline
+  serving_mode: online # server-like
   model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
   engine_args:
-    enforce_eager: true
+    enforce_eager: true # remove for better perf but bigger memory footprint
 
 scenario:
   input_shapes:
     batch_size: 4
-    sequence_length: 256
+    sequence_length: 64
+
   generate_kwargs:
-    max_new_tokens: 100
-    min_new_tokens: 100
+    max_new_tokens: 32
+    min_new_tokens: 32
diff --git a/examples/pytorch_bert_mps.yaml b/examples/mps_pytorch_bert.yaml
similarity index 67%
rename from examples/pytorch_bert_mps.yaml
rename to examples/mps_pytorch_bert.yaml
index 4d4dc6e3..27368eb1 100644
--- a/examples/pytorch_bert_mps.yaml
+++ b/examples/mps_pytorch_bert.yaml
@@ -1,15 +1,12 @@
 defaults:
   - benchmark
   - scenario: inference
-  - launcher: process # launcher: inline works, 
+  - launcher: inline # mps fails with python multi-processing for some reason
   - backend: pytorch
   - _base_
   - _self_
 
-name: pytorch_bert
-
-# launcher:
-#   start_method: spawn
+name: mps_pytorch_bert
 
 scenario:
   latency: true
@@ -19,8 +16,6 @@ scenario:
     sequence_length: 128
 
 backend:
-  device: cpu
+  device: mps
   no_weights: true
   model: bert-base-uncased
-
-
diff --git a/examples/neural_compressor_ptq_bert.yaml b/examples/neural_compressor_ptq_bert.yaml
deleted file mode 100644
index cbc32590..00000000
--- a/examples/neural_compressor_ptq_bert.yaml
+++ /dev/null
@@ -1,20 +0,0 @@
-defaults:
-  - benchmark
-  - backend: neural-compressor
-  - scenario: inference
-  - launcher: process
-  - _base_
-  - _self_
-
-name: neural_compressor_ptq_bert
-
-backend:
-  device: cpu
-  no_weights: true
-  model: bert-base-uncased
-  ptq_quantization: true
-  calibration: true
-
-scenario:
-  input_shapes:
-    batch_size: 1
diff --git a/examples/openvino_static_quant_bert.yaml b/examples/openvino_static_quant_bert.yaml
deleted file mode 100644
index caa4363a..00000000
--- a/examples/openvino_static_quant_bert.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-defaults:
-  - benchmark
-  - scenario: inference
-  - backend: openvino
-  - launcher: process
-  - _base_
-  - _self_
-
-name: openvino_static_quant_bert
-
-backend:
-  device: cpu
-  no_weights: true
-  model: bert-base-uncased
-  quantization: true
-  calibration: true
-  reshape: true
-
-scenario:
-  input_shapes:
-    batch_size: 1
diff --git a/examples/tei_bge.yaml b/examples/tei_bge.yaml
deleted file mode 100644
index dbbab7d5..00000000
--- a/examples/tei_bge.yaml
+++ /dev/null
@@ -1,21 +0,0 @@
-defaults:
-  - benchmark
-  - scenario: inference
-  - launcher: inline
-  - backend: py-txi
-  - _self_
-
-name: tei_bert
-
-launcher:
-  device_isolation: true
-  device_isolation_action: warn
-
-backend:
-  device: cpu
-  model: BAAI/bge-base-en-v1.5
-
-scenario:
-  input_shapes:
-    batch_size: 64
-    sequence_length: 128
diff --git a/optimum_benchmark/backends/py_txi/config.py b/optimum_benchmark/backends/py_txi/config.py
index e42161e6..73b75b75 100644
--- a/optimum_benchmark/backends/py_txi/config.py
+++ b/optimum_benchmark/backends/py_txi/config.py
@@ -50,7 +50,7 @@ class PyTXIConfig(BackendConfig):
     quantize: Optional[str] = None
     num_shard: Optional[int] = None
     speculate: Optional[int] = None
-    cuda_graphs: Optional[bool] = None
+    cuda_graphs: Optional[int] = None
     disable_custom_kernels: Optional[bool] = None
     trust_remote_code: Optional[bool] = None
 
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index 4b26266b..5af0723b 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -10,12 +10,10 @@
     Benchmark,
     BenchmarkConfig,
     EnergyStarConfig,
-    INCConfig,
     InferenceConfig,
     InlineConfig,
     IPEXConfig,
     LlamaCppConfig,
-    LLMSwarmConfig,
     ORTConfig,
     OVConfig,
     ProcessConfig,
@@ -43,9 +41,7 @@
 cs.store(group="backend", name=ORTConfig.name, node=ORTConfig)
 cs.store(group="backend", name=TorchORTConfig.name, node=TorchORTConfig)
 cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig)
-cs.store(group="backend", name=INCConfig.name, node=INCConfig)
 cs.store(group="backend", name=PyTXIConfig.name, node=PyTXIConfig)
-cs.store(group="backend", name=LLMSwarmConfig.name, node=LLMSwarmConfig)
 cs.store(group="backend", name=VLLMConfig.name, node=VLLMConfig)
 cs.store(group="backend", name=LlamaCppConfig.name, node=LlamaCppConfig)
 # scenarios configurations
diff --git a/setup.py b/setup.py
index 03bbdf07..46a1ed60 100644
--- a/setup.py
+++ b/setup.py
@@ -76,6 +76,7 @@
     "py-txi": ["py-txi"],
     "vllm": ["vllm"],
     # optional dependencies
+    "torchao": ["torchao"],
     "autoawq": ["autoawq"],
     "auto-gptq": ["optimum", "auto-gptq"],
     "sentence-transformers": ["sentence-transformers"],
diff --git a/tests/test_energy_star.py b/tests/test_energy_star.py
index bbb83f55..f2520932 100644
--- a/tests/test_energy_star.py
+++ b/tests/test_energy_star.py
@@ -9,12 +9,16 @@
 LOGGER = getLogger("test-cli")
 
 
-TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples/energy_star"
+TEST_CONFIG_DIR = Path(__file__).parent.parent / "energy_star"
+
 TEST_CONFIG_NAMES = [
     config.split(".")[0]
     for config in os.listdir(TEST_CONFIG_DIR)
     if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
 ]
+TEST_SCRIPT_PATHS = [
+    str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py")
+]
 
 ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None)
 CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None)
@@ -42,3 +46,11 @@ def test_cli_configs(config_name):
 
     popen = run_subprocess_and_log_stream_output(LOGGER, args)
     assert popen.returncode == 0, f"Failed to run {config_name}"
+
+
+@pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS)
+def test_api_scripts(script_path):
+    args = ["python", script_path]
+
+    popen = run_subprocess_and_log_stream_output(LOGGER, args)
+    assert popen.returncode == 0, f"Failed to run {script_path}"
diff --git a/tests/test_examples.py b/tests/test_examples.py
new file mode 100644
index 00000000..13cf3cff
--- /dev/null
+++ b/tests/test_examples.py
@@ -0,0 +1,46 @@
+import os
+from logging import getLogger
+from pathlib import Path
+
+import pytest
+
+from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output
+
+LOGGER = getLogger("test-examples")
+
+
+TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples"
+
+TEST_CONFIG_NAMES = [
+    config.split(".")[0]
+    for config in os.listdir(TEST_CONFIG_DIR)
+    if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
+]
+
+TEST_SCRIPT_PATHS = [
+    str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py")
+]
+
+ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None)
+CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None)
+
+
+@pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES)
+def test_cli_configs(config_name):
+    args = ["optimum-benchmark", "--config-dir", TEST_CONFIG_DIR, "--config-name", config_name]
+
+    if ROCR_VISIBLE_DEVICES is not None:
+        args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
+    elif CUDA_VISIBLE_DEVICES is not None:
+        args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"']
+
+    popen = run_subprocess_and_log_stream_output(LOGGER, args)
+    assert popen.returncode == 0, f"Failed to run {config_name}"
+
+
+@pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS)
+def test_api_scripts(script_path):
+    args = ["python", script_path]
+
+    popen = run_subprocess_and_log_stream_output(LOGGER, args)
+    assert popen.returncode == 0, f"Failed to run {script_path}"