From a2700a82c8b0e11e6d850165259f9489ad92572e Mon Sep 17 00:00:00 2001 From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:13:47 +0100 Subject: [PATCH] Test examples (#306) --- .github/workflows/test_api_cpu.yaml | 17 ++++++- .github/workflows/test_api_cuda.yaml | 15 +++++- .github/workflows/test_api_misc.yaml | 4 +- .github/workflows/test_cli_cpu_ipex.yaml | 15 +++--- .github/workflows/test_cli_cpu_llama_cpp.yaml | 10 +++- .../workflows/test_cli_cpu_onnxruntime.yaml | 8 ++++ .github/workflows/test_cli_cpu_openvino.yaml | 14 +++--- .github/workflows/test_cli_cpu_py_txi.yaml | 11 ++++- .github/workflows/test_cli_cpu_pytorch.yaml | 9 ++++ .../workflows/test_cli_cuda_onnxruntime.yaml | 9 ++++ .github/workflows/test_cli_cuda_py_txi.yaml | 10 +++- .github/workflows/test_cli_cuda_pytorch.yaml | 8 ++++ .../workflows/test_cli_cuda_tensorrt_llm.yaml | 10 ++++ .../workflows/test_cli_cuda_torch_ort.yaml | 15 ++++-- .github/workflows/test_cli_cuda_vllm.yaml | 9 ++++ ...energy_star.yaml => test_energy_star.yaml} | 6 +-- .../energy_star => energy_star}/_base_.yaml | 0 .../automatic_speech_recognition.yaml | 0 .../image_classification.yaml | 0 .../image_to_text.yaml | 0 .../object_detection.yaml | 0 .../question_answering.yaml | 0 .../sentence_similarity.yaml | 0 .../summarization.yaml | 0 .../t5_question_answering.yaml | 0 .../t5_summarization.yaml | 0 .../t5_text_classification.yaml | 0 .../t5_text_generation.yaml | 0 .../text_classification.yaml | 0 .../text_generation.yaml | 0 .../text_to_image.yaml | 0 .../{ipex_bert.yaml => cpu_ipex_bert.yaml} | 19 ++++---- .../{ipex_llama.yaml => cpu_ipex_llama.yaml} | 25 +++++----- ...ding.yaml => cpu_llama_cpp_embedding.yaml} | 18 ++++---- ...aml => cpu_llama_cpp_text_generation.yaml} | 18 ++++---- ... => cpu_onnxruntime_static_quant_vit.yaml} | 9 +++- ...me_timm.yaml => cpu_onnxruntime_timm.yaml} | 3 +- ..._bert.yaml => cpu_openvino_8bit_bert.yaml} | 23 ++++------ ...usion.yaml => cpu_openvino_diffusion.yaml} | 5 +- .../{pytorch_bert.py => cuda_pytorch_bert.py} | 36 +++++---------- ...torch_bert.yaml => cuda_pytorch_bert.yaml} | 15 +++--- ...trt_llama.yaml => cuda_pytorch_llama.yaml} | 12 +++-- ..._llama.py => cuda_pytorch_llama_quants.py} | 22 +++------ ...pytorch_vlm.yaml => cuda_pytorch_vlm.yaml} | 4 +- .../{tgi_llama.yaml => cuda_tgi_llama.yaml} | 13 +++--- ...pytorch_llama.yaml => cuda_trt_llama.yaml} | 23 ++++------ .../{vllm_llama.yaml => cuda_vllm_llama.yaml} | 14 +++--- ...ch_bert_mps.yaml => mps_pytorch_bert.yaml} | 11 ++--- examples/neural_compressor_ptq_bert.yaml | 20 -------- examples/openvino_static_quant_bert.yaml | 21 --------- examples/tei_bge.yaml | 21 --------- optimum_benchmark/backends/py_txi/config.py | 2 +- optimum_benchmark/cli.py | 4 -- setup.py | 1 + tests/test_energy_star.py | 14 +++++- tests/test_examples.py | 46 +++++++++++++++++++ 56 files changed, 324 insertions(+), 245 deletions(-) rename .github/workflows/{test_cli_energy_star.yaml => test_energy_star.yaml} (84%) rename {examples/energy_star => energy_star}/_base_.yaml (100%) rename {examples/energy_star => energy_star}/automatic_speech_recognition.yaml (100%) rename {examples/energy_star => energy_star}/image_classification.yaml (100%) rename {examples/energy_star => energy_star}/image_to_text.yaml (100%) rename {examples/energy_star => energy_star}/object_detection.yaml (100%) rename {examples/energy_star => energy_star}/question_answering.yaml (100%) rename {examples/energy_star => energy_star}/sentence_similarity.yaml (100%) rename {examples/energy_star => energy_star}/summarization.yaml (100%) rename {examples/energy_star => energy_star}/t5_question_answering.yaml (100%) rename {examples/energy_star => energy_star}/t5_summarization.yaml (100%) rename {examples/energy_star => energy_star}/t5_text_classification.yaml (100%) rename {examples/energy_star => energy_star}/t5_text_generation.yaml (100%) rename {examples/energy_star => energy_star}/text_classification.yaml (100%) rename {examples/energy_star => energy_star}/text_generation.yaml (100%) rename {examples/energy_star => energy_star}/text_to_image.yaml (100%) rename examples/{ipex_bert.yaml => cpu_ipex_bert.yaml} (59%) rename examples/{ipex_llama.yaml => cpu_ipex_llama.yaml} (66%) rename examples/{llama_cpp_embedding.yaml => cpu_llama_cpp_embedding.yaml} (57%) rename examples/{llama_cpp_text_generation.yaml => cpu_llama_cpp_text_generation.yaml} (61%) rename examples/{onnxruntime_static_quant_vit.yaml => cpu_onnxruntime_static_quant_vit.yaml} (70%) rename examples/{onnxruntime_timm.yaml => cpu_onnxruntime_timm.yaml} (82%) rename examples/{numactl_bert.yaml => cpu_openvino_8bit_bert.yaml} (57%) rename examples/{openvino_diffusion.yaml => cpu_openvino_diffusion.yaml} (78%) rename examples/{pytorch_bert.py => cuda_pytorch_bert.py} (59%) rename examples/{pytorch_bert.yaml => cuda_pytorch_bert.yaml} (90%) rename examples/{trt_llama.yaml => cuda_pytorch_llama.yaml} (70%) rename examples/{pytorch_llama.py => cuda_pytorch_llama_quants.py} (81%) rename examples/{pytorch_vlm.yaml => cuda_pytorch_vlm.yaml} (92%) rename examples/{tgi_llama.yaml => cuda_tgi_llama.yaml} (63%) rename examples/{pytorch_llama.yaml => cuda_trt_llama.yaml} (56%) rename examples/{vllm_llama.yaml => cuda_vllm_llama.yaml} (62%) rename examples/{pytorch_bert_mps.yaml => mps_pytorch_bert.yaml} (67%) delete mode 100644 examples/neural_compressor_ptq_bert.yaml delete mode 100644 examples/openvino_static_quant_bert.yaml delete mode 100644 examples/tei_bge.yaml create mode 100644 tests/test_examples.py diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml index 126e500b..b48490a5 100644 --- a/.github/workflows/test_api_cpu.yaml +++ b/.github/workflows/test_api_cpu.yaml @@ -47,8 +47,21 @@ jobs: pip install -e .[testing,timm,diffusers,codecarbon] - name: Run tests + run: | + pytest tests/test_api.py -s -k "api and cpu" env: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/cpu - run: | - pytest tests/test_api.py -s -k "api and cpu" + + # no examples for now + # - if: ${{ + # (github.event_name == 'push') || + # (github.event_name == 'workflow_dispatch') || + # contains( github.event.pull_request.labels.*.name, 'examples') + # }} + # name: Run examples + # run: | + # pytest tests/test_examples.py -s -k "api and cpu" + # env: + # HF_TOKEN: ${{ secrets.HF_TOKEN }} + # PUSH_REPO_ID: optimum-benchmark/cpu diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml index c8be0ece..d45afa40 100644 --- a/.github/workflows/test_api_cuda.yaml +++ b/.github/workflows/test_api_cuda.yaml @@ -45,8 +45,21 @@ jobs: pip install -e .[testing,timm,diffusers,codecarbon] - name: Run tests + run: | + pytest tests/test_api.py -x -s -k "api and cuda" env: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/cuda + + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples run: | - pytest tests/test_api.py -x -s -k "api and cuda" + pip install -e .[testing,torchao,autoawq,auto-gptq] + pytest tests/test_examples.py -x -s -k "api and cuda and pytorch" + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + PUSH_REPO_ID: optimum-benchmark/cuda diff --git a/.github/workflows/test_api_misc.yaml b/.github/workflows/test_api_misc.yaml index 2da1e7ec..36c26215 100644 --- a/.github/workflows/test_api_misc.yaml +++ b/.github/workflows/test_api_misc.yaml @@ -58,8 +58,8 @@ jobs: UV_SYSTEM_PYTHON: 1 - name: Run tests + run: | + pytest tests/test_api.py -s -k "api and not (cpu or cuda or rocm or mps)" env: HF_TOKEN: ${{ secrets.HF_TOKEN }} PUSH_REPO_ID: optimum-benchmark/misc-${{ matrix.os }}-${{ matrix.python }} - run: | - pytest tests/test_api.py -s -k "api and not (cpu or cuda or rocm or mps)" diff --git a/.github/workflows/test_cli_cpu_ipex.yaml b/.github/workflows/test_cli_cpu_ipex.yaml index d6b94d3e..5bf0be92 100644 --- a/.github/workflows/test_cli_cpu_ipex.yaml +++ b/.github/workflows/test_cli_cpu_ipex.yaml @@ -36,16 +36,17 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Set up Python 3.10 - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - name: Install requirements run: | - pip install --upgrade pip - pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,ipex,diffusers,timm] - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and ipex" + + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and ipex" diff --git a/.github/workflows/test_cli_cpu_llama_cpp.yaml b/.github/workflows/test_cli_cpu_llama_cpp.yaml index 05d43683..145c0f83 100644 --- a/.github/workflows/test_cli_cpu_llama_cpp.yaml +++ b/.github/workflows/test_cli_cpu_llama_cpp.yaml @@ -48,4 +48,12 @@ jobs: pip install -e .[testing,llama-cpp] - name: Run tests - run: pytest tests/test_cli.py -s -k "llama_cpp" + run: pytest tests/test_cli.py -s -k "cli and cpu and llama_cpp" + + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and llama_cpp" diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml index 21e65235..ef8482b7 100644 --- a/.github/workflows/test_cli_cpu_onnxruntime.yaml +++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml @@ -49,3 +49,11 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and onnxruntime" + + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and onnxruntime" diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml index 4612370c..2ef0312e 100644 --- a/.github/workflows/test_cli_cpu_openvino.yaml +++ b/.github/workflows/test_cli_cpu_openvino.yaml @@ -36,16 +36,18 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Set up Python 3.10 - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - name: Install requirements run: | - pip install --upgrade pip pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu pip install -e .[testing,openvino,diffusers,timm] - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and openvino" + + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: pytest tests/test_examples.py -s -k "cli and cpu and openvino" diff --git a/.github/workflows/test_cli_cpu_py_txi.yaml b/.github/workflows/test_cli_cpu_py_txi.yaml index d07f6170..7b1946e7 100644 --- a/.github/workflows/test_cli_cpu_py_txi.yaml +++ b/.github/workflows/test_cli_cpu_py_txi.yaml @@ -45,7 +45,16 @@ jobs: run: | pip install --upgrade pip pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu - pip install -e .[testing,py-txi] + pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and py_txi" + + # no examples for now + # - if: ${{ + # (github.event_name == 'push') || + # (github.event_name == 'workflow_dispatch') || + # contains( github.event.pull_request.labels.*.name, 'examples') + # }} + # name: Run examples + # run: pytest tests/test_examples.py -s -k "cli and cpu and (tgi or tei)" diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml index fef2a772..dab603c7 100644 --- a/.github/workflows/test_cli_cpu_pytorch.yaml +++ b/.github/workflows/test_cli_cpu_pytorch.yaml @@ -49,3 +49,12 @@ jobs: - name: Run tests run: pytest tests/test_cli.py -s -k "cli and cpu and pytorch" + + # no examples for now + # - if: ${{ + # (github.event_name == 'push') || + # (github.event_name == 'workflow_dispatch') || + # contains( github.event.pull_request.labels.*.name, 'examples') + # }} + # name: Run examples + # run: pytest tests/test_examples.py -s -k "cli and cpu and pytorch" diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml index 0584665c..1351e1b0 100644 --- a/.github/workflows/test_cli_cuda_onnxruntime.yaml +++ b/.github/workflows/test_cli_cuda_onnxruntime.yaml @@ -48,3 +48,12 @@ jobs: - name: Run tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and onnxruntime" + + # no examples for now + # - if: ${{ + # (github.event_name == 'push') || + # (github.event_name == 'workflow_dispatch') || + # contains( github.event.pull_request.labels.*.name, 'examples') + # }} + # name: Run examples + # run: pytest tests/test_examples.py -x -s -k "cli and cuda and onnxruntime" diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml index 7339b98e..5c090b28 100644 --- a/.github/workflows/test_cli_cuda_py_txi.yaml +++ b/.github/workflows/test_cli_cuda_py_txi.yaml @@ -45,7 +45,15 @@ jobs: - name: Install requirements run: | pip install --upgrade pip - pip install -e .[testing,py-txi] + pip install -e .[testing,py-txi] git+https://github.com/IlyasMoutawwakil/py-txi.git - name: Run tests run: pytest tests/test_cli.py -x -s -k "cli and cuda and py_txi" + + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: pytest tests/test_examples.py -x -s -k "cli and cuda and (tgi or tei)" diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml index 0bc5dfaf..2aa54d5d 100644 --- a/.github/workflows/test_cli_cuda_pytorch.yaml +++ b/.github/workflows/test_cli_cuda_pytorch.yaml @@ -50,6 +50,14 @@ jobs: run: | pytest tests/test_cli.py -x -s -k "cli and cuda and pytorch and not (dp or ddp or device_map or deepspeed)" + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: pytest tests/test_examples.py -x -s -k "cli and cuda and pytorch" + run_cli_cuda_pytorch_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml index acb04fe2..c75aac92 100644 --- a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml +++ b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml @@ -50,6 +50,16 @@ jobs: run: | pytest tests/test_cli.py -x -s -k "cli and cuda and tensorrt_llm and not (tp or pp)" + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: | + huggingface-cli delete-cache + pytest tests/test_examples.py -x -s -k "cli and cuda and trt" + cli_cuda_tensorrt_llm_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml index ee886e8c..7dccafb8 100644 --- a/.github/workflows/test_cli_cuda_torch_ort.yaml +++ b/.github/workflows/test_cli_cuda_torch_ort.yaml @@ -44,13 +44,21 @@ jobs: - name: Install dependencies run: | - pip install -e .[testing,torch-ort,peft] - pip install optimum@git+https://github.com/huggingface/optimum.git + pip install -e .[testing,torch-ort,peft] optimum@git+https://github.com/huggingface/optimum.git@fxi-ort-trainer - name: Run tests run: | pytest tests/test_cli.py -x -s -k "cli and cuda and torch_ort and not (dp or ddp or device_map) and not (peft)" + # - if: ${{ + # (github.event_name == 'push') || + # (github.event_name == 'workflow_dispatch') || + # contains( github.event.pull_request.labels.*.name, 'examples') + # }} + # name: Run examples + # run: | + # pytest tests/test_examples.py -x -s -k "cli and cuda and torch_ort" + run_cli_cuda_torch_ort_multi_gpu_tests: if: ${{ (github.event_name == 'push') || @@ -75,8 +83,7 @@ jobs: - name: Install dependencies run: | - pip install -e .[testing,torch-ort,peft] - pip install optimum@git+https://github.com/huggingface/optimum.git + pip install -e .[testing,torch-ort,peft] optimum@git+https://github.com/huggingface/optimum.git@fxi-ort-trainer - name: Run tests run: | diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml index 732513d2..6072dd8c 100644 --- a/.github/workflows/test_cli_cuda_vllm.yaml +++ b/.github/workflows/test_cli_cuda_vllm.yaml @@ -50,6 +50,15 @@ jobs: run: | FORCE_SEQUENTIAL=1 pytest tests/test_cli.py -x -s -k "cli and cuda and vllm and not (tp or pp)" + - if: ${{ + (github.event_name == 'push') || + (github.event_name == 'workflow_dispatch') || + contains( github.event.pull_request.labels.*.name, 'examples') + }} + name: Run examples + run: | + pytest tests/test_examples.py -x -s -k "cli and cuda and vllm" + run_cli_cuda_vllm_multi_gpu_tests: if: ${{ (github.event_name == 'push') || diff --git a/.github/workflows/test_cli_energy_star.yaml b/.github/workflows/test_energy_star.yaml similarity index 84% rename from .github/workflows/test_cli_energy_star.yaml rename to .github/workflows/test_energy_star.yaml index 24c487f6..db9a22cd 100644 --- a/.github/workflows/test_cli_energy_star.yaml +++ b/.github/workflows/test_energy_star.yaml @@ -20,13 +20,11 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} jobs: - run_cli_energy_star_tests: + run_energy_star: if: ${{ (github.event_name == 'push') || (github.event_name == 'workflow_dispatch') || - contains( github.event.pull_request.labels.*.name, 'cli') || - contains( github.event.pull_request.labels.*.name, 'energy_star') || - contains( github.event.pull_request.labels.*.name, 'cli_energy_star') + contains( github.event.pull_request.labels.*.name, 'energy_star') }} runs-on: diff --git a/examples/energy_star/_base_.yaml b/energy_star/_base_.yaml similarity index 100% rename from examples/energy_star/_base_.yaml rename to energy_star/_base_.yaml diff --git a/examples/energy_star/automatic_speech_recognition.yaml b/energy_star/automatic_speech_recognition.yaml similarity index 100% rename from examples/energy_star/automatic_speech_recognition.yaml rename to energy_star/automatic_speech_recognition.yaml diff --git a/examples/energy_star/image_classification.yaml b/energy_star/image_classification.yaml similarity index 100% rename from examples/energy_star/image_classification.yaml rename to energy_star/image_classification.yaml diff --git a/examples/energy_star/image_to_text.yaml b/energy_star/image_to_text.yaml similarity index 100% rename from examples/energy_star/image_to_text.yaml rename to energy_star/image_to_text.yaml diff --git a/examples/energy_star/object_detection.yaml b/energy_star/object_detection.yaml similarity index 100% rename from examples/energy_star/object_detection.yaml rename to energy_star/object_detection.yaml diff --git a/examples/energy_star/question_answering.yaml b/energy_star/question_answering.yaml similarity index 100% rename from examples/energy_star/question_answering.yaml rename to energy_star/question_answering.yaml diff --git a/examples/energy_star/sentence_similarity.yaml b/energy_star/sentence_similarity.yaml similarity index 100% rename from examples/energy_star/sentence_similarity.yaml rename to energy_star/sentence_similarity.yaml diff --git a/examples/energy_star/summarization.yaml b/energy_star/summarization.yaml similarity index 100% rename from examples/energy_star/summarization.yaml rename to energy_star/summarization.yaml diff --git a/examples/energy_star/t5_question_answering.yaml b/energy_star/t5_question_answering.yaml similarity index 100% rename from examples/energy_star/t5_question_answering.yaml rename to energy_star/t5_question_answering.yaml diff --git a/examples/energy_star/t5_summarization.yaml b/energy_star/t5_summarization.yaml similarity index 100% rename from examples/energy_star/t5_summarization.yaml rename to energy_star/t5_summarization.yaml diff --git a/examples/energy_star/t5_text_classification.yaml b/energy_star/t5_text_classification.yaml similarity index 100% rename from examples/energy_star/t5_text_classification.yaml rename to energy_star/t5_text_classification.yaml diff --git a/examples/energy_star/t5_text_generation.yaml b/energy_star/t5_text_generation.yaml similarity index 100% rename from examples/energy_star/t5_text_generation.yaml rename to energy_star/t5_text_generation.yaml diff --git a/examples/energy_star/text_classification.yaml b/energy_star/text_classification.yaml similarity index 100% rename from examples/energy_star/text_classification.yaml rename to energy_star/text_classification.yaml diff --git a/examples/energy_star/text_generation.yaml b/energy_star/text_generation.yaml similarity index 100% rename from examples/energy_star/text_generation.yaml rename to energy_star/text_generation.yaml diff --git a/examples/energy_star/text_to_image.yaml b/energy_star/text_to_image.yaml similarity index 100% rename from examples/energy_star/text_to_image.yaml rename to energy_star/text_to_image.yaml diff --git a/examples/ipex_bert.yaml b/examples/cpu_ipex_bert.yaml similarity index 59% rename from examples/ipex_bert.yaml rename to examples/cpu_ipex_bert.yaml index e549da0a..0e7ed37b 100644 --- a/examples/ipex_bert.yaml +++ b/examples/cpu_ipex_bert.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: ipex_bert +name: cpu_ipex_bert launcher: numactl: true @@ -14,16 +14,17 @@ launcher: cpunodebind: 0 membind: 0 +backend: + device: cpu + export: true + no_weights: false # because on multi-node machines, intializing weights could harm performance + torch_dtype: float32 # but use bfloat16 on compatible Intel CPUs + model: google-bert/bert-base-uncased + scenario: - latency: true memory: true + latency: true + input_shapes: batch_size: 1 sequence_length: 128 - -backend: - device: cpu - no_weights: false - export: true - torch_dtype: bfloat16 - model: bert-base-uncased diff --git a/examples/ipex_llama.yaml b/examples/cpu_ipex_llama.yaml similarity index 66% rename from examples/ipex_llama.yaml rename to examples/cpu_ipex_llama.yaml index b564316b..898ed0df 100644 --- a/examples/ipex_llama.yaml +++ b/examples/cpu_ipex_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: ipex_llama +name: cpu_ipex_llama launcher: numactl: true @@ -14,24 +14,21 @@ launcher: cpunodebind: 0 membind: 0 +backend: + device: cpu + export: true + no_weights: false # because on multi-node machines, intializing weights could harm performance + torch_dtype: float32 # but use bfloat16 on compatible Intel CPUs + model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 + scenario: - latency: true memory: true + latency: true - warmup_runs: 10 - iterations: 10 - duration: 10 - input_shapes: batch_size: 1 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: max_new_tokens: 32 min_new_tokens: 32 - -backend: - device: cpu - export: true - no_weights: false - torch_dtype: bfloat16 - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 diff --git a/examples/llama_cpp_embedding.yaml b/examples/cpu_llama_cpp_embedding.yaml similarity index 57% rename from examples/llama_cpp_embedding.yaml rename to examples/cpu_llama_cpp_embedding.yaml index bdd86cce..666277c6 100644 --- a/examples/llama_cpp_embedding.yaml +++ b/examples/cpu_llama_cpp_embedding.yaml @@ -1,26 +1,24 @@ defaults: - benchmark - scenario: inference - - launcher: inline - backend: llama_cpp + - launcher: process - _base_ - _self_ -name: llama_cpp_llama +name: cpu_llama_cpp_embedding backend: - device: mps - model: nomic-ai/nomic-embed-text-v1.5-GGUF + device: cpu task: feature-extraction + model: nomic-ai/nomic-embed-text-v1.5-GGUF filename: nomic-embed-text-v1.5.Q4_0.gguf scenario: input_shapes: batch_size: 1 - sequence_length: 256 - vocab_size: 30000 - type_vocab_size: 1 - max_position_embeddings: 512 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 32 + min_new_tokens: 32 diff --git a/examples/llama_cpp_text_generation.yaml b/examples/cpu_llama_cpp_text_generation.yaml similarity index 61% rename from examples/llama_cpp_text_generation.yaml rename to examples/cpu_llama_cpp_text_generation.yaml index 96def950..2cd55514 100644 --- a/examples/llama_cpp_text_generation.yaml +++ b/examples/cpu_llama_cpp_text_generation.yaml @@ -1,25 +1,23 @@ defaults: - benchmark - scenario: inference - - launcher: inline - backend: llama_cpp + - launcher: process - _base_ - _self_ -name: llama_cpp_llama +name: cpu_llama_cpp_text_generation backend: - device: mps - model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF + device: cpu task: text-generation + model: TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF filename: tinyllama-1.1b-chat-v1.0.Q4_0.gguf - scenario: + memory: true + latency: true + input_shapes: batch_size: 1 - sequence_length: 256 - vocab_size: 32000 - generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + sequence_length: 128 diff --git a/examples/onnxruntime_static_quant_vit.yaml b/examples/cpu_onnxruntime_static_quant_vit.yaml similarity index 70% rename from examples/onnxruntime_static_quant_vit.yaml rename to examples/cpu_onnxruntime_static_quant_vit.yaml index 3d298473..97591bcd 100644 --- a/examples/onnxruntime_static_quant_vit.yaml +++ b/examples/cpu_onnxruntime_static_quant_vit.yaml @@ -6,10 +6,11 @@ defaults: - _base_ - _self_ -name: onnxruntime_static_quant_vit +name: cpu_onnxruntime_static_quant_vit backend: device: cpu + export: true no_weights: true model: google/vit-base-patch16-224 quantization: true @@ -17,3 +18,9 @@ backend: is_static: true per_channel: false calibration: true + +scenario: + memory: true + latency: true + input_shapes: + batch_size: 2 diff --git a/examples/onnxruntime_timm.yaml b/examples/cpu_onnxruntime_timm.yaml similarity index 82% rename from examples/onnxruntime_timm.yaml rename to examples/cpu_onnxruntime_timm.yaml index 165fc28a..963f44f0 100644 --- a/examples/onnxruntime_timm.yaml +++ b/examples/cpu_onnxruntime_timm.yaml @@ -10,7 +10,8 @@ name: onnxruntime_timm backend: device: cpu - model: timm/mobilenetv3_large_100.ra_in1k + export: true + model: timm/tiny_vit_21m_224.in1k scenario: memory: true diff --git a/examples/numactl_bert.yaml b/examples/cpu_openvino_8bit_bert.yaml similarity index 57% rename from examples/numactl_bert.yaml rename to examples/cpu_openvino_8bit_bert.yaml index 7add65e7..73ef474d 100644 --- a/examples/numactl_bert.yaml +++ b/examples/cpu_openvino_8bit_bert.yaml @@ -1,27 +1,24 @@ defaults: - benchmark - scenario: inference + - backend: openvino - launcher: process - - backend: pytorch - _base_ - _self_ -name: pytorch_bert +name: openvino_static_quant -launcher: - numactl: true - numactl_kwargs: - cpunodebind: 0 - membind: 0 +backend: + device: cpu + reshape: true + no_weights: true + load_in_8bit: false # enable 8bit on compatible Intel CPU machines + model: google-bert/bert-base-uncased scenario: - latency: true memory: true + latency: true + input_shapes: batch_size: 1 sequence_length: 128 - -backend: - device: cpu - no_weights: true - model: bert-base-uncased diff --git a/examples/openvino_diffusion.yaml b/examples/cpu_openvino_diffusion.yaml similarity index 78% rename from examples/openvino_diffusion.yaml rename to examples/cpu_openvino_diffusion.yaml index f0501101..30d21935 100644 --- a/examples/openvino_diffusion.yaml +++ b/examples/cpu_openvino_diffusion.yaml @@ -10,10 +10,9 @@ name: openvino_diffusion backend: device: cpu - model: stabilityai/stable-diffusion-2-1 - reshape: true export: true - half: true + model: stabilityai/stable-diffusion-2-1 + half: false # enable half-precision on compatible Intel CPU machines scenario: input_shapes: diff --git a/examples/pytorch_bert.py b/examples/cuda_pytorch_bert.py similarity index 59% rename from examples/pytorch_bert.py rename to examples/cuda_pytorch_bert.py index 09f62b8d..2a7ddf89 100644 --- a/examples/pytorch_bert.py +++ b/examples/cuda_pytorch_bert.py @@ -1,22 +1,20 @@ import os -from huggingface_hub import whoami - from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig from optimum_benchmark.logging_utils import setup_logging -try: - USERNAME = whoami()["name"] -except Exception as e: - print(f"Failed to get username from Hugging Face Hub: {e}") - USERNAME = None +BENCHMARK_NAME = "cuda_pytorch_bert" +MODEL = "google-bert/bert-base-uncased" +PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None) -BENCHMARK_NAME = "pytorch_bert" +if __name__ == "__main__": + level = os.environ.get("LOG_LEVEL", "INFO") + to_file = os.environ.get("LOG_TO_FILE", "0") == "1" + setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS") -def run_benchmark(): launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn") - backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased") + backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model=MODEL) scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128}) benchmark_config = BenchmarkConfig( name=BENCHMARK_NAME, @@ -27,19 +25,9 @@ def run_benchmark(): log_report=True, ) benchmark_report = Benchmark.launch(benchmark_config) - - return benchmark_config, benchmark_report - - -if __name__ == "__main__": - level = os.environ.get("LOG_LEVEL", "INFO") - to_file = os.environ.get("LOG_TO_FILE", "0") == "1" - setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS") - - benchmark_config, benchmark_report = run_benchmark() benchmark = Benchmark(config=benchmark_config, report=benchmark_report) - if USERNAME is not None: - benchmark_config.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME) - benchmark_report.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME) - benchmark.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME) + if PUSH_REPO_ID is not None: + benchmark_config.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME) + benchmark_report.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME) + benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME) diff --git a/examples/pytorch_bert.yaml b/examples/cuda_pytorch_bert.yaml similarity index 90% rename from examples/pytorch_bert.yaml rename to examples/cuda_pytorch_bert.yaml index 8bb702ca..8ab9b5cb 100644 --- a/examples/pytorch_bert.yaml +++ b/examples/cuda_pytorch_bert.yaml @@ -12,15 +12,16 @@ launcher: device_isolation: true device_isolation_action: warn +backend: + device: cuda + device_ids: 0 + no_weights: true + model: google-bert/bert-base-uncased + scenario: - latency: true memory: true + latency: true + input_shapes: batch_size: 1 sequence_length: 128 - -backend: - device: cuda - device_ids: 0 - no_weights: true - model: bert-base-uncased diff --git a/examples/trt_llama.yaml b/examples/cuda_pytorch_llama.yaml similarity index 70% rename from examples/trt_llama.yaml rename to examples/cuda_pytorch_llama.yaml index 30cb600a..1f85bd10 100644 --- a/examples/trt_llama.yaml +++ b/examples/cuda_pytorch_llama.yaml @@ -1,12 +1,12 @@ defaults: - benchmark - - backend: tensorrt-llm - scenario: inference - launcher: process + - backend: pytorch - _base_ - _self_ -name: trt_llama +name: cuda_pytorch_llama launcher: device_isolation: true @@ -16,12 +16,14 @@ backend: device: cuda device_ids: 0 no_weights: true + torch_dtype: float16 model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: input_shapes: batch_size: 4 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 32 + min_new_tokens: 32 diff --git a/examples/pytorch_llama.py b/examples/cuda_pytorch_llama_quants.py similarity index 81% rename from examples/pytorch_llama.py rename to examples/cuda_pytorch_llama_quants.py index fe732bfa..01d492cb 100644 --- a/examples/pytorch_llama.py +++ b/examples/cuda_pytorch_llama_quants.py @@ -1,17 +1,11 @@ import os -from huggingface_hub import whoami - from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig from optimum_benchmark.logging_utils import setup_logging -try: - USERNAME = whoami()["name"] -except Exception as e: - print(f"Failed to get username from Hugging Face Hub: {e}") - USERNAME = None - -BENCHMARK_NAME = "pytorch-llama" +BENCHMARK_NAME = "cuda_pytorch_llama" +MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" +PUSH_REPO_ID = os.environ.get("PUSH_REPO_ID", None) WEIGHTS_CONFIGS = { "float16": { @@ -40,10 +34,10 @@ def run_benchmark(weight_config: str): launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn") backend_config = PyTorchConfig( + model=MODEL, device="cuda", device_ids="0", no_weights=True, - model="gpt2", **WEIGHTS_CONFIGS[weight_config], ) scenario_config = InferenceConfig( @@ -52,7 +46,7 @@ def run_benchmark(weight_config: str): duration=10, iterations=10, warmup_runs=10, - input_shapes={"batch_size": 1, "sequence_length": 128}, + input_shapes={"batch_size": 1, "sequence_length": 64}, generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32}, ) benchmark_config = BenchmarkConfig( @@ -77,7 +71,5 @@ def run_benchmark(weight_config: str): benchmark_config, benchmark_report = run_benchmark(weight_config) benchmark = Benchmark(config=benchmark_config, report=benchmark_report) - if USERNAME is not None: - benchmark.push_to_hub( - repo_id=f"{USERNAME}/benchmarks", filename=f"{weight_config}.json", subfolder=BENCHMARK_NAME - ) + if PUSH_REPO_ID is not None: + benchmark.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=BENCHMARK_NAME, filename=f"{weight_config}.json") diff --git a/examples/pytorch_vlm.yaml b/examples/cuda_pytorch_vlm.yaml similarity index 92% rename from examples/pytorch_vlm.yaml rename to examples/cuda_pytorch_vlm.yaml index a39f8c8a..8f1e0f3c 100644 --- a/examples/pytorch_vlm.yaml +++ b/examples/cuda_pytorch_vlm.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: pytorch_vlm +name: cuda_pytorch_vlm launcher: device_isolation: true @@ -30,7 +30,7 @@ scenario: input_shapes: # text batch_size: 1 - sequence_length: 256 + sequence_length: 64 # image num_images: 2 num_channels: 3 diff --git a/examples/tgi_llama.yaml b/examples/cuda_tgi_llama.yaml similarity index 63% rename from examples/tgi_llama.yaml rename to examples/cuda_tgi_llama.yaml index 399667fb..297403c8 100644 --- a/examples/tgi_llama.yaml +++ b/examples/cuda_tgi_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: tgi_llama +name: cuda_tgi_llama launcher: device_isolation: true @@ -14,14 +14,15 @@ launcher: backend: device: cuda - device_ids: 4 - # no_weights: true + device_ids: 0 + cuda_graphs: 0 # remove for better perf but bigger memory footprint model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: input_shapes: batch_size: 4 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 32 + min_new_tokens: 32 diff --git a/examples/pytorch_llama.yaml b/examples/cuda_trt_llama.yaml similarity index 56% rename from examples/pytorch_llama.yaml rename to examples/cuda_trt_llama.yaml index becd1f2e..c483fc2f 100644 --- a/examples/pytorch_llama.yaml +++ b/examples/cuda_trt_llama.yaml @@ -1,33 +1,30 @@ defaults: - benchmark + - backend: tensorrt-llm - scenario: inference - launcher: process - - backend: pytorch - _base_ - _self_ -name: pytorch_llama +name: cuda_trt_llama launcher: device_isolation: true device_isolation_action: warn backend: - model: gpt2 device: cuda - torch_dtype: float16 + device_ids: 0 + max_batch_size: 4 + max_new_tokens: 32 + max_prompt_length: 64 + model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 scenario: - memory: true - latency: true - - warmup_runs: 10 - iterations: 10 - duration: 10 - input_shapes: - batch_size: 1 - sequence_length: 256 + batch_size: 4 + sequence_length: 64 + generate_kwargs: max_new_tokens: 32 min_new_tokens: 32 diff --git a/examples/vllm_llama.yaml b/examples/cuda_vllm_llama.yaml similarity index 62% rename from examples/vllm_llama.yaml rename to examples/cuda_vllm_llama.yaml index 8bbb4025..5ec4b5a8 100644 --- a/examples/vllm_llama.yaml +++ b/examples/cuda_vllm_llama.yaml @@ -6,7 +6,7 @@ defaults: - _base_ - _self_ -name: vllm_llama +name: cuda_vllm_llama launcher: device_isolation: true @@ -15,16 +15,16 @@ launcher: backend: device: cuda device_ids: 0 - no_weights: false - serving_mode: offline + serving_mode: online # server-like model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 engine_args: - enforce_eager: true + enforce_eager: true # remove for better perf but bigger memory footprint scenario: input_shapes: batch_size: 4 - sequence_length: 256 + sequence_length: 64 + generate_kwargs: - max_new_tokens: 100 - min_new_tokens: 100 + max_new_tokens: 32 + min_new_tokens: 32 diff --git a/examples/pytorch_bert_mps.yaml b/examples/mps_pytorch_bert.yaml similarity index 67% rename from examples/pytorch_bert_mps.yaml rename to examples/mps_pytorch_bert.yaml index 4d4dc6e3..27368eb1 100644 --- a/examples/pytorch_bert_mps.yaml +++ b/examples/mps_pytorch_bert.yaml @@ -1,15 +1,12 @@ defaults: - benchmark - scenario: inference - - launcher: process # launcher: inline works, + - launcher: inline # mps fails with python multi-processing for some reason - backend: pytorch - _base_ - _self_ -name: pytorch_bert - -# launcher: -# start_method: spawn +name: mps_pytorch_bert scenario: latency: true @@ -19,8 +16,6 @@ scenario: sequence_length: 128 backend: - device: cpu + device: mps no_weights: true model: bert-base-uncased - - diff --git a/examples/neural_compressor_ptq_bert.yaml b/examples/neural_compressor_ptq_bert.yaml deleted file mode 100644 index cbc32590..00000000 --- a/examples/neural_compressor_ptq_bert.yaml +++ /dev/null @@ -1,20 +0,0 @@ -defaults: - - benchmark - - backend: neural-compressor - - scenario: inference - - launcher: process - - _base_ - - _self_ - -name: neural_compressor_ptq_bert - -backend: - device: cpu - no_weights: true - model: bert-base-uncased - ptq_quantization: true - calibration: true - -scenario: - input_shapes: - batch_size: 1 diff --git a/examples/openvino_static_quant_bert.yaml b/examples/openvino_static_quant_bert.yaml deleted file mode 100644 index caa4363a..00000000 --- a/examples/openvino_static_quant_bert.yaml +++ /dev/null @@ -1,21 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - backend: openvino - - launcher: process - - _base_ - - _self_ - -name: openvino_static_quant_bert - -backend: - device: cpu - no_weights: true - model: bert-base-uncased - quantization: true - calibration: true - reshape: true - -scenario: - input_shapes: - batch_size: 1 diff --git a/examples/tei_bge.yaml b/examples/tei_bge.yaml deleted file mode 100644 index dbbab7d5..00000000 --- a/examples/tei_bge.yaml +++ /dev/null @@ -1,21 +0,0 @@ -defaults: - - benchmark - - scenario: inference - - launcher: inline - - backend: py-txi - - _self_ - -name: tei_bert - -launcher: - device_isolation: true - device_isolation_action: warn - -backend: - device: cpu - model: BAAI/bge-base-en-v1.5 - -scenario: - input_shapes: - batch_size: 64 - sequence_length: 128 diff --git a/optimum_benchmark/backends/py_txi/config.py b/optimum_benchmark/backends/py_txi/config.py index e42161e6..73b75b75 100644 --- a/optimum_benchmark/backends/py_txi/config.py +++ b/optimum_benchmark/backends/py_txi/config.py @@ -50,7 +50,7 @@ class PyTXIConfig(BackendConfig): quantize: Optional[str] = None num_shard: Optional[int] = None speculate: Optional[int] = None - cuda_graphs: Optional[bool] = None + cuda_graphs: Optional[int] = None disable_custom_kernels: Optional[bool] = None trust_remote_code: Optional[bool] = None diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py index 4b26266b..5af0723b 100644 --- a/optimum_benchmark/cli.py +++ b/optimum_benchmark/cli.py @@ -10,12 +10,10 @@ Benchmark, BenchmarkConfig, EnergyStarConfig, - INCConfig, InferenceConfig, InlineConfig, IPEXConfig, LlamaCppConfig, - LLMSwarmConfig, ORTConfig, OVConfig, ProcessConfig, @@ -43,9 +41,7 @@ cs.store(group="backend", name=ORTConfig.name, node=ORTConfig) cs.store(group="backend", name=TorchORTConfig.name, node=TorchORTConfig) cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig) -cs.store(group="backend", name=INCConfig.name, node=INCConfig) cs.store(group="backend", name=PyTXIConfig.name, node=PyTXIConfig) -cs.store(group="backend", name=LLMSwarmConfig.name, node=LLMSwarmConfig) cs.store(group="backend", name=VLLMConfig.name, node=VLLMConfig) cs.store(group="backend", name=LlamaCppConfig.name, node=LlamaCppConfig) # scenarios configurations diff --git a/setup.py b/setup.py index 03bbdf07..46a1ed60 100644 --- a/setup.py +++ b/setup.py @@ -76,6 +76,7 @@ "py-txi": ["py-txi"], "vllm": ["vllm"], # optional dependencies + "torchao": ["torchao"], "autoawq": ["autoawq"], "auto-gptq": ["optimum", "auto-gptq"], "sentence-transformers": ["sentence-transformers"], diff --git a/tests/test_energy_star.py b/tests/test_energy_star.py index bbb83f55..f2520932 100644 --- a/tests/test_energy_star.py +++ b/tests/test_energy_star.py @@ -9,12 +9,16 @@ LOGGER = getLogger("test-cli") -TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples/energy_star" +TEST_CONFIG_DIR = Path(__file__).parent.parent / "energy_star" + TEST_CONFIG_NAMES = [ config.split(".")[0] for config in os.listdir(TEST_CONFIG_DIR) if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_")) ] +TEST_SCRIPT_PATHS = [ + str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py") +] ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None) CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None) @@ -42,3 +46,11 @@ def test_cli_configs(config_name): popen = run_subprocess_and_log_stream_output(LOGGER, args) assert popen.returncode == 0, f"Failed to run {config_name}" + + +@pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS) +def test_api_scripts(script_path): + args = ["python", script_path] + + popen = run_subprocess_and_log_stream_output(LOGGER, args) + assert popen.returncode == 0, f"Failed to run {script_path}" diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 00000000..13cf3cff --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,46 @@ +import os +from logging import getLogger +from pathlib import Path + +import pytest + +from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output + +LOGGER = getLogger("test-examples") + + +TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples" + +TEST_CONFIG_NAMES = [ + config.split(".")[0] + for config in os.listdir(TEST_CONFIG_DIR) + if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_")) +] + +TEST_SCRIPT_PATHS = [ + str(TEST_CONFIG_DIR / filename) for filename in os.listdir(TEST_CONFIG_DIR) if filename.endswith(".py") +] + +ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None) +CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None) + + +@pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES) +def test_cli_configs(config_name): + args = ["optimum-benchmark", "--config-dir", TEST_CONFIG_DIR, "--config-name", config_name] + + if ROCR_VISIBLE_DEVICES is not None: + args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"'] + elif CUDA_VISIBLE_DEVICES is not None: + args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"'] + + popen = run_subprocess_and_log_stream_output(LOGGER, args) + assert popen.returncode == 0, f"Failed to run {config_name}" + + +@pytest.mark.parametrize("script_path", TEST_SCRIPT_PATHS) +def test_api_scripts(script_path): + args = ["python", script_path] + + popen = run_subprocess_and_log_stream_output(LOGGER, args) + assert popen.returncode == 0, f"Failed to run {script_path}"