diff --git a/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct-FP8.yaml b/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct-FP8.yaml new file mode 100644 index 00000000000..7445ad9a929 --- /dev/null +++ b/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct-FP8.yaml @@ -0,0 +1,11 @@ +# bash ./run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen2-57B-A14B-Instruct-FP8-KV -b "auto" -l 1000 -f 5 -t 4 +model_name: "nm-testing/Qwen2-57B-A14B-Instruct-FP8-KV" +tasks: +- name: "gsm8k" + metrics: + - name: "exact_match,strict-match" + value: 0.823 + - name: "exact_match,flexible-extract" + value: 0.777 +limit: 1000 +num_fewshot: 5