diff --git a/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct-FP8.yaml b/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct-FP8.yaml
new file mode 100644
index 00000000000..7445ad9a929
--- /dev/null
+++ b/.buildkite/lm-eval-harness/configs/Qwen2-57B-A14-Instruct-FP8.yaml
@@ -0,0 +1,11 @@
+# bash ./run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen2-57B-A14B-Instruct-FP8-KV -b "auto" -l 1000 -f 5 -t 4
+model_name: "nm-testing/Qwen2-57B-A14B-Instruct-FP8-KV"
+tasks:
+- name: "gsm8k"
+  metrics:
+  - name: "exact_match,strict-match"
+    value: 0.823
+  - name: "exact_match,flexible-extract"
+    value: 0.777
+limit: 1000
+num_fewshot: 5