From 0b698510ea1d823135aec78328a8e6e308aed8da Mon Sep 17 00:00:00 2001
From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com>
Date: Mon, 19 Aug 2024 14:25:54 +0200
Subject: [PATCH] Add t4 for llm perf leaderboard (#238)

---
 .github/workflows/update_llm_perf_cuda_pytorch.yaml | 6 +++++-
 llm_perf/update_llm_perf_cuda_pytorch.py            | 2 +-
 llm_perf/update_llm_perf_leaderboard.py             | 2 +-
 optimum_benchmark/backends/config.py                | 1 +
 optimum_benchmark/task_utils.py                     | 5 ++++-
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
index 495fea20..0ab646ab 100644
--- a/.github/workflows/update_llm_perf_cuda_pytorch.yaml
+++ b/.github/workflows/update_llm_perf_cuda_pytorch.yaml
@@ -18,7 +18,11 @@ jobs:
       fail-fast: false
       matrix:
         subset: [unquantized, bnb, awq, gptq]
-        machine: [{ name: 1xA10, runs-on: [single-gpu, nvidia-gpu, a10, ci] }]
+        
+        machine: [
+          {name: 1xA10, runs-on: {group: 'aws-g5-4xlarge-plus'}}, 
+          {name: 1xT4, runs-on: {group: 'aws-g4dn-2xlarge'}}
+        ]
 
     runs-on: ${{ matrix.machine.runs-on }}
 
diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py
index aa0de547..51ab678f 100644
--- a/llm_perf/update_llm_perf_cuda_pytorch.py
+++ b/llm_perf/update_llm_perf_cuda_pytorch.py
@@ -134,7 +134,7 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config):
         quantization_scheme=quant_scheme,
         quantization_config=quant_config,
         attn_implementation=attn_implementation,
-        hub_kwargs={"trust_remote_code": True},
+        model_kwargs={"trust_remote_code": True},
     )
 
     benchmark_config = BenchmarkConfig(
diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py
index 7671ba68..9c8763e6 100644
--- a/llm_perf/update_llm_perf_leaderboard.py
+++ b/llm_perf/update_llm_perf_leaderboard.py
@@ -32,7 +32,7 @@ def gather_benchmarks(subset: str, machine: str):
 
 def update_perf_dfs():
     for subset in ["unquantized", "bnb", "awq", "gptq"]:
-        for machine in ["1xA10", "1xA100"]:
+        for machine in ["1xA10", "1xA100", "1xT4"]:
             try:
                 gather_benchmarks(subset, machine)
             except Exception:
diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
index f03d3571..e413a4f8 100644
--- a/optimum_benchmark/backends/config.py
+++ b/optimum_benchmark/backends/config.py
@@ -73,6 +73,7 @@ def __post_init__(self):
                 self.library,
                 revision=self.model_kwargs.get("revision", None),
                 token=self.model_kwargs.get("token", None),
+                trust_remote_code=self.model_kwargs.get("trust_remote_code", False),
             )
 
         if self.device is None:
diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index 74773faf..337e835e 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -190,6 +190,7 @@ def infer_model_type_from_model_name_or_path(
     library_name: Optional[str] = None,
     revision: Optional[str] = None,
     token: Optional[str] = None,
+    trust_remote_code: bool = False,
 ) -> str:
     if library_name is None:
         library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token)
@@ -216,7 +217,9 @@ def infer_model_type_from_model_name_or_path(
                 break
 
     else:
-        transformers_config = get_transformers_pretrained_config(model_name_or_path, revision=revision, token=token)
+        transformers_config = get_transformers_pretrained_config(
+            model_name_or_path, revision=revision, token=token, trust_remote_code=trust_remote_code
+        )
         inferred_model_type = transformers_config.model_type
 
     if inferred_model_type is None: