From 0b698510ea1d823135aec78328a8e6e308aed8da Mon Sep 17 00:00:00 2001 From: Baptiste Colle <32412211+baptistecolle@users.noreply.github.com> Date: Mon, 19 Aug 2024 14:25:54 +0200 Subject: [PATCH] Add t4 for llm perf leaderboard (#238) --- .github/workflows/update_llm_perf_cuda_pytorch.yaml | 6 +++++- llm_perf/update_llm_perf_cuda_pytorch.py | 2 +- llm_perf/update_llm_perf_leaderboard.py | 2 +- optimum_benchmark/backends/config.py | 1 + optimum_benchmark/task_utils.py | 5 ++++- 5 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update_llm_perf_cuda_pytorch.yaml b/.github/workflows/update_llm_perf_cuda_pytorch.yaml index 495fea20..0ab646ab 100644 --- a/.github/workflows/update_llm_perf_cuda_pytorch.yaml +++ b/.github/workflows/update_llm_perf_cuda_pytorch.yaml @@ -18,7 +18,11 @@ jobs: fail-fast: false matrix: subset: [unquantized, bnb, awq, gptq] - machine: [{ name: 1xA10, runs-on: [single-gpu, nvidia-gpu, a10, ci] }] + + machine: [ + {name: 1xA10, runs-on: {group: 'aws-g5-4xlarge-plus'}}, + {name: 1xT4, runs-on: {group: 'aws-g4dn-2xlarge'}} + ] runs-on: ${{ matrix.machine.runs-on }} diff --git a/llm_perf/update_llm_perf_cuda_pytorch.py b/llm_perf/update_llm_perf_cuda_pytorch.py index aa0de547..51ab678f 100644 --- a/llm_perf/update_llm_perf_cuda_pytorch.py +++ b/llm_perf/update_llm_perf_cuda_pytorch.py @@ -134,7 +134,7 @@ def benchmark_cuda_pytorch(model, attn_implementation, weights_config): quantization_scheme=quant_scheme, quantization_config=quant_config, attn_implementation=attn_implementation, - hub_kwargs={"trust_remote_code": True}, + model_kwargs={"trust_remote_code": True}, ) benchmark_config = BenchmarkConfig( diff --git a/llm_perf/update_llm_perf_leaderboard.py b/llm_perf/update_llm_perf_leaderboard.py index 7671ba68..9c8763e6 100644 --- a/llm_perf/update_llm_perf_leaderboard.py +++ b/llm_perf/update_llm_perf_leaderboard.py @@ -32,7 +32,7 @@ def gather_benchmarks(subset: str, machine: str): def update_perf_dfs(): for subset in ["unquantized", "bnb", "awq", "gptq"]: - for machine in ["1xA10", "1xA100"]: + for machine in ["1xA10", "1xA100", "1xT4"]: try: gather_benchmarks(subset, machine) except Exception: diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py index f03d3571..e413a4f8 100644 --- a/optimum_benchmark/backends/config.py +++ b/optimum_benchmark/backends/config.py @@ -73,6 +73,7 @@ def __post_init__(self): self.library, revision=self.model_kwargs.get("revision", None), token=self.model_kwargs.get("token", None), + trust_remote_code=self.model_kwargs.get("trust_remote_code", False), ) if self.device is None: diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py index 74773faf..337e835e 100644 --- a/optimum_benchmark/task_utils.py +++ b/optimum_benchmark/task_utils.py @@ -190,6 +190,7 @@ def infer_model_type_from_model_name_or_path( library_name: Optional[str] = None, revision: Optional[str] = None, token: Optional[str] = None, + trust_remote_code: bool = False, ) -> str: if library_name is None: library_name = infer_library_from_model_name_or_path(model_name_or_path, revision=revision, token=token) @@ -216,7 +217,9 @@ def infer_model_type_from_model_name_or_path( break else: - transformers_config = get_transformers_pretrained_config(model_name_or_path, revision=revision, token=token) + transformers_config = get_transformers_pretrained_config( + model_name_or_path, revision=revision, token=token, trust_remote_code=trust_remote_code + ) inferred_model_type = transformers_config.model_type if inferred_model_type is None: