From fe24e6084d1357f0ce74e93d56962a490b3395c8 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Sat, 6 Jul 2024 15:39:19 -0700 Subject: [PATCH 01/10] Support loading loras from huggingface in runtime --- vllm/lora/utils.py | 42 +++++++++++++++++++++++++++++++++++++ vllm/lora/worker_manager.py | 6 ++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index ab3b99eee6f..8e8690087d1 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -1,5 +1,8 @@ +import os from typing import List, Optional, Set, Tuple, Type +from huggingface_hub import snapshot_download +from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError, HFValidationError from torch import nn from transformers import PretrainedConfig @@ -105,3 +108,42 @@ def parse_fine_tuned_lora_name(name: str) -> Tuple[str, bool]: return ".".join(parts[2:-1]), parts[-1] == "lora_embedding_A" raise ValueError(f"{name} is unsupported LoRA weight") + +def get_lora_absolute_path(lora_path: str) -> str: + """ + Resolves the given lora_path to an absolute local path. + + If the lora_path is identified as a Hugging Face model identifier, it will download + the model and return the local snapshot path. Otherwise, it treats the lora_path + as a local file path and converts it to an absolute path. + + Parameters: + lora_path (str): The path to the lora model, which can be an absolute path, + a relative path, or a Hugging Face model identifier. + + Returns: + str: The resolved absolute local path to the lora model. + """ + + # Check if the path is an absolute path. Return it no matter existing or not. + if os.path.isabs(lora_path): + return lora_path + + # If the path starts with ~, expand the user home directory and return the path. + if lora_path.startswith('~'): + return os.path.expanduser(lora_path) + + # Check if the expanded relative path exists locally. + if os.path.exists(lora_path): + return os.path.abspath(lora_path) + + # If the path does not exist locally, assume it's a Hugging Face model identifier. + try: + local_snapshot_path = snapshot_download(repo_id=lora_path) + except (RepositoryNotFoundError, EntryNotFoundError, HFValidationError) as e: + # Handle errors that may occur during the download + # Return original path instead instead of throwing error here + print(f"Error downloading the Hugging Face model: {e}") + return lora_path + + return local_snapshot_path diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py index 3d0ef4252b0..0d75867d65d 100644 --- a/vllm/lora/worker_manager.py +++ b/vllm/lora/worker_manager.py @@ -13,6 +13,7 @@ from vllm.lora.models import (LoRAModel, LoRAModelManager, LRUCacheLoRAModelManager, create_lora_manager) from vllm.lora.request import LoRARequest +from vllm.lora.utils import get_lora_absolute_path logger = init_logger(__name__) @@ -89,8 +90,9 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: packed_modules_mapping[module]) else: expected_lora_modules.append(module) + lora_local_path = get_lora_absolute_path(lora_request.lora_local_path) lora = self._lora_model_cls.from_local_checkpoint( - lora_request.lora_local_path, + lora_local_path, expected_lora_modules, max_position_embeddings=self.max_position_embeddings, lora_model_id=lora_request.lora_int_id, @@ -103,7 +105,7 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: ) except Exception as e: raise RuntimeError( - f"Loading lora {lora_request.lora_local_path} failed") from e + f"Loading lora {lora_local_path} failed") from e if lora.rank > self.lora_config.max_lora_rank: raise ValueError( f"LoRA rank {lora.rank} is greater than max_lora_rank " From 171044585fc1eb79388fea3499d920f112034873 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Mon, 8 Jul 2024 15:25:34 -0700 Subject: [PATCH 02/10] Add unit tests for get_lora_absolute_path Format the code and pass the linter --- tests/lora/test_utils.py | 46 ++++++++++++++++++++++++++++++++++++- vllm/lora/utils.py | 20 +++++++++------- vllm/lora/worker_manager.py | 6 ++--- 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/tests/lora/test_utils.py b/tests/lora/test_utils.py index 4ff9715b4ca..7b5cfdb398d 100644 --- a/tests/lora/test_utils.py +++ b/tests/lora/test_utils.py @@ -1,9 +1,12 @@ from collections import OrderedDict +from unittest.mock import patch import pytest +from huggingface_hub.utils import RepositoryNotFoundError from torch import nn -from vllm.lora.utils import parse_fine_tuned_lora_name, replace_submodule +from vllm.lora.utils import (get_lora_absolute_path, + parse_fine_tuned_lora_name, replace_submodule) from vllm.utils import LRUCache @@ -182,3 +185,44 @@ def test_lru_cache(): assert 2 in cache assert 4 in cache assert 6 in cache + + +# Unit tests for get_lora_absolute_path +@patch('os.path.isabs', True) +def test_get_lora_absolute_path_absolute(mock_isabs): + path = '/absolute/path/to/lora' + assert get_lora_absolute_path(path) == path + + +@patch('os.path.expanduser', '/home/user/relative/path/to/lora') +def test_get_lora_absolute_path_expanduser(mock_expanduser): + # Path with ~ that needs to be expanded + path = '~/relative/path/to/lora' + assert get_lora_absolute_path(path) == '/home/user/relative/path/to/lora' + + +@patch('os.path.exists', True) +@patch('os.path.abspath', '/absolute/path/to/lora') +def test_get_lora_absolute_path_local_existing(mock_exists, mock_abspath): + # Relative path that exists locally + path = 'relative/path/to/lora' + assert get_lora_absolute_path(path) == '/absolute/path/to/lora' + + +@patch('huggingface_hub.snapshot_download', '/mock/snapshot/path') +@patch('os.path.exists', False) +def test_get_lora_absolute_path_huggingface(mock_exists, + mock_snapshot_download): + # Hugging Face model identifier + path = 'org/repo' + assert get_lora_absolute_path(path) == '/mock/snapshot/path' + + +@patch('huggingface_hub.snapshot_download', + side_effect=RepositoryNotFoundError) +@patch('os.path.exists', False) +def test_get_lora_absolute_path_huggingface_error(mock_exists, + mock_snapshot_download): + # Hugging Face model identifier with download error + path = 'org/repo' + assert get_lora_absolute_path(path) == path diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index 8e8690087d1..6e2c4a1245c 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -2,7 +2,8 @@ from typing import List, Optional, Set, Tuple, Type from huggingface_hub import snapshot_download -from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError, HFValidationError +from huggingface_hub.utils import (EntryNotFoundError, HFValidationError, + RepositoryNotFoundError) from torch import nn from transformers import PretrainedConfig @@ -109,13 +110,15 @@ def parse_fine_tuned_lora_name(name: str) -> Tuple[str, bool]: raise ValueError(f"{name} is unsupported LoRA weight") + def get_lora_absolute_path(lora_path: str) -> str: """ Resolves the given lora_path to an absolute local path. - If the lora_path is identified as a Hugging Face model identifier, it will download - the model and return the local snapshot path. Otherwise, it treats the lora_path - as a local file path and converts it to an absolute path. + If the lora_path is identified as a Hugging Face model identifier, + it will download the model and return the local snapshot path. + Otherwise, it treats the lora_path as a local file path and + converts it to an absolute path. Parameters: lora_path (str): The path to the lora model, which can be an absolute path, @@ -125,11 +128,11 @@ def get_lora_absolute_path(lora_path: str) -> str: str: The resolved absolute local path to the lora model. """ - # Check if the path is an absolute path. Return it no matter existing or not. + # Check if the path is an absolute path. Return it no matter exists or not. if os.path.isabs(lora_path): return lora_path - # If the path starts with ~, expand the user home directory and return the path. + # If the path starts with ~, expand the user home directory. if lora_path.startswith('~'): return os.path.expanduser(lora_path) @@ -137,10 +140,11 @@ def get_lora_absolute_path(lora_path: str) -> str: if os.path.exists(lora_path): return os.path.abspath(lora_path) - # If the path does not exist locally, assume it's a Hugging Face model identifier. + # If the path does not exist locally, assume it's a Hugging Face repo. try: local_snapshot_path = snapshot_download(repo_id=lora_path) - except (RepositoryNotFoundError, EntryNotFoundError, HFValidationError) as e: + except (RepositoryNotFoundError, EntryNotFoundError, + HFValidationError) as e: # Handle errors that may occur during the download # Return original path instead instead of throwing error here print(f"Error downloading the Hugging Face model: {e}") diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py index 0d75867d65d..8ea92ccc151 100644 --- a/vllm/lora/worker_manager.py +++ b/vllm/lora/worker_manager.py @@ -90,7 +90,8 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: packed_modules_mapping[module]) else: expected_lora_modules.append(module) - lora_local_path = get_lora_absolute_path(lora_request.lora_local_path) + lora_local_path = get_lora_absolute_path( + lora_request.lora_local_path) lora = self._lora_model_cls.from_local_checkpoint( lora_local_path, expected_lora_modules, @@ -104,8 +105,7 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: embedding_padding_modules=self.embedding_padding_modules, ) except Exception as e: - raise RuntimeError( - f"Loading lora {lora_local_path} failed") from e + raise RuntimeError(f"Loading lora {lora_local_path} failed") from e if lora.rank > self.lora_config.max_lora_rank: raise ValueError( f"LoRA rank {lora.rank} is greater than max_lora_rank " From 0030b9ffce2b2e8b8e9f04a7b108f5877193cec0 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Sat, 6 Jul 2024 15:45:12 -0700 Subject: [PATCH 03/10] Rename lora_local_path to lora_path --- tests/core/test_scheduler.py | 4 ++-- vllm/entrypoints/openai/serving_engine.py | 4 ++-- vllm/lora/request.py | 2 +- vllm/lora/worker_manager.py | 7 +++---- vllm/transformers_utils/tokenizer.py | 5 ++--- vllm/worker/model_runner.py | 2 +- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py index bae958211cb..4ca2260b5e0 100644 --- a/tests/core/test_scheduler.py +++ b/tests/core/test_scheduler.py @@ -462,7 +462,7 @@ def test_prefill_schedule_max_lora(): lora_request=LoRARequest( lora_name=str(i), lora_int_id=i + 1, - lora_local_path="abc")) + lora_path="abc")) waiting.append(seq_group) # Add two more requests to verify lora is prioritized. # 0: Lora, 1: Lora, 2: regular, 3: regular @@ -760,7 +760,7 @@ def test_schedule_swapped_max_loras(): lora_request=LoRARequest( lora_name=str(i), lora_int_id=i + 1, - lora_local_path="abc")) + lora_path="abc")) scheduler._allocate_and_set_running(seq_group) append_new_token_seq_group(60, seq_group, 1) scheduler._swap_out(seq_group, blocks_to_swap_out) diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 14c1df89e06..b54480e12e0 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -33,7 +33,7 @@ class PromptAdapterPath: @dataclass class LoRAModulePath: name: str - local_path: str + path: str class OpenAIServing: @@ -68,7 +68,7 @@ def __init__( LoRARequest( lora_name=lora.name, lora_int_id=i, - lora_local_path=lora.local_path, + lora_path=lora.path, ) for i, lora in enumerate(lora_modules, start=1) ] diff --git a/vllm/lora/request.py b/vllm/lora/request.py index 2d10d037760..858ae40736c 100644 --- a/vllm/lora/request.py +++ b/vllm/lora/request.py @@ -20,7 +20,7 @@ class LoRARequest(AdapterRequest): lora_name: str lora_int_id: int - lora_local_path: str + lora_path: str long_lora_max_len: Optional[int] = None __hash__ = AdapterRequest.__hash__ diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py index 8ea92ccc151..fdb69a9ef0e 100644 --- a/vllm/lora/worker_manager.py +++ b/vllm/lora/worker_manager.py @@ -90,10 +90,9 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: packed_modules_mapping[module]) else: expected_lora_modules.append(module) - lora_local_path = get_lora_absolute_path( - lora_request.lora_local_path) + lora_path = get_lora_absolute_path(lora_request.lora_path) lora = self._lora_model_cls.from_local_checkpoint( - lora_local_path, + lora_path, expected_lora_modules, max_position_embeddings=self.max_position_embeddings, lora_model_id=lora_request.lora_int_id, @@ -105,7 +104,7 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: embedding_padding_modules=self.embedding_padding_modules, ) except Exception as e: - raise RuntimeError(f"Loading lora {lora_local_path} failed") from e + raise RuntimeError(f"Loading lora {lora_path} failed") from e if lora.rank > self.lora_config.max_lora_rank: raise ValueError( f"LoRA rank {lora.rank} is greater than max_lora_rank " diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index f5684dbf127..ea74a19baa2 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -134,14 +134,13 @@ def get_lora_tokenizer(lora_request: LoRARequest, *args, if lora_request is None: return None try: - tokenizer = get_tokenizer(lora_request.lora_local_path, *args, - **kwargs) + tokenizer = get_tokenizer(lora_request.lora_path, *args, **kwargs) except OSError as e: # No tokenizer was found in the LoRA folder, # use base model tokenizer logger.warning( "No tokenizer found in %s, using base model tokenizer instead. " - "(Exception: %s)", lora_request.lora_local_path, e) + "(Exception: %s)", lora_request.lora_path, e) tokenizer = None return tokenizer diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 75a2607d0d9..3168cdf38fc 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -687,7 +687,7 @@ def profile_run(self) -> None: dummy_lora_request = LoRARequest( lora_name=f"warmup_{lora_id}", lora_int_id=lora_id, - lora_local_path="/not/a/real/path", + lora_path="/not/a/real/path", ) self.lora_manager.add_dummy_lora(dummy_lora_request, rank=LORA_WARMUP_RANK) From e3c3523f5dd3355a6795e6f39a871bae8b499905 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Mon, 8 Jul 2024 19:09:35 -0700 Subject: [PATCH 04/10] Resolve test failures --- tests/lora/test_utils.py | 43 +++++++++++++++++++++++++--------------- vllm/lora/utils.py | 11 +++++----- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/tests/lora/test_utils.py b/tests/lora/test_utils.py index 7b5cfdb398d..5a9b60d23d5 100644 --- a/tests/lora/test_utils.py +++ b/tests/lora/test_utils.py @@ -2,7 +2,7 @@ from unittest.mock import patch import pytest -from huggingface_hub.utils import RepositoryNotFoundError +from huggingface_hub.utils import HfHubHTTPError from torch import nn from vllm.lora.utils import (get_lora_absolute_path, @@ -188,41 +188,52 @@ def test_lru_cache(): # Unit tests for get_lora_absolute_path -@patch('os.path.isabs', True) +@patch('os.path.isabs') def test_get_lora_absolute_path_absolute(mock_isabs): path = '/absolute/path/to/lora' + mock_isabs.return_value = True assert get_lora_absolute_path(path) == path -@patch('os.path.expanduser', '/home/user/relative/path/to/lora') +@patch('os.path.expanduser') def test_get_lora_absolute_path_expanduser(mock_expanduser): # Path with ~ that needs to be expanded path = '~/relative/path/to/lora' - assert get_lora_absolute_path(path) == '/home/user/relative/path/to/lora' + absolute_path = '/home/user/relative/path/to/lora' + mock_expanduser.return_value = absolute_path + assert get_lora_absolute_path(path) == absolute_path -@patch('os.path.exists', True) -@patch('os.path.abspath', '/absolute/path/to/lora') -def test_get_lora_absolute_path_local_existing(mock_exists, mock_abspath): +@patch('os.path.exists') +@patch('os.path.abspath') +def test_get_lora_absolute_path_local_existing(mock_abspath, mock_exist): # Relative path that exists locally path = 'relative/path/to/lora' - assert get_lora_absolute_path(path) == '/absolute/path/to/lora' + absolute_path = '/absolute/path/to/lora' + mock_exist.return_value = True + mock_abspath.return_value = absolute_path + assert get_lora_absolute_path(path) == absolute_path -@patch('huggingface_hub.snapshot_download', '/mock/snapshot/path') -@patch('os.path.exists', False) -def test_get_lora_absolute_path_huggingface(mock_exists, +@patch('huggingface_hub.snapshot_download') +@patch('os.path.exists') +def test_get_lora_absolute_path_huggingface(mock_exist, mock_snapshot_download): # Hugging Face model identifier path = 'org/repo' - assert get_lora_absolute_path(path) == '/mock/snapshot/path' + absolute_path = '/mock/snapshot/path' + mock_exist.return_value = False + mock_snapshot_download.return_value = absolute_path + assert get_lora_absolute_path(path) == absolute_path -@patch('huggingface_hub.snapshot_download', - side_effect=RepositoryNotFoundError) -@patch('os.path.exists', False) -def test_get_lora_absolute_path_huggingface_error(mock_exists, +@patch('huggingface_hub.snapshot_download') +@patch('os.path.exists') +def test_get_lora_absolute_path_huggingface_error(mock_exist, mock_snapshot_download): # Hugging Face model identifier with download error path = 'org/repo' + mock_exist.return_value = False + mock_snapshot_download.side_effect = HfHubHTTPError( + "failed to query model info") assert get_lora_absolute_path(path) == path diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index 6e2c4a1245c..923fbce1f3f 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -1,9 +1,9 @@ import os from typing import List, Optional, Set, Tuple, Type -from huggingface_hub import snapshot_download -from huggingface_hub.utils import (EntryNotFoundError, HFValidationError, - RepositoryNotFoundError) +import huggingface_hub +from huggingface_hub.utils import (EntryNotFoundError, HfHubHTTPError, + HFValidationError, RepositoryNotFoundError) from torch import nn from transformers import PretrainedConfig @@ -142,8 +142,9 @@ def get_lora_absolute_path(lora_path: str) -> str: # If the path does not exist locally, assume it's a Hugging Face repo. try: - local_snapshot_path = snapshot_download(repo_id=lora_path) - except (RepositoryNotFoundError, EntryNotFoundError, + local_snapshot_path = huggingface_hub.snapshot_download( + repo_id=lora_path) + except (HfHubHTTPError, RepositoryNotFoundError, EntryNotFoundError, HFValidationError) as e: # Handle errors that may occur during the download # Return original path instead instead of throwing error here From 14557220883c2b78c6787a75fa610d23de3b9506 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Tue, 9 Jul 2024 00:03:31 -0700 Subject: [PATCH 05/10] Update vllm/lora/utils.py Co-authored-by: Antoni Baum --- vllm/lora/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index 923fbce1f3f..2735014090b 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -111,7 +111,7 @@ def parse_fine_tuned_lora_name(name: str) -> Tuple[str, bool]: raise ValueError(f"{name} is unsupported LoRA weight") -def get_lora_absolute_path(lora_path: str) -> str: +def get_adapter_absolute_path(lora_path: str) -> str: """ Resolves the given lora_path to an absolute local path. From a6a30b99c1d6a5a709ce51ee653f48e6999d3000 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Tue, 9 Jul 2024 00:09:31 -0700 Subject: [PATCH 06/10] Address some feedback from reviewer --- tests/lora/test_utils.py | 28 ++++++++++++++-------------- vllm/lora/utils.py | 4 ++-- vllm/lora/worker_manager.py | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/lora/test_utils.py b/tests/lora/test_utils.py index 5a9b60d23d5..db02bacdb64 100644 --- a/tests/lora/test_utils.py +++ b/tests/lora/test_utils.py @@ -5,7 +5,7 @@ from huggingface_hub.utils import HfHubHTTPError from torch import nn -from vllm.lora.utils import (get_lora_absolute_path, +from vllm.lora.utils import (get_adapter_absolute_path, parse_fine_tuned_lora_name, replace_submodule) from vllm.utils import LRUCache @@ -187,53 +187,53 @@ def test_lru_cache(): assert 6 in cache -# Unit tests for get_lora_absolute_path +# Unit tests for get_adapter_absolute_path @patch('os.path.isabs') -def test_get_lora_absolute_path_absolute(mock_isabs): +def test_get_adapter_absolute_path_absolute(mock_isabs): path = '/absolute/path/to/lora' mock_isabs.return_value = True - assert get_lora_absolute_path(path) == path + assert get_adapter_absolute_path(path) == path @patch('os.path.expanduser') -def test_get_lora_absolute_path_expanduser(mock_expanduser): +def test_get_adapter_absolute_path_expanduser(mock_expanduser): # Path with ~ that needs to be expanded path = '~/relative/path/to/lora' absolute_path = '/home/user/relative/path/to/lora' mock_expanduser.return_value = absolute_path - assert get_lora_absolute_path(path) == absolute_path + assert get_adapter_absolute_path(path) == absolute_path @patch('os.path.exists') @patch('os.path.abspath') -def test_get_lora_absolute_path_local_existing(mock_abspath, mock_exist): +def test_get_adapter_absolute_path_local_existing(mock_abspath, mock_exist): # Relative path that exists locally path = 'relative/path/to/lora' absolute_path = '/absolute/path/to/lora' mock_exist.return_value = True mock_abspath.return_value = absolute_path - assert get_lora_absolute_path(path) == absolute_path + assert get_adapter_absolute_path(path) == absolute_path @patch('huggingface_hub.snapshot_download') @patch('os.path.exists') -def test_get_lora_absolute_path_huggingface(mock_exist, - mock_snapshot_download): +def test_get_adapter_absolute_path_huggingface(mock_exist, + mock_snapshot_download): # Hugging Face model identifier path = 'org/repo' absolute_path = '/mock/snapshot/path' mock_exist.return_value = False mock_snapshot_download.return_value = absolute_path - assert get_lora_absolute_path(path) == absolute_path + assert get_adapter_absolute_path(path) == absolute_path @patch('huggingface_hub.snapshot_download') @patch('os.path.exists') -def test_get_lora_absolute_path_huggingface_error(mock_exist, - mock_snapshot_download): +def test_get_adapter_absolute_path_huggingface_error(mock_exist, + mock_snapshot_download): # Hugging Face model identifier with download error path = 'org/repo' mock_exist.return_value = False mock_snapshot_download.side_effect = HfHubHTTPError( "failed to query model info") - assert get_lora_absolute_path(path) == path + assert get_adapter_absolute_path(path) == path diff --git a/vllm/lora/utils.py b/vllm/lora/utils.py index 2735014090b..4513337299e 100644 --- a/vllm/lora/utils.py +++ b/vllm/lora/utils.py @@ -145,10 +145,10 @@ def get_adapter_absolute_path(lora_path: str) -> str: local_snapshot_path = huggingface_hub.snapshot_download( repo_id=lora_path) except (HfHubHTTPError, RepositoryNotFoundError, EntryNotFoundError, - HFValidationError) as e: + HFValidationError): # Handle errors that may occur during the download # Return original path instead instead of throwing error here - print(f"Error downloading the Hugging Face model: {e}") + logger.exception("Error downloading the HuggingFace model") return lora_path return local_snapshot_path diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py index fdb69a9ef0e..724c308a07a 100644 --- a/vllm/lora/worker_manager.py +++ b/vllm/lora/worker_manager.py @@ -13,7 +13,7 @@ from vllm.lora.models import (LoRAModel, LoRAModelManager, LRUCacheLoRAModelManager, create_lora_manager) from vllm.lora.request import LoRARequest -from vllm.lora.utils import get_lora_absolute_path +from vllm.lora.utils import get_adapter_absolute_path logger = init_logger(__name__) @@ -90,7 +90,7 @@ def _load_adapter(self, lora_request: LoRARequest) -> LoRAModel: packed_modules_mapping[module]) else: expected_lora_modules.append(module) - lora_path = get_lora_absolute_path(lora_request.lora_path) + lora_path = get_adapter_absolute_path(lora_request.lora_path) lora = self._lora_model_cls.from_local_checkpoint( lora_path, expected_lora_modules, From a16ec5dd6143cf3714b1bb574c24502ffa3df12d Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Fri, 12 Jul 2024 15:34:21 -0700 Subject: [PATCH 07/10] Add test for loading lora from huggingface --- tests/lora/conftest.py | 6 +++++ tests/lora/test_lora_huggingface.py | 39 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 tests/lora/test_lora_huggingface.py diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py index bda123bf131..a962efb6900 100644 --- a/tests/lora/conftest.py +++ b/tests/lora/conftest.py @@ -163,6 +163,12 @@ def sql_lora_files(): return snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test") +@pytest.fixture(scope="session") +def sql_lora_huggingface_id(): + # huggingface repo id is used to test lora runtime downloading. + return "yard1/llama-2-7b-sql-lora-test" + + @pytest.fixture(scope="session") def mixtral_lora_files(): # Note: this module has incorrect adapter_config.json to test diff --git a/tests/lora/test_lora_huggingface.py b/tests/lora/test_lora_huggingface.py new file mode 100644 index 00000000000..e2daf9d1351 --- /dev/null +++ b/tests/lora/test_lora_huggingface.py @@ -0,0 +1,39 @@ +from typing import List + +import pytest + +from vllm.lora.models import LoRAModel +from vllm.lora.utils import get_adapter_absolute_path +from vllm.model_executor.models.llama import LlamaForCausalLM + +# Provide absolute path and huggingface lora ids +lora_fixture_name = ["sql_lora_files", "sql_lora_huggingface_id"] + + +@pytest.mark.parametrize("lora_fixture_name", lora_fixture_name) +def test_load_checkpoints_from_huggingface(lora_fixture_name, request): + lora_name = request.getfixturevalue(lora_fixture_name) + supported_lora_modules = LlamaForCausalLM.supported_lora_modules + packed_modules_mapping = LlamaForCausalLM.packed_modules_mapping + embedding_modules = LlamaForCausalLM.embedding_modules + embed_padding_modules = LlamaForCausalLM.embedding_padding_modules + expected_lora_modules: List[str] = [] + for module in supported_lora_modules: + if module in packed_modules_mapping: + expected_lora_modules.extend(packed_modules_mapping[module]) + else: + expected_lora_modules.append(module) + + lora_path = get_adapter_absolute_path(lora_name) + + # lora loading should work for either absolute path and hugggingface id. + lora_model = LoRAModel.from_local_checkpoint( + lora_path, + expected_lora_modules, + lora_model_id=1, + device="cpu", + embedding_modules=embedding_modules, + embedding_padding_modules=embed_padding_modules) + + # Assertions to ensure the model is loaded correctly + assert lora_model is not None, "LoRAModel is not loaded correctly" From 10ee978e878dbcd3915f249b69ec404ff6defb0b Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Fri, 12 Jul 2024 17:56:13 -0700 Subject: [PATCH 08/10] Update tests/lora/conftest.py Co-authored-by: Antoni Baum --- tests/lora/conftest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py index a962efb6900..c4efee21f09 100644 --- a/tests/lora/conftest.py +++ b/tests/lora/conftest.py @@ -168,6 +168,9 @@ def sql_lora_huggingface_id(): # huggingface repo id is used to test lora runtime downloading. return "yard1/llama-2-7b-sql-lora-test" +@pytest.fixture(scope="session") +def sql_lora_files(sql_lora_huggingface_id): + return snapshot_download(repo_id=sql_lora_huggingface_id) @pytest.fixture(scope="session") def mixtral_lora_files(): From 0ea0d0d26ba5d4f6434be865e79ab77ec9cd8aad Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Sun, 14 Jul 2024 23:59:17 -0700 Subject: [PATCH 09/10] Add deprecation notice for lora_local_path --- tests/lora/conftest.py | 7 ++----- vllm/lora/request.py | 42 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py index c4efee21f09..0bcae5b0c96 100644 --- a/tests/lora/conftest.py +++ b/tests/lora/conftest.py @@ -158,20 +158,17 @@ def dummy_model_gate_up() -> nn.Module: return model -@pytest.fixture(scope="session") -def sql_lora_files(): - return snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test") - - @pytest.fixture(scope="session") def sql_lora_huggingface_id(): # huggingface repo id is used to test lora runtime downloading. return "yard1/llama-2-7b-sql-lora-test" + @pytest.fixture(scope="session") def sql_lora_files(sql_lora_huggingface_id): return snapshot_download(repo_id=sql_lora_huggingface_id) + @pytest.fixture(scope="session") def mixtral_lora_files(): # Note: this module has incorrect adapter_config.json to test diff --git a/vllm/lora/request.py b/vllm/lora/request.py index 858ae40736c..a0b9fd6cd41 100644 --- a/vllm/lora/request.py +++ b/vllm/lora/request.py @@ -1,4 +1,5 @@ -from dataclasses import dataclass +import warnings +from dataclasses import dataclass, field from typing import Optional from vllm.adapter_commons.request import AdapterRequest @@ -20,10 +21,25 @@ class LoRARequest(AdapterRequest): lora_name: str lora_int_id: int - lora_path: str + lora_path: str = "" + lora_local_path: Optional[str] = field(default=None, repr=False) long_lora_max_len: Optional[int] = None __hash__ = AdapterRequest.__hash__ + def __post_init__(self): + if 'lora_local_path' in self.__dict__: + warnings.warn( + "The 'lora_local_path' attribute is deprecated " + "and will be removed in a future version. " + "Please use 'lora_path' instead.", + DeprecationWarning, + stacklevel=2) + if not self.lora_path: + self.lora_path = self.lora_local_path or "" + + # Ensure lora_path is not empty + assert self.lora_path, "lora_path can not be empty" + @property def adapter_id(self): return self.lora_int_id @@ -32,6 +48,26 @@ def adapter_id(self): def name(self): return self.lora_name + @property + def path(self): + return self.lora_path + @property def local_path(self): - return self.lora_local_path + warnings.warn( + "The 'local_path' attribute is deprecated " + "and will be removed in a future version. " + "Please use 'path' instead.", + DeprecationWarning, + stacklevel=2) + return self.lora_path + + @local_path.setter + def local_path(self, value): + warnings.warn( + "The 'local_path' attribute is deprecated " + "and will be removed in a future version. " + "Please use 'path' instead.", + DeprecationWarning, + stacklevel=2) + self.lora_path = value From ba2a04de6cb894d96c5141bbca438b3b7b44a080 Mon Sep 17 00:00:00 2001 From: Jiaxin Shan Date: Wed, 17 Jul 2024 18:02:43 -0700 Subject: [PATCH 10/10] Fix failures in test_long_context. The problem comes from new added field and the test didn't explicitly set the field but using position information. --- tests/lora/test_long_context.py | 2 +- vllm/lora/request.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lora/test_long_context.py b/tests/lora/test_long_context.py index 853fd9fb3ce..389a3ccbc17 100644 --- a/tests/lora/test_long_context.py +++ b/tests/lora/test_long_context.py @@ -29,7 +29,7 @@ def _create_lora_request(lora_id, long_context_infos): context_len = long_context_infos[lora_id]["context_length"] scaling_factor = context_len_to_scaling_factor[context_len] return LoRARequest(context_len, lora_id, - long_context_infos[lora_id]["lora"], + long_context_infos[lora_id]["lora"], None, 4096 * scaling_factor) diff --git a/vllm/lora/request.py b/vllm/lora/request.py index a0b9fd6cd41..5d791424fbe 100644 --- a/vllm/lora/request.py +++ b/vllm/lora/request.py @@ -38,7 +38,7 @@ def __post_init__(self): self.lora_path = self.lora_local_path or "" # Ensure lora_path is not empty - assert self.lora_path, "lora_path can not be empty" + assert self.lora_path, "lora_path cannot be empty" @property def adapter_id(self):