Skip to content

Commit 0315122

Browse files
authored
Merge pull request oobabooga#6232 from oobabooga/dev
Merge dev branch
2 parents d01c68f + b19d239 commit 0315122

File tree

3 files changed

+42
-60
lines changed

3 files changed

+42
-60
lines changed

modules/llama_cpp_python_hijack.py

+34-52
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import importlib
2+
import platform
23
from typing import Sequence
34

45
from tqdm import tqdm
@@ -13,58 +14,39 @@
1314
def llama_cpp_lib():
1415
global imported_module
1516

16-
def module_to_purpose(module_name):
17-
if module_name == 'llama_cpp':
18-
return 'CPU'
19-
elif module_name == 'llama_cpp_cuda_tensorcores':
20-
return 'tensorcores'
21-
elif module_name == 'llama_cpp_cuda':
22-
return 'default'
23-
24-
return 'unknown'
25-
26-
return_lib = None
27-
28-
if shared.args.cpu:
29-
if imported_module and imported_module != 'llama_cpp':
30-
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the CPU version currently requires a server restart.")
31-
try:
32-
return_lib = importlib.import_module('llama_cpp')
33-
imported_module = 'llama_cpp'
34-
except:
35-
pass
36-
37-
if shared.args.tensorcores and return_lib is None:
38-
if imported_module and imported_module != 'llama_cpp_cuda_tensorcores':
39-
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the tensorcores version currently requires a server restart.")
40-
try:
41-
return_lib = importlib.import_module('llama_cpp_cuda_tensorcores')
42-
imported_module = 'llama_cpp_cuda_tensorcores'
43-
except:
44-
pass
45-
46-
if return_lib is None:
47-
if imported_module and imported_module != 'llama_cpp_cuda':
48-
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the default version currently requires a server restart.")
49-
try:
50-
return_lib = importlib.import_module('llama_cpp_cuda')
51-
imported_module = 'llama_cpp_cuda'
52-
except:
53-
pass
54-
55-
if return_lib is None and not shared.args.cpu:
56-
if imported_module and imported_module != 'llama_cpp':
57-
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the CPU version currently requires a server restart.")
58-
try:
59-
return_lib = importlib.import_module('llama_cpp')
60-
imported_module = 'llama_cpp'
61-
except:
62-
pass
63-
64-
if return_lib is not None:
65-
monkey_patch_llama_cpp_python(return_lib)
66-
67-
return return_lib
17+
# Determine the platform
18+
is_macos = platform.system() == 'Darwin'
19+
20+
# Define the library names based on the platform
21+
if is_macos:
22+
lib_names = [
23+
(None, 'llama_cpp')
24+
]
25+
else:
26+
lib_names = [
27+
('cpu', 'llama_cpp'),
28+
('tensorcores', 'llama_cpp_cuda_tensorcores'),
29+
(None, 'llama_cpp_cuda'),
30+
(None, 'llama_cpp')
31+
]
32+
33+
for arg, lib_name in lib_names:
34+
should_import = (arg is None or getattr(shared.args, arg))
35+
36+
if should_import:
37+
if imported_module and imported_module != lib_name:
38+
# Conflict detected, raise an exception
39+
raise Exception(f"Cannot import `{lib_name}` because `{imported_module}` is already imported. Switching to a different version of llama-cpp-python currently requires a server restart.")
40+
41+
try:
42+
return_lib = importlib.import_module(lib_name)
43+
imported_module = lib_name
44+
monkey_patch_llama_cpp_python(return_lib)
45+
return return_lib
46+
except ImportError:
47+
continue
48+
49+
return None
6850

6951

7052
def eval_with_progress(self, tokens: Sequence[int]):

requirements.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+
5858
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
5959
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
6060
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
61-
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
62-
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
63-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
64-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
61+
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
62+
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
63+
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
64+
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
6565
autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"

requirements_noavx2.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+
5858
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
5959
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
6060
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
61-
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
62-
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
63-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
64-
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
61+
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
62+
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
63+
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
64+
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
6565
autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"

0 commit comments

Comments
 (0)