|
1 | 1 | import importlib
|
| 2 | +import platform |
2 | 3 | from typing import Sequence
|
3 | 4 |
|
4 | 5 | from tqdm import tqdm
|
|
13 | 14 | def llama_cpp_lib():
|
14 | 15 | global imported_module
|
15 | 16 |
|
16 |
| - def module_to_purpose(module_name): |
17 |
| - if module_name == 'llama_cpp': |
18 |
| - return 'CPU' |
19 |
| - elif module_name == 'llama_cpp_cuda_tensorcores': |
20 |
| - return 'tensorcores' |
21 |
| - elif module_name == 'llama_cpp_cuda': |
22 |
| - return 'default' |
23 |
| - |
24 |
| - return 'unknown' |
25 |
| - |
26 |
| - return_lib = None |
27 |
| - |
28 |
| - if shared.args.cpu: |
29 |
| - if imported_module and imported_module != 'llama_cpp': |
30 |
| - raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the CPU version currently requires a server restart.") |
31 |
| - try: |
32 |
| - return_lib = importlib.import_module('llama_cpp') |
33 |
| - imported_module = 'llama_cpp' |
34 |
| - except: |
35 |
| - pass |
36 |
| - |
37 |
| - if shared.args.tensorcores and return_lib is None: |
38 |
| - if imported_module and imported_module != 'llama_cpp_cuda_tensorcores': |
39 |
| - raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the tensorcores version currently requires a server restart.") |
40 |
| - try: |
41 |
| - return_lib = importlib.import_module('llama_cpp_cuda_tensorcores') |
42 |
| - imported_module = 'llama_cpp_cuda_tensorcores' |
43 |
| - except: |
44 |
| - pass |
45 |
| - |
46 |
| - if return_lib is None: |
47 |
| - if imported_module and imported_module != 'llama_cpp_cuda': |
48 |
| - raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the default version currently requires a server restart.") |
49 |
| - try: |
50 |
| - return_lib = importlib.import_module('llama_cpp_cuda') |
51 |
| - imported_module = 'llama_cpp_cuda' |
52 |
| - except: |
53 |
| - pass |
54 |
| - |
55 |
| - if return_lib is None and not shared.args.cpu: |
56 |
| - if imported_module and imported_module != 'llama_cpp': |
57 |
| - raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the CPU version currently requires a server restart.") |
58 |
| - try: |
59 |
| - return_lib = importlib.import_module('llama_cpp') |
60 |
| - imported_module = 'llama_cpp' |
61 |
| - except: |
62 |
| - pass |
63 |
| - |
64 |
| - if return_lib is not None: |
65 |
| - monkey_patch_llama_cpp_python(return_lib) |
66 |
| - |
67 |
| - return return_lib |
| 17 | + # Determine the platform |
| 18 | + is_macos = platform.system() == 'Darwin' |
| 19 | + |
| 20 | + # Define the library names based on the platform |
| 21 | + if is_macos: |
| 22 | + lib_names = [ |
| 23 | + (None, 'llama_cpp') |
| 24 | + ] |
| 25 | + else: |
| 26 | + lib_names = [ |
| 27 | + ('cpu', 'llama_cpp'), |
| 28 | + ('tensorcores', 'llama_cpp_cuda_tensorcores'), |
| 29 | + (None, 'llama_cpp_cuda'), |
| 30 | + (None, 'llama_cpp') |
| 31 | + ] |
| 32 | + |
| 33 | + for arg, lib_name in lib_names: |
| 34 | + should_import = (arg is None or getattr(shared.args, arg)) |
| 35 | + |
| 36 | + if should_import: |
| 37 | + if imported_module and imported_module != lib_name: |
| 38 | + # Conflict detected, raise an exception |
| 39 | + raise Exception(f"Cannot import `{lib_name}` because `{imported_module}` is already imported. Switching to a different version of llama-cpp-python currently requires a server restart.") |
| 40 | + |
| 41 | + try: |
| 42 | + return_lib = importlib.import_module(lib_name) |
| 43 | + imported_module = lib_name |
| 44 | + monkey_patch_llama_cpp_python(return_lib) |
| 45 | + return return_lib |
| 46 | + except ImportError: |
| 47 | + continue |
| 48 | + |
| 49 | + return None |
68 | 50 |
|
69 | 51 |
|
70 | 52 | def eval_with_progress(self, tokens: Sequence[int]):
|
|
0 commit comments