Skip to content

Commit c94bdc4

Browse files
committed
Expose libggml and refactor ctypes extension
1 parent 926b414 commit c94bdc4

File tree

2 files changed

+274
-0
lines changed

2 files changed

+274
-0
lines changed

llama_cpp/_ctypes_extensions.py

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
from __future__ import annotations
2+
3+
import sys
4+
import os
5+
import ctypes
6+
import functools
7+
import pathlib
8+
9+
from typing import (
10+
Any,
11+
Callable,
12+
List,
13+
Union,
14+
Optional,
15+
TYPE_CHECKING,
16+
TypeVar,
17+
Generic,
18+
)
19+
from typing_extensions import TypeAlias
20+
21+
22+
# Load the library
23+
def load_shared_library(lib_base_name: str, base_path: pathlib.Path):
24+
"""Platform independent shared library loader"""
25+
# Searching for the library in the current directory under the name "libllama" (default name
26+
# for llamacpp) and "llama" (default name for this repo)
27+
lib_paths: List[pathlib.Path] = []
28+
# Determine the file extension based on the platform
29+
if sys.platform.startswith("linux") or sys.platform.startswith("freebsd"):
30+
lib_paths += [
31+
base_path / f"lib{lib_base_name}.so",
32+
]
33+
elif sys.platform == "darwin":
34+
lib_paths += [
35+
base_path / f"lib{lib_base_name}.so",
36+
base_path / f"lib{lib_base_name}.dylib",
37+
]
38+
elif sys.platform == "win32":
39+
lib_paths += [
40+
base_path / f"{lib_base_name}.dll",
41+
base_path / f"lib{lib_base_name}.dll",
42+
]
43+
else:
44+
raise RuntimeError("Unsupported platform")
45+
46+
cdll_args = dict() # type: ignore
47+
48+
# Add the library directory to the DLL search path on Windows (if needed)
49+
if sys.platform == "win32":
50+
os.add_dll_directory(str(base_path))
51+
os.environ["PATH"] = str(base_path) + os.pathsep + os.environ["PATH"]
52+
53+
if sys.platform == "win32" and sys.version_info >= (3, 8):
54+
os.add_dll_directory(str(base_path))
55+
if "CUDA_PATH" in os.environ:
56+
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "bin"))
57+
os.add_dll_directory(os.path.join(os.environ["CUDA_PATH"], "lib"))
58+
if "HIP_PATH" in os.environ:
59+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "bin"))
60+
os.add_dll_directory(os.path.join(os.environ["HIP_PATH"], "lib"))
61+
cdll_args["winmode"] = ctypes.RTLD_GLOBAL
62+
63+
# Try to load the shared library, handling potential errors
64+
for lib_path in lib_paths:
65+
if lib_path.exists():
66+
try:
67+
return ctypes.CDLL(str(lib_path), **cdll_args) # type: ignore
68+
except Exception as e:
69+
raise RuntimeError(f"Failed to load shared library '{lib_path}': {e}")
70+
71+
raise FileNotFoundError(
72+
f"Shared library with base name '{lib_base_name}' not found"
73+
)
74+
75+
76+
# ctypes sane type hint helpers
77+
#
78+
# - Generic Pointer and Array types
79+
# - PointerOrRef type with a type hinted byref function
80+
#
81+
# NOTE: Only use these for static type checking not for runtime checks
82+
# no good will come of that
83+
84+
if TYPE_CHECKING:
85+
CtypesCData = TypeVar("CtypesCData", bound=ctypes._CData) # type: ignore
86+
87+
CtypesArray: TypeAlias = ctypes.Array[CtypesCData] # type: ignore
88+
89+
CtypesPointer: TypeAlias = ctypes._Pointer[CtypesCData] # type: ignore
90+
91+
CtypesVoidPointer: TypeAlias = ctypes.c_void_p
92+
93+
class CtypesRef(Generic[CtypesCData]):
94+
pass
95+
96+
CtypesPointerOrRef: TypeAlias = Union[
97+
CtypesPointer[CtypesCData], CtypesRef[CtypesCData]
98+
]
99+
100+
CtypesFuncPointer: TypeAlias = ctypes._FuncPointer # type: ignore
101+
102+
F = TypeVar("F", bound=Callable[..., Any])
103+
104+
105+
def ctypes_function_for_shared_library(lib: ctypes.CDLL):
106+
"""Decorator for defining ctypes functions with type hints"""
107+
108+
def ctypes_function(
109+
name: str, argtypes: List[Any], restype: Any, enabled: bool = True
110+
):
111+
def decorator(f: F) -> F:
112+
if enabled:
113+
func = getattr(lib, name)
114+
func.argtypes = argtypes
115+
func.restype = restype
116+
functools.wraps(f)(func)
117+
return func
118+
else:
119+
return f
120+
121+
return decorator
122+
123+
return ctypes_function
124+
125+
126+
def _byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCData]:
127+
"""Type-annotated version of ctypes.byref"""
128+
...
129+
130+
131+
byref = _byref if TYPE_CHECKING else ctypes.byref

llama_cpp/_ggml.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
"""Internal module use at your own risk
2+
3+
This module provides a minimal interface for working with ggml tensors from llama-cpp-python
4+
"""
5+
import os
6+
import pathlib
7+
8+
import ctypes
9+
10+
import llama_cpp._ctypes_extensions as ctypes_ext
11+
12+
import numpy as np
13+
14+
15+
libggml_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib"
16+
libggml = ctypes_ext.load_shared_library("ggml", libggml_base_path)
17+
18+
ggml_function = ctypes_ext.ctypes_function_for_shared_library(libggml)
19+
20+
21+
# define GGML_MAX_DIMS 4
22+
GGML_MAX_DIMS = 4
23+
24+
# define GGML_MAX_OP_PARAMS 64
25+
GGML_MAX_OP_PARAMS = 64
26+
27+
# define GGML_MAX_SRC 10
28+
GGML_MAX_SRC = 10
29+
30+
# define GGML_MAX_NAME 64
31+
GGML_MAX_NAME = 64
32+
33+
34+
# // n-dimensional tensor
35+
# struct ggml_tensor {
36+
# enum ggml_type type;
37+
#
38+
# GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor");
39+
#
40+
# struct ggml_backend_buffer * buffer;
41+
#
42+
# int64_t ne[GGML_MAX_DIMS]; // number of elements
43+
# size_t nb[GGML_MAX_DIMS]; // stride in bytes:
44+
# // nb[0] = ggml_type_size(type)
45+
# // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding
46+
# // nb[i] = nb[i-1] * ne[i-1]
47+
#
48+
# // compute data
49+
# enum ggml_op op;
50+
#
51+
# // op params - allocated as int32_t for alignment
52+
# int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
53+
#
54+
# int32_t flags;
55+
#
56+
# struct ggml_tensor * grad;
57+
# struct ggml_tensor * src[GGML_MAX_SRC];
58+
#
59+
# // source tensor and offset for views
60+
# struct ggml_tensor * view_src;
61+
# size_t view_offs;
62+
#
63+
# void * data;
64+
#
65+
# char name[GGML_MAX_NAME];
66+
#
67+
# void * extra; // extra things e.g. for ggml-cuda.cu
68+
#
69+
# // char padding[4];
70+
# };
71+
class ggml_tensor(ctypes.Structure):
72+
__fields__ = [
73+
("type", ctypes.c_int),
74+
("buffer", ctypes.c_void_p),
75+
("ne", ctypes.c_int64 * 8),
76+
("nb", ctypes.c_size_t * 8),
77+
("op", ctypes.c_int),
78+
("op_params", ctypes.c_int32 * 8),
79+
("flags", ctypes.c_int32),
80+
("grad", ctypes.c_void_p),
81+
("src", ctypes.c_void_p * 8),
82+
("view_src", ctypes.c_void_p),
83+
("view_offs", ctypes.c_size_t),
84+
("data", ctypes.c_void_p),
85+
("name", ctypes.c_char * 64),
86+
("extra", ctypes.c_void_p),
87+
]
88+
89+
90+
ggml_tensor_p = ctypes_ext.CtypesPointer[ggml_tensor]
91+
ggml_tensor_p_ctypes = ctypes.POINTER(ggml_tensor)
92+
93+
94+
# GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
95+
@ggml_function(
96+
"ggml_backend_tensor_get",
97+
[ggml_tensor_p_ctypes, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t],
98+
ctypes.c_void_p,
99+
)
100+
def ggml_backend_tensor_get(
101+
tensor: ggml_tensor_p, data: ctypes.c_void_p, offset: int, size: int
102+
) -> None:
103+
...
104+
105+
106+
# GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor);
107+
@ggml_function(
108+
"ggml_nbytes",
109+
[ggml_tensor_p_ctypes],
110+
ctypes.c_size_t,
111+
)
112+
def ggml_nbytes(tensor: ggml_tensor_p) -> int:
113+
...
114+
115+
116+
# GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor);
117+
@ggml_function(
118+
"ggml_nelements",
119+
[ggml_tensor_p_ctypes],
120+
ctypes.c_int64,
121+
)
122+
def ggml_nelements(tensor: ggml_tensor_p) -> int:
123+
...
124+
125+
126+
# GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars
127+
@ggml_function(
128+
"ggml_n_dims",
129+
[ggml_tensor_p_ctypes],
130+
ctypes.c_int,
131+
)
132+
def ggml_n_dims(tensor: ggml_tensor_p) -> int:
133+
...
134+
135+
136+
def ggml_tensor_to_numpy(tensor: ggml_tensor_p):
137+
nbytes = ggml_nbytes(tensor)
138+
nelements = ggml_nelements(tensor)
139+
data = np.empty(nelements, dtype=np.float32)
140+
ggml_backend_tensor_get(
141+
tensor, ctypes.cast(data.ctypes.data, ctypes.c_void_p), 0, nbytes
142+
)
143+
return data.reshape(tensor.contents.ne[: ggml_n_dims(tensor)])

0 commit comments

Comments
 (0)