|
| 1 | +"""Internal module use at your own risk |
| 2 | +
|
| 3 | +This module provides a minimal interface for working with ggml tensors from llama-cpp-python |
| 4 | +""" |
| 5 | +import os |
| 6 | +import pathlib |
| 7 | + |
| 8 | +import ctypes |
| 9 | + |
| 10 | +import llama_cpp._ctypes_extensions as ctypes_ext |
| 11 | + |
| 12 | +import numpy as np |
| 13 | + |
| 14 | + |
| 15 | +libggml_base_path = pathlib.Path(os.path.abspath(os.path.dirname(__file__))) / "lib" |
| 16 | +libggml = ctypes_ext.load_shared_library("ggml", libggml_base_path) |
| 17 | + |
| 18 | +ggml_function = ctypes_ext.ctypes_function_for_shared_library(libggml) |
| 19 | + |
| 20 | + |
| 21 | +# define GGML_MAX_DIMS 4 |
| 22 | +GGML_MAX_DIMS = 4 |
| 23 | + |
| 24 | +# define GGML_MAX_OP_PARAMS 64 |
| 25 | +GGML_MAX_OP_PARAMS = 64 |
| 26 | + |
| 27 | +# define GGML_MAX_SRC 10 |
| 28 | +GGML_MAX_SRC = 10 |
| 29 | + |
| 30 | +# define GGML_MAX_NAME 64 |
| 31 | +GGML_MAX_NAME = 64 |
| 32 | + |
| 33 | + |
| 34 | +# // n-dimensional tensor |
| 35 | +# struct ggml_tensor { |
| 36 | +# enum ggml_type type; |
| 37 | +# |
| 38 | +# GGML_DEPRECATED(enum ggml_backend_type backend, "use the buffer type to find the storage location of the tensor"); |
| 39 | +# |
| 40 | +# struct ggml_backend_buffer * buffer; |
| 41 | +# |
| 42 | +# int64_t ne[GGML_MAX_DIMS]; // number of elements |
| 43 | +# size_t nb[GGML_MAX_DIMS]; // stride in bytes: |
| 44 | +# // nb[0] = ggml_type_size(type) |
| 45 | +# // nb[1] = nb[0] * (ne[0] / ggml_blck_size(type)) + padding |
| 46 | +# // nb[i] = nb[i-1] * ne[i-1] |
| 47 | +# |
| 48 | +# // compute data |
| 49 | +# enum ggml_op op; |
| 50 | +# |
| 51 | +# // op params - allocated as int32_t for alignment |
| 52 | +# int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)]; |
| 53 | +# |
| 54 | +# int32_t flags; |
| 55 | +# |
| 56 | +# struct ggml_tensor * grad; |
| 57 | +# struct ggml_tensor * src[GGML_MAX_SRC]; |
| 58 | +# |
| 59 | +# // source tensor and offset for views |
| 60 | +# struct ggml_tensor * view_src; |
| 61 | +# size_t view_offs; |
| 62 | +# |
| 63 | +# void * data; |
| 64 | +# |
| 65 | +# char name[GGML_MAX_NAME]; |
| 66 | +# |
| 67 | +# void * extra; // extra things e.g. for ggml-cuda.cu |
| 68 | +# |
| 69 | +# // char padding[4]; |
| 70 | +# }; |
| 71 | +class ggml_tensor(ctypes.Structure): |
| 72 | + __fields__ = [ |
| 73 | + ("type", ctypes.c_int), |
| 74 | + ("buffer", ctypes.c_void_p), |
| 75 | + ("ne", ctypes.c_int64 * 8), |
| 76 | + ("nb", ctypes.c_size_t * 8), |
| 77 | + ("op", ctypes.c_int), |
| 78 | + ("op_params", ctypes.c_int32 * 8), |
| 79 | + ("flags", ctypes.c_int32), |
| 80 | + ("grad", ctypes.c_void_p), |
| 81 | + ("src", ctypes.c_void_p * 8), |
| 82 | + ("view_src", ctypes.c_void_p), |
| 83 | + ("view_offs", ctypes.c_size_t), |
| 84 | + ("data", ctypes.c_void_p), |
| 85 | + ("name", ctypes.c_char * 64), |
| 86 | + ("extra", ctypes.c_void_p), |
| 87 | + ] |
| 88 | + |
| 89 | + |
| 90 | +ggml_tensor_p = ctypes_ext.CtypesPointer[ggml_tensor] |
| 91 | +ggml_tensor_p_ctypes = ctypes.POINTER(ggml_tensor) |
| 92 | + |
| 93 | + |
| 94 | +# GGML_API GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); |
| 95 | +@ggml_function( |
| 96 | + "ggml_backend_tensor_get", |
| 97 | + [ggml_tensor_p_ctypes, ctypes.c_void_p, ctypes.c_size_t, ctypes.c_size_t], |
| 98 | + ctypes.c_void_p, |
| 99 | +) |
| 100 | +def ggml_backend_tensor_get( |
| 101 | + tensor: ggml_tensor_p, data: ctypes.c_void_p, offset: int, size: int |
| 102 | +) -> None: |
| 103 | + ... |
| 104 | + |
| 105 | + |
| 106 | +# GGML_API GGML_CALL size_t ggml_nbytes (const struct ggml_tensor * tensor); |
| 107 | +@ggml_function( |
| 108 | + "ggml_nbytes", |
| 109 | + [ggml_tensor_p_ctypes], |
| 110 | + ctypes.c_size_t, |
| 111 | +) |
| 112 | +def ggml_nbytes(tensor: ggml_tensor_p) -> int: |
| 113 | + ... |
| 114 | + |
| 115 | + |
| 116 | +# GGML_API GGML_CALL int64_t ggml_nelements (const struct ggml_tensor * tensor); |
| 117 | +@ggml_function( |
| 118 | + "ggml_nelements", |
| 119 | + [ggml_tensor_p_ctypes], |
| 120 | + ctypes.c_int64, |
| 121 | +) |
| 122 | +def ggml_nelements(tensor: ggml_tensor_p) -> int: |
| 123 | + ... |
| 124 | + |
| 125 | + |
| 126 | +# GGML_API int ggml_n_dims (const struct ggml_tensor * tensor); // returns 1 for scalars |
| 127 | +@ggml_function( |
| 128 | + "ggml_n_dims", |
| 129 | + [ggml_tensor_p_ctypes], |
| 130 | + ctypes.c_int, |
| 131 | +) |
| 132 | +def ggml_n_dims(tensor: ggml_tensor_p) -> int: |
| 133 | + ... |
| 134 | + |
| 135 | + |
| 136 | +def ggml_tensor_to_numpy(tensor: ggml_tensor_p): |
| 137 | + nbytes = ggml_nbytes(tensor) |
| 138 | + nelements = ggml_nelements(tensor) |
| 139 | + data = np.empty(nelements, dtype=np.float32) |
| 140 | + ggml_backend_tensor_get( |
| 141 | + tensor, ctypes.cast(data.ctypes.data, ctypes.c_void_p), 0, nbytes |
| 142 | + ) |
| 143 | + return data.reshape(tensor.contents.ne[: ggml_n_dims(tensor)]) |
0 commit comments