Skip to content

Commit 6773189

Browse files
committed
use tensor_data(t) and tensor_set_data(t,data)
1 parent 84d5475 commit 6773189

24 files changed

+676
-667
lines changed

common/common.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1977,7 +1977,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
19771977
// extend if necessary - do not store data for layer 0 (it's not used)
19781978
result.data.resize(std::max(result.data.size(), static_cast<size_t>(result.n_embd * layer_idx)), 0.0f);
19791979

1980-
const float * src = (const float *) tensor->data;
1980+
const float * src = (const float *) tensor_data(tensor);
19811981
float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0]
19821982
for (int j = 0; j < result.n_embd; j++) {
19831983
dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -408,12 +408,12 @@ static void init_model(struct my_llama_model * model) {
408408
}
409409

410410
static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
411-
float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
411+
float * ptr = (float *) ((char *) tensor_data(tensor) + i0*tensor->nb[0] + i1*tensor->nb[1]);
412412
return *ptr;
413413
}
414414

415415
static int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
416-
int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
416+
int32_t * ptr = (int32_t *) ((char *) tensor_data(tensor) + i0*tensor->nb[0] + i1*tensor->nb[1]);
417417
return *ptr;
418418
}
419419

examples/cvector-generator/cvector-generator.cpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,8 @@ struct callback_data {
8181
// copy tensor data
8282
auto n_bytes = ggml_nbytes(t);
8383
struct ggml_tensor * t_layer = ggml_new_tensor_2d(ctx_ggml, t->type, t->ne[0], t->ne[1]);
84-
t_layer->data = malloc(n_bytes); // TODO @ngxson : get rid of this malloc somehow
85-
ggml_backend_tensor_get(t, t_layer->data, 0, n_bytes);
84+
tensor_set_data(t_layer, malloc(n_bytes)); // TODO @ngxson : get rid of this malloc somehow
85+
ggml_backend_tensor_get(t, tensor_data(t_layer), 0, n_bytes);
8686
ggml_set_name(t_layer, ggml_get_name(t));
8787
//print_debug_tensor(t_layer);
8888

@@ -98,8 +98,8 @@ struct callback_data {
9898
// NOTE: final layer is ignored. we only have (n_layers - 1) to process
9999
std::vector<struct ggml_tensor *> calc_diff() {
100100
for (float il = 0; il < v_pos.size(); il++) {
101-
float * a = (float *) v_pos[il]->data;
102-
float * b = (float *) v_neg[il]->data;
101+
float * a = (float *) tensor_data(v_pos[il]);
102+
float * b = (float *) tensor_data(v_neg[il]);
103103
size_t n_elem = ggml_nelements(v_pos[il]);
104104
for (size_t j = 0; j < n_elem; j++) {
105105
a[j] -= b[j];
@@ -141,7 +141,7 @@ struct callback_data {
141141
struct ggml_tensor * diff_filtered = ggml_new_tensor_2d(
142142
ctx_ggml, GGML_TYPE_F32, n_embd, n_nonzero_rows);
143143
ggml_format_name(diff_filtered, "diff_filtered_%s", a->name);
144-
diff_filtered->data = malloc(ggml_nbytes(diff_filtered));
144+
tensor_set_data(diff_filtered, malloc(ggml_nbytes(diff_filtered)));
145145

146146
// copy non-zero rows
147147
for (int dest_row = 0; dest_row < n_nonzero_rows; dest_row++) {
@@ -159,9 +159,9 @@ struct callback_data {
159159

160160
// we don't implement destructor, because we want to reuse callback_data. we just want to free the tensors
161161
void reset() {
162-
for (auto ptr : v_pos) free(ptr->data);
163-
for (auto ptr : v_neg) free(ptr->data);
164-
for (auto ptr : v_diff_filtered) free(ptr->data);
162+
for (auto ptr : v_pos) free(tensor_data(ptr));
163+
for (auto ptr : v_neg) free(tensor_data(ptr));
164+
for (auto ptr : v_diff_filtered) free(tensor_data(ptr));
165165
v_pos.clear();
166166
v_neg.clear();
167167
v_diff_filtered.clear();
@@ -208,7 +208,7 @@ struct train_context {
208208
std::vector<uint8_t> empty;
209209
v_diff_tmp.push_back(empty);
210210
auto t = ggml_new_tensor_1d(ctx_ggml, GGML_TYPE_F32, n_embd);
211-
t->data = malloc(ggml_nbytes(t)); // TODO: get rid of malloc if possible
211+
tensor_set_data(t, malloc(ggml_nbytes(t))); // TODO: get rid of malloc if possible
212212
v_final.push_back(t);
213213
}
214214
}
@@ -221,7 +221,7 @@ struct train_context {
221221
auto & diff_tmp = v_diff_tmp[il];
222222
size_t curr_size = diff_tmp.size();
223223
diff_tmp.resize(curr_size + ggml_nbytes(t));
224-
memcpy(diff_tmp.data() + curr_size, t->data, ggml_nbytes(t));
224+
memcpy(diff_tmp.data() + curr_size, tensor_data(t), ggml_nbytes(t));
225225
}
226226
}
227227

@@ -238,7 +238,7 @@ struct train_context {
238238
? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
239239
: ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
240240
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
241-
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
241+
tensor_set_data(diff, malloc(ggml_nbytes(diff))); // TODO: get rid of this malloc if possible
242242
if (transpose) {
243243
// copy data & transpose
244244
float * arr = (float *) diff_tmp.data();
@@ -250,7 +250,7 @@ struct train_context {
250250
}
251251
} else {
252252
// only copy
253-
memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
253+
memcpy(tensor_data(diff), diff_tmp.data(), ggml_nbytes(diff));
254254
}
255255
v_diff.push_back(diff);
256256
print_debug_tensor(diff);
@@ -260,8 +260,8 @@ struct train_context {
260260
}
261261

262262
~train_context() {
263-
for (auto ptr : v_final) free(ptr->data);
264-
for (auto ptr : v_diff) free(ptr->data);
263+
for (auto ptr : v_final) free(tensor_data(ptr));
264+
for (auto ptr : v_diff) free(tensor_data(ptr));
265265
// no need to free v_diff_tmp, since we didn't use malloc
266266
ggml_free(ctx_ggml);
267267
}

examples/cvector-generator/pca.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ struct pca_model {
102102
ggml_set_name(dev_square, "dev_square");
103103
ggml_set_name(dev_eigenvector, "dev_eigenvector");
104104
buffer = ggml_backend_alloc_ctx_tensors(ctx, backend);
105-
ggml_backend_tensor_set(dev_input, t_input->data, 0, ggml_nbytes(t_input));
105+
ggml_backend_tensor_set(dev_input, tensor_data(t_input), 0, ggml_nbytes(t_input));
106106

107107
// initialize eigenvector to random normalized vector
108108
{
@@ -285,7 +285,7 @@ static void power_iteration(
285285

286286
// get output tensor
287287
GGML_ASSERT(last_eigenvector);
288-
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
288+
ggml_backend_tensor_get(last_eigenvector, tensor_data(output), 0, ggml_nbytes(last_eigenvector));
289289
//print_debug_tensor(output);
290290
ggml_gallocr_free(allocr);
291291

examples/eval-callback/eval-callback.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
119119
}
120120

121121
if (!ggml_is_quantized(t->type)) {
122-
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
122+
uint8_t * data = is_host ? (uint8_t *) tensor_data(t) : cb_data->data.data();
123123
ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
124124
}
125125

examples/gguf-hash/gguf-hash.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
336336
const char * name = gguf_get_tensor_name(ctx, i);
337337
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
338338
auto n_bytes = ggml_nbytes(cur);
339-
auto *raw_data = cur->data;
339+
auto *raw_data = tensor_data(cur);
340340
const std::string tensor_layer_name = fname + ":" + name;
341341

342342
if (hash_params.xxh64) {

examples/gguf/gguf.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ static bool gguf_ex_write(const std::string & fname) {
6363
ggml_set_name(cur, name.c_str());
6464

6565
{
66-
float * data = (float *) cur->data;
66+
float * data = (float *) tensor_data(cur);
6767
for (int j = 0; j < ggml_nelements(cur); ++j) {
6868
data[j] = 100 + i;
6969
}
@@ -201,10 +201,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
201201
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
202202

203203
printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
204-
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
204+
__func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, tensor_data(cur));
205205

206206
// print first 10 elements
207-
const float * data = (const float *) cur->data;
207+
const float * data = (const float *) tensor_data(cur);
208208

209209
printf("%s data[:10] : ", name);
210210
for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
@@ -214,7 +214,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
214214

215215
// check data
216216
if (check_data) {
217-
const float * data = (const float *) cur->data;
217+
const float * data = (const float *) tensor_data(cur);
218218
for (int j = 0; j < ggml_nelements(cur); ++j) {
219219
if (data[j] != 100 + i) {
220220
fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));

examples/imatrix/imatrix.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
9797
ggml_backend_tensor_get(src1, m_src1_data.data(), 0, ggml_nbytes(src1));
9898
}
9999

100-
const float * data = is_host ? (const float *) src1->data : m_src1_data.data();
100+
const float * data = is_host ? (const float *) tensor_data(src1) : m_src1_data.data();
101101

102102
// this has been adapted to the new format of storing merged experts in a single 3d tensor
103103
// ref: https://github.com/ggml-org/llama.cpp/pull/6387

examples/llava/clip.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,7 +1607,7 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p
16071607
int num_bytes = ggml_nbytes(cur);
16081608
if (ggml_backend_buft_is_host(buft)) {
16091609
// for the CPU and Metal backend, we can read directly into the tensor
1610-
fin.read(reinterpret_cast<char *>(cur->data), num_bytes);
1610+
fin.read(reinterpret_cast<char *>(tensor_data(cur)), num_bytes);
16111611
} else {
16121612
// read into a temporary buffer first, then copy to device memory
16131613
read_buf.resize(num_bytes);
@@ -3054,14 +3054,14 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
30543054

30553055
switch (cur->type) {
30563056
case GGML_TYPE_F32:
3057-
f32_data = (float *)cur->data;
3057+
f32_data = (float *)tensor_data(cur);
30583058
break;
30593059
case GGML_TYPE_F16:
30603060
if (conv_buf.size() < n_elms) {
30613061
conv_buf.resize(n_elms);
30623062
}
30633063
for (size_t j = 0; j < n_elms; ++j) {
3064-
conv_buf[j] = ggml_fp16_to_fp32(((ggml_fp16_t *)cur->data)[j]);
3064+
conv_buf[j] = ggml_fp16_to_fp32(((ggml_fp16_t *)tensor_data(cur))[j]);
30653065
}
30663066
f32_data = (float *)conv_buf.data();
30673067
break;
@@ -3079,7 +3079,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
30793079
new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
30803080
} else {
30813081
new_type = cur->type;
3082-
new_data = cur->data;
3082+
new_data = tensor_data(cur);
30833083
new_size = ggml_nbytes(cur);
30843084
}
30853085
const size_t orig_size = ggml_nbytes(cur);

examples/llava/llava.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
168168
// fill it with the image embeddings, ignoring the base
169169
for (size_t i = 1; i < num_images; i++) {
170170
size_t offset = (i-1) * clip_embd_nbytes(ctx_clip);
171-
memcpy((uint8_t *)(image_features->data) + offset, image_embd_v[i], clip_embd_nbytes(ctx_clip));
171+
memcpy((uint8_t *)tensor_data(image_features) + offset, image_embd_v[i], clip_embd_nbytes(ctx_clip));
172172
}
173173

174174
struct ggml_cgraph * gf = ggml_new_graph(model.ctx);
@@ -202,7 +202,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
202202

203203
memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
204204
// append without newline tokens (default behavior in llava_arch when not using unpad ):
205-
memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (num_images-1)); // grid patches
205+
memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)tensor_data(result), clip_embd_nbytes(ctx_clip) * (num_images-1)); // grid patches
206206
*n_img_pos_out = static_cast<int>(result->ne[1]+clip_n_patches(ctx_clip));
207207

208208
// Debug: Test single segments

ggml/include/ggml.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,14 @@ extern "C" {
607607
char padding[8];
608608
};
609609

610+
static inline void * tensor_data(const struct ggml_tensor * tensor) {
611+
return tensor->data;
612+
}
613+
614+
static inline void tensor_set_data(struct ggml_tensor * tensor, void * data) {
615+
tensor->data = data;
616+
}
617+
610618
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
611619

612620
// Abort callback

ggml/src/ggml-alloc.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
472472
}
473473

474474
static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor * t) {
475-
return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
475+
return tensor_data(t) != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
476476
}
477477

478478
static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
@@ -513,7 +513,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
513513
if (ggml_is_view(parent)) {
514514
struct ggml_tensor * view_src = parent->view_src;
515515
struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src);
516-
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
516+
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && tensor_data(view_src) == tensor_data(parent)) {
517517
AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
518518
assert(view_src_hn->offset == p_hn->offset);
519519
hn->buffer_id = p_hn->buffer_id;
@@ -704,7 +704,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
704704
for (int i = 0; i < graph->n_nodes; i++) {
705705
struct ggml_tensor * node = graph->nodes[i];
706706
struct node_alloc * node_alloc = &galloc->node_allocs[i];
707-
if (node->view_src || node->data) {
707+
if (node->view_src || tensor_data(node)) {
708708
node_alloc->dst.buffer_id = -1;
709709
node_alloc->dst.offset = SIZE_MAX;
710710
node_alloc->dst.size_max = 0;
@@ -716,7 +716,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
716716
}
717717
for (int j = 0; j < GGML_MAX_SRC; j++) {
718718
struct ggml_tensor * src = node->src[j];
719-
if (!src || src->view_src || src->data) {
719+
if (!src || src->view_src || tensor_data(src)) {
720720
node_alloc->src[j].buffer_id = -1;
721721
node_alloc->src[j].offset = SIZE_MAX;
722722
node_alloc->src[j].size_max = 0;
@@ -737,7 +737,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
737737
for (int i = 0; i < graph->n_leafs; i++) {
738738
struct ggml_tensor * leaf = graph->leafs[i];
739739
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
740-
if (leaf->view_src || leaf->data) {
740+
if (leaf->view_src || tensor_data(leaf)) {
741741
galloc->leaf_allocs[i].leaf.buffer_id = -1;
742742
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
743743
galloc->leaf_allocs[i].leaf.size_max = 0;
@@ -798,7 +798,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
798798
ggml_backend_view_init(tensor);
799799
}
800800
} else {
801-
if (tensor->data == NULL) {
801+
if (tensor_data(tensor) == NULL) {
802802
assert(tensor_alloc->offset != SIZE_MAX);
803803
assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
804804
void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
@@ -815,7 +815,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
815815

816816
static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
817817
size_t node_size = 0;
818-
if (!node->data && !node->view_src) {
818+
if (!tensor_data(node) && !node->view_src) {
819819
GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
820820
node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
821821
}
@@ -959,7 +959,7 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
959959

960960
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
961961
enum ggml_status status = GGML_STATUS_SUCCESS;
962-
if (t->data == NULL) {
962+
if (tensor_data(t) == NULL) {
963963
if (t->view_src == NULL) {
964964
status = ggml_tallocr_alloc(&tallocr, t);
965965
} else if (t->buffer == NULL) {
@@ -994,7 +994,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
994994
struct ggml_tensor * first = ggml_get_first_tensor(ctx);
995995
for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
996996
size_t this_size = 0;
997-
if (t->data == NULL && t->view_src == NULL) {
997+
if (tensor_data(t) == NULL && t->view_src == NULL) {
998998
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
999999
}
10001000

0 commit comments

Comments
 (0)