vproxy-tools
diff --git a/‎common/common.cpp
Lines changed: 1 addition & 1 deletion b/‎common/common.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Lines changed: 2 additions & 2 deletions b/‎examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/cvector-generator/cvector-generator.cpp
Lines changed: 14 additions & 14 deletions b/‎examples/cvector-generator/cvector-generator.cpp
Lines changed: 14 additions & 14 deletions
diff --git a/‎examples/cvector-generator/pca.hpp
Lines changed: 2 additions & 2 deletions b/‎examples/cvector-generator/pca.hpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/eval-callback/eval-callback.cpp
Lines changed: 1 addition & 1 deletion b/‎examples/eval-callback/eval-callback.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/gguf-hash/gguf-hash.cpp
Lines changed: 1 addition & 1 deletion b/‎examples/gguf-hash/gguf-hash.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/gguf/gguf.cpp
Lines changed: 4 additions & 4 deletions b/‎examples/gguf/gguf.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/imatrix/imatrix.cpp
Lines changed: 1 addition & 1 deletion b/‎examples/imatrix/imatrix.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/llava/clip.cpp
Lines changed: 4 additions & 4 deletions b/‎examples/llava/clip.cpp
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/llava/llava.cpp
Lines changed: 2 additions & 2 deletions b/‎examples/llava/llava.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎ggml/include/ggml.h
Lines changed: 8 additions & 0 deletions b/‎ggml/include/ggml.h
Lines changed: 8 additions & 0 deletions
diff --git a/‎ggml/src/ggml-alloc.c
Lines changed: 9 additions & 9 deletions b/‎ggml/src/ggml-alloc.c
Lines changed: 9 additions & 9 deletions
@@ -1977,7 +1977,7 @@ static common_control_vector_data common_control_vector_load_one(const common_co
         // extend if necessary - do not store data for layer 0 (it's not used)
         result.data.resize(std::max(result.data.size(), static_cast<size_t>(result.n_embd * layer_idx)), 0.0f);
 
-        const float * src = (const float *) tensor->data;
+        const float * src = (const float *) tensor_data(tensor);
         float * dst = result.data.data() + result.n_embd * (layer_idx - 1);  // layer 1 at [0]
         for (int j = 0; j < result.n_embd; j++) {
             dst[j] += src[j] * load_info.strength;  // allows multiple directions for same layer in same file
 
@@ -408,12 +408,12 @@ static void init_model(struct my_llama_model * model) {
 }
 
 static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
-    float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
+    float * ptr = (float *) ((char *) tensor_data(tensor) + i0*tensor->nb[0] + i1*tensor->nb[1]);
     return *ptr;
 }
 
 static int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
-    int32_t * ptr = (int32_t *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1]);
+    int32_t * ptr = (int32_t *) ((char *) tensor_data(tensor) + i0*tensor->nb[0] + i1*tensor->nb[1]);
     return *ptr;
 }
 
 
@@ -81,8 +81,8 @@ struct callback_data {
         // copy tensor data
         auto n_bytes = ggml_nbytes(t);
         struct ggml_tensor * t_layer = ggml_new_tensor_2d(ctx_ggml, t->type, t->ne[0], t->ne[1]);
-        t_layer->data = malloc(n_bytes); // TODO @ngxson : get rid of this malloc somehow
-        ggml_backend_tensor_get(t, t_layer->data, 0, n_bytes);
+        tensor_set_data(t_layer, malloc(n_bytes)); // TODO @ngxson : get rid of this malloc somehow
+        ggml_backend_tensor_get(t, tensor_data(t_layer), 0, n_bytes);
         ggml_set_name(t_layer, ggml_get_name(t));
         //print_debug_tensor(t_layer);
 
@@ -98,8 +98,8 @@ struct callback_data {
     // NOTE: final layer is ignored. we only have (n_layers - 1) to process
     std::vector<struct ggml_tensor *> calc_diff() {
         for (float il = 0; il < v_pos.size(); il++) {
-            float * a = (float *) v_pos[il]->data;
-            float * b = (float *) v_neg[il]->data;
+            float * a = (float *) tensor_data(v_pos[il]);
+            float * b = (float *) tensor_data(v_neg[il]);
             size_t n_elem = ggml_nelements(v_pos[il]);
             for (size_t j = 0; j < n_elem; j++) {
                 a[j] -= b[j];
@@ -141,7 +141,7 @@ struct callback_data {
         struct ggml_tensor * diff_filtered = ggml_new_tensor_2d(
             ctx_ggml, GGML_TYPE_F32, n_embd, n_nonzero_rows);
         ggml_format_name(diff_filtered, "diff_filtered_%s", a->name);
-        diff_filtered->data = malloc(ggml_nbytes(diff_filtered));
+        tensor_set_data(diff_filtered, malloc(ggml_nbytes(diff_filtered)));
 
         // copy non-zero rows
         for (int dest_row = 0; dest_row < n_nonzero_rows; dest_row++) {
@@ -159,9 +159,9 @@ struct callback_data {
 
     // we don't implement destructor, because we want to reuse callback_data. we just want to free the tensors
     void reset() {
-        for (auto ptr : v_pos) free(ptr->data);
-        for (auto ptr : v_neg) free(ptr->data);
-        for (auto ptr : v_diff_filtered) free(ptr->data);
+        for (auto ptr : v_pos) free(tensor_data(ptr));
+        for (auto ptr : v_neg) free(tensor_data(ptr));
+        for (auto ptr : v_diff_filtered) free(tensor_data(ptr));
         v_pos.clear();
         v_neg.clear();
         v_diff_filtered.clear();
@@ -208,7 +208,7 @@ struct train_context {
             std::vector<uint8_t> empty;
             v_diff_tmp.push_back(empty);
             auto t = ggml_new_tensor_1d(ctx_ggml, GGML_TYPE_F32, n_embd);
-            t->data = malloc(ggml_nbytes(t)); // TODO: get rid of malloc if possible
+            tensor_set_data(t, malloc(ggml_nbytes(t))); // TODO: get rid of malloc if possible
             v_final.push_back(t);
         }
     }
@@ -221,7 +221,7 @@ struct train_context {
             auto & diff_tmp = v_diff_tmp[il];
             size_t curr_size = diff_tmp.size();
             diff_tmp.resize(curr_size + ggml_nbytes(t));
-            memcpy(diff_tmp.data() + curr_size, t->data, ggml_nbytes(t));
+            memcpy(diff_tmp.data() + curr_size, tensor_data(t), ggml_nbytes(t));
         }
     }
 
@@ -238,7 +238,7 @@ struct train_context {
                 ? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
                 : ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
             ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
-            diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
+            tensor_set_data(diff, malloc(ggml_nbytes(diff))); // TODO: get rid of this malloc if possible
             if (transpose) {
                 // copy data & transpose
                 float * arr = (float *) diff_tmp.data();
@@ -250,7 +250,7 @@ struct train_context {
                 }
             } else {
                 // only copy
-                memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
+                memcpy(tensor_data(diff), diff_tmp.data(), ggml_nbytes(diff));
             }
             v_diff.push_back(diff);
             print_debug_tensor(diff);
@@ -260,8 +260,8 @@ struct train_context {
     }
 
     ~train_context() {
-        for (auto ptr : v_final) free(ptr->data);
-        for (auto ptr : v_diff) free(ptr->data);
+        for (auto ptr : v_final) free(tensor_data(ptr));
+        for (auto ptr : v_diff) free(tensor_data(ptr));
         // no need to free v_diff_tmp, since we didn't use malloc
         ggml_free(ctx_ggml);
     }
 
@@ -102,7 +102,7 @@ struct pca_model {
         ggml_set_name(dev_square,      "dev_square");
         ggml_set_name(dev_eigenvector, "dev_eigenvector");
         buffer = ggml_backend_alloc_ctx_tensors(ctx, backend);
-        ggml_backend_tensor_set(dev_input, t_input->data, 0, ggml_nbytes(t_input));
+        ggml_backend_tensor_set(dev_input, tensor_data(t_input), 0, ggml_nbytes(t_input));
 
         // initialize eigenvector to random normalized vector
         {
@@ -285,7 +285,7 @@ static void power_iteration(
 
     // get output tensor
     GGML_ASSERT(last_eigenvector);
-    ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
+    ggml_backend_tensor_get(last_eigenvector, tensor_data(output), 0, ggml_nbytes(last_eigenvector));
     //print_debug_tensor(output);
     ggml_gallocr_free(allocr);
 
 
@@ -119,7 +119,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {
     }
 
     if (!ggml_is_quantized(t->type)) {
-        uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
+        uint8_t * data = is_host ? (uint8_t *) tensor_data(t) : cb_data->data.data();
         ggml_print_tensor(data, t->type, t->ne, t->nb, 3);
     }
 
 
@@ -336,7 +336,7 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
         const char * name = gguf_get_tensor_name(ctx, i);
         struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
         auto n_bytes = ggml_nbytes(cur);
-        auto *raw_data = cur->data;
+        auto *raw_data = tensor_data(cur);
         const std::string tensor_layer_name = fname + ":" + name;
 
         if (hash_params.xxh64) {
 
@@ -63,7 +63,7 @@ static bool gguf_ex_write(const std::string & fname) {
         ggml_set_name(cur, name.c_str());
 
         {
-            float * data = (float *) cur->data;
+            float * data = (float *) tensor_data(cur);
             for (int j = 0; j < ggml_nelements(cur); ++j) {
                 data[j] = 100 + i;
             }
@@ -201,10 +201,10 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
             struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
 
             printf("%s: tensor[%d]: n_dims = %d, ne = (%d, %d, %d, %d), name = %s, data = %p\n",
-                __func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, cur->data);
+                __func__, i, ggml_n_dims(cur), int(cur->ne[0]), int(cur->ne[1]), int(cur->ne[2]), int(cur->ne[3]), cur->name, tensor_data(cur));
 
             // print first 10 elements
-            const float * data = (const float *) cur->data;
+            const float * data = (const float *) tensor_data(cur);
 
             printf("%s data[:10] : ", name);
             for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
@@ -214,7 +214,7 @@ static bool gguf_ex_read_1(const std::string & fname, bool check_data) {
 
             // check data
             if (check_data) {
-                const float * data = (const float *) cur->data;
+                const float * data = (const float *) tensor_data(cur);
                 for (int j = 0; j < ggml_nelements(cur); ++j) {
                     if (data[j] != 100 + i) {
                         fprintf(stderr, "%s: tensor[%d], data[%d]: found %f, expected %f\n", __func__, i, j, data[j], float(100 + i));
 
@@ -97,7 +97,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
         ggml_backend_tensor_get(src1, m_src1_data.data(), 0, ggml_nbytes(src1));
     }
 
-    const float * data = is_host ? (const float *) src1->data : m_src1_data.data();
+    const float * data = is_host ? (const float *) tensor_data(src1) : m_src1_data.data();
 
     // this has been adapted to the new format of storing merged experts in a single 3d tensor
     // ref: https://github.com/ggml-org/llama.cpp/pull/6387
 
@@ -1607,7 +1607,7 @@ struct clip_ctx * clip_init(const char * fname, struct clip_context_params ctx_p
             int num_bytes = ggml_nbytes(cur);
             if (ggml_backend_buft_is_host(buft)) {
                 // for the CPU and Metal backend, we can read directly into the tensor
-                fin.read(reinterpret_cast<char *>(cur->data), num_bytes);
+                fin.read(reinterpret_cast<char *>(tensor_data(cur)), num_bytes);
             } else {
                 // read into a temporary buffer first, then copy to device memory
                 read_buf.resize(num_bytes);
@@ -3054,14 +3054,14 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
 
             switch (cur->type) {
             case GGML_TYPE_F32:
-                f32_data = (float *)cur->data;
+                f32_data = (float *)tensor_data(cur);
                 break;
             case GGML_TYPE_F16:
                 if (conv_buf.size() < n_elms) {
                     conv_buf.resize(n_elms);
                 }
                 for (size_t j = 0; j < n_elms; ++j) {
-                    conv_buf[j] = ggml_fp16_to_fp32(((ggml_fp16_t *)cur->data)[j]);
+                    conv_buf[j] = ggml_fp16_to_fp32(((ggml_fp16_t *)tensor_data(cur))[j]);
                 }
                 f32_data = (float *)conv_buf.data();
                 break;
@@ -3079,7 +3079,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
             new_size = ggml_quantize_chunk(new_type, f32_data, new_data, 0, n_elms/cur->ne[0], cur->ne[0], nullptr);
         } else {
             new_type = cur->type;
-            new_data = cur->data;
+            new_data = tensor_data(cur);
             new_size = ggml_nbytes(cur);
         }
         const size_t orig_size = ggml_nbytes(cur);
 
@@ -168,7 +168,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
     // fill it with the image embeddings, ignoring the base
     for (size_t i = 1; i < num_images; i++) {
         size_t offset = (i-1) * clip_embd_nbytes(ctx_clip);
-        memcpy((uint8_t *)(image_features->data) + offset, image_embd_v[i], clip_embd_nbytes(ctx_clip));
+        memcpy((uint8_t *)tensor_data(image_features) + offset, image_embd_v[i], clip_embd_nbytes(ctx_clip));
     }
 
     struct ggml_cgraph  * gf = ggml_new_graph(model.ctx);
@@ -202,7 +202,7 @@ static bool clip_llava_handle_patches(clip_ctx * ctx_clip, std::vector<float *>
 
     memcpy(image_embd_out, image_embd_v[0], clip_embd_nbytes(ctx_clip)); // main image as global context
     // append without newline tokens (default behavior in llava_arch when not using unpad ):
-    memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)result->data, clip_embd_nbytes(ctx_clip) * (num_images-1)); // grid patches
+    memcpy(image_embd_out + clip_n_patches(ctx_clip) * clip_n_mmproj_embd(ctx_clip), (float*)tensor_data(result), clip_embd_nbytes(ctx_clip) * (num_images-1)); // grid patches
     *n_img_pos_out = static_cast<int>(result->ne[1]+clip_n_patches(ctx_clip));
 
     // Debug: Test single segments
 
@@ -607,6 +607,14 @@ extern "C" {
         char padding[8];
     };
 
+    static inline void * tensor_data(const struct ggml_tensor * tensor) {
+        return tensor->data;
+    }
+
+    static inline void tensor_set_data(struct ggml_tensor * tensor, void * data) {
+        tensor->data = data;
+    }
+
     static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
 
     // Abort callback
 
@@ -472,7 +472,7 @@ static bool ggml_gallocr_is_own(ggml_gallocr_t galloc, struct ggml_tensor * t) {
 }
 
 static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor * t) {
-    return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
+    return tensor_data(t) != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
 }
 
 static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
@@ -513,7 +513,7 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
                     if (ggml_is_view(parent)) {
                         struct ggml_tensor * view_src = parent->view_src;
                         struct hash_node * view_src_hn = ggml_gallocr_hash_get(galloc, view_src);
-                        if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
+                        if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && tensor_data(view_src) == tensor_data(parent)) {
                             AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
                             assert(view_src_hn->offset == p_hn->offset);
                             hn->buffer_id = p_hn->buffer_id;
@@ -704,7 +704,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
     for (int i = 0; i < graph->n_nodes; i++) {
         struct ggml_tensor * node = graph->nodes[i];
         struct node_alloc * node_alloc = &galloc->node_allocs[i];
-        if (node->view_src || node->data) {
+        if (node->view_src || tensor_data(node)) {
             node_alloc->dst.buffer_id = -1;
             node_alloc->dst.offset = SIZE_MAX;
             node_alloc->dst.size_max = 0;
@@ -716,7 +716,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
         }
         for (int j = 0; j < GGML_MAX_SRC; j++) {
             struct ggml_tensor * src = node->src[j];
-            if (!src || src->view_src || src->data) {
+            if (!src || src->view_src || tensor_data(src)) {
                 node_alloc->src[j].buffer_id = -1;
                 node_alloc->src[j].offset = SIZE_MAX;
                 node_alloc->src[j].size_max = 0;
@@ -737,7 +737,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
     for (int i = 0; i < graph->n_leafs; i++) {
         struct ggml_tensor * leaf = graph->leafs[i];
         struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
-        if (leaf->view_src || leaf->data) {
+        if (leaf->view_src || tensor_data(leaf)) {
             galloc->leaf_allocs[i].leaf.buffer_id = -1;
             galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
             galloc->leaf_allocs[i].leaf.size_max = 0;
@@ -798,7 +798,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
             ggml_backend_view_init(tensor);
         }
     } else {
-        if (tensor->data == NULL) {
+        if (tensor_data(tensor) == NULL) {
             assert(tensor_alloc->offset != SIZE_MAX);
             assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
             void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
@@ -815,7 +815,7 @@ static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor *
 
 static bool ggml_gallocr_node_needs_realloc(ggml_gallocr_t galloc, struct ggml_tensor * node, struct tensor_alloc * talloc) {
     size_t node_size = 0;
-    if (!node->data && !node->view_src) {
+    if (!tensor_data(node) && !node->view_src) {
         GGML_ASSERT(talloc->buffer_id >= 0); // prevent segfault when misusing the API
         node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[talloc->buffer_id], node);
     }
@@ -959,7 +959,7 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
 
     for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
         enum ggml_status status = GGML_STATUS_SUCCESS;
-        if (t->data == NULL) {
+        if (tensor_data(t) == NULL) {
             if (t->view_src == NULL) {
                 status = ggml_tallocr_alloc(&tallocr, t);
             } else if (t->buffer == NULL) {
@@ -994,7 +994,7 @@ ggml_backend_buffer_t ggml_backend_alloc_ctx_tensors_from_buft(struct ggml_conte
     struct ggml_tensor * first = ggml_get_first_tensor(ctx);
     for (struct ggml_tensor * t = first; t != NULL; t = ggml_get_next_tensor(ctx, t)) {
         size_t this_size = 0;
-        if (t->data == NULL && t->view_src == NULL) {
+        if (tensor_data(t) == NULL && t->view_src == NULL) {
             this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
         }
Original file line number	Diff line number	Diff line change
`@@ -408,12 +408,12 @@ static void init_model(struct my_llama_model * model) {`
`408`	`408`	`}`
`409`	`409`
`410`	`410`	`static float get_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {`
`411`		`- float * ptr = (float ) ((char ) tensor->data + i0tensor->nb[0] + i1tensor->nb[1]);`
	`411`	`+ float * ptr = (float ) ((char ) tensor_data(tensor) + i0tensor->nb[0] + i1tensor->nb[1]);`
`412`	`412`	`return *ptr;`
`413`	`413`	`}`
`414`	`414`
`415`	`415`	`static int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {`
`416`		`- int32_t * ptr = (int32_t ) ((char ) tensor->data + i0tensor->nb[0] + i1tensor->nb[1]);`
	`416`	`+ int32_t * ptr = (int32_t ) ((char ) tensor_data(tensor) + i0tensor->nb[0] + i1tensor->nb[1]);`
`417`	`417`	`return *ptr;`
`418`	`418`	`}`
`419`	`419`
Original file line number	Diff line number	Diff line change
`@@ -119,7 +119,7 @@ static bool ggml_debug(struct ggml_tensor * t, bool ask, void * user_data) {`
`119`	`119`	`}`
`120`	`120`
`121`	`121`	`if (!ggml_is_quantized(t->type)) {`
`122`		`- uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();`
	`122`	`+ uint8_t * data = is_host ? (uint8_t *) tensor_data(t) : cb_data->data.data();`
`123`	`123`	`ggml_print_tensor(data, t->type, t->ne, t->nb, 3);`
`124`	`124`	`}`
`125`	`125`
Original file line number	Diff line number	Diff line change
`@@ -97,7 +97,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *`
`97`	`97`	`ggml_backend_tensor_get(src1, m_src1_data.data(), 0, ggml_nbytes(src1));`
`98`	`98`	`}`
`99`	`99`
`100`		`- const float * data = is_host ? (const float *) src1->data : m_src1_data.data();`
	`100`	`+ const float * data = is_host ? (const float *) tensor_data(src1) : m_src1_data.data();`
`101`	`101`
`102`	`102`	`// this has been adapted to the new format of storing merged experts in a single 3d tensor`
`103`	`103`	`// ref: https://github.com/ggml-org/llama.cpp/pull/6387`