File tree 3 files changed +49
-0
lines changed 3 files changed +49
-0
lines changed Original file line number Diff line number Diff line change @@ -198,6 +198,8 @@ option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adr
198
198
set (GGML_OPENCL_TARGET_VERSION "300" CACHE STRING
199
199
"gmml: OpenCL API version to target" )
200
200
201
+ option (GGML_NUMA_MIRROR "ggml: support numa aware tensor data" OFF )
202
+
201
203
# toolchain for vulkan-shaders-gen
202
204
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen" )
203
205
@@ -317,6 +319,18 @@ set(variable_set_statements
317
319
318
320
set (GGML_SHARED_LIB ${BUILD_SHARED_LIBS} )
319
321
322
+ if (GGML_NUMA_MIRROR)
323
+ message (STATUS
324
+ "-----------------\n "
325
+ "Enabling GGML_NUMA_MIRROR"
326
+ message (STATUS
327
+ "-----------------" )
328
+
329
+ foreach (lib "ggml" "ggml-base" )
330
+ target_compile_definitions (${lib} PUBLIC GGML_NUMA_MIRROR)
331
+ endforeach ()
332
+ endif ()
333
+
320
334
get_cmake_property (all_variables VARIABLES )
321
335
foreach (variable_name IN LISTS all_variables)
322
336
if (variable_name MATCHES "^GGML_" )
Original file line number Diff line number Diff line change @@ -598,21 +598,46 @@ extern "C" {
598
598
struct ggml_tensor * view_src ;
599
599
size_t view_offs ;
600
600
601
+ #ifdef GGML_NUMA_MIRROR
602
+ union {
603
+ #ifdef __NVCC__
604
+ void * data ;
605
+ #endif
606
+ void * __data [2 ];
607
+ };
608
+ #else
601
609
void * data ;
610
+ #endif
602
611
603
612
char name [GGML_MAX_NAME ];
604
613
605
614
void * extra ; // extra things e.g. for ggml-cuda.cu
606
615
616
+ #ifndef GGML_NUMA_MIRROR
607
617
char padding [8 ];
618
+ #endif
608
619
};
609
620
621
+ #ifdef GGML_NUMA_MIRROR
622
+ extern __thread int ggml_current_numa_node ;
623
+ #endif
624
+
610
625
static inline void * tensor_data (const struct ggml_tensor * tensor ) {
626
+ #ifdef GGML_NUMA_MIRROR
627
+ int n = ggml_current_numa_node ;
628
+ return tensor -> __data [n ];
629
+ #else
611
630
return tensor -> data ;
631
+ #endif
612
632
}
613
633
614
634
static inline void tensor_set_data (struct ggml_tensor * tensor , void * data ) {
635
+ #ifdef GGML_NUMA_MIRROR
636
+ tensor -> __data [0 ] = data ;
637
+ tensor -> __data [1 ] = data ;
638
+ #else
615
639
tensor -> data = data ;
640
+ #endif
616
641
}
617
642
618
643
static const size_t GGML_TENSOR_SIZE = sizeof (struct ggml_tensor );
Original file line number Diff line number Diff line change 60
60
#define m512i (p ) (__m512i)(p)
61
61
#endif
62
62
63
+ #ifdef GGML_NUMA_MIRROR
64
+ __thread int ggml_current_numa_node = 0 ;
65
+ #endif
66
+
63
67
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
64
68
float ggml_table_f32_f16 [1 << 16 ];
65
69
@@ -1609,10 +1613,16 @@ static struct ggml_tensor * ggml_new_tensor_impl(
1609
1613
/*.src =*/ { NULL },
1610
1614
/*.view_src =*/ view_src ,
1611
1615
/*.view_offs =*/ view_offs ,
1616
+ #ifdef GGML_NUMA_MIRROR
1617
+ /*.data =*/ { .__data = { NULL , NULL } },
1618
+ #else
1612
1619
/*.data =*/ NULL ,
1620
+ #endif
1613
1621
/*.name =*/ { 0 },
1614
1622
/*.extra =*/ NULL ,
1623
+ #ifndef GGML_NUMA_MIRROR
1615
1624
/*.padding =*/ { 0 },
1625
+ #endif
1616
1626
};
1617
1627
tensor_set_data (result , obj_alloc_size > 0 ? (void * )(result + 1 ) : data );
1618
1628
You can’t perform that action at this time.
0 commit comments