2
2
3
3
#define GGML_COMMON_DECL_C
4
4
#include "ggml-common.h"
5
-
6
5
#include "ggml.h"
7
6
8
- // les definitions / converstion FP8 <=> FP32
9
7
#ifdef __cplusplus
10
8
extern "C" {
11
9
#endif
@@ -14,28 +12,28 @@ extern "C" {
14
12
typedef struct { uint8_t bits ; } ggml_e4m3_t ;
15
13
typedef struct { uint8_t bits ; } ggml_e3m4_t ;
16
14
17
- void ggml_e5m2_to_fp32_row (const ggml_e5m2_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
18
- void ggml_fp32_to_e5m2_row (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
19
- void ggml_fp32_to_e5m2_row_ref (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
15
+ GGML_API void ggml_e5m2_to_fp32_row (const ggml_e5m2_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
16
+ GGML_API void ggml_fp32_to_e5m2_row (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
17
+ GGML_API void ggml_fp32_to_e5m2_row_ref (const float * GGML_RESTRICT x , ggml_e5m2_t * GGML_RESTRICT y , int64_t k );
20
18
21
- void ggml_e4m3_to_fp32_row (const ggml_e4m3_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
22
- void ggml_fp32_to_e4m3_row (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
23
- void ggml_fp32_to_e4m3_row_ref (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
19
+ GGML_API void ggml_e4m3_to_fp32_row (const ggml_e4m3_t * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
20
+ GGML_API void ggml_fp32_to_e4m3_row (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
21
+ GGML_API void ggml_fp32_to_e4m3_row_ref (const float * GGML_RESTRICT x , ggml_e4m3_t * GGML_RESTRICT y , int64_t k );
24
22
25
- void dequantize_row_e4m3_q (const block_e4m3_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
26
- void quantize_row_e4m3_q (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
27
- void quantize_row_e4m3_q_ref (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
23
+ GGML_API void dequantize_row_e4m3_q (const block_e4m3_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
24
+ GGML_API void quantize_row_e4m3_q (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
25
+ GGML_API void quantize_row_e4m3_q_ref (const float * GGML_RESTRICT x , block_e4m3_q * GGML_RESTRICT y , int64_t k );
28
26
29
- void dequantize_row_e3m4_q (const block_e3m4_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
30
- void quantize_row_e3m4_q (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
31
- void quantize_row_e3m4_q_ref (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
27
+ GGML_API void dequantize_row_e3m4_q (const block_e3m4_q * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k );
28
+ GGML_API void quantize_row_e3m4_q (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
29
+ GGML_API void quantize_row_e3m4_q_ref (const float * GGML_RESTRICT x , block_e3m4_q * GGML_RESTRICT y , int64_t k );
32
30
33
31
// TODO: the best depend on the CPU fp32 / bf16 / fp16
34
32
#define GGML_FP8_VECT_DOT_TYPE GGML_TYPE_F32
35
- void ggml_vec_dot_e5m2 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e5m2_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
36
- void ggml_vec_dot_e4m3 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e4m3_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
37
- void ggml_vec_dot_e4m3_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e4m3_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
38
- void ggml_vec_dot_e3m4_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e3m4_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
33
+ GGML_API void ggml_vec_dot_e5m2 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e5m2_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
34
+ GGML_API void ggml_vec_dot_e4m3 (int n , float * GGML_RESTRICT s , size_t bs , const ggml_e4m3_t * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
35
+ GGML_API void ggml_vec_dot_e4m3_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e4m3_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
36
+ GGML_API void ggml_vec_dot_e3m4_q (int n , float * GGML_RESTRICT s , size_t bs , const block_e3m4_q * GGML_RESTRICT vx , size_t bx , const float * GGML_RESTRICT vy , size_t by , int nrc );
39
37
40
38
#ifdef __cplusplus
41
39
}
0 commit comments