6
6
typedef uint16_t ggml_half;
7
7
typedef uint32_t ggml_half2;
8
8
9
- #define GGML_COMMON_AGGR
9
+ #define GGML_COMMON_AGGR_U
10
+ #define GGML_COMMON_AGGR_S
11
+
12
+ #define GGML_COMMON_DECL
13
+ #elif defined(GGML_COMMON_DECL_CPP)
14
+ #include < cstdint>
15
+
16
+ typedef uint16_t ggml_half;
17
+ typedef uint32_t ggml_half2;
18
+
19
+ // std-c++ allow anonymous unions but some compiler warn on it
20
+ #define GGML_COMMON_AGGR_U data
21
+ // std-c++ do not allow it.
22
+ #define GGML_COMMON_AGGR_S data
10
23
11
24
#define GGML_COMMON_DECL
12
25
#elif defined(GGML_COMMON_DECL_METAL)
@@ -15,7 +28,8 @@ typedef uint32_t ggml_half2;
15
28
typedef half ggml_half;
16
29
typedef half2 ggml_half2;
17
30
18
- #define GGML_COMMON_AGGR
31
+ #define GGML_COMMON_AGGR_U
32
+ #define GGML_COMMON_AGGR_S
19
33
20
34
#define GGML_COMMON_DECL
21
35
#elif defined(GGML_COMMON_DECL_CUDA)
@@ -29,7 +43,8 @@ typedef half2 ggml_half2;
29
43
typedef half ggml_half;
30
44
typedef half2 ggml_half2;
31
45
32
- #define GGML_COMMON_AGGR data
46
+ #define GGML_COMMON_AGGR_U
47
+ #define GGML_COMMON_AGGR_S data
33
48
34
49
#define GGML_COMMON_DECL
35
50
#elif defined(GGML_COMMON_DECL_HIP)
@@ -39,7 +54,8 @@ typedef half2 ggml_half2;
39
54
typedef half ggml_half;
40
55
typedef half2 ggml_half2;
41
56
42
- #define GGML_COMMON_AGGR data
57
+ #define GGML_COMMON_AGGR_U
58
+ #define GGML_COMMON_AGGR_S data
43
59
44
60
#define GGML_COMMON_DECL
45
61
#elif defined(GGML_COMMON_DECL_SYCL)
@@ -49,7 +65,8 @@ typedef half2 ggml_half2;
49
65
typedef sycl::half ggml_half;
50
66
typedef sycl::half2 ggml_half2;
51
67
52
- #define GGML_COMMON_AGGR data
68
+ #define GGML_COMMON_AGGR_U
69
+ #define GGML_COMMON_AGGR_S data
53
70
54
71
#define GGML_COMMON_DECL
55
72
#endif
@@ -154,9 +171,9 @@ typedef struct {
154
171
struct {
155
172
ggml_half d; // delta
156
173
ggml_half m; // min
157
- } GGML_COMMON_AGGR ;
174
+ } GGML_COMMON_AGGR_S ;
158
175
ggml_half2 dm;
159
- };
176
+ } GGML_COMMON_AGGR_U ;
160
177
uint8_t qs[QK4_1 / 2 ]; // nibbles / quants
161
178
} block_q4_1;
162
179
static_assert (sizeof (block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
@@ -175,9 +192,9 @@ typedef struct {
175
192
struct {
176
193
ggml_half d; // delta
177
194
ggml_half m; // min
178
- } GGML_COMMON_AGGR ;
195
+ } GGML_COMMON_AGGR_S ;
179
196
ggml_half2 dm;
180
- };
197
+ } GGML_COMMON_AGGR_U ;
181
198
uint8_t qh[4 ]; // 5-th bit of quants
182
199
uint8_t qs[QK5_1 / 2 ]; // nibbles / quants
183
200
} block_q5_1;
@@ -196,9 +213,9 @@ typedef struct {
196
213
struct {
197
214
ggml_half d; // delta
198
215
ggml_half s; // d * sum(qs[i])
199
- } GGML_COMMON_AGGR ;
216
+ } GGML_COMMON_AGGR_S ;
200
217
ggml_half2 ds;
201
- };
218
+ } GGML_COMMON_AGGR_U ;
202
219
int8_t qs[QK8_1]; // quants
203
220
} block_q8_1;
204
221
static_assert (sizeof (block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
@@ -261,9 +278,9 @@ typedef struct {
261
278
struct {
262
279
ggml_half d; // super-block scale for quantized scales
263
280
ggml_half dmin; // super-block scale for quantized mins
264
- } GGML_COMMON_AGGR ;
281
+ } GGML_COMMON_AGGR_S ;
265
282
ggml_half2 dm;
266
- };
283
+ } GGML_COMMON_AGGR_U ;
267
284
} block_q2_K;
268
285
static_assert (sizeof (block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
269
286
@@ -288,9 +305,9 @@ typedef struct {
288
305
struct {
289
306
ggml_half d; // super-block scale for quantized scales
290
307
ggml_half dmin; // super-block scale for quantized mins
291
- } GGML_COMMON_AGGR ;
308
+ } GGML_COMMON_AGGR_S ;
292
309
ggml_half2 dm;
293
- };
310
+ } GGML_COMMON_AGGR_U ;
294
311
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
295
312
uint8_t qs[QK_K/2 ]; // 4--bit quants
296
313
} block_q4_K;
@@ -305,9 +322,9 @@ typedef struct {
305
322
struct {
306
323
ggml_half d; // super-block scale for quantized scales
307
324
ggml_half dmin; // super-block scale for quantized mins
308
- } GGML_COMMON_AGGR ;
325
+ } GGML_COMMON_AGGR_S ;
309
326
ggml_half2 dm;
310
- };
327
+ } GGML_COMMON_AGGR_U ;
311
328
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
312
329
uint8_t qh[QK_K/8 ]; // quants, high bit
313
330
uint8_t qs[QK_K/2 ]; // quants, low 4 bits
@@ -424,6 +441,24 @@ typedef struct {
424
441
} block_iq4_nlx4;
425
442
static_assert (sizeof (block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
426
443
444
+ // fp8 support
445
+ // - fp8 simple type
446
+ typedef struct { uint8_t bits; } ggml_e5m2_t ;
447
+ typedef struct { uint8_t bits; } ggml_e4m3_t ;
448
+
449
+ // - fp8 with bloc delta => 8.125 bpw
450
+ typedef struct {
451
+ float d; // delta
452
+ uint8_t qs[QK_K];
453
+ } block_e4m3_q;
454
+ static_assert (sizeof (block_e4m3_q) == sizeof(float ) + QK_K, "wrong block_e4m3_q block size/padding");
455
+
456
+ typedef struct {
457
+ float d; // delta
458
+ uint8_t qs[QK_K];
459
+ } block_e3m4_q;
460
+ static_assert (sizeof (block_e3m4_q) == sizeof(float ) + QK_K, "wrong block_e3m4_q block size/padding");
461
+
427
462
#endif // GGML_COMMON_DECL
428
463
#endif // GGML_COMMON_DECL
429
464
@@ -437,6 +472,13 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wro
437
472
#define GGML_TABLE_BEGIN (type, name, size ) static const type name[size] = {
438
473
#define GGML_TABLE_END () };
439
474
475
+ #define GGML_COMMON_IMPL
476
+ #elif defined(GGML_COMMON_IMPL_CPP)
477
+ #include < cstdint>
478
+
479
+ #define GGML_TABLE_BEGIN (type, name, size ) static const type name[size] = {
480
+ #define GGML_TABLE_END () };
481
+
440
482
#define GGML_COMMON_IMPL
441
483
#elif defined(GGML_COMMON_IMPL_METAL)
442
484
#include < metal_stdlib>
0 commit comments