11
11
12
12
#include < iostream>
13
13
#include < vector>
14
+ #include < unordered_set>
14
15
15
16
// #define IGPU_TRACE(...) std::cout << "#> ggml-igpu: " << __VA_ARGS__ << std::endl
16
17
#define IGPU_TRACE (...)
18
+ // #define IGPU_DEV(...) std::cout << "#> ggml-igpu: " << __VA_ARGS__ << std::endl
19
+ #define IGPU_DEV (...)
17
20
18
- // #define BLOC_V1
19
- #define BLOC_V2
21
+ // #define BLOC_V1 // mieux? N=[23-47] => Voir comment faire "mieux" dans cette bande.
22
+ // #define BLOC_V2
23
+ // #define BLOC_V3
24
+ #define BLOC_V4 // OK N=[1-22] N=[48...]
20
25
21
26
#ifdef BLOC_V1
22
27
#include " mulmat-bf16bloc_V1.h"
23
28
#endif
24
29
#ifdef BLOC_V2
25
30
#include " mulmat-bf16bloc_V2.h"
26
31
#endif
32
+ #ifdef BLOC_V3
33
+ #include " mulmat-bf16bloc_V3.h"
34
+ #endif
35
+ #ifdef BLOC_V4
36
+ #include " mulmat-bf16bloc_V4.h"
37
+ #endif
27
38
28
39
/*
29
40
#> version bloc-bf16 V0.
@@ -72,7 +83,7 @@ namespace ggml::backend::igpu {
72
83
// - cas RAM/CPU
73
84
// m_data = new (std::align_val_t(32)) uint8_t[m_size];
74
85
// GGML_ASSERT(m_data);
75
- // - cas HHIP /IGPU
86
+ // - cas HIP /IGPU
76
87
m_host_data = ggml::hip::allocateHost<uint8_t >(m_size);
77
88
m_device_data = ggml::hip::getDeviceMem (m_host_data);
78
89
}
@@ -373,7 +384,7 @@ namespace ggml::backend::igpu {
373
384
374
385
// TODO: retourner une REF !
375
386
bool caps_host_buffer () override { return true ; }
376
- buffer_type* get_host_buffer_type () override {
387
+ ggml::cpp::backend:: buffer_type* get_host_buffer_type () override {
377
388
// IGPU_TRACE(" #################### device[" << m_name << "] get_host_buffer_type!");
378
389
return m_host_buffer_type;
379
390
}
@@ -384,6 +395,21 @@ namespace ggml::backend::igpu {
384
395
// std::vector<buffer_type> get_extra_bufts() override ; ???
385
396
386
397
bool supports_op (const ggml_tensor & op) override {
398
+ // histoire de lister toutes les OPs...
399
+ static std::unordered_set<std::string> list_ops;
400
+ if (list_ops.count (op.name ) == 0 ) {
401
+ list_ops.insert (op.name );
402
+ IGPU_DEV (" ##>> op(" << op.name <<" <" <<ggml_op_name (op.op ) <<" >) : "
403
+ << ggml_type_name (op.type )<< " [" <<op.ne [0 ]<<" , " <<op.ne [1 ]<<" , " <<op.ne [2 ]<<" , " <<op.ne [3 ]<<" ]" );
404
+ for (int i=0 ; i<GGML_MAX_SRC; ++i) {
405
+ if (op.src [i] != nullptr ) {
406
+ IGPU_DEV (" {" <<i<<" } " << op.src [i]->name << " <" <<ggml_type_name (op.src [i]->type )<<" > "
407
+ << " [" <<op.src [i]->ne [0 ]<<" , " <<op.src [i]->ne [1 ]<<" , " <<op.src [i]->ne [2 ]<<" , " <<op.src [i]->ne [3 ]<<" ]" );
408
+ }
409
+ }
410
+ }
411
+
412
+
387
413
switch (op.op ) {
388
414
case GGML_OP_NONE:
389
415
case GGML_OP_RESHAPE:
0 commit comments