Skip to content

Commit f918e3f

Browse files
committed
[IGPU]: test new Kernel (V3/V4)
- update low N gemm kernel - add analyse trace. - corrected syncthreads
1 parent ebce03e commit f918e3f

7 files changed

+1603
-95
lines changed

README.md

+85-85
Large diffs are not rendered by default.

ggml/src/ggml-igpu/ggml-igpu.cpp

+30-4
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,30 @@
1111

1212
#include <iostream>
1313
#include <vector>
14+
#include <unordered_set>
1415

1516
//#define IGPU_TRACE(...) std::cout << "#> ggml-igpu: " << __VA_ARGS__ << std::endl
1617
#define IGPU_TRACE(...)
18+
//#define IGPU_DEV(...) std::cout << "#> ggml-igpu: " << __VA_ARGS__ << std::endl
19+
#define IGPU_DEV(...)
1720

18-
//#define BLOC_V1
19-
#define BLOC_V2
21+
//#define BLOC_V1 // mieux? N=[23-47] => Voir comment faire "mieux" dans cette bande.
22+
//#define BLOC_V2
23+
//#define BLOC_V3
24+
#define BLOC_V4 // OK N=[1-22] N=[48...]
2025

2126
#ifdef BLOC_V1
2227
#include "mulmat-bf16bloc_V1.h"
2328
#endif
2429
#ifdef BLOC_V2
2530
#include "mulmat-bf16bloc_V2.h"
2631
#endif
32+
#ifdef BLOC_V3
33+
#include "mulmat-bf16bloc_V3.h"
34+
#endif
35+
#ifdef BLOC_V4
36+
#include "mulmat-bf16bloc_V4.h"
37+
#endif
2738

2839
/*
2940
#> version bloc-bf16 V0.
@@ -72,7 +83,7 @@ namespace ggml::backend::igpu {
7283
// - cas RAM/CPU
7384
//m_data = new (std::align_val_t(32)) uint8_t[m_size];
7485
//GGML_ASSERT(m_data);
75-
// - cas HHIP/IGPU
86+
// - cas HIP/IGPU
7687
m_host_data = ggml::hip::allocateHost<uint8_t>(m_size);
7788
m_device_data = ggml::hip::getDeviceMem(m_host_data);
7889
}
@@ -373,7 +384,7 @@ namespace ggml::backend::igpu {
373384

374385
// TODO: retourner une REF !
375386
bool caps_host_buffer() override { return true; }
376-
buffer_type* get_host_buffer_type() override {
387+
ggml::cpp::backend::buffer_type* get_host_buffer_type() override {
377388
// IGPU_TRACE(" ####################  device[" << m_name << "] get_host_buffer_type!");
378389
return m_host_buffer_type;
379390
}
@@ -384,6 +395,21 @@ namespace ggml::backend::igpu {
384395
// std::vector<buffer_type> get_extra_bufts() override ; ???
385396

386397
bool supports_op(const ggml_tensor & op) override {
398+
// histoire de lister toutes les OPs...
399+
static std::unordered_set<std::string> list_ops;
400+
if (list_ops.count(op.name) == 0) {
401+
list_ops.insert(op.name);
402+
IGPU_DEV("##>> op("<< op.name<<"<"<<ggml_op_name(op.op) <<">) : "
403+
<< ggml_type_name(op.type)<< "[" <<op.ne[0]<<", "<<op.ne[1]<<", "<<op.ne[2]<<", "<<op.ne[3]<<"]");
404+
for (int i=0; i<GGML_MAX_SRC; ++i) {
405+
if (op.src[i] != nullptr) {
406+
IGPU_DEV(" {"<<i<<"} " << op.src[i]->name << "<" <<ggml_type_name(op.src[i]->type)<<"> "
407+
<< "["<<op.src[i]->ne[0]<<", "<<op.src[i]->ne[1]<<", "<<op.src[i]->ne[2]<<", "<<op.src[i]->ne[3]<<"]");
408+
}
409+
}
410+
}
411+
412+
387413
switch (op.op) {
388414
case GGML_OP_NONE:
389415
case GGML_OP_RESHAPE:

ggml/src/ggml-igpu/mulmat-bf16bloc_V2.h

+3
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ namespace ggml::backend::igpu {
215215
}
216216
}
217217
}
218+
219+
__syncthreads(); // OK il faut attendre que tout soit reformaté.
220+
218221
}
219222

220223
// save C (sauf fin)

0 commit comments

Comments
 (0)