This repository has been archived by the owner on Dec 17, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.cpp
154 lines (134 loc) · 4.13 KB
/
utils.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
#include "utils.hpp"
#include "similarity_transform.hpp"
#include <random>
sycl::event
identity_matrix(sycl::queue& q,
float* const mat,
const uint dim,
const uint wg_size,
std::vector<sycl::event> evts)
{
memset(mat, 0, sizeof(float) * dim * dim);
buffer_2d buf_mat{ mat, sycl::range<2>{ dim, dim } };
auto evt = q.submit([&](sycl::handler& h) {
global_2d_writer acc_mat{ buf_mat, h };
h.depends_on(evts);
h.parallel_for<class kernelIdentityMatrix>(
sycl::nd_range<1>{ sycl::range<1>{ dim }, sycl::range<1>{ wg_size } },
[=](sycl::nd_item<1> it) {
const size_t r = it.get_global_id(0);
acc_mat[r][r] = 1.f;
});
});
return evt;
}
void
check(const float* vec, const uint dim)
{
for (uint i = 0; i < dim; i++) {
assert(vec[i] == 1.f);
}
}
sycl::event
generate_vector(sycl::queue& q,
float* const vec,
const uint dim,
const uint wg_size,
std::vector<sycl::event> evts)
{
memset(vec, 0, sizeof(float) * dim);
buffer_1d buf_vec{ vec, sycl::range<1>{ dim } };
auto evt = q.submit([&](sycl::handler& h) {
global_1d_writer acc_vec{ buf_vec, h };
h.depends_on(evts);
h.parallel_for<class kernelGenerateVector>(
sycl::nd_range<1>{ sycl::range<1>{ dim }, sycl::range<1>{ wg_size } },
[=](sycl::nd_item<1> it) {
const size_t r = it.get_global_id(0);
acc_vec[r] = r + 1;
});
});
return evt;
}
float
check_eigen_vector(const float* vec,
const float* eigen_vec,
const float max,
const uint dim)
{
float max_dev = 0.f;
for (uint i = 0; i < dim; i++) {
max_dev = std::max(max_dev, std::abs((vec[i] / max) - eigen_vec[i]));
}
return max_dev;
}
sycl::event
stop_criteria_test_success_data(sycl::queue& q,
float* const vec,
const uint dim,
const uint wg_size,
std::vector<sycl::event> evts)
{
const float EPS = 1e-4f;
memset(vec, 0, sizeof(float) * dim);
buffer_1d buf_vec{ vec, sycl::range<1>{ dim } };
auto evt_1 = q.submit([&](sycl::handler& h) {
global_1d_writer acc_vec{ buf_vec, h };
h.depends_on(evts);
h.parallel_for<class kernelStopCriteriaTestSuccessData>(
sycl::nd_range<1>{ sycl::range<1>{ dim }, sycl::range<1>{ wg_size } },
[=](sycl::nd_item<1> it) {
const size_t r = it.get_global_id(0);
acc_vec[r] = 1.f + EPS;
});
});
return evt_1;
}
sycl::event
stop_criteria_test_fail_data(sycl::queue& q,
float* const vec,
const uint dim,
const uint wg_size,
std::vector<sycl::event> evts)
{
const float EPS = 1e-4f;
memset(vec, 0, sizeof(float) * dim);
buffer_1d buf_vec{ vec, sycl::range<1>{ dim } };
auto evt_1 = q.submit([&](sycl::handler& h) {
global_1d_writer acc_vec{ buf_vec, h };
h.depends_on(evts);
h.parallel_for<class kernelStopCriteriaTestFailData>(
sycl::nd_range<1>{ sycl::range<1>{ dim }, sycl::range<1>{ wg_size } },
[=](sycl::nd_item<1> it) {
const size_t r = it.get_global_id(0);
acc_vec[r] = (float)(r + 1) * EPS;
});
});
return evt_1;
}
void
generate_random_vector(float* const vec, const uint dim)
{
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(0.f, 1.f);
for (uint i = 0; i < dim; i++) {
*(vec + i) = dis(gen);
}
}
void
generate_hilbert_matrix(sycl::queue& q, float* const mat, const uint dim)
{
buffer_2d buf_mat{ mat, sycl::range<2>{ dim, dim } };
auto evt = q.submit([&](sycl::handler& h) {
global_2d_writer acc_mat{ buf_mat, h, sycl::no_init };
h.parallel_for(
sycl::nd_range<2>{ sycl::range<2>{ dim, dim }, sycl::range<2>{ 1, 32 } },
[=](sycl::nd_item<2> it) {
const size_t r = it.get_global_id(0);
const size_t c = it.get_global_id(1);
acc_mat[r][c] = 1.f / (float)(r + c + 1);
});
});
evt.wait();
}