-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmaxpool.cpp
128 lines (122 loc) · 4.05 KB
/
maxpool.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include "weights_cache.hpp"
#include "image_cache.hpp"
#include "output_cache.hpp"
#include "fpga_top.hpp"
//#include<"maxpool".hpp>
void maxpool(data_t * in, layer_t layer) {
int i,j,c,ii,jj;
//pooling_t params = l.pool_params;
data_t * img = &in[layer.mem_addr_input];
data_t * out = &in[layer.mem_addr_output];
for (c = 0; c < layer.channels_in; c++) {
#pragma HLS LOOP_TRIPCOUNT min=64 max=256 avg=128
for (i = 0; i < layer.height; i+=layer.stride) {
#pragma HLS LOOP_TRIPCOUNT min=14 max=224 avg=56
for (j = 0; j < layer.width; j+=layer.stride) {
#pragma HLS LOOP_TRIPCOUNT min=14 max=224 avg=56
#pragma HLS PIPELINE II=1
data_t max = std::numeric_limits<float>::min();
for (ii = 0; ii < layer.kernel; ii++) {
#pragma HLS LOOP_TRIPCOUNT avg=3
for (jj = 0; jj < layer.kernel; jj++) {
#pragma HLS LOOP_TRIPCOUNT avg=3
#pragma HLS UNROLL factor=3
if (i+ii >= layer.height || j+jj >= layer.width) {
continue;
}
data_t temp = img[(c * layer.width* layer.height) + (i+ii) * layer.width+ (j+jj)];
if (temp > max) {
max = temp;
}
}
}
// TODO change the parameters and verify the function
//out[(c * div_ceil(l.h, params.stride) * div_ceil(l.w,params.stride)) + (i/params.stride * div_ceil(l.w,params.stride) + j/params.stride)] = max;
}
}
}
}
void conv_pool(data_t* in, layer_t l) {
int i; int j; int ii; int jj; int filt_num; int c;
data_t img_buffer[224*224];
// conv_pool_helper(in, l, img_buffer);
}
// TODO work out the changes the conv_pool_helper for the below functions
//void conv_pool_helper(data_t* in, layer_t l, data_t img_buffer[224*224]) {
//#pragma HLS INLINE
//#pragma HLS RESOURCE variable=img_buffer core=RAM_1P_BRAM
//#pragma HLS INTERFACE ap_memory port=img_buffer
//
//int i, j, ii, jj, filt_num, c;
//data_t* w = &in[l.weight_offset];
//data_t* img = &in[l.img_offset];
//data_t* out = &in[l.output_offset];
//int bias_offset = l.weight_offset + l.num_weights - l.chan_out;
//
// for (filt_num = 0; filt_num < l.chan_out; filt_num++) {
//#pragma HLS LOOP_TRIPCOUNT min=16 max=1000 avg=256
// data_t bias = in[bias_offset + filt_num];
//
// data_t w_buffer[9*512*2];
// for (i = 0; i < l.chan_in * l.k * l.k; i++) {
//#pragma HLS LOOP_TRIPCOUNT min=1 max=4608
// w_buffer[i] = w[(filt_num * l.chan_in * l.k * l.k) + i];
// }
//
// for (i = 0; i < l.h; i+= l.stride){
//#pragma HLS LOOP_TRIPCOUNT min=14 max=224 avg=56
// for (j = 0; j < l.w; j+= l.stride) {
//#pragma HLS LOOP_TRIPCOUNT min=14 max=224 avg=56
// data_t temp = 0;
// for (c = 0; c < l.chan_in; c++) {
//#pragma HLS LOOP_TRIPCOUNT min=3 max=512 avg=256
//#pragma HLS PIPELINE II=1
//
// for (ii = -l.k/2; ii <= l.k/2; ii++) {
//#pragma HLS LOOP_TRIPCOUNT avg=2
// for (jj = -l.k/2; jj <= l.k/2; jj++) {
//#pragma HLS LOOP_TRIPCOUNT avg=2
// temp += pad_img(img, l, i+ii , j+jj, c) * w_buffer[(c * l.k * l.k) + (jj+l.k/2) * l.k + (ii+l.k/2)];
// }
// }
// }
//
// temp += bias;
//
// if (l.relu && temp < 0.0) {
// temp = 0.0;
// }
//
// img_buffer[(i/l.stride * div_ceil(l.w,l.stride) + j/l.stride)] = temp;
// }
// }
//
// int conv_h = div_ceil(l.h, l.stride);
// int conv_w = div_ceil(l.w, l.stride);
// pooling_t params = l.pool_params;
//
// for (i = 0; i < conv_h; i+=params.stride) {
//#pragma HLS LOOP_TRIPCOUNT min=14 max=224 avg=56
// for (j = 0; j < conv_w; j+=params.stride) {
//#pragma HLS LOOP_TRIPCOUNT min=14 max=224 avg=56
//#pragma HLS PIPELINE II=1
// data_t max = std::numeric_limits<float>::min();
// for (ii = 0; ii < params.k; ii++) {
//#pragma HLS LOOP_TRIPCOUNT avg=3
// for (jj = 0; jj < params.k; jj++) {
//#pragma HLS LOOP_TRIPCOUNT avg=3
//#pragma HLS UNROLL factor=3
// if (i+ii >= conv_h || j+jj >= conv_w) {
// continue;
// }
// data_t temp = img_buffer[(i+ii) * conv_w + (j+jj)];
// if (temp > max) {
// max = temp;
// }
// }
// }
// out[(filt_num * div_ceil(conv_h, params.stride) * div_ceil(conv_w,params.stride)) + (i/params.stride * div_ceil(conv_w,params.stride) + j/params.stride)] = max;
// }
// }
// }
//}