-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgameOfLifeCudaShared.cu
205 lines (167 loc) · 5.63 KB
/
gameOfLifeCudaShared.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <cuda.h>
extern "C"
{
#include "png_util.h"
}
#define p_B 64
// to compile
// to convert output png files to an mp4 movie:
// ffmpeg -y -start_number 0 -r 24 -i gol%05d.png -b:v 8192k -c:v mpeg4 gol.mp4
int idx(int N, int i, int j){
int n = i + (N+2)*j;
return n;
}
__global__ void cudaEquals(int *counter, float *Iold, float *Inew) {
int t = threadIdx.x;
int b = blockIdx.x;
int B = blockDim.x;
// evaluate array index "n" for this thread
int n = t + b * B;
// difference
if(Iold[n] == Inew[n]) {
atomicAdd(counter, 1);
}
}
/* function to update Inew from Iold */
__global__ void cudaIterate(int N, float *Iold, float *Inew){
__shared__ int board[p_B][p_B];
int tx = threadIdx.x;
int ty = threadIdx.y;
// int b = blockIdx.x;
// int d = blockDim.x;
int x = threadIdx.x + blockIdx.x * (p_B-2);
int y = threadIdx.y + blockIdx.y * (p_B-2);
int n = x + y * N;
if(tx<p_B-2 && ty<p_B-2 && n<N+1){
int len = (N+2);
board[tx][ty] = Iold[n-len+1];
board[tx][ty+2] = Iold[n+len-1];
board[tx+2][ty] = Iold[n-len+1];
board[tx+2][ty+2] = Iold[n+len+1];/*
//fills this for every thread, the if statements serve to help with finding the surroundings of the edges.
if(tx == 0){ //left wall
board[tx][ty] = Iold[n-(N+2)-1];//left one up one
board[tx][ty+2] = Iold[n+(N+2)-1];//left one down one
}
else if(tx == p_B-3){//right wall
board[tx+2][ty] = Iold[n -(N+2)+1];//right one up one
board[tx+2][ty+2] = Iold[n +(N+2)+1];//right one down one
}
if(ty == 0){//top
board[tx][ty] = Iold[n-(N+2)-1];//up one left one
board[tx+2][ty] = Iold[n-(N+2) +1];//up one right one
}
else if(ty == p_B-3){
board[tx][ty+2] = Iold[n+(N+2)-1];//down one left one
board[tx+2][ty+2] = Iold[n+(N+2)+1]; //down one right one
}*/
__syncthreads();
int x_ind = tx+1;//accounting for edges, if tx = ty = 1, the way I did it above shifted everything over
int y_ind = ty+1; // this way, the indices can be manipulated more intuitively
int surroundings = board[x_ind-1][y_ind-1] + board[x_ind][y_ind-1] + board[x_ind+1][y_ind+1] +
board[x_ind-1][y_ind] + board[x_ind+1][y_ind] + board[x_ind-1][y_ind+1] +
board[x_ind][y_ind+1] + board[x_ind+1][y_ind+1];
int oldState = board[x_ind][y_ind];
int newstate = (oldState==1)?((surroundings==2)||(surroundings==3)):(surroundings==3);
if(n<N){
Inew[n] = newstate;
}
}
}
/* function to print game board for debugging */
void print_board(int N, float *board){
printf("\n");
for(int i=1; i<N+1; i=i+1){
for(int j=1; j<N+1; j=j+1){
printf("%d", (int)board[idx(N,i,j)]);
}
printf("\n");
}
printf("\n");
}
/* function to solve for game board using Game of Life rules */
void solve(int N){
/* Intializes integer random number generator */
// srand((unsigned) time(&t));
srand(123456);
// notice the size of these arrays
float* h_Inew = (float*) calloc((N+2)*(N+2),sizeof(float));
float* h_Iold = (float*) calloc((N+2)*(N+2),sizeof(float));
float *c_Inew, *c_Iold;
cudaMalloc(&c_Inew, (N+2)*(N+2)*sizeof(float));
cudaMalloc(&c_Iold, (N+2)*(N+2)*sizeof(float));
for(int i=1;i<N+1;i=i+1){
for(int j=1;j<N+1;j=j+1){
// set board state randomly to 1 or 0
h_Iold[idx(N,i,j)] = rand()%2;
}
}
/* print initial board*/
//printf("initial game board:");
//print_board(N, h_Iold);
cudaMemcpy(c_Iold, h_Iold, (N+2)*(N+2)*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(c_Inew, h_Inew, (N+2)*(N+2)*sizeof(float), cudaMemcpyHostToDevice);
/* iterate here */
int B = p_B;
int G = (N+B-1)/B;
int count = 0; // step counter
int iostep = 10; // output every iostep
int output = 1; // save images if output=1
int maxsteps = 1000; // maximum number of steps
int *counter = 0;
do{
/* iterate from Iold to Inew */
cudaIterate<<<G, B>>>(N, c_Iold, c_Inew);
/* iterate from Inew to Iold */
cudaIterate<<<G, B>>>(N, c_Inew, c_Iold);
cudaMemcpy(h_Iold, c_Iold, (N+2)*(N+2) * sizeof(float), cudaMemcpyDeviceToHost);
cudaMemcpy(h_Inew, c_Inew, (N+2)*(N+2) * sizeof(float), cudaMemcpyDeviceToHost);
cudaEquals<<<G, B>>>(counter,c_Iold, c_Inew);
if(counter != 0) {
break;
}
if(output==1 && count%iostep==0){
char filename[BUFSIZ];
FILE *png;
sprintf(filename, "gol%05d.png", count/iostep);
png = fopen(filename, "w");
write_gray_png(png, N+2, N+2, h_Iold, 0, 1);
fclose(png);
}
/* update counter */
count = count + 1;
}while(memcmp(h_Inew, h_Iold, (N+2)*(N+2)*sizeof(int))!=0 && count <= maxsteps);
/* print out the cell existence in the whole board, then in cell (1 1) and (10 10)*/
//printf("final game board:");
//print_board(N, h_Iold);
printf("I_{1 1} = %d\n", (int)h_Iold[idx(N,1,1)]);
printf("I_{10 10} = %d\n", (int)h_Iold[idx(N,10,10)]);
printf("Took %d steps\n", count);
free(h_Inew);
free(h_Iold);
}
/* usage: ./main 100
to iterate, solve and display the game board of size N*/
int main(int argc, char **argv){
if(argc!=2){
printf("To run with an (N+2)x(N+2) board: \n");
printf(" ./gameOfLife N\n");
exit(-1);
}
/* start timer */
clock_t begin = clock();
/* read N from the command line arguments */
int N = atoi(argv[1]);
/* to solve for cell existence in game of life game board */
solve(N);
/* end timer*/
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Time spent = %g sec\n", time_spent);
return 0;
}