-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSimulationsClassification.R
210 lines (156 loc) · 5.46 KB
/
SimulationsClassification.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
####################################
### Classification of Spiral Data ##
####################################
# Settings
## A positive integer indicates the numebr of classes.
K <- 5
## A positive integer indicates the number of samples for each class.
N <- 200
## A positive integer indicates the number of neurons in each hidden layer of
## the neural network.
num.neurons <- 150
## A positive integer indicates the number of layers for the neural network.
l <- 2
## A positive value within [0,1] indicates the propostion of informative
## samples.
prop <- 1
## A Boolean value indicates whether to corrupt the input vectors.
NoiseX <- FALSE
## A positive value within [0,1] indicates the batch size for stochastic
## gradient descent algorithm.
BatchSize <- 1
## A positive integer indicates the number of epoch to update the gradient of
## the neural network.
E <- 2000
## A positive value indicates the learning rate for the stochastic gradient
## descent algorithm with softmax loss.
alphaSoftmax <- 1e-1
## A positive value indicates the learning rate for the stochastic gradient
## descent algorithm with DeepMoM structure.
alphaMoM <- 1e-1
## A vector of positive integers indicates the number of blocks for DeepMoM.
Blocks <- c(3, 5, 7, 9, 11)
## A positive value to scale the initial values for updating the gradients of
## the neural network.
scale <- 1
## A positive value to set the random seed for shuffling the data for
## reproducible research.
seed <- 202101
# Loading required functions
source("./AdditionalFunctions/FeedForwardNN.R")
source("./AdditionalFunctions/BackPropNN.R")
source("./AdditionalFunctions/TrainNN.R")
source("./AdditionalFunctions/GroupK.R")
source("./AdditionalFunctions/Mom.R")
source("./AdditionalFunctions/HuberLoss.R")
source("./AdditionalFunctions/HuberDerivative.R")
source("./AdditionalFunctions/L1Derivative.R")
# Generating spiral data
X <- data.frame()
C <- data.frame()
for (j in (1:K)){
r <- seq(0.05,1,length.out = N)
t <- seq((j-1)*3.7, (j)*3.7, length.out=N) + rnorm(N, sd=0.25)
Xtemp <- data.frame(x1 =r*sin(t) , x2 = r*cos(t))
ytemp <- data.frame(matrix(j, N, 1))
X <- rbind(X, Xtemp)
C <- rbind(C, ytemp)
}
data <- cbind(X,C)
colnames(data) <- c(colnames(X), 'label')
x_min <- min(X[,1])-0.2; x_max <- max(X[,1])+0.2
y_min <- min(X[,2])-0.2; y_max <- max(X[,2])+0.2
X <- as.matrix(X)/max(abs(X))
# Formalize outputs
Y <- matrix(0, N*K, K)
for (i in 1:(N*K)){
Y[i, C[i,]] <- 1
}
set.seed(seed)
train.index <- sample(c(1:(N*K)), floor(0.5*N*K),replace=FALSE)
X.test <- X[-train.index,]
CT <- C[-train.index,]
X <- X[train.index,]
Y <- Y[train.index,]
good <- sample(c(1:dim(X)[1]), floor(dim(X)[1]*prop), replace=FALSE)
C.test <- C[train.index,]
total.index <- c(1:dim(X)[1])
if(isTRUE(NoiseX==FALSE)){
bad <- total.index[-good]
for(i in bad){
s <- c(1:K)
C.test[i] <- sample(s[-C.test[i]], 1, replace=FALSE)
}
}
Y <- matrix(0, dim(X)[1], K)
for (i in 1:(dim(X)[1])){
Y[i, C.test[i]] <- 1
}
if (isTRUE(NoiseX)){
noise <- sample(c(1:dim(X)[1]), floor((1-prop)*dim(X)[1]), replace = FALSE)
X[noise, ] <- matrix(rnorm(dim(X)[2]*length(noise), 5, 1),
nrow=length(noise), ncol=dim(X)[2])
}
X[noise, ] <- as.matrix(X[noise, ])/max(abs(X[noise, ]))
num.obs <- dim(X)[1]
num.par <- dim(X)[2]
b <- floor(dim(X)[1]*BatchSize)
i.num <- floor(num.obs/b)*E
P <- rep(num.neurons,l)
P <- c(num.par,P,K)
set.seed(NULL)
# Softmax
alpha <- alphaSoftmax
repeat{
Pre_Para.softmax <- TrainNN(y=Y,X,P=P,alpha=alpha,iteration=i.num,random=FALSE,
batch=b,MOM=FALSE,k=3,loss.f="ls",q=NULL,
bias=TRUE,class=TRUE,beta=scale, para=NULL)
train.test <- FeedForwardNN(X, para=Pre_Para.softmax[[1]], class=TRUE,
class.score=TRUE)
if(any(is.na(train.test))|isTRUE(train.test=="NaN")|
isTRUE(any(is.na(train.test)))|isTRUE(is.na(train.test))|
isTRUE(train.test==Inf)|isTRUE(train.test==-Inf)|
isTRUE(identical(train.test,integer(0)))){
alpha <- alpha/2
next
}else{
break
}
}
Prediction.class <- FeedForwardNN(X.test,para=Pre_Para.softmax[[1]],class=TRUE,
class.score=FALSE)
alpha.softmax <- alpha
LossTracking.ls <- Pre_Para.softmax[[2]]
accuracy.softmax <- mean(Prediction.class==(CT))
# MOM
Prediction.mom <- c()
alpha.mom <- c()
LossTracking.mom <- list()
for(i in 1:length(Blocks)){
alpha <- alphaMoM
repeat{
Pre_Para.mom <- TrainNN(y=Y,X=X,P=P,alpha=alpha,iteration=i.num,random=FALSE,
batch=b,MOM=TRUE,k=Blocks[i],loss.f="ls",q=NULL,
bias=TRUE,class=TRUE,beta=scale, para=NULL)
train.test <- FeedForwardNN(X,para=Pre_Para.mom[[1]],class=TRUE,
class.score=TRUE)
if(any(is.na(train.test))|isTRUE(train.test=="NaN")|
isTRUE(any(is.na(train.test)))|isTRUE(is.na(train.test))|
isTRUE(train.test==Inf)|isTRUE(train.test==-Inf)|
isTRUE(identical(train.test,integer(0)))){
alpha <- alpha/2
next
}else{
break
}
}
Prediction.class.mom <- FeedForwardNN(X.test,para=Pre_Para.mom[[1]],
class=TRUE,class.score=FALSE)
LossTracking.mom[[i]] <- Pre_Para.mom[[2]]
accuracy.mom <- mean(Prediction.class.mom==(CT))
Prediction.mom <- c(Prediction.mom,accuracy.mom)
alpha.mom <- c(alpha.mom,alpha)
}
accuracy.softmax
max(Prediction.mom)
#save.image("100.RData")