Skip to content

Commit 44833c2

Browse files
Added RMSProp Optimizer subroutine (#144)
* Added RMSProp subroutine * Added Bias gradients * Made suggested corrections * Refactor RMSprop; it now compiles and converges --------- Co-authored-by: milancurcic <caomaco@gmail.com>
1 parent c3c048a commit 44833c2

File tree

1 file changed

+81
-2
lines changed

1 file changed

+81
-2
lines changed

example/quadratic.f90

Lines changed: 81 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,23 @@ program quadratic_fit
33
! stochastic gradient descent, batch gradient descent, and minibatch gradient
44
! descent.
55
use nf, only: dense, input, network
6+
use nf_dense_layer, only: dense_layer
67

78
implicit none
8-
type(network) :: net_sgd, net_batch_sgd, net_minibatch_sgd
9+
type(network) :: net_sgd, net_batch_sgd, net_minibatch_sgd, net_rms_prop
910

1011
! Training parameters
1112
integer, parameter :: num_epochs = 1000
1213
integer, parameter :: train_size = 1000
1314
integer, parameter :: test_size = 30
1415
integer, parameter :: batch_size = 10
1516
real, parameter :: learning_rate = 0.01
17+
real, parameter :: decay_rate = 0.9
1618

1719
! Input and output data
1820
real, allocatable :: x(:), y(:) ! training data
1921
real, allocatable :: xtest(:), ytest(:) ! testing data
20-
real, allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:)
22+
real, allocatable :: ypred_sgd(:), ypred_batch_sgd(:), ypred_minibatch_sgd(:), ypred_rms_prop(:)
2123

2224
integer :: i, n
2325

@@ -42,6 +44,7 @@ program quadratic_fit
4244
net_sgd = network([input(1), dense(3), dense(1)])
4345
net_batch_sgd = network([input(1), dense(3), dense(1)])
4446
net_minibatch_sgd = network([input(1), dense(3), dense(1)])
47+
net_rms_prop = network([input(1), dense(3), dense(1)])
4548

4649
! Print network info to stdout; this will be the same for all three networks.
4750
call net_sgd % print_info()
@@ -55,15 +58,20 @@ program quadratic_fit
5558
! Mini-batch SGD optimizer
5659
call minibatch_gd_optimizer(net_minibatch_sgd, x, y, learning_rate, num_epochs, batch_size)
5760

61+
! RMSProp optimizer
62+
call rmsprop_optimizer(net_rms_prop, x, y, learning_rate, num_epochs, decay_rate)
63+
5864
! Calculate predictions on the test set
5965
ypred_sgd = [(net_sgd % predict([xtest(i)]), i = 1, test_size)]
6066
ypred_batch_sgd = [(net_batch_sgd % predict([xtest(i)]), i = 1, test_size)]
6167
ypred_minibatch_sgd = [(net_minibatch_sgd % predict([xtest(i)]), i = 1, test_size)]
68+
ypred_rms_prop = [(net_rms_prop % predict([xtest(i)]), i = 1, test_size)]
6269

6370
! Print the mean squared error
6471
print '("Stochastic gradient descent MSE:", f9.6)', sum((ypred_sgd - ytest)**2) / size(ytest)
6572
print '(" Batch gradient descent MSE: ", f9.6)', sum((ypred_batch_sgd - ytest)**2) / size(ytest)
6673
print '(" Minibatch gradient descent MSE: ", f9.6)', sum((ypred_minibatch_sgd - ytest)**2) / size(ytest)
74+
print '(" RMSProp MSE: ", f9.6)', sum((ypred_rms_prop - ytest)**2) / size(ytest)
6775

6876
contains
6977

@@ -161,6 +169,77 @@ subroutine minibatch_gd_optimizer(net, x, y, learning_rate, num_epochs, batch_si
161169
end do
162170
end subroutine minibatch_gd_optimizer
163171

172+
subroutine rmsprop_optimizer(net, x, y, learning_rate, num_epochs, decay_rate)
173+
! RMSprop optimizer for updating weights using root mean square
174+
type(network), intent(inout) :: net
175+
real, intent(in) :: x(:), y(:)
176+
real, intent(in) :: learning_rate, decay_rate
177+
integer, intent(in) :: num_epochs
178+
integer :: i, j, n
179+
real, parameter :: epsilon = 1e-8 ! Small constant to avoid division by zero
180+
181+
! Define a dedicated type to store the RMSprop gradients.
182+
! This is needed because array sizes vary between layers and we need to
183+
! keep track of gradients for each layer over time.
184+
! For now this works only for dense layers.
185+
! We will need to define a similar type for conv2d layers.
186+
type :: rms_gradient_dense
187+
real, allocatable :: dw(:,:)
188+
real, allocatable :: db(:)
189+
end type rms_gradient_dense
190+
191+
type(rms_gradient_dense), allocatable :: rms(:)
192+
193+
print *, "Running RMSprop optimizer..."
194+
195+
! Here we allocate the array or RMS gradient derived types.
196+
! We need one for each dense layer, however we will allocate it to the
197+
! length of all layers as it will make housekeeping easier.
198+
allocate(rms(size(net % layers)))
199+
200+
do n = 1, num_epochs
201+
202+
do i = 1, size(x)
203+
call net % forward([x(i)])
204+
call net % backward([y(i)])
205+
end do
206+
207+
! RMSprop update rule
208+
do j = 1, size(net % layers)
209+
select type (this_layer => net % layers(j) % p)
210+
type is (dense_layer)
211+
212+
! If this is our first time here for this layer, allocate the
213+
! internal RMS gradient arrays and initialize them to zero.
214+
if (.not. allocated(rms(j) % dw)) then
215+
allocate(rms(j) % dw, mold=this_layer % dw)
216+
allocate(rms(j) % db, mold=this_layer % db)
217+
rms(j) % dw = 0
218+
rms(j) % db = 0
219+
end if
220+
221+
! Update the RMS gradients using the RMSprop moving average rule
222+
rms(j) % dw = decay_rate * rms(j) % dw + (1 - decay_rate) * this_layer % dw**2
223+
rms(j) % db = decay_rate * rms(j) % db + (1 - decay_rate) * this_layer % db**2
224+
225+
! Update weights and biases using the RMSprop update rule
226+
this_layer % weights = this_layer % weights - learning_rate &
227+
/ sqrt(rms(j) % dw + epsilon) * this_layer % dw
228+
this_layer % biases = this_layer % biases - learning_rate &
229+
/ sqrt(rms(j) % db + epsilon) * this_layer % db
230+
231+
! We have updated the weights and biases, so we need to reset the
232+
! gradients to zero for the next epoch.
233+
this_layer % dw = 0
234+
this_layer % db = 0
235+
236+
end select
237+
end do
238+
239+
end do
240+
241+
end subroutine rmsprop_optimizer
242+
164243
subroutine shuffle(arr)
165244
! Shuffle an array using the Fisher-Yates algorithm.
166245
integer, intent(inout) :: arr(:)

0 commit comments

Comments
 (0)