-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
76 lines (56 loc) · 2.28 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import interpolate
import time
start_time = time.time()
data_matrix = np.array(pd.read_csv('SUSY.csv'))
X = data_matrix[:,1:]
Y = data_matrix[:,0]
Y[Y==0.0] = -1.0
nrows, ncols = X.shape[0], X.shape[1]
training_percent = 70
print "\n\n Creating training set ...\n"
X_train = X[:int(training_percent*nrows/100),:]
Y_train = Y[:int(training_percent*nrows/100)]
print "\n Training set created ...\n"
nrows_train, ncols_train = X_train.shape[0], X_train.shape[1]
print "\n\n Creating test set ...\n"
X_test = X[int(training_percent*nrows/100):,:]
Y_test = Y[int(training_percent*nrows/100):]
print "\n Test set created ...\n"
nrows_test, ncols_test = X_test.shape[0], X_test.shape[1]
w = np.zeros(ncols_train)
regularizer = 0.00001
Y_pred = np.dot(X_train, w)
error = float(np.logical_xor((Y_pred > 0).astype(int), (Y_train.astype(int) > 0).astype(int)).sum())/float(nrows_train)
count = 1
total_iterations = 1000
error_list = [error]
count_list = [count]
for t in range(total_iterations):
i = np.random.randint(0, nrows_train, 1)[0]
decision = np.dot(X_train[i,:], w)*Y_train[i]
if decision >= 1:
w_new = np.add(w, 0)
elif decision < 1:
w_new = np.add(w, np.divide(np.multiply(X_train[i],Y_train[i]), np.sqrt(count)))
w_new = np.multiply(w_new, min(1, 1/(np.linalg.norm(w_new)*np.sqrt(regularizer))))
w = w_new
count = count + 1
Y_pred = np.dot(X_test, w)
error = float(np.logical_xor((Y_pred > 0).astype(int), (Y_test.astype(int) > 0).astype(int)).sum())/float(nrows_test)
error_list.append(error)
count_list.append(count)
plt.title('Variation of accuracy in prediction with increasing iterations \n')
plt.xlabel('Iteration number')
plt.ylabel('Test error')
plt.plot(count_list, error_list, color = '#7fffd4')
tck1 = interpolate.splrep(count_list, error_list, k = 3, s = 900)
error_list_int = interpolate.splev(count_list, tck1, der = 0)
plt.plot(count_list, error_list_int, color = 'magenta', label = 'Stochastic Gradient Descent')
plt.legend()
plt.show()
end_time = time.time() - start_time
print "\n\nPercentage accuracy = " + str(100 - error_list[-1] * 100) + "%\n"
print "\nTime taken = " + str(end_time) + " seconds\n"