-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrad_desc.py
executable file
·82 lines (70 loc) · 2.49 KB
/
grad_desc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import random
import numpy as np
class GradientDescent(object):
"""
Implementation of a gradient descent algorithm for binary classification.
The loss function is 1 if classified incorrectly, 0 if correctly,
but use a hinge loss for training.
Initialized with a list of digits to be labeled positively
(all others negatively).
The algorithm initializes self.w to be a vector of 0's.
Then for each of a sequence of rounds, check if the loss is positive;
if so do a gradient descent update.
Supports methods:
reset(eta)
norm_grad_loss(x, y)
test_error(X, Y)
data_update(x, y, importance_wt)
null_update()
"""
def __init__(self, num_features, pos_labels, eta=0.1):
# num_features = length of the x vector
# pos_labels = list of digits in [0,9] to label positively (others negatively)
self.num_features = num_features
self.binarized_labels = [1.0 if l in pos_labels else -1.0 for l in range(10)]
self.reset(eta)
super(GradientDescent, self).__init__()
def reset(self, eta):
self.eta = eta
self.w = np.zeros(self.num_features)
self.avg_w = self.w
self.steps = 0
# predict on only a single datapoint, using avg_w
def _predict_one(self,x):
return np.sign(self.avg_w.dot(x))
# predict on a matrix of data (shape is numdata * numfeatures)
def _predict(self,X):
Ypreds = np.apply_along_axis(self._predict_one, 1, X)
return Ypreds
# predict and report error on data (shape is numdata * numfeatures)
def test_error(self, X, Y):
Ypred = self._predict(X)
return np.mean(map(lambda y : self.binarized_labels[y], Y) != Ypred)
def _loss(self,x,biny):
score = self.w.dot(x) * biny
return max(1. - score, 0.)
def _grad_loss(self, x, biny):
# gradient of the loss
loss = self._loss(x,biny)
if loss > 0.:
return -biny * x
return np.zeros(self.num_features)
def norm_grad_loss(self, x, y):
# l2 norm of the gradient of the loss
return np.linalg.norm(self._grad_loss(x,self.binarized_labels[y]))
def _step(self):
# every step, whether we got a data_update or a null_update, we need to
# step the average
self.avg_w = (self.avg_w * self.steps + self.w) / float(self.steps + 1.0)
self.steps += 1
# Do an importance-weighted gradient descent update on the hypothesis
def data_update(self, x, y, importance_wt):
biny = self.binarized_labels[y]
loss = self._loss(x,biny)
if loss > 0.:
step = self.eta * self._grad_loss(x, biny)
self.w = self.w - step / importance_wt
self._step()
# Do an update with no new data
def null_update(self):
self._step()