-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcollaborative_filtering.py
148 lines (114 loc) · 4.73 KB
/
collaborative_filtering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# import cvxopt as cvxopt
import cvxpy as cp
import numpy as np
import os
from torch_geometric.loader import DataLoader
from datasets.PowerFlowData import PowerFlowData
from utils.argument_parser import argument_parser
from utils.custom_loss_functions import Masked_L2_loss
from pygsp import graphs
import torch
def collaborative_filtering_testing(y, mask, B, x_gt,f, eval_loss_fn=Masked_L2_loss(regularize=False)):
# decision variables
z_hat = cp.Variable((x_gt.shape[0], x_gt.shape[1]))
distance = 1/2 * \
cp.square(cp.pnorm(cp.multiply(y, mask)-cp.multiply(z_hat, mask), f))
normalizer = cp.square(cp.pnorm(z_hat, f))
# trace = cp.trace(cp.matmul(cp.multiply(z_hat,mask).T, cp.matmul(L, cp.multiply(z_hat,mask))))
# trace = cp.trace(cp.matmul(z_hat.T, cp.matmul(L,z_hat)))
trace = cp.norm(B@z_hat, 2)
# lambda_z = 0.0001
# prob = cp.Problem(cp.Minimize(1/2 * error + lambda_z*cp.norm(z_hat,1)))
losses = []
rnmse_list = []
# for alpha in set [0,10] with 0,01 step
alphas = np.arange(0, 5, 0.5)
aplhas = []
alphas = [0.1, 0.5, 1, 10]
lambda_L_list = np.arange(0, 3, 0.5)
lambda_z_list = np.arange(0, 3, 0.5)
results = np.zeros((len(lambda_L_list), len(lambda_z_list)))
for i, lambda_L in enumerate(lambda_L_list):
for j, lambda_z in enumerate(lambda_z_list):
prob = cp.Problem(cp.Minimize(
distance + lambda_z*normalizer + lambda_L*trace))
print("problem is solved...")
prob.solve()
print("status:", prob.status)
# rnmse = np.sqrt(np.square(z_hat.value-x_gt).mean())/y.std()
# rnmse_list.append(rnmse)
# print(f"The rNMSE is: {np.round(rnmse,4)}")
# print("z_hat: ", z_hat.value*mask.numpy())
# print("x_gt: ", x_gt*mask.numpy())
# print("mask: ", mask)
z_tensor = torch.tensor(z_hat.value)
x_gt_tensor = torch.tensor(x_gt)
loss = eval_loss_fn(z_tensor, x_gt_tensor, mask)
# print("loss: ", loss.item())
# losses.append(loss.item())
results[i, j] = loss.item()
# plot a 2d heatmap of the results
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
ax = sns.heatmap(results, annot=True, fmt=".2f", cmap="YlGnBu")
ax.set_xlabel("lambda_z")
ax.set_ylabel("lambda_L")
plt.show()
def tikhonov_regularizer(alpha, L, y, mask):
# Tikhonov regularization
z_hat = np.matmul(np.matmul(np.linalg.inv(alpha*L + np.eye(L.shape[0])), L), y)
return z_hat
if __name__ == "__main__":
data_dir = "./data/"
grid_case = "5"
grid_case = "14"
# grid_case = "9"
# grid_case = "6470rte"
# grid_case = "118"
# Load the dataset
trainset = PowerFlowData(root=data_dir, case=grid_case,
split=[.5, .2, .3], task='train')
valset = PowerFlowData(root=data_dir, case=grid_case,
split=[.5, .2, .3], task='val')
testset = PowerFlowData(root=data_dir, case=grid_case,
split=[.5, .2, .3], task='test')
# train_loader = DataLoader(trainset, batch_size=batch_size, shuffle=True)
# val_loader = DataLoader(valset, batch_size=batch_size, shuffle=False)
# test_loader = DataLoader(testset, batch_size=batch_size, shuffle=False)
# Load adjacency matrix from file
file_path = data_dir + "/raw/case" + str(grid_case) + '_adjacency_matrix.npy'
adjacency_matrix = np.load(file_path)
print(adjacency_matrix.shape)
num_of_nodes = adjacency_matrix.shape[0]
print(f'Number of nodes: {num_of_nodes}')
# create graph from adjacency matrix
G = graphs.Graph(adjacency_matrix)
# get incidence matrix
G.compute_differential_operator()
B = G.D.toarray()
print(f'B: {B.shape}')
# get laplacian matrix
L = G.L.toarray()
print(f'Laplacian: {L.shape}')
# Get the data
# x_gt is the actual values
x_gt = trainset.y[:num_of_nodes, :4].numpy()
print("x_gt: ", x_gt.shape, x_gt[0, :])
# y is the observations, i.e. the features with missing values
y = trainset.x[:num_of_nodes, 4:8]
print("y: ", y.shape, y[0, :])
# mask of values to be predicted
mask = trainset.x[:num_of_nodes, 10:14]
# find values x from ys
# x_hat is the predicted values of the features with missing values
# Create the problem
rnmse_list = []
print("problem is constructed...")
f = x_gt.shape[1]
print("f: ", f)
collaborative_filtering_testing(y, mask, B,x_gt,f)
eval_loss_fn=Masked_L2_loss(regularize=False)
result = tikhonov_regularizer(1.25, L, y, mask)
loss = eval_loss_fn(torch.tensor(result), torch.tensor(x_gt), mask)
print("loss: ", loss.item())