-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproject.py
71 lines (67 loc) · 2.11 KB
/
project.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
"""
@author: Chinmay Jain
Main file used to create the execute the MVME algorithm
"""
from gensim.models import word2vec
import numpy
# Method used to implement the MVME algorithm
def mvme(t1,t2):
# Initialization - Load the word2vec vectors pre-trained
model = word2vec.Word2Vec.load('modelWord2Vec_mature');
#if model is not None:
#print('Model loaded .. .. ');
#t1Tokens = preprocess.preprocess(t1);
#t2Tokens = preprocess.preprocess(t2);
t1Tokens = t1[0];
t2Tokens = t2[0];
n = len(t1Tokens);
m = len(t2Tokens);
#print('MVME',t1Tokens,t2Tokens);
# Calculate the Distance matrix
mat = [[0.0 for x in range(n)] for x in range(m)];
for i in range(n):
u = t1Tokens[i];
for j in range(m):
v = t2Tokens[j];
try:
mat[j][i] = model.similarity(u,v);
except KeyError:
mat[j][i] = 0;
except AttributeError:
print('Error')
mat[j][i] = 0;
#print('Calculating the similarity');
# Get the similarity value
sim = 0.0;
#print(mat);
mat = numpy.asarray(mat);
count = 0
# Calculate the similarity between t1 and t2
while mat.size:
maxValue = -100.0;
for x in range(n-count):
for y in range(m-count):
if mat[y][x] > maxValue:
row = y;
col = x;
maxValue = mat[y][x];
sim += mat[row][col];
mat = numpy.delete(mat, (row), axis=0);
mat = numpy.delete(mat, (col), axis=1);
count += 1;
#print(sim);
return sim;
# Checks if the similarity matrix has non zero values or not
def isMatEmpty(mat):
print('Empty');
for x in range(len(mat)):
for y in range(len(mat[x])):
if mat[x][y] == 0.0:
continue;
else:
return False;
return True;
#t1 = [(['I', 'think']),2];
#t2 = [(["can't", 'admit', 'when']),2];
#print(mvme(t2,t2));