-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathanswer_bin.py
170 lines (144 loc) · 5.4 KB
/
answer_bin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
from nltk.tree import Tree as Tree
from parse_sentence import *
import nltk, string
from nltk.corpus import wordnet
from nltk.corpus import stopwords
from binary import Binary
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
Binary = Binary()
sNLP = StanfordNLP()
class Answer_Bin:
def __init__(self):
self.stemmer = 0
self.remove_punctuation_map = 0
self.vectorizer = 0
@staticmethod
def parse_text(filename):
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
fp = open(filename)
data = fp.read()
sentences = tokenizer.tokenize(data)
sentences = [si for si in sentences if "\n" not in si]
sentences = [s.encode('ascii', 'ignore') for s in sentences]
return sentences
@staticmethod
def easy_find(text, sentences):
return process.extractOne(text, sentences, scorer=fuzz.token_sort_ratio)[0]
@staticmethod
def is_bin_q(tree):
for s in tree.subtrees(lambda t: t.label() == "SQ"):
return True
return False
@staticmethod
def positive_bin_answer(text):
tree = sNLP.parse(text)
tree = Tree.fromstring(str(tree))
for t in tree[0]:
if t.label() == "VP":
VP = 1
for tt in t:
if tt.label() == "RB":
return 0
return 1
def answer_bin(self, text, filename):
tree = sNLP.parse(text)
tree = Tree.fromstring(str(tree))
if not self.is_bin_q(tree):
print("This question is not binary.")
sentences = self.parse_text(filename)
sent = self.easy_find(text, sentences)
print(sent)
# negative_word = self.positive_bin_answer(sent)
return
def get_raw_answer(self, question, answer):
q_tree = sNLP.parse(question)
q_tree = Tree.fromstring(str(q_tree))
a_tree = sNLP.parse(Binary.main(answer))
a_tree = Tree.fromstring(str(a_tree))
# res = True
(q_top_level_structure, q_parse_by_structure) = self.get_top_level_structure(q_tree)
(a_top_level_structure, a_parse_by_structure) = self.get_top_level_structure(a_tree)
for i in range(0, len(q_top_level_structure)):
q_label = q_top_level_structure[i]
if q_label in a_top_level_structure:
a_index = a_top_level_structure.index(q_label)
else:
print("label not found")
return False
# print "Result:!!!!!", self.partial_matching(q_parse_by_structure[i], a_parse_by_structure[a_index])
if not self.partial_matching(q_parse_by_structure[i], a_parse_by_structure[a_index]):
# print("struct:", q_parse_by_structure[i], a_parse_by_structure[a_index])
return False
return True
@staticmethod
def get_top_level_structure(tree):
top_level_structure = []
parse_by_structure = []
for t in tree.subtrees(lambda t: t.label() == "SQ"):
for tt in t:
top_level_structure.append(tt.label())
parse_by_structure.append(tt.leaves())
return top_level_structure, parse_by_structure
@staticmethod
def get_synonyms_antonyms(word):
synonyms = []
antonyms = []
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.append(l.name())
if l.antonyms():
antonyms.append(l.antonyms()[0].name())
for l in syn.hypernyms():
synonyms.append(l.name().split(".")[0])
return synonyms, antonyms
def partial_matching(self, question_list, answer_list):
# res = True
question_list = [word for word in question_list if word not in stopwords.words('english')]
answer_list = [word for word in answer_list if word not in stopwords.words('english')]
synonyms_list = []
antonyms_list = []
for word in question_list:
synonyms_list.append(word)
(synonyms, antonyms) = self.get_synonyms_antonyms(word)
synonyms_list += synonyms
antonyms_list += antonyms
for word in answer_list:
if not word in synonyms_list:
return False
if word in antonyms_list:
return False
return True
def main(self, question, filename):
tree = sNLP.parse(question)
tree = Tree.fromstring(str(tree))
if not self.is_bin_q(tree):
print("This question is not binary.")
sentences = self.parse_text(filename)
answer = self.easy_find(question, sentences)
positive = self.positive_bin_answer(answer)
res = self.get_raw_answer(question, answer)
print(question)
print(answer)
if res:
if not positive:
return "No."
else:
return "Yes."
else:
if not positive:
return "Yes."
else:
return "No."
"""
Debug: please keep the following tests here for future convenience
"""
# Answer_Bin = Answer_Bin()
# # print Answer_Bin.answer_bin("On November 27, 1995, did Dempsey loose his then 16-year-old sister Jennifer to a brain aneurysm?", "data/set1/a1.txt")
# # print Binary.main("On November 27, 1995, Dempsey lost his then 16-year-old sister Jennifer to a brain aneurysm.")
# # print Answer_Bin.main("On November 27, 1995, did Dempsey lose his then 16-year-old sister Jennifer to a brain aneurysm?", "data/set1/a1.txt")
# # print Answer_Bin.get_raw_answer("On November 27, 1995, did Dempsey lose his then 16-year-old sister Jennifer to a brain aneurysm?", "On November 27, 1995, Dempsey lost his then 16-year-old sister Jennifer to a brain aneurysm.")
# # print Answer_Bin.partial_matching(['lose', 'his', 'then', '16-year-old', 'sister', 'Jennifer', 'to', 'a', 'brain', 'aneurysm'],
# # ['lose', 'his', 'then', '16-year-old', 'sister', 'Jennifer', 'to', 'a', 'brain', 'aneurysm'])
# print Answer_Bin.main("On November 27, 1995, did Dempsey lose his then 16-year-old sister Jennifer to a brain aneurysm?", "data/set1/a1.txt")
# print Answer_Bin.main("On November 27, 1995, did Dempsey lose his then 16-year-old sister Jennifer to a fever?", "data/set1/a1.txt")