-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsummarize.py
80 lines (49 loc) · 1.65 KB
/
summarize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.probability import FreqDist
def get_summary(full_text, pes):
text = full_text
words = word_tokenize(text)
# removing stopwords
stop_words = set(stopwords.words("english"))
freqTable = dict()
freqTable = dict()
for word in words:
word = word.lower()
if word in stop_words:
continue
if word in freqTable:
freqTable[word] += 1
else:
freqTable[word] = 1
# print(freqTable)
sentences = sent_tokenize(text)
def get_sentence_value():
sentence_value = dict()
for sentence in sentences:
for word, freq in freqTable.items():
if word in sentence.lower():
if sentence in sentence_value:
sentence_value[sentence] += freq
else:
sentence_value[sentence] = freq
# print(sentence_value)
return sentence_value
sentence_value = get_sentence_value()
# print(sentence_value)
def get_sum_values():
sum_values = 0
for sentence in sentence_value:
sum_values += sentence_value[sentence]
average = int(sum_values / len(sentence_value))
return average
average = get_sum_values()
# print(average)
summary = ''
for sentence in sentences:
if (sentence in sentence_value) and (sentence_value[sentence] > (pes * average)):
summary += " " + sentence
return "Summary: " + summary