-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.py
38 lines (33 loc) · 1.12 KB
/
index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import app.parser.getData as importArticles
import app.parser.articleRetrieval.getArticles as getContent
import app.parser.getChunks as gc
import app.analytics.tag as tag
import app.parser.articleRetrieval.wikipediaParse as wp
import app.parser.sentences as sent
import app.analytics.sentenceFiltering.actionSentences as action
import app.analytics.functions.hasDate as hd
import app.analytics.functions.synonym as sn
import app.analytics.getFeatures as ft
articles = importArticles.getData()
sentences= []
count = 0
for article in articles[0:10]:
print article
chunks = gc.getChunks(article[1])
tags = tag.getTags(article[1],chunks)
if tags == []:
continue # check this is right. go to next itteration
"""The Stanford Open IE tags"""
subject = tags['subject']
relation = tags['relation']
objects = tags['object']
objects = objects.split()
print objects
print relation
print subject
article = wp.getArticle(subject)
sentences = sent.getSentences(article)
features= ft.getFeatures(subject, objects, relation, sentences)