-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
290 lines (252 loc) · 8.65 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# -- coding: utf-8 --
from flask import Flask, request, render_template, redirect
import urllib2
from bs4 import BeautifulSoup
import os
import random
from getWikiSubset import *
from journey import *
mainWikiURL = "https://en.wikipedia.org%s"
template_wikiURL = "/wiki/%s"
# Wikipedia page URL: mainWikiURL % template_wikiURL % search_terms_like_this
serverURL = "http://aguimaraesduarte.pythonanywhere.com/links/%s"
# to be updated if necessary
currentPage = ""
previousPage = ""
visitedPages = {} # dictionary
visitedLinks = [] # list
counter = 0 # in order to "refresh" the index.html page
# getLinksFromURL: get all links inside body of Wikipedia article.
# @param URL: full Wikipedia page URL
# @return links: list of wikiURLs ('/wiki/<page>') found in the main body
def getLinksFromURL(URL):
global currentPage
response = urllib2.urlopen(URL)
html = BeautifulSoup(response, "html.parser")
title = html.find_all('h1', {'id': 'firstHeading'})[0].text
currentPage = title
links = []
content = html.find_all('div', {'class': 'mw-content-ltr'})[0]
for link in content.find_all('a'):
href = link['href']
try:
title = link['title']
except:
title =[]
if (href.startswith('/wiki/')) and (":" not in href) and (href not in links):
term = href.split("#")[0]
term = urllib2.quote(term,':/') # keep all the %xx URL codes
links.append((term,title)) # remove trailing "#" if it has one
return links
# transformTerms: convert spaces into underscores for Wikipedia URL.
# @param searchString: input string with spaces
# @return searchString where eventual spaces have been replaced with underscores
def transformTerms(searchString):
return "_".join(searchString.split()) # Something_like_this
# getLinksFromSearchString: convert input search terms as a string into
# a potentially valid Wikipedia URL, and get the list of links in that page.
# @param searchString: input search terms as string
# return links from getLinksFromURL(URL)
# If error (not a valid Wikipedia URL), then return error string as single element list)
def getLinksFromSearchString(searchString):
searchURL = template_wikiURL % searchString
URL = mainWikiURL % searchURL
try:
return getLinksFromURL(URL)
except:
return ["%s is not a valid search term" % searchString]
# htmlList: from a list of links from a Wikipedia page (using terms from <searchString>),
# create and return html code for an organized table of said links.
# @param listOfLinks: list of parital Wikipedia links as strings
# @param searchString: input search terms as string
# @return html: html code for the table
# If error (only one link <=> invalid Wikipedia URL), then special handling needed
def htmlList(listOfLinks, searchString):
html_template = """
<table>
%s
%s
</table>"""
if searchString=="":
return "This page does not exist on Wikipedia"
else:
href = mainWikiURL % template_wikiURL % searchString
html_header = "<tr><th><a href='%s'>%s</a>: %d links</th></tr>" % (href, searchString, len(listOfLinks))
html_entries_template = "<tr><td><a href='%s'>%s</a></tr></td>"
html_entries = ""
for link in listOfLinks:
linkHref = link[0].split("/wiki/")[1]
try:
linkTitle = link[1]
except:
linkTitle = ''
# linkText = urllib2.unquote(linkHref)
html_entries += html_entries_template % (linkHref,linkTitle)
html = html_template % (html_header, html_entries)
return html
# updateVisited: update the visitedPages dictionary with visited links.
def updateVisited():
if previousPage not in visitedPages:
if previousPage != "":
visitedPages[previousPage] = [currentPage]
else:
visitedPages[currentPage] = []
else:
if currentPage not in visitedPages[previousPage]:
visitedPages[previousPage].append(currentPage)
# dictToDot: transform dictionary <d> into a dot string that is saved into graph.txt.
# graph.txt is converted into a graph.png image using command line dot function.
# @param d: input directed dictionary
# @return dot: string of connected graph for debugging purposes
def dictToDot(d):
dot = "digraph g {\n"
dot += "\trankdir=LR;\n"
for key in d.keys():
for value in d[key]:
dot += '\t"' + key.encode('ascii', errors='ignore') + '" -> "' + value.encode('ascii', errors='ignore') + '";\n'
dot += "}"
file = open("static/graph.txt", "w")
file.write(dot)
file.close()
os.system('dot -Tpng static/graph.txt > static/graph.png')
return dot
# dictToPath: transform path <path> into a dot string that is saved into path_graph.txt.
# path_graph.txt is converted into a path_graph.png image using command line dot function.
# @param path: input list of visited pages
# @return dot: string of connected graph for debugging purposes
def dictToDotPath(path):
dot = "digraph g {\n"
dot += "\trankdir=LR;\n"
for i in range(len(path)-1):
dot += '\t"' + urllib2.unquote(path[i]) + '" -> "' + urllib2.unquote(path[i+1]) + '";\n'
dot += "}"
file = open("static/path_graph.txt", "w")
file.write(dot)
file.close()
os.system('dot -Tpng static/path_graph.txt > static/path_graph.png')
return dot
############################# Flask stuff
app = Flask(__name__)
@app.route("/")
def home():
global visitedPages
global visitedLinks
global currentPage
global previousPage
visitedPages = {} # dictionary
visitedLinks = [] # list
currentPage = ""
previousPage = ""
global counter
counter = 0
return render_template('combined_page.html')
@app.route("/links/", methods=['POST'])
def post_links():
searchString = request.form["searchString"]
return links(searchString)
@app.route("/links/<searchString>")
def links(searchString):
global currentPage
global previousPage
global counter
html = """
<html>
<head>
<br>
<form action="/">
Click here to go back to traversal selection <input type="submit" value="Go back" class="tfbutton">
</head>
<body>
<header>
<img src=%s></img>
</header>
<br>
%s
%s
</body>
</html>
"""
# transform into wiki format
trSearchString = transformTerms(searchString)
# get list of links
listOfLinks = getLinksFromSearchString(trSearchString)
# create html table
table = htmlList(listOfLinks, currentPage)
# update visited links and dictionary
updateVisited()
visitedLinks.append(currentPage)
dot = dictToDot(visitedPages)
# update previous page
previousPage = currentPage
# assemble final html
imageURL = "/static/graph.png?r=%s" % random.randint(0,10000)
returnHtml = html % (imageURL, " --> ".join(visitedLinks), table)
# update counter for index.html to refresh
counter += 1
return returnHtml
@app.route("/show_path", methods = ['GET', 'POST'])
def render_path():
html = """
<html>
<body>
<header>
The shortest path from %s to %s is:
<img src=%s></img>
</header>
<br>
<form action="/take_journey" method="post">
<input type="submit" value="Take The Journey" class="tfbutton">
<br>
<br>
</body>
</html>
"""
response = redirect('/take_journey')
start_node = request.cookies.get('start_node') # get cookie called 'ID'
end_node = request.cookies.get('end_node') # get cookie called 'ID'
response.set_cookie('start_node', value=start_node)
response.set_cookie('end_node', value=end_node)
with open('trees/Full_Graph_400.txt', 'r') as f:
graph = f.read().split('\t')
paths = get_all_paths(graph, start_node, end_node, 5)
path_lengths = [len(p) for p in paths]
shortest_path = paths[path_lengths.index(min(path_lengths))]
dictToDotPath(shortest_path)
imageURL = "/static/path_graph.png?r=%s" % random.randint(0, 10000)
return html % (urllib2.unquote(start_node), urllib2.unquote(end_node), imageURL)
@app.route("/set_nodes", methods = ['GET', 'POST'])
def set_nodes():
start_node = request.form['startlocation']
end_node = request.form['endlocation']
response = redirect('/show_path')
response.set_cookie('start_node', value=start_node)
response.set_cookie('end_node', value=end_node)
return response
@app.route("/take_journey", methods = ['GET', 'POST'])
def take_journey():
html = """
<html>
<body>
<header>
Thank you for taking the journey from %s to %s
<br>
<br>
<form action="/start_over" method="post">
Go back to traversal selection
<input type="submit" value="Go back" class="tfbutton">
</header>
</body>
</html>
"""
start_node = request.cookies.get('start_node') # get cookie called 'ID'
end_node = request.cookies.get('end_node') # get cookie called 'ID'
try:
takeJourney(start_node, end_node)
except:
pass
return html % (urllib2.unquote(start_node), urllib2.unquote(end_node))
@app.route("/start_over", methods = ['GET', 'POST'])
def start_over():
response = redirect('/')
return response
app.run() # kickstart your flask server