-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpagelist.py
173 lines (158 loc) · 5.41 KB
/
pagelist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# -*- coding: utf-8 -*-
# Copyright 2008, 2009 Mr.Z-man
# This file is part of wikitools.
# wikitools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# wikitools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with wikitools. If not, see <http://www.gnu.org/licenses/>.
import api
import page
import category
import wikifile
import math
def listFromQuery(site, queryresult):
"""Generate a list of pages from an API query result
queryresult is the list of pages from a list or generator query
e.g. - for a list=categorymembers query, use result['query']['categorymembers']
for a generator query, use result['query']['pages']
"""
ret = []
if isinstance(queryresult, list):
for item in queryresult:
pageid = False
if 'name' in item and 'url' in item and 'title' not in item:
item['title'] = u'File:' + item['name']
item['ns'] = 6
if 'pageid' in item:
pageid = item['pageid']
if 'ns' in item and item['ns'] == 14:
item = category.Category(site, title=item['title'], check=False, followRedir=False, pageid=pageid)
elif 'ns' in item and item['ns'] == 6:
item = wikifile.File(site, title=item['title'], check=False, followRedir=False, pageid=pageid)
else:
item = page.Page(site, title=item['title'], check=False, followRedir=False, pageid=pageid)
ret.append(item)
else:
for key in queryresult.keys():
item = queryresult[key]
pageid = False
if 'name' in item and 'url' in item and 'title' not in item:
item['title'] = u'File:' + item['name']
item['ns'] = 6
if 'pageid' in item:
pageid = item['pageid']
if 'ns' in item and item['ns'] == 14:
item = category.Category(site, title=item['title'], check=False, followRedir=False, pageid=pageid)
elif 'ns' in item and item['ns'] == 6:
item = wikifile.File(site, title=item['title'], check=False, followRedir=False, pageid=pageid)
else:
item = page.Page(site, title=item['title'], check=False, followRedir=False, pageid=pageid)
ret.append(item)
return ret
def listFromTitles(site, titles, check=True, followRedir=False):
"""Create a list of page objects from a list of titles
check and followRedir have the same meaning as in page.Page
"""
ret = []
if not check:
for title in titles:
title = page.Page(site, title=title, check=False)
ret.append(title)
else:
querylist = []
limit = int(site.limit)
if len(titles) > limit/10:
iters = int(math.ceil(float(len(titles)) / (limit/10)))
for x in range(0,iters):
lower = x*limit/10
upper = (x+1)*limit/10
querylist.append(titles[lower:upper])
else:
querylist.append(titles)
response = False
for item in querylist:
tlist = '|'.join(item)
if not isinstance(tlist, unicode):
tlist = unicode(tlist, 'utf8')
params = {'action':'query',
'titles':tlist,
}
if followRedir:
params['redirects'] = ''
req = api.APIRequest(site, params)
res = req.query(False)
if not response:
response = res
else:
# This breaks on non-existent titles, the api gives them negative numbers
# resultCombine doesn't account for this and ignores or overwrites the
# duplicate pageids
response = api.resultCombine('', response, res)
for key in response['query']['pages'].keys():
res = response['query']['pages'][key]
item = makePage(key, res, site)
ret.append(item)
return ret
def listFromPageids(site, pageids, check=True, followRedir=False):
"""Create a list of page objects from a list of pageids
check and followRedir have the same meaning as in page.Page
"""
ret = []
if not check:
for id in pageids:
title = page.Page(site, pageid=id, check=False)
ret.append(title)
else:
querylist = []
limit = int(site.limit)
if len(pageids) > limit/10:
iters = int(math.ceil(float(len(pageids)) / (limit/10)))
for x in range(0,iters):
lower = x*limit/10
upper = (x+1)*limit/10
querylist.append(pageids[lower:upper])
else:
querylist.append(pageids)
response = False
for item in querylist:
ids = [str(id) for id in item]
idlist = '|'.join(ids)
params = {'action':'query',
'pageids':idlist,
}
if followRedir:
params['redirects'] = ''
req = api.APIRequest(site, params)
res = req.query()
if not response:
response = res
else:
response = api.resultCombine('', response, res)
for key in response['query']['pages'].keys():
res = response['query']['pages'][key]
item = makePage(key, res, site)
ret.append(item)
return ret
def makePage(key, result, site):
title=False
if 'title' in result:
title = result['title']
if 'ns' in result and result['ns'] == 14:
item = category.Category(site, title=title, check=False, followRedir=False, pageid=key)
elif 'ns' in result and result['ns'] == 6:
item = wikifile.File(site, title=title, check=False, followRedir=False, pageid=key)
else:
item = page.Page(site, title=title, check=False, followRedir=False, pageid=key)
if 'missing' in result:
item.exists = False
if 'invalid' in result:
item = False
if 'ns' in result:
item.setNamespace(int(result['ns']))
return item