Skip to content

Commit

Permalink
腾讯修改了算法,ipad UA请求时始终跳转为pad.ac.qq.com,故相应修改
Browse files Browse the repository at this point in the history
  • Loading branch information
abcfy2 committed Nov 30, 2014
1 parent 9b963e9 commit 0ee1863
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 30 deletions.
14 changes: 4 additions & 10 deletions getComic-gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,10 @@ def download(self):

path = self.pathLine.text()
comicName = self.comicName

forbiddenRE = re.compile(r'[\\/":*?<>|]') #windows下文件名非法字符\ / : * ? " < > |
comicName = re.sub(forbiddenRE, '_', comicName) #将windows下的非法字符一律替换为_
comicPath = os.path.join(path, comicName)

if not os.path.isdir(path):
os.makedirs(path)

if not os.path.isdir(comicPath):
os.makedirs(comicPath)

Expand Down Expand Up @@ -175,13 +173,9 @@ def run(self):
outputString = '正在下载第{0:0>4}话: {1}...'.format(i+1, self.contentNameList[i])
print(outputString)
self.output.emit(outputString)
contentPath = os.path.join(self.comicPath, '第{0:0>4}话'.format(i+1))

#如果章节名有左右斜杠时,不创建带有章节名的目录,因为这是路径分隔符
forbiddenRE = re.compile(r'[\\/":*?<>|]') #windows下文件名非法字符\ / : * ? " < > |
if not forbiddenRE.search(self.contentNameList[i]):
contentPath = os.path.join(self.comicPath, '第{0:0>4}话-{1}'.format(i+1, self.contentNameList[i]))

self.contentNameList[i] = re.sub(forbiddenRE, '_', self.contentNameList[i])
contentPath = os.path.join(self.comicPath, '第{0:0>4}话-{1}'.format(i+1, self.contentNameList[i]))
if not os.path.isdir(contentPath):
os.mkdir(contentPath)
imgList = getComic.getImgList(self.contentList[i], self.id)
Expand Down
40 changes: 20 additions & 20 deletions getComic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,17 @@ def __str__(self):
return repr(self.code)

def isLegelUrl(url):
legalUrl1 = re.compile(r'^http://ac.qq.com/Comic/[Cc]omicInfo/id/\d+/?$')
legalUrl2 = re.compile(r'^http://m.ac.qq.com/Comic/[Cc]omicInfo/id/\d+/?$')
legalUrl3 = re.compile(r'^http://ac.qq.com/\w+/?$')
legal_url_list = [
re.compile(r'^http://ac.qq.com/Comic/[Cc]omicInfo/id/\d+/?$'),
re.compile(r'^http://m.ac.qq.com/Comic/[Cc]omicInfo/id/\d+/?$'),
re.compile(r'^http://ac.qq.com/\w+/?$'),
re.compile(r'^http://pad.ac.qq.com/Comic/[Cc]omicInfo/id/\d+/?$')
]

if legalUrl1.match(url):
return True
elif legalUrl2.match(url):
return True
elif legalUrl3.match(url):
return True
else:
return False
for legal_url in legal_url_list:
if legal_url.match(url):
return True
return False

def getId(url):
if not isLegelUrl(url):
Expand All @@ -62,13 +61,13 @@ def getId(url):
return id[0]

def getContent(id):
getComicInfoUrl = 'http://m.ac.qq.com/GetData/getComicInfo?id={}'.format(id)
getComicInfoUrl = 'http://pad.ac.qq.com/GetData/getComicInfo?id={}'.format(id)
getComicInfo = requestSession.get(getComicInfoUrl)
comicInfoJson = getComicInfo.text
comicInfo = json.loads(comicInfoJson)
comicName = comicInfo['title']
comicIntrd = comicInfo['brief_intrd']
getChapterListUrl = 'http://m.ac.qq.com/GetData/getChapterList?id={}'.format(id)
getChapterListUrl = 'http://pad.ac.qq.com/GetData/getChapterList?id={}'.format(id)
getChapterList = requestSession.get(getChapterListUrl)
contentJson = json.loads(getChapterList.text)
count = contentJson['length']
Expand All @@ -82,7 +81,7 @@ def getContent(id):

def getImgList(contentJson, id):
cid = list(contentJson.keys())[0]
getPicHashURL = 'http://m.ac.qq.com/View/mGetPicHash?id={}&cid={}'.format(id, cid)
getPicHashURL = 'http://pad.ac.qq.com/View/mGetPicHash?id={}&cid={}'.format(id, cid)
picJsonPage = requestSession.get(getPicHashURL).text
picJson = json.loads(picJsonPage)
count = picJson['pCount'] #统计图片数量
Expand Down Expand Up @@ -183,9 +182,12 @@ def main(url, path, lst=None):
print('\n'.join(contentNameList))
except Exception:
print('章节列表包含无法解析的特殊字符\n')

forbiddenRE = re.compile(r'[\\/":*?<>|]') #windows下文件名非法字符\ / : * ? " < > |
comicName = re.sub(forbiddenRE, '_', comicName) #将windows下的非法字符一律替换为_
comicPath = os.path.join(path, comicName)
if not os.path.isdir(comicPath):
os.mkdir(comicPath)
os.makedirs(comicPath)
print()

if not lst:
Expand All @@ -200,14 +202,11 @@ def main(url, path, lst=None):
'自动忽略'.format(len(contentList)))
break

contentPath = os.path.join(comicPath, '第{0:0>4}话'.format(i))
contentNameList[i - 1] = re.sub(forbiddenRE, '_', contentNameList[i - 1]) #将windows下的非法字符一律替换为_
contentPath = os.path.join(comicPath, '第{0:0>4}话-{1}'.format(i, contentNameList[i - 1]))

try:
print('正在下载第{0:0>4}话: {1}'.format(i, contentNameList[i -1]))
#如果章节名有左右斜杠时,不创建带有章节名的目录,因为这是路径分隔符
forbiddenRE = re.compile(r'[\\/":*?<>|]') #windows下文件名非法字符\ / : * ? " < > |
if not forbiddenRE.search(contentNameList[i - 1]):
contentPath = os.path.join(comicPath, '第{0:0>4}话-{1}'.format(i, contentNameList[i - 1]))
except Exception:
print('正在下载第{0:0>4}话: {1}'.format(i))

Expand All @@ -229,6 +228,7 @@ def main(url, path, lst=None):
parser.add_argument('-u', '--url', help='要下载的漫画的首页,可以下载以下类型的url: \n'
'http://ac.qq.com/Comic/comicInfo/id/511915\n'
'http://m.ac.qq.com/Comic/comicInfo/id/505430\n'
'http://pad.ac.qq.com/Comic/comicInfo/id/505430\n'
'http://ac.qq.com/naruto')
parser.add_argument('-p', '--path', help='漫画下载路径。 默认: {}'.format(defaultPath),
default=defaultPath)
Expand Down

0 comments on commit 0ee1863

Please sign in to comment.