-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpc6.py
72 lines (66 loc) · 2.37 KB
/
pc6.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from bs4 import BeautifulSoup
import re
from queue import Queue
from time import sleep
import requests
import string
from urllib.parse import urlparse
from urllib.parse import parse_qs
from urllib.request import urlretrieve
import time
import urllib
import sys
from urllib import request
import wget
import json
# 得到该链接的对应的源码
def link2content(link):
try:
res = requests.get(url=link, headers=headers)
res.encoding = 'utf-8'
return res.text
except Exception:
sleep(0.1)
return ""
# 根据源码结构特征进行解析
def parse_link(url):
soup = BeautifulSoup(link2content(url), 'html.parser')
for bt in soup.find_all("a", class_='btn'):
try:
queue_link.put('http://www.pc6.com'+bt['href'])
except:
continue
print("parse link complete")
if __name__ == '__main__':
queue_link = Queue() # 每个应用的详情页面对应链接队列
headers = {
'Accept': '*/*',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36'
} # 报文头部
num = 1
url='http://www.pc6.com/android/588_1.html'
type_url=[]
sp = BeautifulSoup(link2content(url), 'html.parser')
ty =sp.find("p",{'class':'clearfix'})
for st in ty.a.next_siblings:
if st!='\n':
s=str(st).split('\"',3)
type_url.append('http://www.pc6.com'+s[1])
for t in type_url:
for page in range(1,21):
link=t.split('_',2)[0]+'_'+str(page)+'.html'
parse_link(link)
while not queue_link.empty():
try:
u = queue_link.get() #从队列中得到链接
print(u)
wget.download(u , './apk/'+str(num)+'.apk')#下载apk,参数为下载链接和路径,需要修改路径或者在当前路径创建名为apk的文件夹
print('第',num,'个apk下载完成')
num = num+1
sleep(1)
except:
print('出现错误,直接跳过,当前页面为: ',link)
continue