-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathduckduckgo.py
88 lines (77 loc) · 2.88 KB
/
duckduckgo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Based on https://github.com/deepanprabhu/duckduckgo-images-api/blob/master/duckduckgo_images_api/api.py
"""
from typing import *
import requests
import re
import json
import logging
def search(keywords: str, max_results: int = 10) -> list:
"""Search DuckDuckGo for keywords
Args:
keywords (str): Keywords to search for
max_results (int, optional): Requested number of search results. Defaults to 10.
Returns:
list: A list of dictionaries containing the following fields:
"image" : image URL
"url" : URL of page where image was found
"height" : height of image
"width" : width of image
"title" : title of page
"source" : No idea, often "Bing"
"thumbnail" : URL of thumbnail
None: in case of errors
"""
url = 'https://duckduckgo.com/'
params = {'q': keywords}
headers = {
'authority': 'duckduckgo.com',
'accept': 'application/json, text/javascript, */* q=0.01',
'sec-fetch-dest': 'empty',
'x-requested-with': 'XMLHttpRequest',
'user-agent': 'Mozilla/5.0 (Macintosh Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'referer': 'https://duckduckgo.com/',
'accept-language': 'en-US,enq=0.9',
}
# First make a request to above URL, and parse out the 'vqd'
# This is a special token, which should be used in the subsequent request
res = requests.post(url, headers = headers, data = params)
if res.status_code != 200:
logging.error("DuckDuckGo responded with %d" % (res.status_code))
return None
search_obj = re.search(r'vqd=([\d-]+)\&', res.text, re.M|re.I)
if not search_obj:
logging.error("Token parsing failed")
return None
params = (
('l', 'us-en'),
('o', 'json'),
('q', keywords),
('vqd', search_obj.group(1)),
('f', ',,,'),
('p', '1'),
('v7exp', 'a'),
)
request_url = url + "i.js"
search_results = []
counter = 0
while True:
try:
res = requests.get(request_url, headers = headers, params = params)
if res.status_code != 200:
logging.error("DuckDuckGo responded with %d" % (res.status_code))
return search_results
data = json.loads(res.text)
except ValueError as e:
logging.error("Caught exception", exc_info = True)
continue
for foo in data["results"]:
search_results.append(foo)
counter += 1
if counter == max_results:
return search_results
if "next" not in data:
return search_results
request_url = url + data["next"]