From 6a77750a72368c41084a86730d471d260e26d1fd Mon Sep 17 00:00:00 2001
From: sbmzhcn <sbmzhcn@gmail.com>
Date: Mon, 12 Jan 2015 06:52:11 -0800
Subject: [PATCH 1/3] fix for phantomjs send_keys

---
 GoogleScraper/scraping.py | 4 ++++
 GoogleScraper/selenium.py | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/GoogleScraper/scraping.py b/GoogleScraper/scraping.py
index 672ab03d..7e23253e 100644
--- a/GoogleScraper/scraping.py
+++ b/GoogleScraper/scraping.py
@@ -275,7 +275,11 @@ def blocking_search(self, callback, *args, **kwargs):
                     # Leave search when search engines detected us
                     # add the rest of the keywords as missed one
                     logger.critical(e)
+<<<<<<< HEAD
                     self.missed_keywords.add(self.keywords[i:])
+=======
+                    self.missed_keywords.add(self.keywords[i])
+>>>>>>> some fix for http scraping
                     continue
 
     @abc.abstractmethod
diff --git a/GoogleScraper/selenium.py b/GoogleScraper/selenium.py
index 980703fb..33bca0ce 100644
--- a/GoogleScraper/selenium.py
+++ b/GoogleScraper/selenium.py
@@ -364,7 +364,10 @@ def search(self):
             if self.search_input:
                 self.search_input.clear()
                 time.sleep(.25)
-                self.search_input.send_keys(self.current_keyword + Keys.ENTER)
+                self.search_input.send_keys(self.current_keyword)
+                if self.browser_type == 'phantomjs':
+                    time.sleep(1)  # Phantomjs are much faster than firefox, chrome
+                self.search_input.send_keys(Keys.ENTER)
                 self.current_request_time = datetime.datetime.utcnow()
             else:
                 logger.warning('Cannot get handle to the input form for keyword {}.'.format(self.current_keyword))

From ec54e7fa3c399372833e66564a1cc2ccd5ba0923 Mon Sep 17 00:00:00 2001
From: sbmzhcn <sbmzhcn@gmail.com>
Date: Mon, 12 Jan 2015 06:58:10 -0800
Subject: [PATCH 2/3] some fix

---
 GoogleScraper/scraping.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/GoogleScraper/scraping.py b/GoogleScraper/scraping.py
index 7e23253e..1c97a985 100644
--- a/GoogleScraper/scraping.py
+++ b/GoogleScraper/scraping.py
@@ -275,11 +275,7 @@ def blocking_search(self, callback, *args, **kwargs):
                     # Leave search when search engines detected us
                     # add the rest of the keywords as missed one
                     logger.critical(e)
-<<<<<<< HEAD
-                    self.missed_keywords.add(self.keywords[i:])
-=======
                     self.missed_keywords.add(self.keywords[i])
->>>>>>> some fix for http scraping
                     continue
 
     @abc.abstractmethod

From 3448cc6d1f018281412c3be9202371f8dd7598cc Mon Sep 17 00:00:00 2001
From: sbmzhcn <sbmzhcn@gmail.com>
Date: Mon, 12 Jan 2015 07:11:32 -0800
Subject: [PATCH 3/3] http requests timeout support

---
 GoogleScraper/config.cfg  | 3 +++
 GoogleScraper/core.py     | 5 +++++
 GoogleScraper/http.py     | 3 ++-
 GoogleScraper/scraping.py | 6 +++++-
 4 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/GoogleScraper/config.cfg b/GoogleScraper/config.cfg
index ea217da5..3ced8f11 100644
--- a/GoogleScraper/config.cfg
+++ b/GoogleScraper/config.cfg
@@ -99,6 +99,9 @@ use_own_ip: True
 ; Whether to check proxies before starting the scrape
 check_proxies: True
 
+; Set HTTP requests to stop waiting for a response after a given number of seconds
+timeout: 10
+
 ; Global configuration parameters that apply on all modes.
 [GLOBAL]
 ; The proxy file. If this is a valid file path, each line will represent a proxy.
diff --git a/GoogleScraper/core.py b/GoogleScraper/core.py
index 2ac70ad1..b6c6eef3 100755
--- a/GoogleScraper/core.py
+++ b/GoogleScraper/core.py
@@ -259,6 +259,10 @@ def main(return_results=False, parse_cmd_line=True):
 
     if Config['SCRAPING'].getboolean('use_own_ip'):
         proxies.append(None)
+
+    request_timeout = Config['SCRAPING'].getint('timeout', 10)
+    if request_timeout < 10:
+        request_timeout = 10
         
     if not proxies:
         raise InvalidConfigurationException("No proxies available and using own IP is prohibited by configuration. Turning down.")
@@ -398,6 +402,7 @@ def main(return_results=False, parse_cmd_line=True):
                                 db_lock=db_lock,
                                 proxy=proxy_to_use,
                                 progress_queue=q,
+                                request_timeout=request_timeout
                             )
                         )
 
diff --git a/GoogleScraper/http.py b/GoogleScraper/http.py
index dbcb1dd7..88f2036b 100644
--- a/GoogleScraper/http.py
+++ b/GoogleScraper/http.py
@@ -250,7 +250,8 @@ def search(self, *args, rand=False, **kwargs):
             super().detection_prevention_sleep()
             super().keyword_info()
 
-            request = self.requests.get(self.base_search_url + urlencode(self.search_params), headers=self.headers, timeout=5)
+            request = self.requests.get(self.base_search_url + urlencode(self.search_params), headers=self.headers,
+                                        timeout=self.request_timeout)
 
             self.current_request_time = datetime.datetime.utcnow()
             self.html = request.text
diff --git a/GoogleScraper/scraping.py b/GoogleScraper/scraping.py
index 1c97a985..7eda989b 100644
--- a/GoogleScraper/scraping.py
+++ b/GoogleScraper/scraping.py
@@ -136,7 +136,8 @@ class SearchEngineScrape(metaclass=abc.ABCMeta):
     }
 
     def __init__(self, keywords=None, scraper_search=None, session=None, db_lock=None, cache_lock=None,
-                 start_page_pos=1, search_engine=None, search_type=None, proxy=None, progress_queue=None):
+                 start_page_pos=1, search_engine=None, search_type=None, proxy=None, progress_queue=None,
+                 request_timeout=10):
         """Instantiate an SearchEngineScrape object.
 
         Args:
@@ -240,6 +241,9 @@ def __init__(self, keywords=None, scraper_search=None, session=None, db_lock=Non
         # the default timeout
         self.timeout = 5
 
+        # http request timeout
+        self.request_timeout = request_timeout
+
 
 
     @abc.abstractmethod