NikolaiT · jffifa · Apr 3, 2015 · Apr 3, 2015
diff --git a/GoogleScraper/config.cfg b/GoogleScraper/config.cfg
@@ -106,11 +106,6 @@ check_proxies: True
 ; response when something fails.
 raise_exceptions_while_scraping: False
 
-; The following two options only make sense when search_engine is set to "googleimg"
-; do NOT use them unless you are sure what you are goint to do
-image_type: None
-image_size: None
-
 ; Global configuration parameters that apply on all modes.
 [GLOBAL]
 ; The proxy file. If this is a valid file path, each line will represent a proxy.

diff --git a/GoogleScraper/parsing.py b/GoogleScraper/parsing.py
@@ -962,15 +962,15 @@ def get_parser_by_search_engine(search_engine):
     Raises:
         NoParserForSearchEngineException if no parser could be found for the name.
     """
-    if search_engine == 'google' or search_engine == 'googleimg':
+    if search_engine == 'google':
         return GoogleParser
     elif search_engine == 'yandex':
         return YandexParser
     elif search_engine == 'bing':
         return BingParser
     elif search_engine == 'yahoo':
         return YahooParser
-    elif search_engine == 'baidu' or search_engine == 'baiduimg':
+    elif search_engine == 'baidu':
         return BaiduParser
     elif search_engine == 'duckduckgo':
         return DuckduckgoParser

diff --git a/GoogleScraper/selenium_mode.py b/GoogleScraper/selenium_mode.py
@@ -62,9 +62,7 @@ class SelScrape(SearchEngineScrape, threading.Thread):
         'baidu': '.n',
         'ask': '#paging div a.txt3.l_nu',
         'blekko': '',
-        'duckduckgo': '',
-        'googleimg': '#pnnext',
-        'baiduimg': '.n',
+        'duckduckgo': ''
     }
 
     input_field_selectors = {
@@ -76,23 +74,6 @@ class SelScrape(SearchEngineScrape, threading.Thread):
         'duckduckgo': (By.NAME, 'q'),
         'ask': (By.NAME, 'q'),
         'blekko': (By.NAME, 'q'),
-        'google': (By.NAME, 'q'),
-        'googleimg': (By.NAME, 'as_q'),
-        'baiduimg': (By.NAME, 'word'),
-    }
-
-    param_field_selectors = {
-        'googleimg': {
-            'image_type': (By.ID, 'imgtype_input'),
-            'image_size': (By.ID, 'imgsz_input'),
-        },
-    }
-
-    search_params = {
-        'googleimg': {
-            'image_type': None,
-            'image_size': None,
-        },
     }
 
     normal_search_locations = {
@@ -103,7 +84,7 @@ class SelScrape(SearchEngineScrape, threading.Thread):
         'baidu': 'http://baidu.com/',
         'duckduckgo': 'https://duckduckgo.com/',
         'ask': 'http://ask.com/',
-        'blekko': 'http://blekko.com/',
+        'blekko': 'http://blekko.com/'
     }
 
     image_search_locations = {
@@ -115,8 +96,6 @@ class SelScrape(SearchEngineScrape, threading.Thread):
         'duckduckgo': None,  # duckduckgo doesnt't support direct image search
         'ask': 'http://www.ask.com/pictures/',
         'blekko': None,
-        'googleimg':'https://www.google.com/advanced_image_search',
-        'baiduimg': 'http://image.baidu.com/',
     }
 
     def __init__(self, *args, captcha_lock=None, browser_num=1, **kwargs):
@@ -139,8 +118,6 @@ def __init__(self, *args, captcha_lock=None, browser_num=1, **kwargs):
 
         self.xvfb_display = Config['SELENIUM'].get('xvfb_display', None)
 
-        self.search_param_values = self._get_search_param_values()
-
         # get the base search url based on the search engine.
         self.base_search_url = get_base_search_url_by_search_engine(self.search_engine_name, self.scrape_method)
         super().instance_creation_info(self.__class__.__name__)
@@ -325,15 +302,6 @@ def build_search(self):
 
         self.webdriver.get(starting_point)
 
-    def _get_search_param_values(self):
-        search_param_values = {}
-        if self.search_engine_name in self.search_params:
-            for param_key in self.search_params[self.search_engine_name]:
-                cfg = Config['SCRAPING'].get(param_key, None)
-                if cfg:
-                    search_param_values[param_key] = cfg
-        return search_param_values
-
     def _get_search_input_field(self):
         """Get the search input field for the current search_engine.
 
@@ -342,12 +310,6 @@ def _get_search_input_field(self):
         """
         return self.input_field_selectors[self.search_engine_name]
 
-    def _get_search_param_fields(self):
-        if self.search_engine_name in self.param_field_selectors:
-            return self.param_field_selectors[self.search_engine_name]
-        else:
-            return {}
-
     def _wait_until_search_input_field_appears(self, max_wait=5):
         """Waits until the search input field can be located for the current search engine
 
@@ -369,26 +331,6 @@ def find_visible_search_input(driver):
             logger.error('{}: TimeoutException waiting for search input field: {}'.format(self.name, e))
             return False
 
-    def _wait_until_search_param_fields_appears(self, max_wait=5):
-        """Waits until the search input field contains the query.
-
-        Args:
-            max_wait: How long to wait maximally before returning False.
-        """
-        def find_visible_search_param(driver):
-            for param, field in self._get_search_param_fields().items():
-                input_field = driver.find_element(*field)
-                if not input_field:
-                    return False
-            return True
-
-        try:
-            fields = WebDriverWait(self.webdriver, max_wait).until(find_visible_search_param)
-            return fields
-        except TimeoutException as e:
-            logger.error('{}: TimeoutException waiting for search param field: {}'.format(self.name, e))
-            return False
-
     def _goto_next_page(self):
         """Click the next page element.
         """
@@ -492,28 +434,6 @@ def search(self):
                 self.search_input.clear()
                 time.sleep(.25)
 
-                self.search_param_fields = self._get_search_param_fields()
-
-                if self.search_param_fields:
-                    wait_res = self._wait_until_search_param_fields_appears()
-                    if wait_res is False:
-                        raise Exception('Waiting search param input fields time exceeds')
-                    for param, field in self.search_param_fields.items():
-                        if field[0] == By.ID:
-                            js_tpl = '''
-                            var field = document.getElementById("%s");
-                            field.setAttribute("value", "%s");
-                            '''
-                        elif field[0] == By.NAME:
-                            js_tpl = '''
-                            var fields = document.getElementsByName("%s");
-                            for (var f in fields) {
-                                f.setAttribute("value", "%s");
-                            }
-                            '''
-                        js_str = js_tpl % (field[1], self.search_param_values[param])
-                        self.webdriver.execute_script(js_str)
-
                 try:
                     self.search_input.send_keys(self.query + Keys.ENTER)
                 except ElementNotVisibleException: