diff --git a/changedetectionio/content_fetcher.py b/changedetectionio/content_fetcher.py index be426c886c0..e9477270139 100644 --- a/changedetectionio/content_fetcher.py +++ b/changedetectionio/content_fetcher.py @@ -91,19 +91,20 @@ def __init__(self, status_code, url, screenshot=None, has_filters=False, html_co class Fetcher(): + browser_connection_is_custom = None + browser_connection_url = None browser_steps = None browser_steps_screenshot_path = None content = None error = None fetcher_description = "No description" - browser_connection_url = None headers = {} + instock_data = None + instock_data_js = "" status_code = None webdriver_js_execute_code = None xpath_data = None xpath_element_js = "" - instock_data = None - instock_data_js = "" # Will be needed in the future by the VisualSelector, always get this where possible. screenshot = False @@ -252,16 +253,19 @@ class base_html_playwright(Fetcher): proxy = None - def __init__(self, proxy_override=None, browser_connection_url=None): + def __init__(self, proxy_override=None, custom_browser_connection_url=None): super().__init__() self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"') - # .strip('"') is going to save someone a lot of time when they accidently wrap the env value - if not browser_connection_url: - self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"') + if custom_browser_connection_url: + self.browser_connection_is_custom = True + self.browser_connection_url = custom_browser_connection_url else: - self.browser_connection_url = browser_connection_url + # Fallback to fetching from system + # .strip('"') is going to save someone a lot of time when they accidently wrap the env value + self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"') + # If any proxy settings are enabled, then we should setup the proxy object proxy_args = {} @@ -421,8 +425,10 @@ def run(self, current_include_filters=None, is_binary=False): + # For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!) - if not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'): + # browser_connection_is_custom doesnt work with puppeteer style fetch (use playwright native too in this case) + if not self.browser_connection_is_custom and not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'): if strtobool(os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH')): # Temporary backup solution until we rewrite the playwright code return self.run_fetch_browserless_puppeteer( @@ -569,15 +575,16 @@ class base_html_webdriver(Fetcher): 'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] proxy = None - def __init__(self, proxy_override=None, browser_connection_url=None): + def __init__(self, proxy_override=None, custom_browser_connection_url=None): super().__init__() from selenium.webdriver.common.proxy import Proxy as SeleniumProxy # .strip('"') is going to save someone a lot of time when they accidently wrap the env value - if not browser_connection_url: + if not custom_browser_connection_url: self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') else: - self.browser_connection_url = browser_connection_url + self.browser_connection_is_custom = True + self.browser_connection_url = custom_browser_connection_url # If any proxy settings are enabled, then we should setup the proxy object proxy_args = {} @@ -674,7 +681,7 @@ def quit(self): class html_requests(Fetcher): fetcher_description = "Basic fast Plaintext/HTTP Client" - def __init__(self, proxy_override=None, browser_connection_url=None): + def __init__(self, proxy_override=None, custom_browser_connection_url=None): super().__init__() self.proxy_override = proxy_override # browser_connection_url is none because its always 'launched locally' diff --git a/changedetectionio/processors/__init__.py b/changedetectionio/processors/__init__.py index efccea499e9..7aa8994a5d6 100644 --- a/changedetectionio/processors/__init__.py +++ b/changedetectionio/processors/__init__.py @@ -43,14 +43,14 @@ def call_browser(self): # In the case that the preferred fetcher was a browser config with custom connection URL.. # @todo - on save watch, if its extra_browser_ then it should be obvious it will use playwright (like if its requests now..) - browser_connection_url = None + custom_browser_connection_url = None if prefer_fetch_backend.startswith('extra_browser_'): (t, key) = prefer_fetch_backend.split('extra_browser_') connection = list( filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', []))) if connection: prefer_fetch_backend = 'base_html_playwright' - browser_connection_url = connection[0].get('browser_connection_url') + custom_browser_connection_url = connection[0].get('browser_connection_url') # PDF should be html_requests because playwright will serve it up (so far) in a embedded page # @todo https://github.com/dgtlmoon/changedetection.io/issues/2019 @@ -74,7 +74,7 @@ def call_browser(self): # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) self.fetcher = fetcher_obj(proxy_override=proxy_url, - browser_connection_url=browser_connection_url + custom_browser_connection_url=custom_browser_connection_url ) if self.watch.has_browser_steps: