diff --git a/lib/crawler/api/config.rb b/lib/crawler/api/config.rb index 5bb8d1b..5611d5f 100644 --- a/lib/crawler/api/config.rb +++ b/lib/crawler/api/config.rb @@ -344,7 +344,7 @@ def document_mapper # Receives a crawler event object and outputs it into relevant systems def output_event(event) # Log the event - # event_logger << "#{event.to_json}\n" + event_logger << "#{event.to_json}\n" # Count stats for the crawl stats.update_from_event(event) diff --git a/lib/crawler/url_validator.rb b/lib/crawler/url_validator.rb index 4013e42..bb78a8c 100644 --- a/lib/crawler/url_validator.rb +++ b/lib/crawler/url_validator.rb @@ -67,6 +67,10 @@ class InvalidCrawlConfigError < Error; end attr_reader :raw_url, :checks, :results, :url_crawl_result def initialize(url:, crawl_config:, checks: nil) + if configuration && configuration.crawler_domains.empty? + raise InvalidCrawlConfigError, 'Please configure at least one domain in the crawl config file.' + end + @crawl_config = crawl_config # Default to running all checks for the given context checks ||= valid_checks diff --git a/lib/crawler/url_validator/url_request_check_concern.rb b/lib/crawler/url_validator/url_request_check_concern.rb index 468b26e..0b9c076 100644 --- a/lib/crawler/url_validator/url_request_check_concern.rb +++ b/lib/crawler/url_validator/url_request_check_concern.rb @@ -123,7 +123,7 @@ def redirect_validation_result(details) # rubocop:disable Metrics/AbcSize end # If we're running in a domain context, this is an inter-domain redirect that we cannot follow - unless @crawl_config + unless configuration return validation_fail(:url_request, <<~MESSAGE, details) The web server at #{url} redirected us to a different domain URL (#{location}). If you want to crawl this site, please use #{location.domain_name} as the domain name.