diff --git a/app/lib/link_checker/uri_checker/http_checker.rb b/app/lib/link_checker/uri_checker/http_checker.rb index c4558320..4da6df66 100644 --- a/app/lib/link_checker/uri_checker/http_checker.rb +++ b/app/lib/link_checker/uri_checker/http_checker.rb @@ -53,6 +53,12 @@ def initialize(options = {}) end end + class PageBlocksBots < LinkChecker::UriChecker::Warning + def initialize(options = {}) + super(summary: :page_blocks_bots, message: :page_blocked_bots, **options) + end + end + class PageRequiresLogin < Error def initialize(options = {}) super(summary: :page_requires_login, message: :login_required_to_view, **options) @@ -172,7 +178,11 @@ def check_request if response.status == 404 || response.status == 410 add_problem(PageNotFound.new(from_redirect: from_redirect?)) elsif response.status == 401 || response.status == 403 - add_problem(PageRequiresLogin.new(from_redirect: from_redirect?)) + if response.headers["cf-mitigated"] == "challenge" + add_problem(PageBlocksBots.new(from_redirect: from_redirect?)) + else + add_problem(PageRequiresLogin.new(from_redirect: from_redirect?)) + end elsif response.status >= 400 && response.status < 500 add_problem(PageIsUnavailable.new(from_redirect: from_redirect?, status: response.status)) elsif response.status >= 500 && response.status < 600 diff --git a/app/lib/link_checker/uri_checker/problem.rb b/app/lib/link_checker/uri_checker/problem.rb index c32072c6..29bf4158 100644 --- a/app/lib/link_checker/uri_checker/problem.rb +++ b/app/lib/link_checker/uri_checker/problem.rb @@ -47,6 +47,7 @@ def get_string(symbol) NoHost HttpCommunicationError PageNotFound + PageBlocksBots PageRequiresLogin PageIsUnavailable PageRespondsWithError diff --git a/config/locales/en.yml b/config/locales/en.yml index a408432e..bab6c1e8 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -64,6 +64,11 @@ en: redirect: This redirects to a page not found (404). find_content_now: Find where the content is now hosted and link to that instead. + page_blocks_bots: Page blocks robots + page_blocked_bots: + singular: Our link checker was blocked from accessing the website. + redirect: This redirects to a page that blocked our link checker from accessing the website. + page_requires_login: Page requires login login_required_to_view: singular: A login is required to view this page. diff --git a/spec/lib/link_checker_spec.rb b/spec/lib/link_checker_spec.rb index 4c767613..764cb2c3 100644 --- a/spec/lib/link_checker_spec.rb +++ b/spec/lib/link_checker_spec.rb @@ -10,9 +10,10 @@ end end - shared_examples "has errors" do + shared_examples "has errors" do |error = nil| it "should have errors" do expect(subject.errors).to_not be_empty + expect(subject.errors).to include(error) if error end end @@ -22,9 +23,10 @@ end end - shared_examples "has warnings" do + shared_examples "has warnings" do |warning = nil| it "should have warnings" do expect(subject.warnings).to_not be_empty + expect(subject.warnings).to include(warning) if warning end end @@ -202,42 +204,56 @@ context "401 status code" do let(:uri) { "http://www.not-gov.uk/401" } before { stub_request(:get, uri).to_return(status: 401) } - include_examples "has errors", "401 error (page requires login)" + include_examples "has errors", "A login is required to view this page." include_examples "has no warnings" end - context "403 status code" do + context "403 status code with Cloudflare challenge header" do + let(:uri) { "http://www.not-gov.uk/403" } + before do + stub_request(:get, uri).to_return( + status: 403, + headers: { + "cf-mitigated" => "challenge", + }, + ) + end + include_examples "has no errors" + include_examples "has warnings", "Our link checker was blocked from accessing the website." + end + + context "403 status code without Cloudflare challenge header" do let(:uri) { "http://www.not-gov.uk/403" } before { stub_request(:get, uri).to_return(status: 403) } - include_examples "has errors", "403 error (page requires login)" + include_examples "has errors", "A login is required to view this page." include_examples "has no warnings" end context "404 status code" do let(:uri) { "http://www.not-gov.uk/404" } before { stub_request(:get, uri).to_return(status: 404) } - include_examples "has errors", "404 error (page not found)" + include_examples "has errors", "This page was not found (404)." include_examples "has no warnings" end context "410 status code" do let(:uri) { "http://www.not-gov.uk/410" } before { stub_request(:get, uri).to_return(status: 410) } - include_examples "has errors", "410 error (page not found)" + include_examples "has errors", "This page was not found (404)." include_examples "has no warnings" end context "an unspecified 4xx status code" do let(:uri) { "http://www.not-gov.uk/418" } before { stub_request(:get, uri).to_return(status: 418) } - include_examples "has errors", "418 error (page is unavailable)" + include_examples "has errors", "This page is unavailable (418)." include_examples "has no warnings" end context "5xx status code" do let(:uri) { "http://www.not-gov.uk/500" } before { stub_request(:get, uri).to_return(status: 500) } - include_examples "has errors", "500 (server error)" + include_examples "has errors", "This page is responding with an error (500) and won't work for users." include_examples "has no warnings" end