From e8a6ffe65e00b436aef2e4c3de26bc85bf9268e2 Mon Sep 17 00:00:00 2001 From: Richard Towers Date: Wed, 29 Nov 2023 17:21:54 +0000 Subject: [PATCH] Check links and redirects against a domain list Currently this only includes a very small list of suspicious domains which we want to check as a sort of one-off. We could consider using a third party list of domains (or IP ranges) instead, if this is considered to be valuable. --- .../link_checker/uri_checker/http_checker.rb | 14 ++++++++++++++ app/lib/link_checker/uri_checker/problem.rb | 1 + config/domains.yml | 19 +++++++++++++++++++ config/locales/en.yml | 4 ++++ spec/lib/link_checker_spec.rb | 8 ++++++++ 5 files changed, 46 insertions(+) create mode 100644 config/domains.yml diff --git a/app/lib/link_checker/uri_checker/http_checker.rb b/app/lib/link_checker/uri_checker/http_checker.rb index 101b7d1c..c4558320 100644 --- a/app/lib/link_checker/uri_checker/http_checker.rb +++ b/app/lib/link_checker/uri_checker/http_checker.rb @@ -35,6 +35,12 @@ def initialize(options = {}) end end + class SuspiciousDomain < LinkChecker::UriChecker::Warning + def initialize(options = {}) + super(summary: :suspicious_destination, message: :website_on_list_of_suspicious_domains, **options) + end + end + class SlowResponse < LinkChecker::UriChecker::Warning def initialize(options = {}) super(summary: :slow_page, message: :page_is_slow, suggested_fix: :contact_site_administrator, **options) @@ -104,6 +110,7 @@ def call check_redirects check_credentials_in_uri check_top_level_domain + check_suspicious_domains check_request return report if report.has_errors? @@ -119,6 +126,7 @@ def call attr_reader :response INVALID_TOP_LEVEL_DOMAINS = %w[xxx adult dating porn sex sexy singles].freeze + SUSPICIOUS_DOMAINS = Rails.application.config_for(:domains).suspicious_domains.freeze REDIRECT_STATUS_CODES = [301, 302, 303, 307, 308].freeze REDIRECT_LIMIT = 8 REDIRECT_LOOP_LIMIT = 5 @@ -145,6 +153,12 @@ def check_top_level_domain end end + def check_suspicious_domains + if SUSPICIOUS_DOMAINS.any? { |d| uri.host.ends_with? d } + add_problem(SuspiciousDomain.new(from_redirect: from_redirect?)) + end + end + def check_request start_time = Time.zone.now @response = make_request(:get) diff --git a/app/lib/link_checker/uri_checker/problem.rb b/app/lib/link_checker/uri_checker/problem.rb index 2629cd90..c32072c6 100644 --- a/app/lib/link_checker/uri_checker/problem.rb +++ b/app/lib/link_checker/uri_checker/problem.rb @@ -56,6 +56,7 @@ def get_string(symbol) TooManyRedirectsSlowly CredentialsInUri SuspiciousTld + SuspiciousDomain SlowResponse PageWithRating PageContainsThreat diff --git a/config/domains.yml b/config/domains.yml new file mode 100644 index 00000000..c61b4338 --- /dev/null +++ b/config/domains.yml @@ -0,0 +1,19 @@ +default: &default + suspicious_domains: + # NOTE: These domains are considered suspicious by GDS corporate IT + # making requests from the corporate network may raise flags. + - nostringsng.com + - www.becauseiamagirl.org + - www.bilebrizoua.ci + # NOTE: This is not a comprehensive list of all suspicious domains on the internet. + +test: + <<: *default + suspicious_domains: + - malicious.example.com + +development: + <<: *default + +production: + <<: *default diff --git a/config/locales/en.yml b/config/locales/en.yml index 8763d9bd..a408432e 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -49,6 +49,10 @@ en: singular: This link is hosted on a website meant for adult content. redirect: This redirects to websites meant for adult content. + website_on_list_of_suspicious_domains: + singular: This link is hosted on a domain which is on our list of suspicious domains + redirect: This redirects to a website which is on our list of suspicious domains + slow_page: Slow page page_is_slow: singular: This page is slow loading and may frustrate users. diff --git a/spec/lib/link_checker_spec.rb b/spec/lib/link_checker_spec.rb index b5a4db59..4c767613 100644 --- a/spec/lib/link_checker_spec.rb +++ b/spec/lib/link_checker_spec.rb @@ -96,6 +96,14 @@ include_examples "has no errors" end + context "domain is risky" do + let(:uri) { "https://malicious.example.com" } + before { stub_request(:get, uri).to_return(status: 200) } + include_examples "has a problem summary", "Suspicious Destination" + include_examples "has warnings" + include_examples "has no errors" + end + context "there are credentials in the URI" do let(:uri) { "https://username:password@www.gov.uk/ok" } include_examples "has a problem summary", "Login details in URL"