From aae01c1e57d85052aae29a4f0c8a5f51be205f8a Mon Sep 17 00:00:00 2001 From: bpmcdevitt Date: Wed, 28 Sep 2022 15:58:41 -0500 Subject: [PATCH] method to pull security advisory urls from index_url now works --- .../mozilla_security_advisory_scraper.rb | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb b/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb index a60cd01..a9198d0 100644 --- a/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb +++ b/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb @@ -13,12 +13,17 @@ class MozillaSecurityAdvisoryScraper end def get_index + headers = { + :accept => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', + :user_agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:105.0) Gecko/20100101 Firefox/105.0' + } response = RestClient::Request.execute( :method => :get, - :url => index_url + :url => index_url, + :headers => headers ) if response.code == 200 - r.body + response.body else puts "HTTP Status code: #{r.code}" end @@ -29,7 +34,7 @@ class MozillaSecurityAdvisoryScraper end def advisory_urls(html_doc) - html_doc.xpath('//li[@class="level-item"]/a').map {|link| link['href']} + html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"} end def get_advisory_urls