diff --git a/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb b/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb index 805b6e7..ca1c322 100644 --- a/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb +++ b/tools/mozilla/security_advisory_scraper/mozilla_security_advisory_scraper.rb @@ -25,6 +25,11 @@ class MozillaSecurityAdvisoryScraper Nokogiri::HTML(response_body) end + def get_and_parse_advisory(advisory_url) + html = get(advisory_url) + parse_html(html) + end + def advisory_urls_in_html(html_doc) html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"} end @@ -48,14 +53,58 @@ class MozillaSecurityAdvisoryScraper end end - # we know its the very first dl tag w class name summary def parse_advisory_summary(advisory_doc) - nodes = advisory_doc.xpath('//dl[@class="summary"]').first.children - # dd tags and dt tags have the info we care about. dt tag is the key, dd - # tag is the value + advisory_doc.xpath('//dl[@class="summary"]').first.css('dd') end - def parse_advisory_cve(advisory_doc) - advisory_doc.xpath('//dl[@class="cve"]') + + def parse_advisory_cves(advisory_doc) + advisory_doc.xpath('//section[@class="cve"]') end + + def data_hash(advisory_doc) + title = advisory_doc.title + header = advisory_doc.css('h2').text + summary = parse_advisory_summary(advisory_doc) + announced = summary[0].text + impact = summary[1].text + products = summary[2].text + fixed_in = summary[3].text + cve_nodes = parse_advisory_cves(advisory_doc) + cves = cve_nodes.map { | cve_node | cve_data_hash(cve_node) } + + { + :title => title, + :header => header, + :announced => announced, + :impact => impact, + :products => products, + :fixed_in => fixed_in, + :cves => cves + } + end + + def cve_data_hash(advisory_cve_doc) + cve_id_and_title = advisory_cve_doc.css('h4').text.gsub("\n", "").gsub("#", "").strip.split(": ") + cve_id = cve_id_and_title.first + cve_title = cve_id_and_title.last + + dl_and_dd_tags = advisory_cve_doc.css('dl').css('dd') + cve_reporter = dl_and_dd_tags.first.text + cve_impact = dl_and_dd_tags.last.text + + cve_description = advisory_cve_doc.css('p').text + cve_references = advisory_cve_doc.css('a').last.values.first + + { + :cve_id => cve_id, + :title => cve_title, + :reporter => cve_reporter, + :impact => cve_impact, + :description => cve_description, + :references => cve_references, + } + + end + end