got data hash working
This commit is contained in:
parent
db264a3f00
commit
43429f643f
1 changed files with 55 additions and 6 deletions
|
@ -25,6 +25,11 @@ class MozillaSecurityAdvisoryScraper
|
||||||
Nokogiri::HTML(response_body)
|
Nokogiri::HTML(response_body)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def get_and_parse_advisory(advisory_url)
|
||||||
|
html = get(advisory_url)
|
||||||
|
parse_html(html)
|
||||||
|
end
|
||||||
|
|
||||||
def advisory_urls_in_html(html_doc)
|
def advisory_urls_in_html(html_doc)
|
||||||
html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"}
|
html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"}
|
||||||
end
|
end
|
||||||
|
@ -48,14 +53,58 @@ class MozillaSecurityAdvisoryScraper
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# we know its the very first dl tag w class name summary
|
|
||||||
def parse_advisory_summary(advisory_doc)
|
def parse_advisory_summary(advisory_doc)
|
||||||
nodes = advisory_doc.xpath('//dl[@class="summary"]').first.children
|
advisory_doc.xpath('//dl[@class="summary"]').first.css('dd')
|
||||||
# dd tags and dt tags have the info we care about. dt tag is the key, dd
|
|
||||||
# tag is the value
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_advisory_cve(advisory_doc)
|
|
||||||
advisory_doc.xpath('//dl[@class="cve"]')
|
def parse_advisory_cves(advisory_doc)
|
||||||
|
advisory_doc.xpath('//section[@class="cve"]')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def data_hash(advisory_doc)
|
||||||
|
title = advisory_doc.title
|
||||||
|
header = advisory_doc.css('h2').text
|
||||||
|
summary = parse_advisory_summary(advisory_doc)
|
||||||
|
announced = summary[0].text
|
||||||
|
impact = summary[1].text
|
||||||
|
products = summary[2].text
|
||||||
|
fixed_in = summary[3].text
|
||||||
|
cve_nodes = parse_advisory_cves(advisory_doc)
|
||||||
|
cves = cve_nodes.map { | cve_node | cve_data_hash(cve_node) }
|
||||||
|
|
||||||
|
{
|
||||||
|
:title => title,
|
||||||
|
:header => header,
|
||||||
|
:announced => announced,
|
||||||
|
:impact => impact,
|
||||||
|
:products => products,
|
||||||
|
:fixed_in => fixed_in,
|
||||||
|
:cves => cves
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def cve_data_hash(advisory_cve_doc)
|
||||||
|
cve_id_and_title = advisory_cve_doc.css('h4').text.gsub("\n", "").gsub("#", "").strip.split(": ")
|
||||||
|
cve_id = cve_id_and_title.first
|
||||||
|
cve_title = cve_id_and_title.last
|
||||||
|
|
||||||
|
dl_and_dd_tags = advisory_cve_doc.css('dl').css('dd')
|
||||||
|
cve_reporter = dl_and_dd_tags.first.text
|
||||||
|
cve_impact = dl_and_dd_tags.last.text
|
||||||
|
|
||||||
|
cve_description = advisory_cve_doc.css('p').text
|
||||||
|
cve_references = advisory_cve_doc.css('a').last.values.first
|
||||||
|
|
||||||
|
{
|
||||||
|
:cve_id => cve_id,
|
||||||
|
:title => cve_title,
|
||||||
|
:reporter => cve_reporter,
|
||||||
|
:impact => cve_impact,
|
||||||
|
:description => cve_description,
|
||||||
|
:references => cve_references,
|
||||||
|
}
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue