data_importer/lib/cna_scrapers/adobe_scraper.rb

79 lines
No EOL
2 KiB
Ruby

class AdobeScraper
attr_accessor :index_url, :doc
APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i
APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i
def initialize
@index_url = 'https://helpx.adobe.com/security/security-bulletin.html'
@doc = read_html(get_html(index_url))
end
def get_html(url)
r = RestClient::Request.execute(
:method => :get,
:url => url
)
if r.code == 200
r.body
else
puts "HTTP Code #{r.code}"
end
end
def read_html(doc)
Nokogiri::HTML(doc)
end
def a_with_href
doc.xpath("//tr/td/a[starts-with(@href, 'https://')]")
end
def advisory_id_from_url(url)
# adobes advisory ids end with .html
if url.ends_with?'.html'
# adobe product security bulletin
if url.include? 'apsb'
result = url.scan(APSB_ID_MATCHER).first.upcase
# adobe product security advisory
elsif url.include? 'apsa'
result = url.scan(APSA_ID_MATCHER).first.upcase
else
result = 'None'
end
result
end
end
def index_hash
a_with_href.map.with_index do |a, index|
url = a.attributes['href'].value
advisory_id = advisory_id_from_url(url)
if advisory_id == 'None'
next
else
{
:index => index,
:advisory_id => advisory_id,
:url => url
}
end
end
end
def get_advisory(url)
html = get_html(url)
doc = read_html(html)
end
def advisory_hash
{
:bulletin_id => nil,
:date_published => nil,
:priority => nil,
:summary => nil,
:affected_versions => {:product => nil, :version => nil, :platform => nil},
:solution => {:product => nil, :updated_version => nil, :platform => nil, :priority_rating => nil, :installation_instructions => nil },
:vulnerability_details => {:vulnerability_category => nil, :vulnerability_impact => nil, :severity => nil, :authentication_required_to_exploit? => nil, :exploit_requires_admin_privileges? => nil, :cvss_base_score => nil, :cvss_vector => nil, :cve_number => nil}
}
end
end