61 lines
1.3 KiB
Ruby
61 lines
1.3 KiB
Ruby
|
class AdobeScraper
|
||
|
attr_accessor :index_url, :doc
|
||
|
APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i
|
||
|
APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i
|
||
|
|
||
|
def initialize
|
||
|
@index_url = 'https://helpx.adobe.com/security/security-bulletin.html'
|
||
|
@doc = read_html(get_html)
|
||
|
end
|
||
|
|
||
|
def get_html
|
||
|
r = RestClient::Request.execute(
|
||
|
:method => :get,
|
||
|
:url => index_url
|
||
|
)
|
||
|
if r.code == 200
|
||
|
r.body
|
||
|
else
|
||
|
puts "HTTP Code #{r.code}"
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def read_html(doc)
|
||
|
Nokogiri::HTML(doc)
|
||
|
end
|
||
|
|
||
|
def a_with_href
|
||
|
doc.xpath("//tr/td/a[starts-with(@href, 'https://')]")
|
||
|
end
|
||
|
|
||
|
def advisory_id_from_url(url)
|
||
|
# adobes advisory ids end with .html
|
||
|
if url.ends_with?'.html'
|
||
|
# adobe product security bulletin
|
||
|
if url.include? 'apsb'
|
||
|
result = url.scan(APSB_ID_MATCHER).first.upcase
|
||
|
elsif url.include? 'apsa'
|
||
|
result = url.scan(APSA_ID_MATCHER).first.upcase
|
||
|
else
|
||
|
result = 'None'
|
||
|
end
|
||
|
result
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def index_hash
|
||
|
a_with_href.map.with_index do |a, index|
|
||
|
url = a.attributes['href'].value
|
||
|
advisory_id = advisory_id_from_url(url)
|
||
|
if advisory_id == 'None'
|
||
|
next
|
||
|
else
|
||
|
{
|
||
|
:index => index,
|
||
|
:advisory_id => advisory_id,
|
||
|
:url => url
|
||
|
}
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|