2022-05-04 01:00:46 -05:00
|
|
|
class AdobeIndexScraper
|
2022-04-28 13:38:05 -05:00
|
|
|
attr_accessor :index_url, :doc
|
|
|
|
APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i
|
|
|
|
APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i
|
|
|
|
|
|
|
|
def initialize
|
|
|
|
@index_url = 'https://helpx.adobe.com/security/security-bulletin.html'
|
2022-04-28 14:37:47 -05:00
|
|
|
@doc = read_html(get_html(index_url))
|
2022-04-28 13:38:05 -05:00
|
|
|
end
|
|
|
|
|
2022-04-28 14:37:47 -05:00
|
|
|
def get_html(url)
|
2022-04-28 13:38:05 -05:00
|
|
|
r = RestClient::Request.execute(
|
|
|
|
:method => :get,
|
2022-04-28 14:37:47 -05:00
|
|
|
:url => url
|
2022-04-28 13:38:05 -05:00
|
|
|
)
|
|
|
|
if r.code == 200
|
|
|
|
r.body
|
|
|
|
else
|
|
|
|
puts "HTTP Code #{r.code}"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def read_html(doc)
|
|
|
|
Nokogiri::HTML(doc)
|
|
|
|
end
|
|
|
|
|
|
|
|
def a_with_href
|
|
|
|
doc.xpath("//tr/td/a[starts-with(@href, 'https://')]")
|
|
|
|
end
|
|
|
|
|
|
|
|
def advisory_id_from_url(url)
|
|
|
|
# adobes advisory ids end with .html
|
|
|
|
if url.ends_with?'.html'
|
|
|
|
# adobe product security bulletin
|
|
|
|
if url.include? 'apsb'
|
|
|
|
result = url.scan(APSB_ID_MATCHER).first.upcase
|
2022-04-28 14:37:47 -05:00
|
|
|
# adobe product security advisory
|
2022-04-28 13:38:05 -05:00
|
|
|
elsif url.include? 'apsa'
|
|
|
|
result = url.scan(APSA_ID_MATCHER).first.upcase
|
|
|
|
else
|
|
|
|
result = 'None'
|
|
|
|
end
|
|
|
|
result
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def index_hash
|
2022-05-04 01:00:46 -05:00
|
|
|
# https://helpx.adobe.com/security/products/creative-cloud/apsb21-111.html: this was 404ing. we migth have to ommit this one.
|
2022-04-28 13:38:05 -05:00
|
|
|
a_with_href.map.with_index do |a, index|
|
|
|
|
url = a.attributes['href'].value
|
|
|
|
advisory_id = advisory_id_from_url(url)
|
|
|
|
if advisory_id == 'None'
|
|
|
|
next
|
2022-05-04 01:00:46 -05:00
|
|
|
elsif url == 'https://helpx.adobe.com/security/products/creative-cloud/apsb21-111.html'
|
|
|
|
next
|
2022-04-28 13:38:05 -05:00
|
|
|
else
|
|
|
|
{
|
|
|
|
:index => index,
|
|
|
|
:advisory_id => advisory_id,
|
|
|
|
:url => url
|
|
|
|
}
|
|
|
|
end
|
2022-05-04 01:00:46 -05:00
|
|
|
end.compact
|
2022-04-28 14:37:47 -05:00
|
|
|
end
|
2022-05-04 01:00:46 -05:00
|
|
|
end
|