class AdobeScraper attr_accessor :index_url, :doc APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i def initialize @index_url = 'https://helpx.adobe.com/security/security-bulletin.html' @doc = read_html(get_html) end def get_html r = RestClient::Request.execute( :method => :get, :url => index_url ) if r.code == 200 r.body else puts "HTTP Code #{r.code}" end end def read_html(doc) Nokogiri::HTML(doc) end def a_with_href doc.xpath("//tr/td/a[starts-with(@href, 'https://')]") end def advisory_id_from_url(url) # adobes advisory ids end with .html if url.ends_with?'.html' # adobe product security bulletin if url.include? 'apsb' result = url.scan(APSB_ID_MATCHER).first.upcase elsif url.include? 'apsa' result = url.scan(APSA_ID_MATCHER).first.upcase else result = 'None' end result end end def index_hash a_with_href.map.with_index do |a, index| url = a.attributes['href'].value advisory_id = advisory_id_from_url(url) if advisory_id == 'None' next else { :index => index, :advisory_id => advisory_id, :url => url } end end end end