data_importer/lib/cna_scrapers/adobe_scraper.rb

class AdobeScraper
  attr_accessor :index_url, :doc
  APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i
  APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i

  def initialize
    @index_url = 'https://helpx.adobe.com/security/security-bulletin.html'
    @doc = read_html(get_html(index_url))
  end

  def get_html(url)
    r = RestClient::Request.execute(
      :method => :get,
      :url => url
    )
    if r.code == 200
      r.body
    else
      puts "HTTP Code #{r.code}"
    end
  end

  def read_html(doc)
    Nokogiri::HTML(doc)
  end

  def a_with_href
    doc.xpath("//tr/td/a[starts-with(@href, 'https://')]")
  end

  def advisory_id_from_url(url)
    # adobes advisory ids end with .html
    if url.ends_with?'.html'
      # adobe product security bulletin
      if url.include? 'apsb'
        result = url.scan(APSB_ID_MATCHER).first.upcase
      # adobe product security advisory
      elsif url.include? 'apsa'
        result = url.scan(APSA_ID_MATCHER).first.upcase
      else
        result = 'None'
      end
      result
    end
  end

  def index_hash
    a_with_href.map.with_index do |a, index|
      url = a.attributes['href'].value
      advisory_id = advisory_id_from_url(url)
      if advisory_id == 'None'
        next
      else
        {
          :index => index,
          :advisory_id => advisory_id,
          :url => url
        }
      end
    end
  end

  def get_advisory(url)
    html = get_html(url)
    doc = read_html(html)
  end

  def advisory_hash
    {
      :bulletin_id => nil,
      :date_published => nil,
      :priority => nil,
      :summary => nil,
      :affected_versions => {:product => nil, :version => nil, :platform => nil},
      :solution => {:product => nil, :updated_version => nil, :platform => nil, :priority_rating => nil, :installation_instructions => nil },
      :vulnerability_details => {:vulnerability_category => nil, :vulnerability_impact => nil, :severity => nil, :authentication_required_to_exploit? => nil, :exploit_requires_admin_privileges? => nil, :cvss_base_score => nil, :cvss_vector => nil, :cve_number => nil}
    }
  end
end