class AdobeIndexScraper
  attr_accessor :index_url, :doc
  APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i
  APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i

  def initialize
    @index_url = 'https://helpx.adobe.com/security/security-bulletin.html'
    @doc = read_html(get_html(index_url))
  end

  def get_html(url)
    r = RestClient::Request.execute(
      :method => :get,
      :url => url
    )
    if r.code == 200
      r.body
    else
      puts "HTTP Code #{r.code}"
    end
  end

  def read_html(doc)
    Nokogiri::HTML(doc)
  end

  def a_with_href
    doc.xpath("//tr/td/a[starts-with(@href, 'https://')]")
  end

  def advisory_id_from_url(url)
    # adobes advisory ids end with .html
    if url.ends_with?'.html'
      # adobe product security bulletin
      if url.include? 'apsb'
        result = url.scan(APSB_ID_MATCHER).first.upcase
      # adobe product security advisory
      elsif url.include? 'apsa'
        result = url.scan(APSA_ID_MATCHER).first.upcase
      else
        result = 'None'
      end
      result
    end
  end

  def index_hash
    # https://helpx.adobe.com/security/products/creative-cloud/apsb21-111.html: this was 404ing. we migth have to ommit this one.
    a_with_href.map.with_index do |a, index|
      url = a.attributes['href'].value
      advisory_id = advisory_id_from_url(url) 
      if advisory_id == 'None' 
        next
      elsif url == 'https://helpx.adobe.com/security/products/creative-cloud/apsb21-111.html'
        next
      else 
        {
          :index => index,
          :advisory_id => advisory_id,
          :url => url
        }
      end
    end.compact
  end
end