class AdobeBulletinScraper


  attr_accessor :url 
  def initialize(url)
    @url = url
  end

  def get_html
    r = RestClient::Request.execute(
      :method => :get,
      :url => url
    )
    if r.code == 200
      r.body
    else
      puts "HTTP Code #{r.code}"
    end
  end

  def read_html(html)
    Nokogiri::HTML(html)
  end
  
  def get_advisory
    html = get_html
    doc = read_html(html)
  end

  def xpath_contains_text(xpath_driver:, text:)
    "#{xpath_driver}[contains(text(), '#{text}')]"
  end

  # example:
  # xpath_driver: "//h1"
  # id_text: "summary" 
  def xpath_id_search(xpath_driver:, id_name:)
    "#{xpath_driver}[@id='#{id_name}']"
  end

def xpath_class_search(xpath_driver:, class_name:)
  "#{xpath_driver}[@class='#{class_name}']"
end

def adv_xpaths_methods
  {
    :header_table => xpath_class_search(xpath_driver: "//table", class_name: "dexter-Table"),
    :second_header_table => xpath_class_search(xpath_driver: "//table", class_name: "text aem-GridColumn aem-GridColumn--default--12 overflowScrol"),
    :summary_h1_upper => xpath_id_search(xpath_driver: "//h1", id_name: "Summary"),
    :summary_h1_lower => xpath_id_search(xpath_driver: "//h1", id_name: "summary"),
    :summary_h2_upper => xpath_id_search(xpath_driver: "//h2", id_name: "Summary"),
    :summary_h2_lower => xpath_id_search(xpath_driver: "//h2", id_name: "summary"),
    :summary_text_p => xpath_contains_text(xpath_driver: "//p", text: 'update'),
    :summary_text_span => xpath_contains_text(xpath_driver: "//p/span", text: 'update'),
    :affected_versions => xpath_id_search(xpath_driver: "//h1", id_name: 'AffectedVersions'),
    :affected_versions_table => nil,
    :solution => xpath_id_search(xpath_driver: "//h1", id_name: 'solution'),
    :solution_table => nil,
    :vulnerability_details => xpath_id_search(xpath_driver: "//h1", id_name: "Vulnerabilitydetails"),
    :vulnerability_details_table => nil
  }
end

  def adv_xpaths
    {
      :header_table => "//table[@class='dexter-Table']",
      :second_header_table => "//table[@class='text aem-GridColumn aem-GridColumn--default--12 overflowScroll']",
      :summary_h1_upper => "//h1[@id='Summary']",
      :summary_h1_lower => "//h1[@id='summary']",
      :summary_h2_upper => "//h2[@id='Summary']",
      :summary_h2_lower => "//h2[@id='summary']",
      :summary_text_p => "//p[contains(text(), 'updates')]",
      :summary_text_span => "//p/span[contains(text(), 'updates')]",
      :affected_versions => "//h1[@id='AffectedVersions']",
      :affected_versions_table => "/html/body/div[2]/div/div[2]/div/div[3]/div/div/div[1]/div/div/div[7]/div/table",
      :solution => "//h1[@id='solution']",
      :solution_table => "//div[11]",
      :vulnerability_details => "//h1[@id='Vulnerabilitydetails']",
      :vulnerability_details_table => "//div[14]//div[1]//table[1]"
    }
  end

  def get_advisory_xpaths
    doc = get_advisory
    xpath_hash = adv_xpaths_methods.keys.map do |key|
      {"#{key}": doc.xpath(adv_xpaths[key])}
    end.inject(:merge)
    if url == 'https://helpx.adobe.com/security/products/photoshop/apsb22-20.html'
      binding.pry
    end

    # for debugging with pry
    # we can look at the state of each of these variables
    header_table_info = header_table_hash(xpath_hash)
    summary_info = summary_hash(xpath_hash) 
   # affected_versions_info = affected_versions_hash(xpath_hash)
   # solution_info = solution_hash(xpath_hash)
   # binding.pry
  # advisory_hash = [header_table_info, summary_info, affected_versions_info, solution_info].inject(&:merge)

  advisory_hash = [header_table_info, summary_info].inject(&:merge)
   # vulnerability_details_info = vulnerability_details_hash(xpath_hash)

  #binding.pry
   #xpath_hash
  end

  def header_table_hash(xpath_hash)
    adv_id_date_and_priority = xpath_hash[:header_table].inner_text.squish.gsub("Bulletin ID Date Published Priority", "").squish.split(" ")
    adv_id = adv_id_date_and_priority[0]
    priority = adv_id_date_and_priority[4]
    month = adv_id_date_and_priority[1]
    day = adv_id_date_and_priority[2] 
    year = adv_id_date_and_priority[3] 
    date_published = "#{month} #{day} #{year}" 
    {
      :bulletin_id => adv_id,
      :date_published => date_published,
      :priority => priority
    }
  end

  def has_summary_heading?(xpath_hash)
    summary_heading = xpath_hash[:summary_h1_upper]
    if summary_heading.empty?
      summary_heading = xpath_hash[:summary_h1_lower]
      if summary_heading.empty? 
        summary_heading = xpath_hash[:summary_h2_upper]
        if summary_heading.empty?
          summary_heading = xpath_hash[:summary_h2_lower]
          if summary_heading.empty?
            return false
          end
        end
      end
    end
    if summary_heading
      return true
    end

  end

  def find_summary_text(xpath_hash)
    # sometimes its just nested //p tags
    summary = xpath_hash[:summary_text_p]
    if summary.empty?
    # sometimes its nested //p/span tags
      summary = xpath_hash[:summary_text_span]
      if summary.empty?
        summary_text = ''
      else
        summary_text = summary.text.squish
      end
    else 
      summary_text = summary.text.squish
    end
    summary_text
  end

  def summary_hash(xpath_hash)
    if has_summary_heading?(xpath_hash)
      summary_text = find_summary_text(xpath_hash)
      {
        :summary => summary_text
      }
    else 
      {
        :summary => nil 
      }
    end
  end

  def get_table_rows(table)
    table.xpath(".//tbody/tr")
  end

  def table_rows_drop_header(table_rows)
    # drop the header row
    table_rows.shift
    table_rows
  end

  def products_and_rowspans(table_rows)
    table_rows.flat_map.with_index do |tr|
      tr.children.map do  |td| 
        if td.has_attribute? 'rowspan'
          { 
            :product_name => td.children.text.squish, 
            :rowspan => td.attributes.dig('rowspan').value 
          }
        end
      end
    end.compact
  end

  def product_version_platform(table)
    t = get_table_rows(table)
    rows = table_rows_drop_header(t)
    p_and_r = products_and_rowspans(rows)
    product_names = p_and_r.map { |p| p[:product_name] }
    rowspan = p_and_r.first[:rowspan].to_i
    tr_groups = rows.to_a.in_groups(rowspan)
    tr_groups.zip(product_names).map do |tr_group, product_name|
      tr_group.map do |tr|
        version_platform = version_and_platform(tr, product_name)
        {:product_name => product_name}.merge(version_platform)
      end
    end.flatten
  end

  def version_and_platform(tr_node, product_name)
    version_and_platform = tr_node.text().gsub("\n", " ").squish.gsub(product_name, " ").squish
    platform = version_and_platform.split(" ").last
    version = version_and_platform.gsub(platform, " ").squish
    {:version => version, :platform => platform}
  end

  def affected_versions_hash(xpath_hash)
    table = xpath_hash[:affected_versions_table].first
    {:affected_versions => product_version_platform(table) }
  end

  def solution_hash(xpath_hash)
    table = xpath_hash[:solution_table].first
    t = get_table_rows(table)
    header_values = t.xpath('./th/text()').map {|t| t.text}
    rows = table_rows_drop_header(t)
    tds = rows.map do |td|
      td.xpath('./td')
    end
    installation_instruction_urls = tds.first.children.xpath(".//a/@href").map(&:value)
    
    solution_hash = tds.map do |td| 
      row_data = td.children.text().split("\n") 
      product_name = row_data[0]
      updated_version = row_data[1]
      platform = row_data[2]
      priority_rating = row_data[3]
      {
        :solution => {
          :product => product_name,
          :updated_version => updated_version,
          :platform => platform,
          :priority_rating => priority_rating,
          :installation_instruction_urls => installation_instruction_urls
        }
      }
    end
    { :solution_info => solution_hash }
  end

  def vulnerability_details_hash(xpath_hash)
    table = xpath_hash[:vulnerability_details_table].first
    t = get_table_rows(table)
    header_values = t.xpath("./th").map {|th| th.text.gsub("\n", " ").squish}
    binding.pry
    header_values

  end
   
  #def advisory_hash
  #  {
  #    :bulletin_id => nil,
  #    :date_published => nil,
  #    :priority => nil,
  #    :summary => nil,
  #    :affected_versions => [{:product => nil, [:version => nil, :platform => nil}]],
  #    :solution => [{:product => nil, :updated_version => nil, :platform => nil, :priority_rating => nil, :installation_instructions => nil }],
  #    :vulnerability_details => [{:vulnerability_category => nil, :vulnerability_impact => nil, :severity => nil, :authentication_required_to_exploit? => nil, :exploit_requires_admin_privileges? => nil, :cvss_base_score => nil, :cvss_vector => nil, :cve_number => nil}]
  #  }
  #end
end