diff --git a/lib/cna_scrapers/adobe/adobe_bulletin_scraper.rb b/lib/cna_scrapers/adobe/adobe_bulletin_scraper.rb new file mode 100644 index 0000000..c233e45 --- /dev/null +++ b/lib/cna_scrapers/adobe/adobe_bulletin_scraper.rb @@ -0,0 +1,272 @@ +class AdobeBulletinScraper + + + attr_accessor :url + def initialize(url) + @url = url + end + + def get_html + r = RestClient::Request.execute( + :method => :get, + :url => url + ) + if r.code == 200 + r.body + else + puts "HTTP Code #{r.code}" + end + end + + def read_html(html) + Nokogiri::HTML(html) + end + + def get_advisory + html = get_html + doc = read_html(html) + end + + def xpath_contains_text(xpath_driver:, text:) + "#{xpath_driver}[contains(text(), '#{text}')]" + end + + # example: + # xpath_driver: "//h1" + # id_text: "summary" + def xpath_id_search(xpath_driver:, id_name:) + "#{xpath_driver}[@id='#{id_name}']" + end + +def xpath_class_search(xpath_driver:, class_name:) + "#{xpath_driver}[@class='#{class_name}']" +end + +def adv_xpaths_methods + { + :header_table => xpath_class_search(xpath_driver: "//table", class_name: "dexter-Table"), + :second_header_table => xpath_class_search(xpath_driver: "//table", class_name: "text aem-GridColumn aem-GridColumn--default--12 overflowScrol"), + :summary_h1_upper => xpath_id_search(xpath_driver: "//h1", id_name: "Summary"), + :summary_h1_lower => xpath_id_search(xpath_driver: "//h1", id_name: "summary"), + :summary_h2_upper => xpath_id_search(xpath_driver: "//h2", id_name: "Summary"), + :summary_h2_lower => xpath_id_search(xpath_driver: "//h2", id_name: "summary"), + :summary_text_p => xpath_contains_text(xpath_driver: "//p", text: 'update'), + :summary_text_span => xpath_contains_text(xpath_driver: "//p/span", text: 'update'), + :affected_versions => xpath_id_search(xpath_driver: "//h1", id_name: 'AffectedVersions'), + :affected_versions_table => nil, + :solution => xpath_id_search(xpath_driver: "//h1", id_name: 'solution'), + :solution_table => nil, + :vulnerability_details => xpath_id_search(xpath_driver: "//h1", id_name: "Vulnerabilitydetails"), + :vulnerability_details_table => nil + } +end + + def adv_xpaths + { + :header_table => "//table[@class='dexter-Table']", + :second_header_table => "//table[@class='text aem-GridColumn aem-GridColumn--default--12 overflowScroll']", + :summary_h1_upper => "//h1[@id='Summary']", + :summary_h1_lower => "//h1[@id='summary']", + :summary_h2_upper => "//h2[@id='Summary']", + :summary_h2_lower => "//h2[@id='summary']", + :summary_text_p => "//p[contains(text(), 'updates')]", + :summary_text_span => "//p/span[contains(text(), 'updates')]", + :affected_versions => "//h1[@id='AffectedVersions']", + :affected_versions_table => "/html/body/div[2]/div/div[2]/div/div[3]/div/div/div[1]/div/div/div[7]/div/table", + :solution => "//h1[@id='solution']", + :solution_table => "//div[11]", + :vulnerability_details => "//h1[@id='Vulnerabilitydetails']", + :vulnerability_details_table => "//div[14]//div[1]//table[1]" + } + end + + def get_advisory_xpaths + doc = get_advisory + xpath_hash = adv_xpaths_methods.keys.map do |key| + {"#{key}": doc.xpath(adv_xpaths[key])} + end.inject(:merge) + if url == 'https://helpx.adobe.com/security/products/photoshop/apsb22-20.html' + binding.pry + end + + # for debugging with pry + # we can look at the state of each of these variables + header_table_info = header_table_hash(xpath_hash) + summary_info = summary_hash(xpath_hash) + # affected_versions_info = affected_versions_hash(xpath_hash) + # solution_info = solution_hash(xpath_hash) + # binding.pry + # advisory_hash = [header_table_info, summary_info, affected_versions_info, solution_info].inject(&:merge) + + advisory_hash = [header_table_info, summary_info].inject(&:merge) + # vulnerability_details_info = vulnerability_details_hash(xpath_hash) + + #binding.pry + #xpath_hash + end + + def header_table_hash(xpath_hash) + adv_id_date_and_priority = xpath_hash[:header_table].inner_text.squish.gsub("Bulletin ID Date Published Priority", "").squish.split(" ") + adv_id = adv_id_date_and_priority[0] + priority = adv_id_date_and_priority[4] + month = adv_id_date_and_priority[1] + day = adv_id_date_and_priority[2] + year = adv_id_date_and_priority[3] + date_published = "#{month} #{day} #{year}" + { + :bulletin_id => adv_id, + :date_published => date_published, + :priority => priority + } + end + + def has_summary_heading?(xpath_hash) + summary_heading = xpath_hash[:summary_h1_upper] + if summary_heading.empty? + summary_heading = xpath_hash[:summary_h1_lower] + if summary_heading.empty? + summary_heading = xpath_hash[:summary_h2_upper] + if summary_heading.empty? + summary_heading = xpath_hash[:summary_h2_lower] + if summary_heading.empty? + return false + end + end + end + end + if summary_heading + return true + end + + end + + def find_summary_text(xpath_hash) + # sometimes its just nested //p tags + summary = xpath_hash[:summary_text_p] + if summary.empty? + # sometimes its nested //p/span tags + summary = xpath_hash[:summary_text_span] + if summary.empty? + summary_text = '' + else + summary_text = summary.text.squish + end + else + summary_text = summary.text.squish + end + summary_text + end + + def summary_hash(xpath_hash) + if has_summary_heading?(xpath_hash) + summary_text = find_summary_text(xpath_hash) + { + :summary => summary_text + } + else + { + :summary => nil + } + end + end + + def get_table_rows(table) + table.xpath(".//tbody/tr") + end + + def table_rows_drop_header(table_rows) + # drop the header row + table_rows.shift + table_rows + end + + def products_and_rowspans(table_rows) + table_rows.flat_map.with_index do |tr| + tr.children.map do |td| + if td.has_attribute? 'rowspan' + { + :product_name => td.children.text.squish, + :rowspan => td.attributes.dig('rowspan').value + } + end + end + end.compact + end + + def product_version_platform(table) + t = get_table_rows(table) + rows = table_rows_drop_header(t) + p_and_r = products_and_rowspans(rows) + product_names = p_and_r.map { |p| p[:product_name] } + rowspan = p_and_r.first[:rowspan].to_i + tr_groups = rows.to_a.in_groups(rowspan) + tr_groups.zip(product_names).map do |tr_group, product_name| + tr_group.map do |tr| + version_platform = version_and_platform(tr, product_name) + {:product_name => product_name}.merge(version_platform) + end + end.flatten + end + + def version_and_platform(tr_node, product_name) + version_and_platform = tr_node.text().gsub("\n", " ").squish.gsub(product_name, " ").squish + platform = version_and_platform.split(" ").last + version = version_and_platform.gsub(platform, " ").squish + {:version => version, :platform => platform} + end + + def affected_versions_hash(xpath_hash) + table = xpath_hash[:affected_versions_table].first + {:affected_versions => product_version_platform(table) } + end + + def solution_hash(xpath_hash) + table = xpath_hash[:solution_table].first + t = get_table_rows(table) + header_values = t.xpath('./th/text()').map {|t| t.text} + rows = table_rows_drop_header(t) + tds = rows.map do |td| + td.xpath('./td') + end + installation_instruction_urls = tds.first.children.xpath(".//a/@href").map(&:value) + + solution_hash = tds.map do |td| + row_data = td.children.text().split("\n") + product_name = row_data[0] + updated_version = row_data[1] + platform = row_data[2] + priority_rating = row_data[3] + { + :solution => { + :product => product_name, + :updated_version => updated_version, + :platform => platform, + :priority_rating => priority_rating, + :installation_instruction_urls => installation_instruction_urls + } + } + end + { :solution_info => solution_hash } + end + + def vulnerability_details_hash(xpath_hash) + table = xpath_hash[:vulnerability_details_table].first + t = get_table_rows(table) + header_values = t.xpath("./th").map {|th| th.text.gsub("\n", " ").squish} + binding.pry + header_values + + end + + #def advisory_hash + # { + # :bulletin_id => nil, + # :date_published => nil, + # :priority => nil, + # :summary => nil, + # :affected_versions => [{:product => nil, [:version => nil, :platform => nil}]], + # :solution => [{:product => nil, :updated_version => nil, :platform => nil, :priority_rating => nil, :installation_instructions => nil }], + # :vulnerability_details => [{:vulnerability_category => nil, :vulnerability_impact => nil, :severity => nil, :authentication_required_to_exploit? => nil, :exploit_requires_admin_privileges? => nil, :cvss_base_score => nil, :cvss_vector => nil, :cve_number => nil}] + # } + #end +end diff --git a/lib/cna_scrapers/adobe_scraper.rb b/lib/cna_scrapers/adobe/adobe_index_scraper.rb similarity index 64% rename from lib/cna_scrapers/adobe_scraper.rb rename to lib/cna_scrapers/adobe/adobe_index_scraper.rb index c2d35d9..f10051a 100644 --- a/lib/cna_scrapers/adobe_scraper.rb +++ b/lib/cna_scrapers/adobe/adobe_index_scraper.rb @@ -1,4 +1,4 @@ -class AdobeScraper +class AdobeIndexScraper attr_accessor :index_url, :doc APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i @@ -45,11 +45,14 @@ class AdobeScraper end def index_hash + # https://helpx.adobe.com/security/products/creative-cloud/apsb21-111.html: this was 404ing. we migth have to ommit this one. a_with_href.map.with_index do |a, index| url = a.attributes['href'].value advisory_id = advisory_id_from_url(url) if advisory_id == 'None' next + elsif url == 'https://helpx.adobe.com/security/products/creative-cloud/apsb21-111.html' + next else { :index => index, @@ -57,23 +60,6 @@ class AdobeScraper :url => url } end - end + end.compact end - - def get_advisory(url) - html = get_html(url) - doc = read_html(html) - end - - def advisory_hash - { - :bulletin_id => nil, - :date_published => nil, - :priority => nil, - :summary => nil, - :affected_versions => {:product => nil, :version => nil, :platform => nil}, - :solution => {:product => nil, :updated_version => nil, :platform => nil, :priority_rating => nil, :installation_instructions => nil }, - :vulnerability_details => {:vulnerability_category => nil, :vulnerability_impact => nil, :severity => nil, :authentication_required_to_exploit? => nil, :exploit_requires_admin_privileges? => nil, :cvss_base_score => nil, :cvss_vector => nil, :cve_number => nil} - } - end -end \ No newline at end of file +end