273 lines
8.6 KiB
Ruby
273 lines
8.6 KiB
Ruby
|
class AdobeBulletinScraper
|
||
|
|
||
|
|
||
|
attr_accessor :url
|
||
|
def initialize(url)
|
||
|
@url = url
|
||
|
end
|
||
|
|
||
|
def get_html
|
||
|
r = RestClient::Request.execute(
|
||
|
:method => :get,
|
||
|
:url => url
|
||
|
)
|
||
|
if r.code == 200
|
||
|
r.body
|
||
|
else
|
||
|
puts "HTTP Code #{r.code}"
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def read_html(html)
|
||
|
Nokogiri::HTML(html)
|
||
|
end
|
||
|
|
||
|
def get_advisory
|
||
|
html = get_html
|
||
|
doc = read_html(html)
|
||
|
end
|
||
|
|
||
|
def xpath_contains_text(xpath_driver:, text:)
|
||
|
"#{xpath_driver}[contains(text(), '#{text}')]"
|
||
|
end
|
||
|
|
||
|
# example:
|
||
|
# xpath_driver: "//h1"
|
||
|
# id_text: "summary"
|
||
|
def xpath_id_search(xpath_driver:, id_name:)
|
||
|
"#{xpath_driver}[@id='#{id_name}']"
|
||
|
end
|
||
|
|
||
|
def xpath_class_search(xpath_driver:, class_name:)
|
||
|
"#{xpath_driver}[@class='#{class_name}']"
|
||
|
end
|
||
|
|
||
|
def adv_xpaths_methods
|
||
|
{
|
||
|
:header_table => xpath_class_search(xpath_driver: "//table", class_name: "dexter-Table"),
|
||
|
:second_header_table => xpath_class_search(xpath_driver: "//table", class_name: "text aem-GridColumn aem-GridColumn--default--12 overflowScrol"),
|
||
|
:summary_h1_upper => xpath_id_search(xpath_driver: "//h1", id_name: "Summary"),
|
||
|
:summary_h1_lower => xpath_id_search(xpath_driver: "//h1", id_name: "summary"),
|
||
|
:summary_h2_upper => xpath_id_search(xpath_driver: "//h2", id_name: "Summary"),
|
||
|
:summary_h2_lower => xpath_id_search(xpath_driver: "//h2", id_name: "summary"),
|
||
|
:summary_text_p => xpath_contains_text(xpath_driver: "//p", text: 'update'),
|
||
|
:summary_text_span => xpath_contains_text(xpath_driver: "//p/span", text: 'update'),
|
||
|
:affected_versions => xpath_id_search(xpath_driver: "//h1", id_name: 'AffectedVersions'),
|
||
|
:affected_versions_table => nil,
|
||
|
:solution => xpath_id_search(xpath_driver: "//h1", id_name: 'solution'),
|
||
|
:solution_table => nil,
|
||
|
:vulnerability_details => xpath_id_search(xpath_driver: "//h1", id_name: "Vulnerabilitydetails"),
|
||
|
:vulnerability_details_table => nil
|
||
|
}
|
||
|
end
|
||
|
|
||
|
def adv_xpaths
|
||
|
{
|
||
|
:header_table => "//table[@class='dexter-Table']",
|
||
|
:second_header_table => "//table[@class='text aem-GridColumn aem-GridColumn--default--12 overflowScroll']",
|
||
|
:summary_h1_upper => "//h1[@id='Summary']",
|
||
|
:summary_h1_lower => "//h1[@id='summary']",
|
||
|
:summary_h2_upper => "//h2[@id='Summary']",
|
||
|
:summary_h2_lower => "//h2[@id='summary']",
|
||
|
:summary_text_p => "//p[contains(text(), 'updates')]",
|
||
|
:summary_text_span => "//p/span[contains(text(), 'updates')]",
|
||
|
:affected_versions => "//h1[@id='AffectedVersions']",
|
||
|
:affected_versions_table => "/html/body/div[2]/div/div[2]/div/div[3]/div/div/div[1]/div/div/div[7]/div/table",
|
||
|
:solution => "//h1[@id='solution']",
|
||
|
:solution_table => "//div[11]",
|
||
|
:vulnerability_details => "//h1[@id='Vulnerabilitydetails']",
|
||
|
:vulnerability_details_table => "//div[14]//div[1]//table[1]"
|
||
|
}
|
||
|
end
|
||
|
|
||
|
def get_advisory_xpaths
|
||
|
doc = get_advisory
|
||
|
xpath_hash = adv_xpaths_methods.keys.map do |key|
|
||
|
{"#{key}": doc.xpath(adv_xpaths[key])}
|
||
|
end.inject(:merge)
|
||
|
if url == 'https://helpx.adobe.com/security/products/photoshop/apsb22-20.html'
|
||
|
binding.pry
|
||
|
end
|
||
|
|
||
|
# for debugging with pry
|
||
|
# we can look at the state of each of these variables
|
||
|
header_table_info = header_table_hash(xpath_hash)
|
||
|
summary_info = summary_hash(xpath_hash)
|
||
|
# affected_versions_info = affected_versions_hash(xpath_hash)
|
||
|
# solution_info = solution_hash(xpath_hash)
|
||
|
# binding.pry
|
||
|
# advisory_hash = [header_table_info, summary_info, affected_versions_info, solution_info].inject(&:merge)
|
||
|
|
||
|
advisory_hash = [header_table_info, summary_info].inject(&:merge)
|
||
|
# vulnerability_details_info = vulnerability_details_hash(xpath_hash)
|
||
|
|
||
|
#binding.pry
|
||
|
#xpath_hash
|
||
|
end
|
||
|
|
||
|
def header_table_hash(xpath_hash)
|
||
|
adv_id_date_and_priority = xpath_hash[:header_table].inner_text.squish.gsub("Bulletin ID Date Published Priority", "").squish.split(" ")
|
||
|
adv_id = adv_id_date_and_priority[0]
|
||
|
priority = adv_id_date_and_priority[4]
|
||
|
month = adv_id_date_and_priority[1]
|
||
|
day = adv_id_date_and_priority[2]
|
||
|
year = adv_id_date_and_priority[3]
|
||
|
date_published = "#{month} #{day} #{year}"
|
||
|
{
|
||
|
:bulletin_id => adv_id,
|
||
|
:date_published => date_published,
|
||
|
:priority => priority
|
||
|
}
|
||
|
end
|
||
|
|
||
|
def has_summary_heading?(xpath_hash)
|
||
|
summary_heading = xpath_hash[:summary_h1_upper]
|
||
|
if summary_heading.empty?
|
||
|
summary_heading = xpath_hash[:summary_h1_lower]
|
||
|
if summary_heading.empty?
|
||
|
summary_heading = xpath_hash[:summary_h2_upper]
|
||
|
if summary_heading.empty?
|
||
|
summary_heading = xpath_hash[:summary_h2_lower]
|
||
|
if summary_heading.empty?
|
||
|
return false
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
if summary_heading
|
||
|
return true
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
def find_summary_text(xpath_hash)
|
||
|
# sometimes its just nested //p tags
|
||
|
summary = xpath_hash[:summary_text_p]
|
||
|
if summary.empty?
|
||
|
# sometimes its nested //p/span tags
|
||
|
summary = xpath_hash[:summary_text_span]
|
||
|
if summary.empty?
|
||
|
summary_text = ''
|
||
|
else
|
||
|
summary_text = summary.text.squish
|
||
|
end
|
||
|
else
|
||
|
summary_text = summary.text.squish
|
||
|
end
|
||
|
summary_text
|
||
|
end
|
||
|
|
||
|
def summary_hash(xpath_hash)
|
||
|
if has_summary_heading?(xpath_hash)
|
||
|
summary_text = find_summary_text(xpath_hash)
|
||
|
{
|
||
|
:summary => summary_text
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
:summary => nil
|
||
|
}
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def get_table_rows(table)
|
||
|
table.xpath(".//tbody/tr")
|
||
|
end
|
||
|
|
||
|
def table_rows_drop_header(table_rows)
|
||
|
# drop the header row
|
||
|
table_rows.shift
|
||
|
table_rows
|
||
|
end
|
||
|
|
||
|
def products_and_rowspans(table_rows)
|
||
|
table_rows.flat_map.with_index do |tr|
|
||
|
tr.children.map do |td|
|
||
|
if td.has_attribute? 'rowspan'
|
||
|
{
|
||
|
:product_name => td.children.text.squish,
|
||
|
:rowspan => td.attributes.dig('rowspan').value
|
||
|
}
|
||
|
end
|
||
|
end
|
||
|
end.compact
|
||
|
end
|
||
|
|
||
|
def product_version_platform(table)
|
||
|
t = get_table_rows(table)
|
||
|
rows = table_rows_drop_header(t)
|
||
|
p_and_r = products_and_rowspans(rows)
|
||
|
product_names = p_and_r.map { |p| p[:product_name] }
|
||
|
rowspan = p_and_r.first[:rowspan].to_i
|
||
|
tr_groups = rows.to_a.in_groups(rowspan)
|
||
|
tr_groups.zip(product_names).map do |tr_group, product_name|
|
||
|
tr_group.map do |tr|
|
||
|
version_platform = version_and_platform(tr, product_name)
|
||
|
{:product_name => product_name}.merge(version_platform)
|
||
|
end
|
||
|
end.flatten
|
||
|
end
|
||
|
|
||
|
def version_and_platform(tr_node, product_name)
|
||
|
version_and_platform = tr_node.text().gsub("\n", " ").squish.gsub(product_name, " ").squish
|
||
|
platform = version_and_platform.split(" ").last
|
||
|
version = version_and_platform.gsub(platform, " ").squish
|
||
|
{:version => version, :platform => platform}
|
||
|
end
|
||
|
|
||
|
def affected_versions_hash(xpath_hash)
|
||
|
table = xpath_hash[:affected_versions_table].first
|
||
|
{:affected_versions => product_version_platform(table) }
|
||
|
end
|
||
|
|
||
|
def solution_hash(xpath_hash)
|
||
|
table = xpath_hash[:solution_table].first
|
||
|
t = get_table_rows(table)
|
||
|
header_values = t.xpath('./th/text()').map {|t| t.text}
|
||
|
rows = table_rows_drop_header(t)
|
||
|
tds = rows.map do |td|
|
||
|
td.xpath('./td')
|
||
|
end
|
||
|
installation_instruction_urls = tds.first.children.xpath(".//a/@href").map(&:value)
|
||
|
|
||
|
solution_hash = tds.map do |td|
|
||
|
row_data = td.children.text().split("\n")
|
||
|
product_name = row_data[0]
|
||
|
updated_version = row_data[1]
|
||
|
platform = row_data[2]
|
||
|
priority_rating = row_data[3]
|
||
|
{
|
||
|
:solution => {
|
||
|
:product => product_name,
|
||
|
:updated_version => updated_version,
|
||
|
:platform => platform,
|
||
|
:priority_rating => priority_rating,
|
||
|
:installation_instruction_urls => installation_instruction_urls
|
||
|
}
|
||
|
}
|
||
|
end
|
||
|
{ :solution_info => solution_hash }
|
||
|
end
|
||
|
|
||
|
def vulnerability_details_hash(xpath_hash)
|
||
|
table = xpath_hash[:vulnerability_details_table].first
|
||
|
t = get_table_rows(table)
|
||
|
header_values = t.xpath("./th").map {|th| th.text.gsub("\n", " ").squish}
|
||
|
binding.pry
|
||
|
header_values
|
||
|
|
||
|
end
|
||
|
|
||
|
#def advisory_hash
|
||
|
# {
|
||
|
# :bulletin_id => nil,
|
||
|
# :date_published => nil,
|
||
|
# :priority => nil,
|
||
|
# :summary => nil,
|
||
|
# :affected_versions => [{:product => nil, [:version => nil, :platform => nil}]],
|
||
|
# :solution => [{:product => nil, :updated_version => nil, :platform => nil, :priority_rating => nil, :installation_instructions => nil }],
|
||
|
# :vulnerability_details => [{:vulnerability_category => nil, :vulnerability_impact => nil, :severity => nil, :authentication_required_to_exploit? => nil, :exploit_requires_admin_privileges? => nil, :cvss_base_score => nil, :cvss_vector => nil, :cve_number => nil}]
|
||
|
# }
|
||
|
#end
|
||
|
end
|