more added to the scraper. trying to pack the data from advisories now into a data hash
This commit is contained in:
parent
aae01c1e57
commit
db264a3f00
1 changed files with 40 additions and 24 deletions
|
@ -1,45 +1,61 @@
|
|||
require 'rest-client'
|
||||
require 'nokogiri'
|
||||
|
||||
|
||||
|
||||
## CURRENT ISSUE: 502 BAD GATEWAY WHEN TESTING GET_ADVISORY_URLS METHOD.
|
||||
## TODO: COPY THE EXACT HEADERS THAT YOU ARE GIVING FROM THE WEB BROWSER
|
||||
## AND SEND THEM WITH THIS REQUEST AND TEST AGAIN.
|
||||
class MozillaSecurityAdvisoryScraper
|
||||
attr_accessor :index_url
|
||||
attr_accessor :index_url, :default_headers, :advisory_urls
|
||||
|
||||
def initialize()
|
||||
@index_url = "https://www.mozilla.org/en-US/security/advisories"
|
||||
end
|
||||
|
||||
def get_index
|
||||
headers = {
|
||||
@default_headers = {
|
||||
:accept => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
:user_agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:105.0) Gecko/20100101 Firefox/105.0'
|
||||
}
|
||||
response = RestClient::Request.execute(
|
||||
:method => :get,
|
||||
:url => index_url,
|
||||
:headers => headers
|
||||
)
|
||||
if response.code == 200
|
||||
response.body
|
||||
else
|
||||
puts "HTTP Status code: #{r.code}"
|
||||
end
|
||||
@advisory_urls ||= get_advisory_urls
|
||||
end
|
||||
|
||||
def parse_index(response_body)
|
||||
def get_index
|
||||
get(index_url)
|
||||
end
|
||||
|
||||
def get_advisory(advisory_url)
|
||||
get(advisory_url)
|
||||
end
|
||||
|
||||
def parse_html(response_body)
|
||||
Nokogiri::HTML(response_body)
|
||||
end
|
||||
|
||||
def advisory_urls(html_doc)
|
||||
def advisory_urls_in_html(html_doc)
|
||||
html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"}
|
||||
end
|
||||
|
||||
def get_advisory_urls
|
||||
body = get_index
|
||||
doc = parse_index(body)
|
||||
advisory_urls(doc)
|
||||
doc = parse_html(body)
|
||||
advisory_urls_in_html(doc)
|
||||
end
|
||||
|
||||
def get(url)
|
||||
response = RestClient::Request.execute(
|
||||
:method => :get,
|
||||
:url => url,
|
||||
:headers => default_headers
|
||||
)
|
||||
if response.code == 200
|
||||
response.body
|
||||
else
|
||||
puts "HTTP Status: #{response.code}"
|
||||
end
|
||||
end
|
||||
|
||||
# we know its the very first dl tag w class name summary
|
||||
def parse_advisory_summary(advisory_doc)
|
||||
nodes = advisory_doc.xpath('//dl[@class="summary"]').first.children
|
||||
# dd tags and dt tags have the info we care about. dt tag is the key, dd
|
||||
# tag is the value
|
||||
end
|
||||
|
||||
def parse_advisory_cve(advisory_doc)
|
||||
advisory_doc.xpath('//dl[@class="cve"]')
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Reference in a new issue