more added to the scraper. trying to pack the data from advisories now into a data hash
This commit is contained in:
parent
aae01c1e57
commit
db264a3f00
1 changed files with 40 additions and 24 deletions
|
@ -1,45 +1,61 @@
|
||||||
require 'rest-client'
|
require 'rest-client'
|
||||||
require 'nokogiri'
|
require 'nokogiri'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## CURRENT ISSUE: 502 BAD GATEWAY WHEN TESTING GET_ADVISORY_URLS METHOD.
|
|
||||||
## TODO: COPY THE EXACT HEADERS THAT YOU ARE GIVING FROM THE WEB BROWSER
|
|
||||||
## AND SEND THEM WITH THIS REQUEST AND TEST AGAIN.
|
|
||||||
class MozillaSecurityAdvisoryScraper
|
class MozillaSecurityAdvisoryScraper
|
||||||
attr_accessor :index_url
|
attr_accessor :index_url, :default_headers, :advisory_urls
|
||||||
|
|
||||||
def initialize()
|
def initialize()
|
||||||
@index_url = "https://www.mozilla.org/en-US/security/advisories"
|
@index_url = "https://www.mozilla.org/en-US/security/advisories"
|
||||||
end
|
@default_headers = {
|
||||||
|
|
||||||
def get_index
|
|
||||||
headers = {
|
|
||||||
:accept => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
:accept => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||||
:user_agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:105.0) Gecko/20100101 Firefox/105.0'
|
:user_agent => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:105.0) Gecko/20100101 Firefox/105.0'
|
||||||
}
|
}
|
||||||
response = RestClient::Request.execute(
|
@advisory_urls ||= get_advisory_urls
|
||||||
:method => :get,
|
|
||||||
:url => index_url,
|
|
||||||
:headers => headers
|
|
||||||
)
|
|
||||||
if response.code == 200
|
|
||||||
response.body
|
|
||||||
else
|
|
||||||
puts "HTTP Status code: #{r.code}"
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def parse_index(response_body)
|
def get_index
|
||||||
|
get(index_url)
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_advisory(advisory_url)
|
||||||
|
get(advisory_url)
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_html(response_body)
|
||||||
Nokogiri::HTML(response_body)
|
Nokogiri::HTML(response_body)
|
||||||
end
|
end
|
||||||
|
|
||||||
def advisory_urls(html_doc)
|
def advisory_urls_in_html(html_doc)
|
||||||
html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"}
|
html_doc.xpath('//li[@class="level-item"]/a').map {|link| relative_url = link['href']; "https://www.mozilla.org#{relative_url}"}
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_advisory_urls
|
def get_advisory_urls
|
||||||
body = get_index
|
body = get_index
|
||||||
doc = parse_index(body)
|
doc = parse_html(body)
|
||||||
advisory_urls(doc)
|
advisory_urls_in_html(doc)
|
||||||
|
end
|
||||||
|
|
||||||
|
def get(url)
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
:method => :get,
|
||||||
|
:url => url,
|
||||||
|
:headers => default_headers
|
||||||
|
)
|
||||||
|
if response.code == 200
|
||||||
|
response.body
|
||||||
|
else
|
||||||
|
puts "HTTP Status: #{response.code}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# we know its the very first dl tag w class name summary
|
||||||
|
def parse_advisory_summary(advisory_doc)
|
||||||
|
nodes = advisory_doc.xpath('//dl[@class="summary"]').first.children
|
||||||
|
# dd tags and dt tags have the info we care about. dt tag is the key, dd
|
||||||
|
# tag is the value
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_advisory_cve(advisory_doc)
|
||||||
|
advisory_doc.xpath('//dl[@class="cve"]')
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue