added the basic framework for whats going to be a mozilla security advisory scraping utility
This commit is contained in:
parent
226546bc48
commit
6e588a3578
1 changed files with 40 additions and 0 deletions
|
@ -0,0 +1,40 @@
|
|||
require 'rest-client'
|
||||
require 'nokogiri'
|
||||
|
||||
|
||||
|
||||
## CURRENT ISSUE: 502 BAD GATEWAY WHEN TESTING GET_ADVISORY_URLS METHOD.
|
||||
## TODO: COPY THE EXACT HEADERS THAT YOU ARE GIVING FROM THE WEB BROWSER
|
||||
## AND SEND THEM WITH THIS REQUEST AND TEST AGAIN.
|
||||
class MozillaSecurityAdvisoryScraper
|
||||
attr_accessor :index_url
|
||||
def initialize()
|
||||
@index_url = "https://www.mozilla.org/en-US/security/advisories"
|
||||
end
|
||||
|
||||
def get_index
|
||||
response = RestClient::Request.execute(
|
||||
:method => :get,
|
||||
:url => index_url
|
||||
)
|
||||
if response.code == 200
|
||||
r.body
|
||||
else
|
||||
puts "HTTP Status code: #{r.code}"
|
||||
end
|
||||
end
|
||||
|
||||
def parse_index(response_body)
|
||||
Nokogiri::HTML(response_body)
|
||||
end
|
||||
|
||||
def advisory_urls(html_doc)
|
||||
html_doc.xpath('//li[@class="level-item"]/a').map {|link| link['href']}
|
||||
end
|
||||
|
||||
def get_advisory_urls
|
||||
body = get_index
|
||||
doc = parse_index(body)
|
||||
advisory_urls(doc)
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue