2019-05-11 17:33:04 -05:00
|
|
|
require 'rest-client'
|
2019-05-11 18:07:09 -05:00
|
|
|
require 'zlib'
|
|
|
|
require 'json'
|
2019-05-19 19:12:26 -05:00
|
|
|
#require '../modules/nvd_tools'
|
2019-05-11 17:33:04 -05:00
|
|
|
|
2019-05-12 20:34:52 -05:00
|
|
|
module NvdTools
|
|
|
|
class NvdDownloader
|
2019-05-12 23:56:12 -05:00
|
|
|
attr_accessor :version, :base_url, :base_filename, :years, :filenames_json, :client
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 17:33:04 -05:00
|
|
|
MIN_YEAR = '2002'
|
|
|
|
MAX_YEAR = '2019'
|
|
|
|
AVAILABLE_YEARS = (MIN_YEAR..MAX_YEAR).to_a
|
2019-05-12 23:56:12 -05:00
|
|
|
GZIPPED_JSON_EXTENSION = 'json.gz'
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 23:56:12 -05:00
|
|
|
def initialize(client: NvdClient.new)
|
|
|
|
|
|
|
|
@base_filename = "nvdcve-#{client.version}-"
|
2019-05-12 20:34:52 -05:00
|
|
|
@years = self.years
|
2019-05-12 23:56:12 -05:00
|
|
|
@filenames_json = self.filenames(GZIPPED_JSON_EXTENSION)
|
|
|
|
@client = client
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 17:33:04 -05:00
|
|
|
def years
|
2019-05-12 20:34:52 -05:00
|
|
|
year = NvdDownloader::AVAILABLE_YEARS.map do |year|
|
|
|
|
[year.to_i, year]
|
|
|
|
end.to_h
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:41:20 -05:00
|
|
|
def filenames(extension)
|
2019-05-12 20:34:52 -05:00
|
|
|
year_filenames = years.map do |k, year|
|
|
|
|
"#{base_filename}#{year}.#{extension}"
|
|
|
|
end
|
|
|
|
|
|
|
|
other_filenames = [
|
|
|
|
"#{base_filename}recent.#{extension}",
|
|
|
|
"#{base_filename}modified.#{extension}"
|
|
|
|
]
|
|
|
|
|
|
|
|
year_filenames + other_filenames
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 18:07:09 -05:00
|
|
|
def read_gzip_stream(gzip_stream)
|
2019-05-12 20:34:52 -05:00
|
|
|
io_stream = StringIO.new(gzip_stream)
|
|
|
|
gz = Zlib::GzipReader.new(io_stream)
|
|
|
|
gz.read
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:41:20 -05:00
|
|
|
def write_to_file(parsed_json, file_path)
|
2019-05-12 20:34:52 -05:00
|
|
|
File.write(file_path, parsed_json)
|
2019-05-12 00:41:20 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 18:07:09 -05:00
|
|
|
def parse_json(json_string)
|
2019-05-12 20:34:52 -05:00
|
|
|
JSON.parse(json_string)
|
2019-05-11 18:07:09 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-06-12 01:20:44 -05:00
|
|
|
def one_time_import(to_file=false)
|
2019-05-12 23:56:12 -05:00
|
|
|
# experimenting with how i want to do this.
|
|
|
|
# right now its a loop through the filenames
|
|
|
|
# and parse all into a json string and store in an array
|
|
|
|
self.filenames_json.map do |filename|
|
|
|
|
r = client.get(filename)
|
2019-06-12 01:20:44 -05:00
|
|
|
json_string = read_gzip_stream(r.body)
|
|
|
|
parsed_json = parse_json(json_string)
|
2019-05-12 23:56:12 -05:00
|
|
|
|
2019-06-12 01:20:44 -05:00
|
|
|
{ :filename => filename,
|
|
|
|
:json => parsed_json
|
|
|
|
}
|
|
|
|
|
|
|
|
if to_file
|
|
|
|
filepath = "../data/cve/#{filename}"
|
|
|
|
write_to_file(r.body, filepath)
|
|
|
|
end
|
2019-05-12 23:56:12 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
# this should be a method that does a one-time import
|
|
|
|
# of all of the json.gz from each year + recent + modified json feeds
|
2019-05-12 00:41:20 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:41:20 -05:00
|
|
|
def modified_meta
|
2019-05-12 20:34:52 -05:00
|
|
|
# this should be a method that builds
|
|
|
|
# the modified filename with the meta file extension included
|
2019-05-12 00:41:20 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:54:45 -05:00
|
|
|
def check_metafile(metafile)
|
2019-05-12 20:34:52 -05:00
|
|
|
# open the metafile, build a hash of k/v pairs of the data inside of the file
|
|
|
|
# check each k/v pair against the file on disk
|
|
|
|
# return a new hash with the same k as before, but the value being a boolean true or false if the value from the k/v pair
|
2019-05-12 00:54:45 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:47:10 -05:00
|
|
|
def detect_changes(metafile)
|
2019-05-12 20:34:52 -05:00
|
|
|
# this should be a method that detects changes in the metafile.
|
|
|
|
# run the check_metafile method against the current metafile on disk.
|
|
|
|
# if there is a change, return true, if not return false
|
2019-05-12 00:47:10 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
end
|
2019-05-19 19:12:26 -05:00
|
|
|
end
|