2019-05-11 17:33:04 -05:00
|
|
|
require 'rest-client'
|
2019-05-11 18:07:09 -05:00
|
|
|
require 'zlib'
|
|
|
|
require 'json'
|
2019-08-08 01:10:50 -05:00
|
|
|
require 'active_support/all'
|
2019-05-19 19:12:26 -05:00
|
|
|
#require '../modules/nvd_tools'
|
2019-05-11 17:33:04 -05:00
|
|
|
|
2019-05-12 20:34:52 -05:00
|
|
|
module NvdTools
|
|
|
|
class NvdDownloader
|
2019-06-12 22:46:34 -05:00
|
|
|
attr_accessor :version, :base_url, :base_filename, :years, :filenames_json, :filenames_meta, :client
|
2019-08-08 01:10:50 -05:00
|
|
|
|
|
|
|
CURRENT_TIME = Time.now
|
2019-05-11 17:33:04 -05:00
|
|
|
MIN_YEAR = '2002'
|
|
|
|
MAX_YEAR = '2019'
|
|
|
|
AVAILABLE_YEARS = (MIN_YEAR..MAX_YEAR).to_a
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 23:56:12 -05:00
|
|
|
def initialize(client: NvdClient.new)
|
|
|
|
|
|
|
|
@base_filename = "nvdcve-#{client.version}-"
|
2019-05-12 20:34:52 -05:00
|
|
|
@years = self.years
|
2019-06-12 22:46:34 -05:00
|
|
|
@filenames_json = self.filenames('json.gz')
|
|
|
|
@filenames_meta = self.filenames('meta')
|
2019-05-12 23:56:12 -05:00
|
|
|
@client = client
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 17:33:04 -05:00
|
|
|
def years
|
2019-05-12 20:34:52 -05:00
|
|
|
year = NvdDownloader::AVAILABLE_YEARS.map do |year|
|
|
|
|
[year.to_i, year]
|
|
|
|
end.to_h
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:41:20 -05:00
|
|
|
def filenames(extension)
|
2019-05-12 20:34:52 -05:00
|
|
|
year_filenames = years.map do |k, year|
|
|
|
|
"#{base_filename}#{year}.#{extension}"
|
|
|
|
end
|
|
|
|
|
|
|
|
other_filenames = [
|
|
|
|
"#{base_filename}recent.#{extension}",
|
|
|
|
"#{base_filename}modified.#{extension}"
|
|
|
|
]
|
|
|
|
|
|
|
|
year_filenames + other_filenames
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 18:07:09 -05:00
|
|
|
def read_gzip_stream(gzip_stream)
|
2019-05-12 20:34:52 -05:00
|
|
|
io_stream = StringIO.new(gzip_stream)
|
|
|
|
gz = Zlib::GzipReader.new(io_stream)
|
|
|
|
gz.read
|
2019-05-11 17:33:04 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-12 00:41:20 -05:00
|
|
|
def write_to_file(parsed_json, file_path)
|
2019-05-12 20:34:52 -05:00
|
|
|
File.write(file_path, parsed_json)
|
2019-05-12 00:41:20 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-05-11 18:07:09 -05:00
|
|
|
def parse_json(json_string)
|
2019-05-12 20:34:52 -05:00
|
|
|
JSON.parse(json_string)
|
2019-05-11 18:07:09 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-06-12 01:20:44 -05:00
|
|
|
def one_time_import(to_file=false)
|
2019-05-12 23:56:12 -05:00
|
|
|
# experimenting with how i want to do this.
|
|
|
|
# right now its a loop through the filenames
|
|
|
|
# and parse all into a json string and store in an array
|
|
|
|
self.filenames_json.map do |filename|
|
|
|
|
r = client.get(filename)
|
2019-06-12 01:20:44 -05:00
|
|
|
json_string = read_gzip_stream(r.body)
|
|
|
|
parsed_json = parse_json(json_string)
|
2019-05-12 23:56:12 -05:00
|
|
|
|
2019-06-12 01:20:44 -05:00
|
|
|
{ :filename => filename,
|
|
|
|
:json => parsed_json
|
|
|
|
}
|
|
|
|
|
|
|
|
if to_file
|
|
|
|
filepath = "../data/cve/#{filename}"
|
|
|
|
write_to_file(r.body, filepath)
|
|
|
|
end
|
2019-05-12 23:56:12 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
# this should be a method that does a one-time import
|
|
|
|
# of all of the json.gz from each year + recent + modified json feeds
|
2019-05-12 00:41:20 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
|
2019-06-13 00:31:12 -05:00
|
|
|
def get_metadata_file(filename)
|
2019-06-12 22:46:34 -05:00
|
|
|
file = filenames('meta').select do |name|
|
|
|
|
name == filename
|
|
|
|
end.first
|
|
|
|
|
|
|
|
r = client.get(file)
|
|
|
|
r.body.split("\r\n")
|
2019-05-12 20:34:52 -05:00
|
|
|
# this should be a method that builds
|
|
|
|
# the modified filename with the meta file extension included
|
2019-05-12 00:41:20 -05:00
|
|
|
end
|
2019-06-12 22:46:34 -05:00
|
|
|
|
2019-06-13 00:31:12 -05:00
|
|
|
def extract_metadata(metadata)
|
|
|
|
metaf = get_metadata_file(metadata)
|
|
|
|
keys_and_values_str = metaf.map do |line|
|
|
|
|
line.split(":", 2)
|
|
|
|
end
|
|
|
|
json = Hash[keys_and_values_str].to_json
|
|
|
|
JSON.parse(json, { symbolize_names: true })
|
|
|
|
end
|
|
|
|
|
|
|
|
def check_metadata(metadata_file_to_check, metadata_to_check_against)
|
2019-05-12 20:34:52 -05:00
|
|
|
# check each k/v pair against the file on disk
|
|
|
|
# return a new hash with the same k as before, but the value being a boolean true or false if the value from the k/v pair
|
2019-06-13 00:31:12 -05:00
|
|
|
|
2019-05-12 00:54:45 -05:00
|
|
|
end
|
2019-06-13 00:31:12 -05:00
|
|
|
|
|
|
|
def detect_changes(metadata)
|
|
|
|
# this should be a method that detects changes in the metadata.
|
|
|
|
# run the check_metadata method against the current metadata.
|
2019-05-12 20:34:52 -05:00
|
|
|
# if there is a change, return true, if not return false
|
2019-05-12 00:47:10 -05:00
|
|
|
end
|
2019-05-12 20:34:52 -05:00
|
|
|
end
|
2019-08-08 01:10:50 -05:00
|
|
|
end
|