misc_rbtools/classes/nvd_downloader.rb

113 lines
3.2 KiB
Ruby
Raw Permalink Normal View History

require 'rest-client'
require 'zlib'
require 'json'
2019-08-08 01:10:50 -05:00
require 'active_support/all'
#require '../modules/nvd_tools'
2019-05-12 20:34:52 -05:00
module NvdTools
class NvdDownloader
attr_accessor :version, :base_url, :base_filename, :years, :filenames_json, :filenames_meta, :client
2019-08-08 01:10:50 -05:00
CURRENT_TIME = Time.now
MIN_YEAR = '2002'
MAX_YEAR = '2019'
AVAILABLE_YEARS = (MIN_YEAR..MAX_YEAR).to_a
2019-05-12 20:34:52 -05:00
def initialize(client: NvdClient.new)
@base_filename = "nvdcve-#{client.version}-"
2019-05-12 20:34:52 -05:00
@years = self.years
@filenames_json = self.filenames('json.gz')
@filenames_meta = self.filenames('meta')
@client = client
end
2019-05-12 20:34:52 -05:00
def years
2019-05-12 20:34:52 -05:00
year = NvdDownloader::AVAILABLE_YEARS.map do |year|
[year.to_i, year]
end.to_h
end
2019-05-12 20:34:52 -05:00
2019-05-12 00:41:20 -05:00
def filenames(extension)
2019-05-12 20:34:52 -05:00
year_filenames = years.map do |k, year|
"#{base_filename}#{year}.#{extension}"
end
other_filenames = [
"#{base_filename}recent.#{extension}",
"#{base_filename}modified.#{extension}"
]
year_filenames + other_filenames
end
2019-05-12 20:34:52 -05:00
def read_gzip_stream(gzip_stream)
2019-05-12 20:34:52 -05:00
io_stream = StringIO.new(gzip_stream)
gz = Zlib::GzipReader.new(io_stream)
gz.read
end
2019-05-12 20:34:52 -05:00
2019-05-12 00:41:20 -05:00
def write_to_file(parsed_json, file_path)
2019-05-12 20:34:52 -05:00
File.write(file_path, parsed_json)
2019-05-12 00:41:20 -05:00
end
2019-05-12 20:34:52 -05:00
def parse_json(json_string)
2019-05-12 20:34:52 -05:00
JSON.parse(json_string)
end
2019-05-12 20:34:52 -05:00
def one_time_import(to_file=false)
# experimenting with how i want to do this.
# right now its a loop through the filenames
# and parse all into a json string and store in an array
self.filenames_json.map do |filename|
r = client.get(filename)
json_string = read_gzip_stream(r.body)
parsed_json = parse_json(json_string)
{ :filename => filename,
:json => parsed_json
}
if to_file
filepath = "../data/cve/#{filename}"
write_to_file(r.body, filepath)
end
end
2019-05-12 20:34:52 -05:00
# this should be a method that does a one-time import
# of all of the json.gz from each year + recent + modified json feeds
2019-05-12 00:41:20 -05:00
end
2019-05-12 20:34:52 -05:00
def get_metadata_file(filename)
file = filenames('meta').select do |name|
name == filename
end.first
r = client.get(file)
r.body.split("\r\n")
2019-05-12 20:34:52 -05:00
# this should be a method that builds
# the modified filename with the meta file extension included
2019-05-12 00:41:20 -05:00
end
def extract_metadata(metadata)
metaf = get_metadata_file(metadata)
keys_and_values_str = metaf.map do |line|
line.split(":", 2)
end
json = Hash[keys_and_values_str].to_json
JSON.parse(json, { symbolize_names: true })
end
def check_metadata(metadata_file_to_check, metadata_to_check_against)
2019-05-12 20:34:52 -05:00
# check each k/v pair against the file on disk
# return a new hash with the same k as before, but the value being a boolean true or false if the value from the k/v pair
end
def detect_changes(metadata)
# this should be a method that detects changes in the metadata.
# run the check_metadata method against the current metadata.
2019-05-12 20:34:52 -05:00
# if there is a change, return true, if not return false
end
2019-05-12 20:34:52 -05:00
end
2019-08-08 01:10:50 -05:00
end