require 'git' require 'json' require 'date' require 'bulk_insert' # This class can be used to import cvelist json data from mitre from their github repo class CveListImporter attr_accessor :repo_url, :repo_path def initialize @repo_url = 'https://github.com/CVEProject/cvelist.git' @repo_path = '/data_importer/data/cve_list' end def git_clone_repo Git.clone(repo_url, repo_path) end def pull_latest_changes `cd #{repo_path}; git pull;` puts "Now pulling latest changes from #{repo_path}" end def read_json(filename) JSON.parse(File.read(filename)) end def list_jsons_for_year(year) year_fp = "#{repo_path}/#{year}/*" Dir["#{year_fp}/*.json"] end def read_jsons_for_year(year) filenames = list_jsons_for_year(year) filenames.map { |filename| read_json(filename) } end def read_all_jsons (1999..Date.today.year).map do |year| read_jsons_for_year(year.to_s) end end def cve_attrs_from_item(json) cve_attrs = {} cve_attrs[:cve_data_meta] = json['CVE_data_meta'] cve_attrs[:cve_id] = json['CVE_data_meta']['ID'] cve_attrs[:affects] = json['affects'] cve_attrs[:data_format] = json['data_format'] cve_attrs[:data_type] = json['data_type'] cve_attrs[:data_version] = json['data_version'] cve_attrs[:description] = json['description'] cve_attrs[:impact] = json['impact'] cve_attrs[:problemtype] = json['problemtype'] cve_attrs[:references] = json['references'] cve_attrs[:source] = json['source'] cve_attrs end # for bulk inserting def cves_for_year(year) json_data = read_jsons_for_year(year) json_data.map do |json_f| cve_attrs_from_item(json_f) end end def import if Dir.exist?(repo_path) pull_latest_changes else git_clone_repo end (1999..Date.today.year).map do |year| cves_from_json = cves_for_year(year) ids = cves_from_json.map { |cve| cve[:cve_id] } cve_ids_in_db = Cve.where(:cve_id => ids).pluck(:cve_id) new_cve_ids = ids - cve_ids_in_db new_cves = cves_from_json.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) } puts "Importing any new CVEs from #{year}" bulk_insert(new_cves) end end def bulk_insert(cves) Cve.bulk_insert do |worker| cves.each do |attrs| worker.add(attrs) end end end end