From 44be58eeb3db932fd27c775cbfeab78cec53c010 Mon Sep 17 00:00:00 2001 From: Brendan McDevitt Date: Mon, 18 Apr 2022 22:07:22 -0500 Subject: [PATCH] add timestamps and some tricks to make sure i have all of my json keys before upsert_all is run --- db/migrate/20220401173431_create_cves.rb | 1 + db/migrate/20220407223152_create_cnas.rb | 1 + db/schema.rb | 4 ++ lib/importers/cna_importer.rb | 21 ++++++- lib/importers/cve_list_importer.rb | 73 ++++++++++-------------- lib/importers/github_repo.rb | 2 +- lib/json_helper.rb | 2 + 7 files changed, 58 insertions(+), 46 deletions(-) diff --git a/db/migrate/20220401173431_create_cves.rb b/db/migrate/20220401173431_create_cves.rb index f8ab256..c4e4ad9 100644 --- a/db/migrate/20220401173431_create_cves.rb +++ b/db/migrate/20220401173431_create_cves.rb @@ -13,6 +13,7 @@ class CreateCves < ActiveRecord::Migration[5.2] t.jsonb :problemtype t.jsonb :references t.jsonb :source + t.timestamps end end end diff --git a/db/migrate/20220407223152_create_cnas.rb b/db/migrate/20220407223152_create_cnas.rb index 4cf6a5a..32c3feb 100644 --- a/db/migrate/20220407223152_create_cnas.rb +++ b/db/migrate/20220407223152_create_cnas.rb @@ -12,6 +12,7 @@ class CreateCnas < ActiveRecord::Migration[7.0] t.string :resources, array: true t.jsonb :cna t.string :country + t.timestamps end end end diff --git a/db/schema.rb b/db/schema.rb index a6ae81c..c654d45 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -25,6 +25,8 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do t.string "resources", array: true t.jsonb "cna" t.string "country" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false t.index ["cna_id"], name: "index_cnas_on_cna_id", unique: true end @@ -56,6 +58,8 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do t.jsonb "problemtype" t.jsonb "references" t.jsonb "source" + t.datetime "created_at", precision: nil, null: false + t.datetime "updated_at", precision: nil, null: false t.index ["cve_id"], name: "index_cves_on_cve_id", unique: true end diff --git a/lib/importers/cna_importer.rb b/lib/importers/cna_importer.rb index 3e1bfad..4f35d6c 100644 --- a/lib/importers/cna_importer.rb +++ b/lib/importers/cna_importer.rb @@ -2,7 +2,23 @@ require 'json' require '/data_importer/lib/json_helper.rb' require 'rest-client' -class CnaImporter +class CnaImporter + + EXPECTED_KEYS = [ + :short_name, + :cna_id, + :organization_name, + :scope, + :contact, + :disclosure_policy, + :security_advisories, + :resources, + :cna, + :country + ].freeze + + EMPTY_HASH = EXPECTED_KEYS.map {|k| [k, nil] }.to_h.freeze + attr_accessor :url def initialize @url = 'https://raw.githubusercontent.com/CVEProject/cve-website/dev/src/assets/data/CNAsList.json' @@ -31,8 +47,9 @@ class CnaImporter def import jsons = get_json + merged_hashes = jsons.map {|h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) } puts "Now importing CNAs." - Cna.upsert_all(jsons, unique_by: :cna_id) + Cna.upsert_all(merged_hashes, unique_by: :cna_id) end end diff --git a/lib/importers/cve_list_importer.rb b/lib/importers/cve_list_importer.rb index 27f0cf7..dce6e1b 100644 --- a/lib/importers/cve_list_importer.rb +++ b/lib/importers/cve_list_importer.rb @@ -1,11 +1,26 @@ require 'git' require 'json' require 'date' -require 'bulk_insert' require '/data_importer/lib/importers/github_repo.rb' +require '/data_importer/lib/json_helper.rb' # This class can be used to import cvelist json data from mitre from their github repo class CveListImporter < GithubRepo + EXPECTED_KEYS = [ + :cve_data_meta, + :cve_id, + :affects, + :data_format, + :data_type, + :data_version, + :description, + :impact, + :problemtype, + :references, + :source +].freeze + +EMPTY_HASH = EXPECTED_KEYS.map {|k| [k, nil] }.to_h.freeze def initialize super(repo_url='https://github.com/CVEProject/cvelist.git', repo_path='/data_importer/data/cve_list') @@ -18,37 +33,17 @@ class CveListImporter < GithubRepo def read_jsons_for_year(year) filenames = list_jsons_for_year(year) - filenames.map { |filename| read_json(filename) } - end - - def read_all_jsons - (1999..Date.today.year).map do |year| - read_jsons_for_year(year.to_s) + hashes = filenames.map do |filename| + json = read_json(filename) + json_transformed = JsonHelper.deep_transform_keys(json) + add_cve_id_to_json_key(json_transformed) end + hashes.map {|h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) } end - def cve_attrs_from_item(json) - cve_attrs = {} - cve_attrs[:cve_data_meta] = json['CVE_data_meta'] - cve_attrs[:cve_id] = json['CVE_data_meta']['ID'] - cve_attrs[:affects] = json['affects'] - cve_attrs[:data_format] = json['data_format'] - cve_attrs[:data_type] = json['data_type'] - cve_attrs[:data_version] = json['data_version'] - cve_attrs[:description] = json['description'] - cve_attrs[:impact] = json['impact'] - cve_attrs[:problemtype] = json['problemtype'] - cve_attrs[:references] = json['references'] - cve_attrs[:source] = json['source'] - cve_attrs - end - - # for bulk inserting - def cves_for_year(year) - json_data = read_jsons_for_year(year) - json_data.map do |json_f| - cve_attrs_from_item(json_f) - end + def add_cve_id_to_json_key(json) + json[:cve_id] = json[:cve_data_meta][:id] + json end def import @@ -56,24 +51,16 @@ class CveListImporter < GithubRepo puts "Now starting import for #{repo_url}." puts '----------' * 12 (1999..Date.today.year).map do |year| - cves_from_json = cves_for_year(year) + cves = read_jsons_for_year(year) - ids = cves_from_json.map { |cve| cve[:cve_id] } - cve_ids_in_db = Cve.where(:cve_id => ids).pluck(:cve_id) + #ids = cves.map { |cve| cve[:cve_id] } + #cve_ids_in_db = Cve.where(:cve_id => ids).pluck(:cve_id) - new_cve_ids = ids - cve_ids_in_db - new_cves = cves_from_json.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) } + #new_cve_ids = ids - cve_ids_in_db + #new_cves = cves.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) } puts "Importing any new CVEs from #{year}" - bulk_insert(new_cves) - end - end - - def bulk_insert(cves) - Cve.bulk_insert do |worker| - cves.each do |attrs| - worker.add(attrs) - end + Cve.upsert_all(cves, unique_by: :cve_id) end end end \ No newline at end of file diff --git a/lib/importers/github_repo.rb b/lib/importers/github_repo.rb index 0f30b42..17370c0 100644 --- a/lib/importers/github_repo.rb +++ b/lib/importers/github_repo.rb @@ -20,7 +20,7 @@ class GithubRepo end def read_json(filename) - JSON.parse(File.read(filename)) + JSON.parse(File.read(filename), symbolize_names: true) end def pull_or_clone diff --git a/lib/json_helper.rb b/lib/json_helper.rb index 88fe8d6..61281dd 100644 --- a/lib/json_helper.rb +++ b/lib/json_helper.rb @@ -1,4 +1,6 @@ class JsonHelper + + def self.deep_transform_keys(json_hash) if json_hash.is_a? Array json_hash.map {|jh| symbolize_names_snake_case(jh) }