add timestamps and some tricks to make sure i have all of my json keys before upsert_all is run
This commit is contained in:
parent
9fd17c606a
commit
44be58eeb3
7 changed files with 58 additions and 46 deletions
|
@ -13,6 +13,7 @@ class CreateCves < ActiveRecord::Migration[5.2]
|
||||||
t.jsonb :problemtype
|
t.jsonb :problemtype
|
||||||
t.jsonb :references
|
t.jsonb :references
|
||||||
t.jsonb :source
|
t.jsonb :source
|
||||||
|
t.timestamps
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -12,6 +12,7 @@ class CreateCnas < ActiveRecord::Migration[7.0]
|
||||||
t.string :resources, array: true
|
t.string :resources, array: true
|
||||||
t.jsonb :cna
|
t.jsonb :cna
|
||||||
t.string :country
|
t.string :country
|
||||||
|
t.timestamps
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -25,6 +25,8 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do
|
||||||
t.string "resources", array: true
|
t.string "resources", array: true
|
||||||
t.jsonb "cna"
|
t.jsonb "cna"
|
||||||
t.string "country"
|
t.string "country"
|
||||||
|
t.datetime "created_at", null: false
|
||||||
|
t.datetime "updated_at", null: false
|
||||||
t.index ["cna_id"], name: "index_cnas_on_cna_id", unique: true
|
t.index ["cna_id"], name: "index_cnas_on_cna_id", unique: true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -56,6 +58,8 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do
|
||||||
t.jsonb "problemtype"
|
t.jsonb "problemtype"
|
||||||
t.jsonb "references"
|
t.jsonb "references"
|
||||||
t.jsonb "source"
|
t.jsonb "source"
|
||||||
|
t.datetime "created_at", precision: nil, null: false
|
||||||
|
t.datetime "updated_at", precision: nil, null: false
|
||||||
t.index ["cve_id"], name: "index_cves_on_cve_id", unique: true
|
t.index ["cve_id"], name: "index_cves_on_cve_id", unique: true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,22 @@ require '/data_importer/lib/json_helper.rb'
|
||||||
require 'rest-client'
|
require 'rest-client'
|
||||||
|
|
||||||
class CnaImporter
|
class CnaImporter
|
||||||
|
|
||||||
|
EXPECTED_KEYS = [
|
||||||
|
:short_name,
|
||||||
|
:cna_id,
|
||||||
|
:organization_name,
|
||||||
|
:scope,
|
||||||
|
:contact,
|
||||||
|
:disclosure_policy,
|
||||||
|
:security_advisories,
|
||||||
|
:resources,
|
||||||
|
:cna,
|
||||||
|
:country
|
||||||
|
].freeze
|
||||||
|
|
||||||
|
EMPTY_HASH = EXPECTED_KEYS.map {|k| [k, nil] }.to_h.freeze
|
||||||
|
|
||||||
attr_accessor :url
|
attr_accessor :url
|
||||||
def initialize
|
def initialize
|
||||||
@url = 'https://raw.githubusercontent.com/CVEProject/cve-website/dev/src/assets/data/CNAsList.json'
|
@url = 'https://raw.githubusercontent.com/CVEProject/cve-website/dev/src/assets/data/CNAsList.json'
|
||||||
|
@ -31,8 +47,9 @@ class CnaImporter
|
||||||
|
|
||||||
def import
|
def import
|
||||||
jsons = get_json
|
jsons = get_json
|
||||||
|
merged_hashes = jsons.map {|h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) }
|
||||||
puts "Now importing CNAs."
|
puts "Now importing CNAs."
|
||||||
Cna.upsert_all(jsons, unique_by: :cna_id)
|
Cna.upsert_all(merged_hashes, unique_by: :cna_id)
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -1,11 +1,26 @@
|
||||||
require 'git'
|
require 'git'
|
||||||
require 'json'
|
require 'json'
|
||||||
require 'date'
|
require 'date'
|
||||||
require 'bulk_insert'
|
|
||||||
require '/data_importer/lib/importers/github_repo.rb'
|
require '/data_importer/lib/importers/github_repo.rb'
|
||||||
|
require '/data_importer/lib/json_helper.rb'
|
||||||
|
|
||||||
# This class can be used to import cvelist json data from mitre from their github repo
|
# This class can be used to import cvelist json data from mitre from their github repo
|
||||||
class CveListImporter < GithubRepo
|
class CveListImporter < GithubRepo
|
||||||
|
EXPECTED_KEYS = [
|
||||||
|
:cve_data_meta,
|
||||||
|
:cve_id,
|
||||||
|
:affects,
|
||||||
|
:data_format,
|
||||||
|
:data_type,
|
||||||
|
:data_version,
|
||||||
|
:description,
|
||||||
|
:impact,
|
||||||
|
:problemtype,
|
||||||
|
:references,
|
||||||
|
:source
|
||||||
|
].freeze
|
||||||
|
|
||||||
|
EMPTY_HASH = EXPECTED_KEYS.map {|k| [k, nil] }.to_h.freeze
|
||||||
|
|
||||||
def initialize
|
def initialize
|
||||||
super(repo_url='https://github.com/CVEProject/cvelist.git', repo_path='/data_importer/data/cve_list')
|
super(repo_url='https://github.com/CVEProject/cvelist.git', repo_path='/data_importer/data/cve_list')
|
||||||
|
@ -18,37 +33,17 @@ class CveListImporter < GithubRepo
|
||||||
|
|
||||||
def read_jsons_for_year(year)
|
def read_jsons_for_year(year)
|
||||||
filenames = list_jsons_for_year(year)
|
filenames = list_jsons_for_year(year)
|
||||||
filenames.map { |filename| read_json(filename) }
|
hashes = filenames.map do |filename|
|
||||||
|
json = read_json(filename)
|
||||||
|
json_transformed = JsonHelper.deep_transform_keys(json)
|
||||||
|
add_cve_id_to_json_key(json_transformed)
|
||||||
|
end
|
||||||
|
hashes.map {|h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) }
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_all_jsons
|
def add_cve_id_to_json_key(json)
|
||||||
(1999..Date.today.year).map do |year|
|
json[:cve_id] = json[:cve_data_meta][:id]
|
||||||
read_jsons_for_year(year.to_s)
|
json
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def cve_attrs_from_item(json)
|
|
||||||
cve_attrs = {}
|
|
||||||
cve_attrs[:cve_data_meta] = json['CVE_data_meta']
|
|
||||||
cve_attrs[:cve_id] = json['CVE_data_meta']['ID']
|
|
||||||
cve_attrs[:affects] = json['affects']
|
|
||||||
cve_attrs[:data_format] = json['data_format']
|
|
||||||
cve_attrs[:data_type] = json['data_type']
|
|
||||||
cve_attrs[:data_version] = json['data_version']
|
|
||||||
cve_attrs[:description] = json['description']
|
|
||||||
cve_attrs[:impact] = json['impact']
|
|
||||||
cve_attrs[:problemtype] = json['problemtype']
|
|
||||||
cve_attrs[:references] = json['references']
|
|
||||||
cve_attrs[:source] = json['source']
|
|
||||||
cve_attrs
|
|
||||||
end
|
|
||||||
|
|
||||||
# for bulk inserting
|
|
||||||
def cves_for_year(year)
|
|
||||||
json_data = read_jsons_for_year(year)
|
|
||||||
json_data.map do |json_f|
|
|
||||||
cve_attrs_from_item(json_f)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def import
|
def import
|
||||||
|
@ -56,24 +51,16 @@ class CveListImporter < GithubRepo
|
||||||
puts "Now starting import for #{repo_url}."
|
puts "Now starting import for #{repo_url}."
|
||||||
puts '----------' * 12
|
puts '----------' * 12
|
||||||
(1999..Date.today.year).map do |year|
|
(1999..Date.today.year).map do |year|
|
||||||
cves_from_json = cves_for_year(year)
|
cves = read_jsons_for_year(year)
|
||||||
|
|
||||||
ids = cves_from_json.map { |cve| cve[:cve_id] }
|
#ids = cves.map { |cve| cve[:cve_id] }
|
||||||
cve_ids_in_db = Cve.where(:cve_id => ids).pluck(:cve_id)
|
#cve_ids_in_db = Cve.where(:cve_id => ids).pluck(:cve_id)
|
||||||
|
|
||||||
new_cve_ids = ids - cve_ids_in_db
|
#new_cve_ids = ids - cve_ids_in_db
|
||||||
new_cves = cves_from_json.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) }
|
#new_cves = cves.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) }
|
||||||
puts "Importing any new CVEs from #{year}"
|
puts "Importing any new CVEs from #{year}"
|
||||||
|
|
||||||
bulk_insert(new_cves)
|
Cve.upsert_all(cves, unique_by: :cve_id)
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def bulk_insert(cves)
|
|
||||||
Cve.bulk_insert do |worker|
|
|
||||||
cves.each do |attrs|
|
|
||||||
worker.add(attrs)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -20,7 +20,7 @@ class GithubRepo
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_json(filename)
|
def read_json(filename)
|
||||||
JSON.parse(File.read(filename))
|
JSON.parse(File.read(filename), symbolize_names: true)
|
||||||
end
|
end
|
||||||
|
|
||||||
def pull_or_clone
|
def pull_or_clone
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
class JsonHelper
|
class JsonHelper
|
||||||
|
|
||||||
|
|
||||||
def self.deep_transform_keys(json_hash)
|
def self.deep_transform_keys(json_hash)
|
||||||
if json_hash.is_a? Array
|
if json_hash.is_a? Array
|
||||||
json_hash.map {|jh| symbolize_names_snake_case(jh) }
|
json_hash.map {|jh| symbolize_names_snake_case(jh) }
|
||||||
|
|
Loading…
Add table
Reference in a new issue