make trickest use upsert_all

This commit is contained in:
Brendan McDevitt 2022-04-22 12:12:45 -05:00
parent 8261a53800
commit 3c6828f0fd
4 changed files with 43 additions and 83 deletions

View file

@ -4,6 +4,7 @@ class CreateTrickestPocCves < ActiveRecord::Migration[7.0]
def change def change
create_table :trickest_poc_cves do |t| create_table :trickest_poc_cves do |t|
t.string :cve_id t.string :cve_id
t.index :cve_id, unique: true
t.string :cve_url t.string :cve_url
t.string :description t.string :description
t.string :poc_links, array: true t.string :poc_links, array: true

View file

@ -138,6 +138,7 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_19_203353) do
t.string "cve_url" t.string "cve_url"
t.string "description" t.string "description"
t.string "poc_links", array: true t.string "poc_links", array: true
t.index ["cve_id"], name: "index_trickest_poc_cves_on_cve_id", unique: true
end end
end end

View file

@ -1,35 +1,40 @@
# frozen_string_literal: true
class GithubRepo class GithubRepo
attr_accessor :repo_url, :repo_path attr_accessor :repo_url, :repo_path
def initialize(repo_url = nil, repo_path = nil) def initialize(repo_url=nil, repo_path=nil)
@repo_url = repo_url @repo_url = repo_url
@repo_path = repo_path @repo_path = repo_path
end
def git_clone_repo
if repo_url.nil? || repo_path.nil?
puts 'Please provide a repo url and repo_path'
else
Git.clone(repo_url, repo_path)
end end
end
def git_clone_repo
def pull_latest_changes if repo_url.nil? || repo_path.nil?
`cd #{repo_path}; git pull;` puts "Please provide a repo url and repo_path"
puts "Now pulling latest changes from #{repo_path}" else
end Git.clone(repo_url, repo_path)
end
def read_json(filename)
JSON.parse(File.read(filename), symbolize_names: true)
end
def pull_or_clone
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
end end
end
end def pull_latest_changes
`cd #{repo_path}; git pull;`
puts "Now pulling latest changes from #{repo_path}"
end
def read_json(filename)
JSON.parse(File.read(filename), symbolize_names: true)
end
def read_markdown(filename)
data = File.read(filename)
formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil)
# should give us the html doc
RDoc::Markdown.parse(data).accept(formatter)
end
def pull_or_clone
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
end
end
end

View file

@ -3,38 +3,12 @@
require 'git' require 'git'
require 'json' require 'json'
require 'date' require 'date'
require 'bulk_insert' require '/data_importer/lib/importers/github_repo.rb'
class TrickestPocCveImporter class TrickestPocCveImporter < GithubRepo
attr_accessor :repo_url, :repo_path
def initialize def initialize
@repo_url = 'https://github.com/trickest/cve.git' super(repo_url = 'https://github.com/trickest/cve.git', repo_path = '/data_importer/data/trickest_cve')
@repo_path = '/data_importer/data/trickest_cve'
end
def git_clone_repo
Git.clone(repo_url, repo_path)
end
def pull_latest_changes
`cd #{repo_path}; git pull;`
puts "Now pulling latest changes from #{repo_path}"
end
def pull_or_clone(repo_path)
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
end
end
def read_markdown(filename)
data = File.read(filename)
formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil)
# should give us the html doc
RDoc::Markdown.parse(data).accept(formatter)
end end
def html_to_hash(html) def html_to_hash(html)
@ -86,7 +60,6 @@ class TrickestPocCveImporter
cve_attrs cve_attrs
end end
# for bulk inserting
def cves_for_year(year) def cves_for_year(year)
htmls = read_mds_for_year(year) htmls = read_mds_for_year(year)
htmls.map do |html| htmls.map do |html|
@ -96,33 +69,13 @@ class TrickestPocCveImporter
end end
def import def import
if Dir.exist?(repo_path) pull_or_clone
pull_latest_changes
else
git_clone_repo
end
puts "Now starting import for #{repo_url}." puts "Now starting import for #{repo_url}."
puts '----------' * 12 puts '----------' * 12
(1999..Date.today.year).map do |year| (1999..Date.today.year).map do |year|
cves_from_markdown = cves_for_year(year) cves_from_markdown = cves_for_year(year)
ids = cves_from_markdown.map { |cve| cve[:cve_id] }
cve_ids_in_db = TrickestPocCve.where(cve_id: ids).pluck(:cve_id)
new_cve_ids = ids - cve_ids_in_db
new_cves = cves_from_markdown.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) }
puts "Importing any new CVEs from #{year}" puts "Importing any new CVEs from #{year}"
TrickestPocCve.upsert_all(cves_from_markdown, unique_by: :cve_id)
bulk_insert(new_cves)
end
end
def bulk_insert(cves)
TrickestPocCve.bulk_insert do |worker|
cves.each do |attrs|
worker.add(attrs)
end
end end
end end
end end