make trickest use upsert_all

This commit is contained in:
Brendan McDevitt 2022-04-22 12:12:45 -05:00
parent 8261a53800
commit 3c6828f0fd
4 changed files with 43 additions and 83 deletions

View file

@ -4,6 +4,7 @@ class CreateTrickestPocCves < ActiveRecord::Migration[7.0]
def change
create_table :trickest_poc_cves do |t|
t.string :cve_id
t.index :cve_id, unique: true
t.string :cve_url
t.string :description
t.string :poc_links, array: true

View file

@ -138,6 +138,7 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_19_203353) do
t.string "cve_url"
t.string "description"
t.string "poc_links", array: true
t.index ["cve_id"], name: "index_trickest_poc_cves_on_cve_id", unique: true
end
end

View file

@ -1,35 +1,40 @@
# frozen_string_literal: true
class GithubRepo
attr_accessor :repo_url, :repo_path
attr_accessor :repo_url, :repo_path
def initialize(repo_url = nil, repo_path = nil)
@repo_url = repo_url
@repo_path = repo_path
end
def git_clone_repo
if repo_url.nil? || repo_path.nil?
puts 'Please provide a repo url and repo_path'
else
Git.clone(repo_url, repo_path)
def initialize(repo_url=nil, repo_path=nil)
@repo_url = repo_url
@repo_path = repo_path
end
end
def pull_latest_changes
`cd #{repo_path}; git pull;`
puts "Now pulling latest changes from #{repo_path}"
end
def read_json(filename)
JSON.parse(File.read(filename), symbolize_names: true)
end
def pull_or_clone
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
def git_clone_repo
if repo_url.nil? || repo_path.nil?
puts "Please provide a repo url and repo_path"
else
Git.clone(repo_url, repo_path)
end
end
end
end
def pull_latest_changes
`cd #{repo_path}; git pull;`
puts "Now pulling latest changes from #{repo_path}"
end
def read_json(filename)
JSON.parse(File.read(filename), symbolize_names: true)
end
def read_markdown(filename)
data = File.read(filename)
formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil)
# should give us the html doc
RDoc::Markdown.parse(data).accept(formatter)
end
def pull_or_clone
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
end
end
end

View file

@ -3,38 +3,12 @@
require 'git'
require 'json'
require 'date'
require 'bulk_insert'
require '/data_importer/lib/importers/github_repo.rb'
class TrickestPocCveImporter
attr_accessor :repo_url, :repo_path
class TrickestPocCveImporter < GithubRepo
def initialize
@repo_url = 'https://github.com/trickest/cve.git'
@repo_path = '/data_importer/data/trickest_cve'
end
def git_clone_repo
Git.clone(repo_url, repo_path)
end
def pull_latest_changes
`cd #{repo_path}; git pull;`
puts "Now pulling latest changes from #{repo_path}"
end
def pull_or_clone(repo_path)
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
end
end
def read_markdown(filename)
data = File.read(filename)
formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil)
# should give us the html doc
RDoc::Markdown.parse(data).accept(formatter)
super(repo_url = 'https://github.com/trickest/cve.git', repo_path = '/data_importer/data/trickest_cve')
end
def html_to_hash(html)
@ -86,7 +60,6 @@ class TrickestPocCveImporter
cve_attrs
end
# for bulk inserting
def cves_for_year(year)
htmls = read_mds_for_year(year)
htmls.map do |html|
@ -96,33 +69,13 @@ class TrickestPocCveImporter
end
def import
if Dir.exist?(repo_path)
pull_latest_changes
else
git_clone_repo
end
pull_or_clone
puts "Now starting import for #{repo_url}."
puts '----------' * 12
(1999..Date.today.year).map do |year|
cves_from_markdown = cves_for_year(year)
ids = cves_from_markdown.map { |cve| cve[:cve_id] }
cve_ids_in_db = TrickestPocCve.where(cve_id: ids).pluck(:cve_id)
new_cve_ids = ids - cve_ids_in_db
new_cves = cves_from_markdown.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) }
puts "Importing any new CVEs from #{year}"
bulk_insert(new_cves)
end
end
def bulk_insert(cves)
TrickestPocCve.bulk_insert do |worker|
cves.each do |attrs|
worker.add(attrs)
end
TrickestPocCve.upsert_all(cves_from_markdown, unique_by: :cve_id)
end
end
end