diff --git a/db/migrate/20220407051821_create_trickest_poc_cves.rb b/db/migrate/20220407051821_create_trickest_poc_cves.rb index d20d07d..107a92e 100644 --- a/db/migrate/20220407051821_create_trickest_poc_cves.rb +++ b/db/migrate/20220407051821_create_trickest_poc_cves.rb @@ -4,6 +4,7 @@ class CreateTrickestPocCves < ActiveRecord::Migration[7.0] def change create_table :trickest_poc_cves do |t| t.string :cve_id + t.index :cve_id, unique: true t.string :cve_url t.string :description t.string :poc_links, array: true diff --git a/db/schema.rb b/db/schema.rb index 458b6e1..3305715 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -138,6 +138,7 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_19_203353) do t.string "cve_url" t.string "description" t.string "poc_links", array: true + t.index ["cve_id"], name: "index_trickest_poc_cves_on_cve_id", unique: true end end diff --git a/lib/importers/github_repo.rb b/lib/importers/github_repo.rb index 76e6430..4f32697 100644 --- a/lib/importers/github_repo.rb +++ b/lib/importers/github_repo.rb @@ -1,35 +1,40 @@ -# frozen_string_literal: true - class GithubRepo - attr_accessor :repo_url, :repo_path + attr_accessor :repo_url, :repo_path - def initialize(repo_url = nil, repo_path = nil) - @repo_url = repo_url - @repo_path = repo_path - end - - def git_clone_repo - if repo_url.nil? || repo_path.nil? - puts 'Please provide a repo url and repo_path' - else - Git.clone(repo_url, repo_path) + def initialize(repo_url=nil, repo_path=nil) + @repo_url = repo_url + @repo_path = repo_path end - end - - def pull_latest_changes - `cd #{repo_path}; git pull;` - puts "Now pulling latest changes from #{repo_path}" - end - - def read_json(filename) - JSON.parse(File.read(filename), symbolize_names: true) - end - - def pull_or_clone - if Dir.exist?(repo_path) - pull_latest_changes - else - git_clone_repo + + def git_clone_repo + if repo_url.nil? || repo_path.nil? + puts "Please provide a repo url and repo_path" + else + Git.clone(repo_url, repo_path) + end end - end -end + + def pull_latest_changes + `cd #{repo_path}; git pull;` + puts "Now pulling latest changes from #{repo_path}" + end + + def read_json(filename) + JSON.parse(File.read(filename), symbolize_names: true) + end + + def read_markdown(filename) + data = File.read(filename) + formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil) + # should give us the html doc + RDoc::Markdown.parse(data).accept(formatter) + end + + def pull_or_clone + if Dir.exist?(repo_path) + pull_latest_changes + else + git_clone_repo + end + end +end \ No newline at end of file diff --git a/lib/importers/trickest_poc_cve_importer.rb b/lib/importers/trickest_poc_cve_importer.rb index bc29887..9684e38 100644 --- a/lib/importers/trickest_poc_cve_importer.rb +++ b/lib/importers/trickest_poc_cve_importer.rb @@ -3,38 +3,12 @@ require 'git' require 'json' require 'date' -require 'bulk_insert' +require '/data_importer/lib/importers/github_repo.rb' -class TrickestPocCveImporter - attr_accessor :repo_url, :repo_path +class TrickestPocCveImporter < GithubRepo def initialize - @repo_url = 'https://github.com/trickest/cve.git' - @repo_path = '/data_importer/data/trickest_cve' - end - - def git_clone_repo - Git.clone(repo_url, repo_path) - end - - def pull_latest_changes - `cd #{repo_path}; git pull;` - puts "Now pulling latest changes from #{repo_path}" - end - - def pull_or_clone(repo_path) - if Dir.exist?(repo_path) - pull_latest_changes - else - git_clone_repo - end - end - - def read_markdown(filename) - data = File.read(filename) - formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil) - # should give us the html doc - RDoc::Markdown.parse(data).accept(formatter) + super(repo_url = 'https://github.com/trickest/cve.git', repo_path = '/data_importer/data/trickest_cve') end def html_to_hash(html) @@ -86,7 +60,6 @@ class TrickestPocCveImporter cve_attrs end - # for bulk inserting def cves_for_year(year) htmls = read_mds_for_year(year) htmls.map do |html| @@ -96,33 +69,13 @@ class TrickestPocCveImporter end def import - if Dir.exist?(repo_path) - pull_latest_changes - else - git_clone_repo - end - + pull_or_clone puts "Now starting import for #{repo_url}." puts '----------' * 12 (1999..Date.today.year).map do |year| cves_from_markdown = cves_for_year(year) - - ids = cves_from_markdown.map { |cve| cve[:cve_id] } - cve_ids_in_db = TrickestPocCve.where(cve_id: ids).pluck(:cve_id) - - new_cve_ids = ids - cve_ids_in_db - new_cves = cves_from_markdown.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) } puts "Importing any new CVEs from #{year}" - - bulk_insert(new_cves) - end - end - - def bulk_insert(cves) - TrickestPocCve.bulk_insert do |worker| - cves.each do |attrs| - worker.add(attrs) - end + TrickestPocCve.upsert_all(cves_from_markdown, unique_by: :cve_id) end end end