diff --git a/Dockerfile b/Dockerfile index 9fb1371..76e4595 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,11 @@ # syntax=docker/dockerfile:1 FROM ruby:2.7.0 -RUN apt-get update -qq && apt-get install -y nodejs postgresql-client +RUN apt-get update -qq && apt-get install -y nodejs postgresql-client less WORKDIR /data_importer COPY Gemfile /data_importer/Gemfile COPY Gemfile.lock /data_importer/Gemfile.lock RUN bundle install +ENV PAGER=less # Add a script to be executed every time the container starts. COPY entrypoint.sh /usr/bin/ diff --git a/Gemfile b/Gemfile index 175f470..cd56dce 100644 --- a/Gemfile +++ b/Gemfile @@ -7,6 +7,9 @@ ruby '2.7.0' gem 'rails', '~> 5.2.3' # Use postgres as the database for Active Record gem 'pg' +gem 'pry' +gem 'bulk_insert' +gem 'git' # Use Puma as the app server gem 'puma', '~> 3.11' # Use SCSS for stylesheets @@ -39,6 +42,13 @@ gem 'bootsnap', '>= 1.1.0', require: false group :development, :test do # Call 'byebug' anywhere in the code to stop execution and get a debugger console gem 'byebug', platforms: [:mri, :mingw, :x64_mingw] + gem 'awesome_print' # pretty print ruby objects + gem 'pry' # Console with powerful introspection capabilities + gem 'pry-byebug' # Integrates pry with byebug + gem 'pry-doc' # Provide MRI Core documentation + gem 'pry-rails' # Causes rails console to open pry. `DISABLE_PRY_RAILS=1 rails c` can still open with IRB + gem 'pry-rescue' # Start a pry session whenever something goes wrong. + gem 'pry-theme' # An easy way to customize Pry colors via theme files end group :development do diff --git a/app/models/cve.rb b/app/models/cve.rb new file mode 100644 index 0000000..b6336f8 --- /dev/null +++ b/app/models/cve.rb @@ -0,0 +1,16 @@ +class Cve < ActiveRecord::Base + scope :with_reserved, -> { where("cve_data_meta->>'STATE' = 'RESERVED'") } + scope :without_reserved, -> { where.not("cve_data_meta->>'STATE' = 'RESERVED'") } + scope :with_rejected, -> { where("cve_data_meta->>'STATE' = 'REJECT'") } + scope :without_rejected, -> { where.not("cve_data_meta->>'STATE' = 'REJECT'") } + scope :with_public, -> { where("cve_data_meta->>'STATE' = 'PUBLIC'") } + scope :without_public, -> { where.not("cve_data_meta->>'STATE' = 'PUBLIC'") } + + def self.find_by_id(id) + find_by(:cve_id => id) + end + + def self.from_year(year) + where("cve_id LIKE ?", "CVE-#{year}-%") + end +end diff --git a/db/migrate/20220401173431_create_cves.rb b/db/migrate/20220401173431_create_cves.rb new file mode 100644 index 0000000..733c11a --- /dev/null +++ b/db/migrate/20220401173431_create_cves.rb @@ -0,0 +1,16 @@ +class CreateCves < ActiveRecord::Migration[5.2] + def change + create_table :cves do |t| + t.jsonb :cve_data_meta + t.jsonb :affects + t.string :data_format + t.string :data_type + t.string :data_version + t.jsonb :description + t.jsonb :impact + t.jsonb :problemtype + t.jsonb :references + t.jsonb :source + end + end +end diff --git a/db/seeds.rb b/db/seeds.rb index 1beea2a..58db0c2 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -5,3 +5,7 @@ # # movies = Movie.create([{ name: 'Star Wars' }, { name: 'Lord of the Rings' }]) # Character.create(name: 'Luke', movie: movies.first) + +# this should get any new Cves and create them in the db +require '/data_importer/lib/cve_list_importer.rb' +CveListImporter.new.import \ No newline at end of file diff --git a/lib/cve_list_importer.rb b/lib/cve_list_importer.rb new file mode 100644 index 0000000..8c27811 --- /dev/null +++ b/lib/cve_list_importer.rb @@ -0,0 +1,96 @@ +require 'git' +require 'json' +require 'date' +require 'bulk_insert' + +# This class can be used to import cvelist json data from mitre from their github repo +class CveListImporter + attr_accessor :repo_url, :repo_path + + def initialize + @repo_url = 'https://github.com/CVEProject/cvelist.git' + @repo_path = '/data_importer/data/cve_list' + end + + def git_clone_repo + Git.clone(repo_url, repo_path) + end + + def pull_latest_changes + `cd #{repo_path}; git pull;` + puts "Now pulling latest changes from #{repo_path}" + end + + def read_json(filename) + JSON.parse(File.read(filename)) + end + + def list_jsons_for_year(year) + year_fp = "#{repo_path}/#{year}/*" + Dir["#{year_fp}/*.json"] + end + + def read_jsons_for_year(year) + filenames = list_jsons_for_year(year) + filenames.map { |filename| read_json(filename) } + end + + def read_all_jsons + (1999..Date.today.year).map do |year| + read_jsons_for_year(year.to_s) + end + end + + def cve_attrs_from_item(json) + cve_attrs = {} + cve_attrs[:cve_data_meta] = json['CVE_data_meta'] + cve_attrs[:cve_id] = json['CVE_data_meta']['ID'] + cve_attrs[:affects] = json['affects'] + cve_attrs[:data_format] = json['data_format'] + cve_attrs[:data_type] = json['data_type'] + cve_attrs[:data_version] = json['data_version'] + cve_attrs[:description] = json['description'] + cve_attrs[:impact] = json['impact'] + cve_attrs[:problemtype] = json['problemtype'] + cve_attrs[:references] = json['references'] + cve_attrs[:source] = json['source'] + cve_attrs + end + + # for bulk inserting + def cves_for_year(year) + json_data = read_jsons_for_year(year) + json_data.map do |json_f| + cve_attrs_from_item(json_f) + end + end + + def import + if Dir.exist?(repo_path) + pull_latest_changes + else + git_clone_repo + end + + (1999..Date.today.year).map do |year| + cves_from_json = cves_for_year(year) + + ids = cves_from_json.map { |cve| cve[:cve_id] } + cve_ids_in_db = Cve.where(:cve_id => ids).pluck(:cve_id) + + new_cve_ids = ids - cve_ids_in_db + new_cves = cves_from_json.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) } + puts "Importing any new CVEs from #{year}" + + bulk_insert(new_cves) + end + end + + def bulk_insert(cves) + Cve.bulk_insert do |worker| + cves.each do |attrs| + worker.add(attrs) + end + end + end +end \ No newline at end of file