diff --git a/app/controllers/gsds_controller.rb b/app/controllers/gsds_controller.rb new file mode 100644 index 0000000..a4c0938 --- /dev/null +++ b/app/controllers/gsds_controller.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class GsdsController < ApplicationController + def index + @gsds = Gsd.all + render json: @gsds.to_json + end + + def show + @gsd = Gsd.find_by_id(params[:gsd_id]) + render json: @gsd.to_json + end + + def show_year + @gsds_for_year = Gsd.from_year(params[:year]) + render json: @gsds_for_year.to_json + end +end diff --git a/app/models/gsd.rb b/app/models/gsd.rb new file mode 100644 index 0000000..e450956 --- /dev/null +++ b/app/models/gsd.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class Gsd < ActiveRecord::Base + def self.find_by_cve_id(id) + find_by(cve_id: id) + end + + def self.from_year(year) + where('cve_id LIKE ?', "CVE-#{year}-%") + end +end diff --git a/app/views/gsds/index.html.erb b/app/views/gsds/index.html.erb new file mode 100644 index 0000000..86a08ee --- /dev/null +++ b/app/views/gsds/index.html.erb @@ -0,0 +1 @@ +

Gsds#index

\ No newline at end of file diff --git a/app/views/gsds/show.html.erb b/app/views/gsds/show.html.erb new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/app/views/gsds/show.html.erb @@ -0,0 +1 @@ + diff --git a/app/views/gsds/show_years.html.erb b/app/views/gsds/show_years.html.erb new file mode 100644 index 0000000..768ad17 --- /dev/null +++ b/app/views/gsds/show_years.html.erb @@ -0,0 +1,2 @@ +

params[:year]

+

<%= @gsds_for_year %>

\ No newline at end of file diff --git a/app/workers/gsd_importer_worker.rb b/app/workers/gsd_importer_worker.rb new file mode 100644 index 0000000..536a87b --- /dev/null +++ b/app/workers/gsd_importer_worker.rb @@ -0,0 +1,12 @@ +# frozen_string_literal: true + +require '/data_importer/lib/importers/gsd_importer' + +class GsdImporterWorker + include Faktory::Job + + def perform(*args) + puts "Hello, I am #{jid} with args #{args}" + GsdImporter.new.import + end +end diff --git a/crontab.yaml b/crontab.yaml index b5026d0..0656b18 100644 --- a/crontab.yaml +++ b/crontab.yaml @@ -53,4 +53,10 @@ jobs: schedule: "@hourly" retries: 1 queue: default + priority: 5 + - job: GsdImporterWorker + args: [] + schedule: "@every 4h00m00s" + retries: 1 + queue: default priority: 5 \ No newline at end of file diff --git a/db/migrate/20220419203353_create_gsds.rb b/db/migrate/20220419203353_create_gsds.rb new file mode 100644 index 0000000..d372e05 --- /dev/null +++ b/db/migrate/20220419203353_create_gsds.rb @@ -0,0 +1,12 @@ +class CreateGsds < ActiveRecord::Migration[7.0] + def change + create_table :gsds do |t| + t.string :gsd_id + t.index :gsd_id, unique: true + t.string :cve_id + t.jsonb :gsd + t.jsonb :namespaces + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 2f5d988..458b6e1 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do +ActiveRecord::Schema[7.0].define(version: 2022_04_19_203353) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -118,6 +118,16 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do t.jsonb "repositories" end + create_table "gsds", force: :cascade do |t| + t.string "gsd_id" + t.string "cve_id" + t.jsonb "gsd" + t.jsonb "namespaces" + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["gsd_id"], name: "index_gsds_on_gsd_id", unique: true + end + create_table "inthewild_cve_exploits", force: :cascade do |t| t.string "cve_id" t.string "earliest_report" diff --git a/db/seeds.rb b/db/seeds.rb index d2c3195..3f2dad8 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -17,6 +17,7 @@ require '/data_importer/lib/importers/cvemon_cve_importer' require '/data_importer/lib/importers/cna_importer' require '/data_importer/lib/importers/github_advisory_importer' require '/data_importer/lib/importers/github_user_importer' +require '/data_importer/lib/importers/gsd_importer.rb' def line_sep puts '----------' * 12 @@ -24,6 +25,7 @@ end def perform import_cves + import_gsds import_github_pocs import_trickest_poc_cves import_inthewild_cve_exploits @@ -44,6 +46,11 @@ def import_cpes CpeImporter.download_and_import end +def import_gsds + line_sep + GsdImporter.new.import +end + def import_github_pocs line_sep PocInGithubImporter.new.import diff --git a/lib/importers/gsd_importer.rb b/lib/importers/gsd_importer.rb new file mode 100644 index 0000000..1b54326 --- /dev/null +++ b/lib/importers/gsd_importer.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +require 'git' +require 'json' +require 'date' +require '/data_importer/lib/importers/github_repo' +require '/data_importer/lib/json_helper' + +class GsdImporter < GithubRepo + EXPECTED_KEYS = %i[ + cve_id + gsd_id + gsd + namespaces + ].freeze + + EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze + + def initialize + super(repo_url = 'https://github.com/cloudsecurityalliance/gsd-database.git', repo_path = '/data_importer/data/gsd_database') + end + + def list_jsons_for_year(year) + year_fp = "#{repo_path}/#{year}/*" + Dir["#{year_fp}/*.json"] + end + + def read_jsons_for_year(year) + filenames = list_jsons_for_year(year) + hashes = filenames.map do |filename| + json = read_json(filename) + json_transformed = JsonHelper.deep_transform_keys(json) + append_ids_to_hash(json_transformed) + end + hashes.map { |h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) } + end + + def append_ids_to_hash(json) + json[:cve_id] = json.dig(:gsd, :alias) + json[:gsd_id] = json.dig(:gsd, :id) + json + end + + # they like to post descriptions with \u0000 and it doesnt make postgres happy + def sanitize_gitlab_advisories(json) + if json[:namespaces].keys.include? (:"gitlab.com") + advisories = json[:namespaces][:"gitlab.com"][:advisories] + sanitized_advisories = advisories.map do |advisory| + description = advisory[:description] + sanitized_description = JsonHelper.fix_null_byte(description) + advisory[:description] = sanitized_description + advisory + end + json[:namespaces][:"gitlab.com"][:advisories] = sanitized_advisories + end + json + end + + def import + pull_or_clone + puts "Now starting import for #{repo_url}." + puts '----------' * 12 + (1999..Date.today.year).map do |year| + puts "Now importing GSDs for #{year}" + gsds = read_jsons_for_year(year) + gsds.each do |h| + # fixes a suse cve from 2009 that didnt have any namespaces set in the json. + if h[:namespaces].nil? + Gsd.upsert_all([h], unique_by: :gsd_id) + else + sanitized_h = sanitize_gitlab_advisories(h) + Gsd.upsert_all([sanitized_h], unique_by: :gsd_id) + end + end + end + end +end diff --git a/lib/json_helper.rb b/lib/json_helper.rb index 98922b8..014b1f1 100644 --- a/lib/json_helper.rb +++ b/lib/json_helper.rb @@ -12,4 +12,9 @@ class JsonHelper def self.symbolize_names_snake_case(json_hash) json_hash.deep_transform_keys { |k| k.to_s.underscore.to_sym } end + + def self.fix_null_byte(s) + s.gsub("`\u0000`", "null_byte") + end + end