77 lines
2.2 KiB
Ruby
77 lines
2.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require 'git'
|
|
require 'json'
|
|
require 'date'
|
|
require '/data_importer/lib/importers/github_repo'
|
|
require '/data_importer/lib/json_helper'
|
|
|
|
class GsdImporter < GithubRepo
|
|
EXPECTED_KEYS = %i[
|
|
cve_id
|
|
gsd_id
|
|
gsd
|
|
namespaces
|
|
].freeze
|
|
|
|
EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze
|
|
|
|
def initialize
|
|
super(repo_url = 'https://github.com/cloudsecurityalliance/gsd-database.git', repo_path = '/data_importer/data/gsd_database')
|
|
end
|
|
|
|
def list_jsons_for_year(year)
|
|
year_fp = "#{repo_path}/#{year}/*"
|
|
Dir["#{year_fp}/*.json"]
|
|
end
|
|
|
|
def read_jsons_for_year(year)
|
|
filenames = list_jsons_for_year(year)
|
|
hashes = filenames.map do |filename|
|
|
json = read_json(filename)
|
|
json_transformed = JsonHelper.deep_transform_keys(json)
|
|
append_ids_to_hash(json_transformed)
|
|
end
|
|
hashes.map { |h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) }
|
|
end
|
|
|
|
def append_ids_to_hash(json)
|
|
json[:cve_id] = json.dig(:gsd, :alias)
|
|
json[:gsd_id] = json.dig(:gsd, :id)
|
|
json
|
|
end
|
|
|
|
# they like to post descriptions with \u0000 and it doesnt make postgres happy
|
|
def sanitize_gitlab_advisories(json)
|
|
if json[:namespaces].keys.include? (:"gitlab.com")
|
|
advisories = json[:namespaces][:"gitlab.com"][:advisories]
|
|
sanitized_advisories = advisories.map do |advisory|
|
|
description = advisory[:description]
|
|
sanitized_description = JsonHelper.fix_null_byte(description)
|
|
advisory[:description] = sanitized_description
|
|
advisory
|
|
end
|
|
json[:namespaces][:"gitlab.com"][:advisories] = sanitized_advisories
|
|
end
|
|
json
|
|
end
|
|
|
|
def import
|
|
pull_or_clone
|
|
puts "Now starting import for #{repo_url}."
|
|
puts '----------' * 12
|
|
(1999..Date.today.year).map do |year|
|
|
puts "Now importing GSDs for #{year}"
|
|
gsds = read_jsons_for_year(year)
|
|
gsds.each do |h|
|
|
# fixes a suse cve from 2009 that didnt have any namespaces set in the json.
|
|
if h[:namespaces].nil?
|
|
Gsd.upsert_all([h], unique_by: :gsd_id)
|
|
else
|
|
sanitized_h = sanitize_gitlab_advisories(h)
|
|
Gsd.upsert_all([sanitized_h], unique_by: :gsd_id)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|