# frozen_string_literal: true require 'git' require 'json' require 'date' require '/data_importer/lib/importers/github_repo' require '/data_importer/lib/json_helper' class GsdImporter < GithubRepo EXPECTED_KEYS = %i[ cve_id gsd_id gsd namespaces ].freeze EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze def initialize super(repo_url = 'https://github.com/cloudsecurityalliance/gsd-database.git', repo_path = '/data_importer/data/gsd_database') end def list_jsons_for_year(year) year_fp = "#{repo_path}/#{year}/*" Dir["#{year_fp}/*.json"] end def read_jsons_for_year(year) filenames = list_jsons_for_year(year) hashes = filenames.map do |filename| json = read_json(filename) json_transformed = JsonHelper.deep_transform_keys(json) append_ids_to_hash(json_transformed) end hashes.map { |h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) } end def append_ids_to_hash(json) json[:cve_id] = json.dig(:gsd, :alias) json[:gsd_id] = json.dig(:gsd, :id) json end # they like to post descriptions with \u0000 and it doesnt make postgres happy def sanitize_gitlab_advisories(json) if json[:namespaces].keys.include? (:"gitlab.com") advisories = json[:namespaces][:"gitlab.com"][:advisories] sanitized_advisories = advisories.map do |advisory| description = advisory[:description] sanitized_description = JsonHelper.fix_null_byte(description) advisory[:description] = sanitized_description advisory end json[:namespaces][:"gitlab.com"][:advisories] = sanitized_advisories end json end def import pull_or_clone puts "Now starting import for #{repo_url}." puts '----------' * 12 (1999..Date.today.year).map do |year| puts "Now importing GSDs for #{year}" gsds = read_jsons_for_year(year) gsds.each do |h| # fixes a suse cve from 2009 that didnt have any namespaces set in the json. if h[:namespaces].nil? Gsd.upsert_all([h], unique_by: :gsd_id) else sanitized_h = sanitize_gitlab_advisories(h) Gsd.upsert_all([sanitized_h], unique_by: :gsd_id) end end end end end