# frozen_string_literal: true

require 'git'
require 'json'
require 'date'
require '/data_importer/lib/importers/github_repo'
require '/data_importer/lib/json_helper'

class GsdImporter < GithubRepo
  EXPECTED_KEYS = %i[
    cve_id
    gsd_id
    gsd
    namespaces
  ].freeze

  EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze

  def initialize
    super(repo_url = 'https://github.com/cloudsecurityalliance/gsd-database.git', repo_path = '/data_importer/data/gsd_database')
  end

  def list_jsons_for_year(year)
    year_fp = "#{repo_path}/#{year}/*"
    Dir["#{year_fp}/*.json"]
  end

  def read_jsons_for_year(year)
    filenames = list_jsons_for_year(year)
    hashes = filenames.map do |filename|
      json = read_json(filename)
      json_transformed = JsonHelper.deep_transform_keys(json)
      append_ids_to_hash(json_transformed)
    end
    hashes.map { |h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) }
  end

  def append_ids_to_hash(json)
    json[:cve_id] = json.dig(:gsd, :alias)
    json[:gsd_id] = json.dig(:gsd, :id)
    json
  end

  # they like to post descriptions with \u0000 and it doesnt make postgres happy
  def sanitize_gitlab_advisories(json)
    if json[:namespaces].keys.include? (:"gitlab.com")
      advisories = json[:namespaces][:"gitlab.com"][:advisories]
      sanitized_advisories = advisories.map do |advisory|
        description = advisory[:description]
        sanitized_description = JsonHelper.fix_null_byte(description)
        advisory[:description] = sanitized_description
        advisory
      end
      json[:namespaces][:"gitlab.com"][:advisories] = sanitized_advisories
    end
    json
  end

  def import
    pull_or_clone
    puts "Now starting import for #{repo_url}."
    puts '----------' * 12
    (1999..Date.today.year).map do |year|
      puts "Now importing GSDs for #{year}"
      gsds = read_jsons_for_year(year)
      gsds.each do |h|
        # fixes a suse cve from 2009 that didnt have any namespaces set in the json.
        if h[:namespaces].nil?
          Gsd.upsert_all([h], unique_by: :gsd_id)
        else 
          sanitized_h = sanitize_gitlab_advisories(h)
          Gsd.upsert_all([sanitized_h], unique_by: :gsd_id)
        end
      end
    end
  end
end