add timestamps to github advisories so upsert_all works the best

This commit is contained in:
Brendan McDevitt 2022-04-19 14:50:18 -05:00
parent 9154a9cb9c
commit ef3a454a26
3 changed files with 21 additions and 1 deletions

View file

@ -15,6 +15,7 @@ class CreateGithubAdvisories < ActiveRecord::Migration[7.0]
t.jsonb :affected
t.jsonb :references
t.jsonb :database_specific
t.timestamps
end
end
end

View file

@ -77,6 +77,8 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_11_181501) do
t.jsonb "affected"
t.jsonb "references"
t.jsonb "database_specific"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["ghsa_id"], name: "index_github_advisories_on_ghsa_id", unique: true
end

View file

@ -7,6 +7,22 @@ class GithubAdvisoryImporter < GithubRepo
# repo has years that begin with 2017 as first GHSA
YEAR_RANGE = (2017..Date.today.year).freeze
EXPECTED_KEYS = %i[
schema_version
ghsa_id
modified
published
aliases
summary
details
severity
affected
references
database_specific
].freeze
EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze
def initialize
super(repo_url = 'https://github.com/github/advisory-database.git', repo_path = '/data_importer/data/github_advisories')
end
@ -56,7 +72,8 @@ class GithubAdvisoryImporter < GithubRepo
YEAR_RANGE.each do |year|
puts "Importing advisory data from #{year}"
jsons = read_jsons_for_year(year)
GithubAdvisory.upsert_all(jsons, unique_by: :ghsa_id)
hashes = jsons.map { |h| h.slice(*EXPECTED_KEYS).reverse_merge(EMPTY_HASH) }
GithubAdvisory.upsert_all(hashes, unique_by: :ghsa_id)
end
end
end