added model and working importer for PoC in Github nomi-sec json data into postgres
This commit is contained in:
parent
8d40ec1665
commit
b91923a9e4
3 changed files with 160 additions and 0 deletions
2
app/models/github_poc.rb
Normal file
2
app/models/github_poc.rb
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
class GithubPoc < ActiveRecord::Base
|
||||||
|
end
|
28
db/migrate/20220405230622_github_pocs.rb
Normal file
28
db/migrate/20220405230622_github_pocs.rb
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
class GithubPocs < ActiveRecord::Migration[7.0]
|
||||||
|
def change
|
||||||
|
create_table :github_pocs do |t|
|
||||||
|
t.integer :github_poc_id
|
||||||
|
t.index :github_poc_id, unique: true
|
||||||
|
t.string :cve_id, default: "None"
|
||||||
|
t.string :name
|
||||||
|
t.string :full_name
|
||||||
|
t.jsonb :owner
|
||||||
|
t.string :html_url
|
||||||
|
t.string :description
|
||||||
|
t.boolean :fork
|
||||||
|
t.date :created_at
|
||||||
|
t.date :updated_at
|
||||||
|
t.date :pushed_at
|
||||||
|
t.integer :stargazers_count
|
||||||
|
t.integer :watchers_count
|
||||||
|
t.integer :forks_count
|
||||||
|
t.boolean :allow_forking
|
||||||
|
t.boolean :is_template
|
||||||
|
t.string :topics, array: true
|
||||||
|
t.string :visibility
|
||||||
|
t.integer :forks
|
||||||
|
t.integer :watchers
|
||||||
|
t.integer :score
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
130
lib/poc_in_github_importer.rb
Normal file
130
lib/poc_in_github_importer.rb
Normal file
|
@ -0,0 +1,130 @@
|
||||||
|
require 'git'
|
||||||
|
require 'json'
|
||||||
|
require 'date'
|
||||||
|
require 'bulk_insert'
|
||||||
|
|
||||||
|
class PocInGithubImporter
|
||||||
|
CVE_MATCHER = /(CVE|cve)-\d{4}-\d{4,7}/
|
||||||
|
|
||||||
|
attr_accessor :repo_url, :repo_path
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@repo_url = 'https://github.com/nomi-sec/PoC-in-GitHub.git'
|
||||||
|
@repo_path = '/data_importer/data/poc_in_github'
|
||||||
|
end
|
||||||
|
|
||||||
|
def git_clone_repo
|
||||||
|
Git.clone(repo_url, repo_path)
|
||||||
|
end
|
||||||
|
|
||||||
|
def pull_latest_changes
|
||||||
|
`cd #{repo_path}; git pull;`
|
||||||
|
puts "Now pulling latest changes from #{repo_path}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def read_json(filename)
|
||||||
|
JSON.parse(File.read(filename))
|
||||||
|
end
|
||||||
|
|
||||||
|
# all the files are named CVE-year-1234.json in this repo
|
||||||
|
def cve_from_filename(filename)
|
||||||
|
File.basename(filename,File.extname(filename))
|
||||||
|
end
|
||||||
|
|
||||||
|
# regex extract substring thats a cve-id from either the name or full_name json entries
|
||||||
|
def cve_from_json_names(json)
|
||||||
|
name = json['name']
|
||||||
|
fullname = json['full_name']
|
||||||
|
description = json['description']
|
||||||
|
id = name.match(CVE_MATCHER)[0] || fullname.match(CVE_MATCHER)[0] || description.match(CVE_MATCHER)[0]
|
||||||
|
debug_hash = {:name => name, :fullname => fullname, :description => description, :id => id.upcase }
|
||||||
|
puts debug_hash
|
||||||
|
cve_id = id.upcase
|
||||||
|
cve_id
|
||||||
|
end
|
||||||
|
|
||||||
|
def list_jsons_for_year(year)
|
||||||
|
year_fp = "#{repo_path}/#{year}"
|
||||||
|
Dir["#{year_fp}/*.json"]
|
||||||
|
end
|
||||||
|
|
||||||
|
def read_jsons_for_year(year)
|
||||||
|
filenames = list_jsons_for_year(year)
|
||||||
|
filenames.map do |f|
|
||||||
|
{:cve_id => cve_from_filename(f), :file_data => read_json(f) }
|
||||||
|
end.flatten
|
||||||
|
end
|
||||||
|
|
||||||
|
def read_all_jsons
|
||||||
|
(1999..Date.today.year).map do |year|
|
||||||
|
read_jsons_for_year(year.to_s)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def cve_attrs_from_item(json, cve_attrs={})
|
||||||
|
cve_attrs[:github_poc_id] = json['id']
|
||||||
|
cve_attrs[:name] = json['name']
|
||||||
|
cve_attrs[:full_name] = json['full_name']
|
||||||
|
cve_attrs[:owner] = json['owner']
|
||||||
|
cve_attrs[:html_url] = json['html_url']
|
||||||
|
cve_attrs[:description] = json['description']
|
||||||
|
cve_attrs[:fork] = json['fork']
|
||||||
|
cve_attrs[:created_at] = json['created_at']
|
||||||
|
cve_attrs[:updated_at] = json['updated_at']
|
||||||
|
cve_attrs[:pushed_at] = json['pushed_at']
|
||||||
|
cve_attrs[:stargazers_count] = json['stargazers_count']
|
||||||
|
cve_attrs[:watchers_count] = json['watchers_count']
|
||||||
|
cve_attrs[:forks_count] = json['forks_count']
|
||||||
|
cve_attrs[:allow_forking] = json['allow_forking']
|
||||||
|
cve_attrs[:is_template] = json['is_template']
|
||||||
|
cve_attrs[:topics] = json['topics']
|
||||||
|
cve_attrs[:visibility] = json['visibility']
|
||||||
|
cve_attrs[:forks] = json['forks']
|
||||||
|
cve_attrs[:watchers] = json['watchers']
|
||||||
|
cve_attrs[:score] = json['score']
|
||||||
|
cve_attrs
|
||||||
|
end
|
||||||
|
|
||||||
|
# for bulk inserting
|
||||||
|
def cves_for_year(year)
|
||||||
|
json_data = read_jsons_for_year(year)
|
||||||
|
|
||||||
|
json_data.map do |info_hash|
|
||||||
|
cve_id = info_hash[:cve_id]
|
||||||
|
json_fd = info_hash[:file_data]
|
||||||
|
|
||||||
|
json_fd.map do |entry|
|
||||||
|
cve_attrs_from_item(entry, cve_attrs={:cve_id => cve_id})
|
||||||
|
end
|
||||||
|
end.flatten
|
||||||
|
end
|
||||||
|
|
||||||
|
def import
|
||||||
|
if Dir.exist?(repo_path)
|
||||||
|
pull_latest_changes
|
||||||
|
else
|
||||||
|
git_clone_repo
|
||||||
|
end
|
||||||
|
|
||||||
|
puts "Now starting import for PocInGithub."
|
||||||
|
(1999..Date.today.year).map do |year|
|
||||||
|
cves_from_json = cves_for_year(year)
|
||||||
|
|
||||||
|
ids = cves_from_json.map { |cve| cve[:github_poc_id] }
|
||||||
|
ids_in_db = GithubPoc.where(:github_poc_id => ids).pluck(:github_poc_id)
|
||||||
|
|
||||||
|
new_ids = ids - ids_in_db
|
||||||
|
new_cves = cves_from_json.select { |cve| cve if new_ids.include?(cve[:github_poc_id]) }
|
||||||
|
puts "Importing any new CVEs from #{year}"
|
||||||
|
bulk_insert(new_cves)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def bulk_insert(cves)
|
||||||
|
GithubPoc.bulk_insert do |worker|
|
||||||
|
cves.each do |attrs|
|
||||||
|
worker.add(attrs)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Add table
Reference in a new issue