add base support for trickest poc cves to github
This commit is contained in:
parent
d81c31febe
commit
752aef6392
6 changed files with 230 additions and 4 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -29,3 +29,6 @@
|
|||
|
||||
# Ignore master key for decrypting credentials and more.
|
||||
/config/master.key
|
||||
|
||||
# Any API keys or envars we dont want to commit add here.
|
||||
/twitter_credentials.env
|
||||
|
|
3
Gemfile
3
Gemfile
|
@ -9,6 +9,9 @@ gem 'actionpack'
|
|||
gem 'sass-rails'
|
||||
gem 'railties'
|
||||
gem 'rest-client'
|
||||
gem 'twitter'
|
||||
gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support
|
||||
gem 'nokogiri'
|
||||
|
||||
# Use postgres as the database for Active Record
|
||||
gem 'pg'
|
||||
|
|
77
Gemfile.lock
77
Gemfile.lock
|
@ -1,3 +1,12 @@
|
|||
GIT
|
||||
remote: https://github.com/julianfssen/tweetkit.git
|
||||
revision: e9ff2e807089547548a3caeea24b06cbdb1defd3
|
||||
specs:
|
||||
tweetkit (0.2.0)
|
||||
faraday (~> 1.9.3)
|
||||
faraday_middleware (~> 1.2.0)
|
||||
simple_oauth (~> 0.3.0)
|
||||
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
|
@ -74,6 +83,7 @@ GEM
|
|||
bindex (0.8.1)
|
||||
bootsnap (1.11.1)
|
||||
msgpack (~> 1.2)
|
||||
buftok (0.2.0)
|
||||
builder (3.2.4)
|
||||
bulk_insert (1.9.0)
|
||||
activerecord (>= 3.2.0)
|
||||
|
@ -104,16 +114,54 @@ GEM
|
|||
digest (3.1.0)
|
||||
domain_name (0.5.20190701)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
equalizer (0.0.11)
|
||||
erubi (1.10.0)
|
||||
execjs (2.8.1)
|
||||
faraday (1.9.3)
|
||||
faraday-em_http (~> 1.0)
|
||||
faraday-em_synchrony (~> 1.0)
|
||||
faraday-excon (~> 1.1)
|
||||
faraday-httpclient (~> 1.0)
|
||||
faraday-multipart (~> 1.0)
|
||||
faraday-net_http (~> 1.0)
|
||||
faraday-net_http_persistent (~> 1.0)
|
||||
faraday-patron (~> 1.0)
|
||||
faraday-rack (~> 1.0)
|
||||
faraday-retry (~> 1.0)
|
||||
ruby2_keywords (>= 0.0.4)
|
||||
faraday-em_http (1.0.0)
|
||||
faraday-em_synchrony (1.0.0)
|
||||
faraday-excon (1.1.0)
|
||||
faraday-httpclient (1.0.1)
|
||||
faraday-multipart (1.0.3)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
faraday-net_http (1.0.1)
|
||||
faraday-net_http_persistent (1.2.0)
|
||||
faraday-patron (1.0.0)
|
||||
faraday-rack (1.0.0)
|
||||
faraday-retry (1.0.3)
|
||||
faraday_middleware (1.2.0)
|
||||
faraday (~> 1.0)
|
||||
ffi (1.15.5)
|
||||
ffi-compiler (1.0.1)
|
||||
ffi (>= 1.0.0)
|
||||
rake
|
||||
git (1.10.2)
|
||||
rchardet (~> 1.8)
|
||||
globalid (1.0.0)
|
||||
activesupport (>= 5.0)
|
||||
http (4.4.1)
|
||||
addressable (~> 2.3)
|
||||
http-cookie (~> 1.0)
|
||||
http-form_data (~> 2.2)
|
||||
http-parser (~> 1.2.0)
|
||||
http-accept (1.7.0)
|
||||
http-cookie (1.0.4)
|
||||
domain_name (~> 0.5)
|
||||
http-form_data (2.3.0)
|
||||
http-parser (1.2.3)
|
||||
ffi-compiler (>= 1.0, < 2.0)
|
||||
http_parser.rb (0.6.0)
|
||||
i18n (1.10.0)
|
||||
concurrent-ruby (~> 1.0)
|
||||
interception (0.5)
|
||||
|
@ -132,6 +180,8 @@ GEM
|
|||
mini_mime (>= 0.1.1)
|
||||
marcel (1.0.2)
|
||||
matrix (0.4.2)
|
||||
memoizable (0.4.2)
|
||||
thread_safe (~> 0.3, >= 0.3.1)
|
||||
method_source (1.0.0)
|
||||
mime-types (3.4.1)
|
||||
mime-types-data (~> 3.2015)
|
||||
|
@ -139,7 +189,9 @@ GEM
|
|||
mini_mime (1.1.2)
|
||||
mini_portile2 (2.8.0)
|
||||
minitest (5.15.0)
|
||||
msgpack (1.4.5)
|
||||
msgpack (1.5.0)
|
||||
multipart-post (2.1.1)
|
||||
naught (1.1.0)
|
||||
net-imap (0.2.3)
|
||||
digest
|
||||
net-protocol
|
||||
|
@ -160,12 +212,12 @@ GEM
|
|||
mini_portile2 (~> 2.8.0)
|
||||
racc (~> 1.4)
|
||||
pg (1.3.5)
|
||||
pry (0.13.1)
|
||||
pry (0.14.1)
|
||||
coderay (~> 1.1)
|
||||
method_source (~> 1.0)
|
||||
pry-byebug (3.9.0)
|
||||
pry-byebug (3.8.0)
|
||||
byebug (~> 11.0)
|
||||
pry (~> 0.13.0)
|
||||
pry (~> 0.10)
|
||||
pry-doc (1.3.0)
|
||||
pry (~> 0.11)
|
||||
yard (~> 0.9.11)
|
||||
|
@ -220,6 +272,7 @@ GEM
|
|||
mime-types (>= 1.16, < 4.0)
|
||||
netrc (~> 0.8)
|
||||
rexml (3.2.5)
|
||||
ruby2_keywords (0.0.5)
|
||||
ruby_dep (1.5.0)
|
||||
rubyzip (2.3.2)
|
||||
sass-rails (6.0.0)
|
||||
|
@ -236,6 +289,7 @@ GEM
|
|||
childprocess (>= 0.5, < 5.0)
|
||||
rexml (~> 3.2, >= 3.2.5)
|
||||
rubyzip (>= 1.2.2)
|
||||
simple_oauth (0.3.1)
|
||||
spring (2.1.1)
|
||||
spring-watcher-listen (2.0.1)
|
||||
listen (>= 2.7, < 4.0)
|
||||
|
@ -249,11 +303,23 @@ GEM
|
|||
sprockets (>= 3.0.0)
|
||||
strscan (3.0.1)
|
||||
thor (1.2.1)
|
||||
thread_safe (0.3.6)
|
||||
tilt (2.0.10)
|
||||
timeout (0.2.0)
|
||||
turbolinks (5.2.1)
|
||||
turbolinks-source (~> 5.2)
|
||||
turbolinks-source (5.2.0)
|
||||
twitter (7.0.0)
|
||||
addressable (~> 2.3)
|
||||
buftok (~> 0.2.0)
|
||||
equalizer (~> 0.0.11)
|
||||
http (~> 4.0)
|
||||
http-form_data (~> 2.0)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
memoizable (~> 0.4.0)
|
||||
multipart-post (~> 2.0)
|
||||
naught (~> 1.0)
|
||||
simple_oauth (~> 0.3.0)
|
||||
tzinfo (2.0.4)
|
||||
concurrent-ruby (~> 1.0)
|
||||
uglifier (4.2.0)
|
||||
|
@ -291,6 +357,7 @@ DEPENDENCIES
|
|||
git
|
||||
jbuilder (~> 2.5)
|
||||
listen (>= 3.0.5, < 3.2)
|
||||
nokogiri
|
||||
pg
|
||||
pry
|
||||
pry-byebug
|
||||
|
@ -307,6 +374,8 @@ DEPENDENCIES
|
|||
spring
|
||||
spring-watcher-listen (~> 2.0.0)
|
||||
turbolinks (~> 5)
|
||||
tweetkit!
|
||||
twitter
|
||||
tzinfo-data
|
||||
uglifier (>= 1.3.0)
|
||||
web-console (>= 3.3.0)
|
||||
|
|
15
config/initializers/twitter_config.rb
Normal file
15
config/initializers/twitter_config.rb
Normal file
|
@ -0,0 +1,15 @@
|
|||
require 'twitter'
|
||||
require 'tweetkit'
|
||||
|
||||
twitter_client = Twitter::REST::Client.new do |config|
|
||||
config.consumer_key = ENV['twitter_api_key']
|
||||
config.consumer_secret = ENV['twitter_api_key_secret']
|
||||
config.access_token = ENV['twitter_access_token']
|
||||
config.access_token_secret = ENV['twitter_access_token_secret']
|
||||
end
|
||||
|
||||
tweetkit_client = Tweetkit::Client.new do |config|
|
||||
config.bearer_token = ENV['twitter_bearer_token']
|
||||
config.consumer_key = ENV['twitter_api_key']
|
||||
config.consumer_secret = ENV['twitter_api_key_secret']
|
||||
end
|
|
@ -8,6 +8,8 @@ services:
|
|||
POSTGRES_PASSWORD: password
|
||||
web:
|
||||
build: .
|
||||
env_file:
|
||||
- twitter_credentials.env
|
||||
command: bash -c "rm -f tmp/pids/server.pid && bundle exec rails s -p 3000 -b '0.0.0.0'"
|
||||
volumes:
|
||||
- .:/data_importer
|
||||
|
|
134
lib/trickest_poc_cve_importer.rb
Normal file
134
lib/trickest_poc_cve_importer.rb
Normal file
|
@ -0,0 +1,134 @@
|
|||
require 'git'
|
||||
require 'json'
|
||||
require 'date'
|
||||
require 'bulk_insert'
|
||||
|
||||
class TrickestPocCveImporter
|
||||
attr_accessor :repo_url, :repo_path
|
||||
|
||||
def initialize
|
||||
@repo_url = 'https://github.com/trickest/cve.git'
|
||||
@repo_path = '/data_importer/data/trickest_cve'
|
||||
end
|
||||
|
||||
def git_clone_repo
|
||||
Git.clone(repo_url, repo_path)
|
||||
end
|
||||
|
||||
def pull_latest_changes
|
||||
`cd #{repo_path}; git pull;`
|
||||
puts "Now pulling latest changes from #{repo_path}"
|
||||
end
|
||||
|
||||
def pull_or_clone(repo_path)
|
||||
if Dir.exist?(repo_path)
|
||||
pull_latest_changes
|
||||
else
|
||||
git_clone_repo
|
||||
end
|
||||
end
|
||||
|
||||
def read_markdown(filename)
|
||||
data = File.read(filename)
|
||||
formatter = RDoc::Markup::ToHtml.new(RDoc::Options.new, nil)
|
||||
# should give us the html doc
|
||||
RDoc::Markdown.parse(data).accept(formatter)
|
||||
end
|
||||
|
||||
def html_to_hash(html)
|
||||
data_hash = {}
|
||||
doc = Nokogiri::HTML5.parse(html)
|
||||
h3_nodes = doc.xpath('//h3')
|
||||
h3_keys = doc.xpath('//h3').map {|n| n.children.first.text}
|
||||
h4_keys = doc.xpath('//h4').map {|n| n.children.first.text}
|
||||
data_hash_keys = (h3_keys + h4_keys).flatten
|
||||
|
||||
# cve id is always the first url in the markdown doc
|
||||
cve_url = doc.xpath("//h3/a").attribute('href').value
|
||||
cve_id = h3_keys.first
|
||||
|
||||
p_text = doc.xpath('//p').map {|p| p.text }
|
||||
links_for_poc = doc.xpath('//p/a').map {|a| a.values}.flatten
|
||||
|
||||
data_hash["#{cve_id}"] = cve_url
|
||||
# p_text[0] is always an ' '.
|
||||
data_hash['Description'] = p_text[1]
|
||||
|
||||
# array of values if its a links. hard to distinguish between ones under POC and ones under Github
|
||||
# if it contains no data under the heading there will be no .value but instead .text will return data.
|
||||
# these ones can both have multiple values
|
||||
# just normalize and put POC and Github stuff under one key now. idc i just need the URL
|
||||
data_hash['POC'] = links_for_poc
|
||||
data_hash
|
||||
end
|
||||
|
||||
def list_mds_for_year(year)
|
||||
year_fp = "#{repo_path}/#{year}"
|
||||
Dir["#{year_fp}/*.md"]
|
||||
end
|
||||
|
||||
def read_mds_for_year(year)
|
||||
filenames = list_mds_for_year(year)
|
||||
filenames.map { |filename| read_markdown(filename) }
|
||||
end
|
||||
|
||||
def read_all_mds
|
||||
(1999..Date.today.year).map do |year|
|
||||
read_mds_for_year(year.to_s)
|
||||
end
|
||||
end
|
||||
|
||||
def cve_attrs_from_item(json)
|
||||
cve_attrs = {}
|
||||
#cve_attrs[:cve_data_meta] = json['CVE_data_meta']
|
||||
#cve_attrs[:cve_id] = json['CVE_data_meta']['ID']
|
||||
#cve_attrs[:affects] = json['affects']
|
||||
#cve_attrs[:data_format] = json['data_format']
|
||||
#cve_attrs[:data_type] = json['data_type']
|
||||
#cve_attrs[:data_version] = json['data_version']
|
||||
#cve_attrs[:description] = json['description']
|
||||
#cve_attrs[:impact] = json['impact']
|
||||
#cve_attrs[:problemtype] = json['problemtype']
|
||||
#cve_attrs[:references] = json['references']
|
||||
#cve_attrs[:source] = json['source']
|
||||
cve_attrs
|
||||
end
|
||||
|
||||
# for bulk inserting
|
||||
def cves_for_year(year)
|
||||
json_data = read_jsons_for_year(year)
|
||||
json_data.map do |json_f|
|
||||
cve_attrs_from_item(json_f)
|
||||
end
|
||||
end
|
||||
|
||||
def import
|
||||
if Dir.exist?(repo_path)
|
||||
pull_latest_changes
|
||||
else
|
||||
git_clone_repo
|
||||
end
|
||||
|
||||
puts "Now starting import for CveList."
|
||||
(1999..Date.today.year).map do |year|
|
||||
cves_from_json = cves_for_year(year)
|
||||
|
||||
ids = cves_from_json.map { |cve| cve[:cve_id] }
|
||||
cve_ids_in_db = TrickestPocCve.where(:cve_id => ids).pluck(:cve_id)
|
||||
|
||||
new_cve_ids = ids - cve_ids_in_db
|
||||
new_cves = cves_from_json.select { |cve| cve if new_cve_ids.include?(cve[:cve_id]) }
|
||||
puts "Importing any new CVEs from #{year}"
|
||||
|
||||
bulk_insert(new_cves)
|
||||
end
|
||||
end
|
||||
|
||||
def bulk_insert(cves)
|
||||
TrickestPocCve.bulk_insert do |worker|
|
||||
cves.each do |attrs|
|
||||
worker.add(attrs)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Add table
Reference in a new issue