enforce utf-8 some more

This commit is contained in:
kenna-bmcdevitt 2024-08-22 14:00:44 -05:00
parent 3b517dba43
commit 14b167e255
7 changed files with 40 additions and 5 deletions

View file

@ -7,6 +7,7 @@ COPY Gemfile /data_importer/Gemfile
RUN bundle update RUN bundle update
RUN bundle install RUN bundle install
ENV PAGER=less ENV PAGER=less
ENV LANG='UTF-8'
# Add a script to be executed every time the container starts. # Add a script to be executed every time the container starts.
COPY entrypoint.sh /usr/bin/ COPY entrypoint.sh /usr/bin/

View file

@ -18,12 +18,15 @@ gem 'retryable'
gem 'rubocop' gem 'rubocop'
gem 'rubocop-graphql' gem 'rubocop-graphql'
gem 'rubocop-rails' gem 'rubocop-rails'
gem 'rdoc'
gem 'rexml', '~> 3.2.4'
gem 'sass-rails' gem 'sass-rails'
gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support
gem 'twitter' gem 'twitter'
gem 'mime-types-data', '~> 3.2024.0820' gem 'mime-types-data', '~> 3.2024.0820'
gem 'listen', '3.0.8' gem 'listen', '3.0.8'
gem 'mutex_m' gem 'mutex_m'
gem 'bigdecimal'
# Use postgres as the database for Active Record # Use postgres as the database for Active Record
gem 'bulk_insert' gem 'bulk_insert'
gem 'git' gem 'git'

View file

@ -82,6 +82,7 @@ GEM
ast (2.4.2) ast (2.4.2)
awesome_print (1.9.2) awesome_print (1.9.2)
base64 (0.2.0) base64 (0.2.0)
bigdecimal (3.1.8)
bindex (0.8.1) bindex (0.8.1)
bootsnap (1.18.4) bootsnap (1.18.4)
msgpack (~> 1.2) msgpack (~> 1.2)
@ -211,6 +212,7 @@ GEM
minitest (5.25.1) minitest (5.25.1)
msgpack (1.7.2) msgpack (1.7.2)
multipart-post (2.4.1) multipart-post (2.4.1)
mutex_m (0.2.0)
naught (1.1.0) naught (1.1.0)
net-imap (0.4.14) net-imap (0.4.14)
date date
@ -247,6 +249,8 @@ GEM
pry (>= 0.12.0) pry (>= 0.12.0)
pry-theme (1.3.1) pry-theme (1.3.1)
coderay (~> 1.1) coderay (~> 1.1)
psych (5.1.2)
stringio
public_suffix (6.0.1) public_suffix (6.0.1)
puma (3.12.6) puma (3.12.6)
racc (1.8.1) racc (1.8.1)
@ -287,6 +291,8 @@ GEM
rb-inotify (0.11.1) rb-inotify (0.11.1)
ffi (~> 1.0) ffi (~> 1.0)
rchardet (1.8.0) rchardet (1.8.0)
rdoc (6.7.0)
psych (>= 4.0.0)
regexp_parser (2.9.2) regexp_parser (2.9.2)
rest-client (2.1.0) rest-client (2.1.0)
http-accept (>= 1.7.0, < 2.0) http-accept (>= 1.7.0, < 2.0)
@ -294,7 +300,7 @@ GEM
mime-types (>= 1.16, < 4.0) mime-types (>= 1.16, < 4.0)
netrc (~> 0.8) netrc (~> 0.8)
retryable (3.0.5) retryable (3.0.5)
rexml (3.3.5) rexml (3.2.9)
strscan strscan
rubocop (1.65.1) rubocop (1.65.1)
json (~> 2.3) json (~> 2.3)
@ -347,6 +353,7 @@ GEM
actionpack (>= 6.1) actionpack (>= 6.1)
activesupport (>= 6.1) activesupport (>= 6.1)
sprockets (>= 3.0.0) sprockets (>= 3.0.0)
stringio (3.1.1)
strscan (3.1.0) strscan (3.1.0)
thor (1.3.1) thor (1.3.1)
thread_safe (0.3.6) thread_safe (0.3.6)
@ -391,6 +398,7 @@ PLATFORMS
DEPENDENCIES DEPENDENCIES
actionpack actionpack
awesome_print awesome_print
bigdecimal
bootsnap (>= 1.1.0) bootsnap (>= 1.1.0)
bulk_insert bulk_insert
byebug byebug
@ -405,6 +413,7 @@ DEPENDENCIES
jbuilder (~> 2.5) jbuilder (~> 2.5)
listen (= 3.0.8) listen (= 3.0.8)
mime-types-data (~> 3.2024.0820) mime-types-data (~> 3.2024.0820)
mutex_m
nokogiri nokogiri
pg pg
pry pry
@ -416,8 +425,10 @@ DEPENDENCIES
puma (~> 3.11) puma (~> 3.11)
rails (~> 7.0.0) rails (~> 7.0.0)
railties railties
rdoc
rest-client rest-client
retryable retryable
rexml (~> 3.2.4)
rubocop rubocop
rubocop-graphql rubocop-graphql
rubocop-rails rubocop-rails

View file

@ -8,6 +8,9 @@ Rails.application.configure do
# since you don't have to restart the web server when you make code changes. # since you don't have to restart the web server when you make code changes.
config.cache_classes = false config.cache_classes = false
# make web console work with docker
config.web_console.permissions = "0.0.0.0/0"
# Do not eager load code on boot. # Do not eager load code on boot.
config.eager_load = false config.eager_load = false

View file

@ -26,7 +26,7 @@ end
def perform def perform
import_cves import_cves
import_gsds # import_gsds
import_github_pocs import_github_pocs
import_trickest_poc_cves import_trickest_poc_cves
import_inthewild_cve_exploits import_inthewild_cve_exploits

View file

@ -24,8 +24,14 @@ class CveListImporter < GithubRepo
EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze
# Old Cve list url
# def initialize
# super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list')
# end
# New Cve list v5 url
def initialize def initialize
super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list') super(repo_url = 'https://github.com/CVEProject/cvelistV5.git', repo_path = '/data_importer/data/cve_list')
end end
def list_jsons_for_year(year) def list_jsons_for_year(year)

View file

@ -15,12 +15,23 @@ class GithubRepo
end end
def pull_latest_changes def pull_latest_changes
`cd #{repo_path}; git pull;` `cd #{repo_path}; git stash; git pull;`
puts "Now pulling latest changes from #{repo_path}" puts "Now pulling latest changes from #{repo_path}"
end end
def read_json(filename) def read_json(filename)
JSON.parse(File.read(filename), symbolize_names: true) begin
file = File.read(filename, encoding: 'utf-8')
# Ensure the file content is valid UTF-8
file.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
JSON.parse(file, symbolize_names: true)
rescue JSON::ParserError => e
puts "Error parsing JSON: #{e}"
rescue Encoding::InvalidByteSequenceError => e
puts "Invalid byte sequence in file: #{e}"
rescue JSON::GeneratorError => e
puts "Error generating JSON: #{e}"
end
end end
def read_markdown(filename) def read_markdown(filename)