enforce utf-8 some more

This commit is contained in:
kenna-bmcdevitt 2024-08-22 14:00:44 -05:00
parent 3b517dba43
commit 14b167e255
7 changed files with 40 additions and 5 deletions

View file

@ -7,6 +7,7 @@ COPY Gemfile /data_importer/Gemfile
RUN bundle update
RUN bundle install
ENV PAGER=less
ENV LANG='UTF-8'
# Add a script to be executed every time the container starts.
COPY entrypoint.sh /usr/bin/

View file

@ -18,12 +18,15 @@ gem 'retryable'
gem 'rubocop'
gem 'rubocop-graphql'
gem 'rubocop-rails'
gem 'rdoc'
gem 'rexml', '~> 3.2.4'
gem 'sass-rails'
gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support
gem 'twitter'
gem 'mime-types-data', '~> 3.2024.0820'
gem 'listen', '3.0.8'
gem 'mutex_m'
gem 'bigdecimal'
# Use postgres as the database for Active Record
gem 'bulk_insert'
gem 'git'

View file

@ -82,6 +82,7 @@ GEM
ast (2.4.2)
awesome_print (1.9.2)
base64 (0.2.0)
bigdecimal (3.1.8)
bindex (0.8.1)
bootsnap (1.18.4)
msgpack (~> 1.2)
@ -211,6 +212,7 @@ GEM
minitest (5.25.1)
msgpack (1.7.2)
multipart-post (2.4.1)
mutex_m (0.2.0)
naught (1.1.0)
net-imap (0.4.14)
date
@ -247,6 +249,8 @@ GEM
pry (>= 0.12.0)
pry-theme (1.3.1)
coderay (~> 1.1)
psych (5.1.2)
stringio
public_suffix (6.0.1)
puma (3.12.6)
racc (1.8.1)
@ -287,6 +291,8 @@ GEM
rb-inotify (0.11.1)
ffi (~> 1.0)
rchardet (1.8.0)
rdoc (6.7.0)
psych (>= 4.0.0)
regexp_parser (2.9.2)
rest-client (2.1.0)
http-accept (>= 1.7.0, < 2.0)
@ -294,7 +300,7 @@ GEM
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
retryable (3.0.5)
rexml (3.3.5)
rexml (3.2.9)
strscan
rubocop (1.65.1)
json (~> 2.3)
@ -347,6 +353,7 @@ GEM
actionpack (>= 6.1)
activesupport (>= 6.1)
sprockets (>= 3.0.0)
stringio (3.1.1)
strscan (3.1.0)
thor (1.3.1)
thread_safe (0.3.6)
@ -391,6 +398,7 @@ PLATFORMS
DEPENDENCIES
actionpack
awesome_print
bigdecimal
bootsnap (>= 1.1.0)
bulk_insert
byebug
@ -405,6 +413,7 @@ DEPENDENCIES
jbuilder (~> 2.5)
listen (= 3.0.8)
mime-types-data (~> 3.2024.0820)
mutex_m
nokogiri
pg
pry
@ -416,8 +425,10 @@ DEPENDENCIES
puma (~> 3.11)
rails (~> 7.0.0)
railties
rdoc
rest-client
retryable
rexml (~> 3.2.4)
rubocop
rubocop-graphql
rubocop-rails

View file

@ -8,6 +8,9 @@ Rails.application.configure do
# since you don't have to restart the web server when you make code changes.
config.cache_classes = false
# make web console work with docker
config.web_console.permissions = "0.0.0.0/0"
# Do not eager load code on boot.
config.eager_load = false

View file

@ -26,7 +26,7 @@ end
def perform
import_cves
import_gsds
# import_gsds
import_github_pocs
import_trickest_poc_cves
import_inthewild_cve_exploits

View file

@ -24,8 +24,14 @@ class CveListImporter < GithubRepo
EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze
# Old Cve list url
# def initialize
# super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list')
# end
# New Cve list v5 url
def initialize
super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list')
super(repo_url = 'https://github.com/CVEProject/cvelistV5.git', repo_path = '/data_importer/data/cve_list')
end
def list_jsons_for_year(year)

View file

@ -15,12 +15,23 @@ class GithubRepo
end
def pull_latest_changes
`cd #{repo_path}; git pull;`
`cd #{repo_path}; git stash; git pull;`
puts "Now pulling latest changes from #{repo_path}"
end
def read_json(filename)
JSON.parse(File.read(filename), symbolize_names: true)
begin
file = File.read(filename, encoding: 'utf-8')
# Ensure the file content is valid UTF-8
file.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
JSON.parse(file, symbolize_names: true)
rescue JSON::ParserError => e
puts "Error parsing JSON: #{e}"
rescue Encoding::InvalidByteSequenceError => e
puts "Invalid byte sequence in file: #{e}"
rescue JSON::GeneratorError => e
puts "Error generating JSON: #{e}"
end
end
def read_markdown(filename)