enforce utf-8 some more
This commit is contained in:
parent
3b517dba43
commit
14b167e255
7 changed files with 40 additions and 5 deletions
|
@ -7,6 +7,7 @@ COPY Gemfile /data_importer/Gemfile
|
||||||
RUN bundle update
|
RUN bundle update
|
||||||
RUN bundle install
|
RUN bundle install
|
||||||
ENV PAGER=less
|
ENV PAGER=less
|
||||||
|
ENV LANG='UTF-8'
|
||||||
|
|
||||||
# Add a script to be executed every time the container starts.
|
# Add a script to be executed every time the container starts.
|
||||||
COPY entrypoint.sh /usr/bin/
|
COPY entrypoint.sh /usr/bin/
|
||||||
|
|
3
Gemfile
3
Gemfile
|
@ -18,12 +18,15 @@ gem 'retryable'
|
||||||
gem 'rubocop'
|
gem 'rubocop'
|
||||||
gem 'rubocop-graphql'
|
gem 'rubocop-graphql'
|
||||||
gem 'rubocop-rails'
|
gem 'rubocop-rails'
|
||||||
|
gem 'rdoc'
|
||||||
|
gem 'rexml', '~> 3.2.4'
|
||||||
gem 'sass-rails'
|
gem 'sass-rails'
|
||||||
gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support
|
gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support
|
||||||
gem 'twitter'
|
gem 'twitter'
|
||||||
gem 'mime-types-data', '~> 3.2024.0820'
|
gem 'mime-types-data', '~> 3.2024.0820'
|
||||||
gem 'listen', '3.0.8'
|
gem 'listen', '3.0.8'
|
||||||
gem 'mutex_m'
|
gem 'mutex_m'
|
||||||
|
gem 'bigdecimal'
|
||||||
# Use postgres as the database for Active Record
|
# Use postgres as the database for Active Record
|
||||||
gem 'bulk_insert'
|
gem 'bulk_insert'
|
||||||
gem 'git'
|
gem 'git'
|
||||||
|
|
13
Gemfile.lock
13
Gemfile.lock
|
@ -82,6 +82,7 @@ GEM
|
||||||
ast (2.4.2)
|
ast (2.4.2)
|
||||||
awesome_print (1.9.2)
|
awesome_print (1.9.2)
|
||||||
base64 (0.2.0)
|
base64 (0.2.0)
|
||||||
|
bigdecimal (3.1.8)
|
||||||
bindex (0.8.1)
|
bindex (0.8.1)
|
||||||
bootsnap (1.18.4)
|
bootsnap (1.18.4)
|
||||||
msgpack (~> 1.2)
|
msgpack (~> 1.2)
|
||||||
|
@ -211,6 +212,7 @@ GEM
|
||||||
minitest (5.25.1)
|
minitest (5.25.1)
|
||||||
msgpack (1.7.2)
|
msgpack (1.7.2)
|
||||||
multipart-post (2.4.1)
|
multipart-post (2.4.1)
|
||||||
|
mutex_m (0.2.0)
|
||||||
naught (1.1.0)
|
naught (1.1.0)
|
||||||
net-imap (0.4.14)
|
net-imap (0.4.14)
|
||||||
date
|
date
|
||||||
|
@ -247,6 +249,8 @@ GEM
|
||||||
pry (>= 0.12.0)
|
pry (>= 0.12.0)
|
||||||
pry-theme (1.3.1)
|
pry-theme (1.3.1)
|
||||||
coderay (~> 1.1)
|
coderay (~> 1.1)
|
||||||
|
psych (5.1.2)
|
||||||
|
stringio
|
||||||
public_suffix (6.0.1)
|
public_suffix (6.0.1)
|
||||||
puma (3.12.6)
|
puma (3.12.6)
|
||||||
racc (1.8.1)
|
racc (1.8.1)
|
||||||
|
@ -287,6 +291,8 @@ GEM
|
||||||
rb-inotify (0.11.1)
|
rb-inotify (0.11.1)
|
||||||
ffi (~> 1.0)
|
ffi (~> 1.0)
|
||||||
rchardet (1.8.0)
|
rchardet (1.8.0)
|
||||||
|
rdoc (6.7.0)
|
||||||
|
psych (>= 4.0.0)
|
||||||
regexp_parser (2.9.2)
|
regexp_parser (2.9.2)
|
||||||
rest-client (2.1.0)
|
rest-client (2.1.0)
|
||||||
http-accept (>= 1.7.0, < 2.0)
|
http-accept (>= 1.7.0, < 2.0)
|
||||||
|
@ -294,7 +300,7 @@ GEM
|
||||||
mime-types (>= 1.16, < 4.0)
|
mime-types (>= 1.16, < 4.0)
|
||||||
netrc (~> 0.8)
|
netrc (~> 0.8)
|
||||||
retryable (3.0.5)
|
retryable (3.0.5)
|
||||||
rexml (3.3.5)
|
rexml (3.2.9)
|
||||||
strscan
|
strscan
|
||||||
rubocop (1.65.1)
|
rubocop (1.65.1)
|
||||||
json (~> 2.3)
|
json (~> 2.3)
|
||||||
|
@ -347,6 +353,7 @@ GEM
|
||||||
actionpack (>= 6.1)
|
actionpack (>= 6.1)
|
||||||
activesupport (>= 6.1)
|
activesupport (>= 6.1)
|
||||||
sprockets (>= 3.0.0)
|
sprockets (>= 3.0.0)
|
||||||
|
stringio (3.1.1)
|
||||||
strscan (3.1.0)
|
strscan (3.1.0)
|
||||||
thor (1.3.1)
|
thor (1.3.1)
|
||||||
thread_safe (0.3.6)
|
thread_safe (0.3.6)
|
||||||
|
@ -391,6 +398,7 @@ PLATFORMS
|
||||||
DEPENDENCIES
|
DEPENDENCIES
|
||||||
actionpack
|
actionpack
|
||||||
awesome_print
|
awesome_print
|
||||||
|
bigdecimal
|
||||||
bootsnap (>= 1.1.0)
|
bootsnap (>= 1.1.0)
|
||||||
bulk_insert
|
bulk_insert
|
||||||
byebug
|
byebug
|
||||||
|
@ -405,6 +413,7 @@ DEPENDENCIES
|
||||||
jbuilder (~> 2.5)
|
jbuilder (~> 2.5)
|
||||||
listen (= 3.0.8)
|
listen (= 3.0.8)
|
||||||
mime-types-data (~> 3.2024.0820)
|
mime-types-data (~> 3.2024.0820)
|
||||||
|
mutex_m
|
||||||
nokogiri
|
nokogiri
|
||||||
pg
|
pg
|
||||||
pry
|
pry
|
||||||
|
@ -416,8 +425,10 @@ DEPENDENCIES
|
||||||
puma (~> 3.11)
|
puma (~> 3.11)
|
||||||
rails (~> 7.0.0)
|
rails (~> 7.0.0)
|
||||||
railties
|
railties
|
||||||
|
rdoc
|
||||||
rest-client
|
rest-client
|
||||||
retryable
|
retryable
|
||||||
|
rexml (~> 3.2.4)
|
||||||
rubocop
|
rubocop
|
||||||
rubocop-graphql
|
rubocop-graphql
|
||||||
rubocop-rails
|
rubocop-rails
|
||||||
|
|
|
@ -8,6 +8,9 @@ Rails.application.configure do
|
||||||
# since you don't have to restart the web server when you make code changes.
|
# since you don't have to restart the web server when you make code changes.
|
||||||
config.cache_classes = false
|
config.cache_classes = false
|
||||||
|
|
||||||
|
# make web console work with docker
|
||||||
|
config.web_console.permissions = "0.0.0.0/0"
|
||||||
|
|
||||||
# Do not eager load code on boot.
|
# Do not eager load code on boot.
|
||||||
config.eager_load = false
|
config.eager_load = false
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ end
|
||||||
|
|
||||||
def perform
|
def perform
|
||||||
import_cves
|
import_cves
|
||||||
import_gsds
|
# import_gsds
|
||||||
import_github_pocs
|
import_github_pocs
|
||||||
import_trickest_poc_cves
|
import_trickest_poc_cves
|
||||||
import_inthewild_cve_exploits
|
import_inthewild_cve_exploits
|
||||||
|
|
|
@ -24,8 +24,14 @@ class CveListImporter < GithubRepo
|
||||||
|
|
||||||
EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze
|
EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze
|
||||||
|
|
||||||
|
# Old Cve list url
|
||||||
|
# def initialize
|
||||||
|
# super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list')
|
||||||
|
# end
|
||||||
|
|
||||||
|
# New Cve list v5 url
|
||||||
def initialize
|
def initialize
|
||||||
super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list')
|
super(repo_url = 'https://github.com/CVEProject/cvelistV5.git', repo_path = '/data_importer/data/cve_list')
|
||||||
end
|
end
|
||||||
|
|
||||||
def list_jsons_for_year(year)
|
def list_jsons_for_year(year)
|
||||||
|
|
|
@ -15,12 +15,23 @@ class GithubRepo
|
||||||
end
|
end
|
||||||
|
|
||||||
def pull_latest_changes
|
def pull_latest_changes
|
||||||
`cd #{repo_path}; git pull;`
|
`cd #{repo_path}; git stash; git pull;`
|
||||||
puts "Now pulling latest changes from #{repo_path}"
|
puts "Now pulling latest changes from #{repo_path}"
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_json(filename)
|
def read_json(filename)
|
||||||
JSON.parse(File.read(filename), symbolize_names: true)
|
begin
|
||||||
|
file = File.read(filename, encoding: 'utf-8')
|
||||||
|
# Ensure the file content is valid UTF-8
|
||||||
|
file.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
|
||||||
|
JSON.parse(file, symbolize_names: true)
|
||||||
|
rescue JSON::ParserError => e
|
||||||
|
puts "Error parsing JSON: #{e}"
|
||||||
|
rescue Encoding::InvalidByteSequenceError => e
|
||||||
|
puts "Invalid byte sequence in file: #{e}"
|
||||||
|
rescue JSON::GeneratorError => e
|
||||||
|
puts "Error generating JSON: #{e}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_markdown(filename)
|
def read_markdown(filename)
|
||||||
|
|
Loading…
Add table
Reference in a new issue