From 14b167e255426ad8ad1ce39df59b178b5d06364c Mon Sep 17 00:00:00 2001 From: kenna-bmcdevitt Date: Thu, 22 Aug 2024 14:00:44 -0500 Subject: [PATCH] enforce utf-8 some more --- Dockerfile | 1 + Gemfile | 3 +++ Gemfile.lock | 13 ++++++++++++- config/environments/development.rb | 3 +++ db/seeds.rb | 2 +- lib/importers/cve_list_importer.rb | 8 +++++++- lib/importers/github_repo.rb | 15 +++++++++++++-- 7 files changed, 40 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index fd32d50..b8dced8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,6 +7,7 @@ COPY Gemfile /data_importer/Gemfile RUN bundle update RUN bundle install ENV PAGER=less +ENV LANG='UTF-8' # Add a script to be executed every time the container starts. COPY entrypoint.sh /usr/bin/ diff --git a/Gemfile b/Gemfile index 5fc7c36..2753e51 100644 --- a/Gemfile +++ b/Gemfile @@ -18,12 +18,15 @@ gem 'retryable' gem 'rubocop' gem 'rubocop-graphql' gem 'rubocop-rails' +gem 'rdoc' +gem 'rexml', '~> 3.2.4' gem 'sass-rails' gem 'tweetkit', github: 'julianfssen/tweetkit' # for twitter v2 api support gem 'twitter' gem 'mime-types-data', '~> 3.2024.0820' gem 'listen', '3.0.8' gem 'mutex_m' +gem 'bigdecimal' # Use postgres as the database for Active Record gem 'bulk_insert' gem 'git' diff --git a/Gemfile.lock b/Gemfile.lock index 0aad24b..80aeb1d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -82,6 +82,7 @@ GEM ast (2.4.2) awesome_print (1.9.2) base64 (0.2.0) + bigdecimal (3.1.8) bindex (0.8.1) bootsnap (1.18.4) msgpack (~> 1.2) @@ -211,6 +212,7 @@ GEM minitest (5.25.1) msgpack (1.7.2) multipart-post (2.4.1) + mutex_m (0.2.0) naught (1.1.0) net-imap (0.4.14) date @@ -247,6 +249,8 @@ GEM pry (>= 0.12.0) pry-theme (1.3.1) coderay (~> 1.1) + psych (5.1.2) + stringio public_suffix (6.0.1) puma (3.12.6) racc (1.8.1) @@ -287,6 +291,8 @@ GEM rb-inotify (0.11.1) ffi (~> 1.0) rchardet (1.8.0) + rdoc (6.7.0) + psych (>= 4.0.0) regexp_parser (2.9.2) rest-client (2.1.0) http-accept (>= 1.7.0, < 2.0) @@ -294,7 +300,7 @@ GEM mime-types (>= 1.16, < 4.0) netrc (~> 0.8) retryable (3.0.5) - rexml (3.3.5) + rexml (3.2.9) strscan rubocop (1.65.1) json (~> 2.3) @@ -347,6 +353,7 @@ GEM actionpack (>= 6.1) activesupport (>= 6.1) sprockets (>= 3.0.0) + stringio (3.1.1) strscan (3.1.0) thor (1.3.1) thread_safe (0.3.6) @@ -391,6 +398,7 @@ PLATFORMS DEPENDENCIES actionpack awesome_print + bigdecimal bootsnap (>= 1.1.0) bulk_insert byebug @@ -405,6 +413,7 @@ DEPENDENCIES jbuilder (~> 2.5) listen (= 3.0.8) mime-types-data (~> 3.2024.0820) + mutex_m nokogiri pg pry @@ -416,8 +425,10 @@ DEPENDENCIES puma (~> 3.11) rails (~> 7.0.0) railties + rdoc rest-client retryable + rexml (~> 3.2.4) rubocop rubocop-graphql rubocop-rails diff --git a/config/environments/development.rb b/config/environments/development.rb index 12ff6ac..6e44a38 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -8,6 +8,9 @@ Rails.application.configure do # since you don't have to restart the web server when you make code changes. config.cache_classes = false + # make web console work with docker + config.web_console.permissions = "0.0.0.0/0" + # Do not eager load code on boot. config.eager_load = false diff --git a/db/seeds.rb b/db/seeds.rb index ab6f60f..8ee2423 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -26,7 +26,7 @@ end def perform import_cves - import_gsds +# import_gsds import_github_pocs import_trickest_poc_cves import_inthewild_cve_exploits diff --git a/lib/importers/cve_list_importer.rb b/lib/importers/cve_list_importer.rb index d00ade3..2040487 100644 --- a/lib/importers/cve_list_importer.rb +++ b/lib/importers/cve_list_importer.rb @@ -24,8 +24,14 @@ class CveListImporter < GithubRepo EMPTY_HASH = EXPECTED_KEYS.map { |k| [k, nil] }.to_h.freeze + # Old Cve list url + # def initialize + # super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list') + # end + + # New Cve list v5 url def initialize - super(repo_url = 'https://github.com/CVEProject/cvelist.git', repo_path = '/data_importer/data/cve_list') + super(repo_url = 'https://github.com/CVEProject/cvelistV5.git', repo_path = '/data_importer/data/cve_list') end def list_jsons_for_year(year) diff --git a/lib/importers/github_repo.rb b/lib/importers/github_repo.rb index 4f32697..6ffcea2 100644 --- a/lib/importers/github_repo.rb +++ b/lib/importers/github_repo.rb @@ -15,12 +15,23 @@ class GithubRepo end def pull_latest_changes - `cd #{repo_path}; git pull;` + `cd #{repo_path}; git stash; git pull;` puts "Now pulling latest changes from #{repo_path}" end def read_json(filename) - JSON.parse(File.read(filename), symbolize_names: true) + begin + file = File.read(filename, encoding: 'utf-8') + # Ensure the file content is valid UTF-8 + file.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') + JSON.parse(file, symbolize_names: true) + rescue JSON::ParserError => e + puts "Error parsing JSON: #{e}" + rescue Encoding::InvalidByteSequenceError => e + puts "Invalid byte sequence in file: #{e}" + rescue JSON::GeneratorError => e + puts "Error generating JSON: #{e}" + end end def read_markdown(filename)