starting to add in cna scrapers. first one is adobe. wrote up to index_hash method that allows me to pull a hash of all urls for each advisory id

2022-04-28 13:38:05 -05:00 · 2022-04-28 13:38:05 -05:00 · 9925e0d28b
commit 9925e0d28b
parent 3a32d6086f
4 changed files with 65 additions and 3 deletions
--- a/app/models/cisa_known_exploit.rb
+++ b/app/models/cisa_known_exploit.rb
@ -11,7 +11,8 @@ class CisaKnownExploit < ActiveRecord::Base
  end

  def self.cves_from_year(year)
-    last.vulnerabilities.select { |vuln| vuln if vuln.dig('cve_id') =~ /CVE-#{year}-\d{4,7}/ }
+    last.vulnerabilities.where("cve_id = 'CVE-#{year}-%'")
+    #last.vulnerabilities.select { |vuln| vuln if vuln.dig('cve_id') =~ /CVE-#{year}-\d{4,7}/ }
  end

  def self.by_product(product_name)
--- a/db/migrate/20220427043126_create_cisa_known_exploits.rb
+++ b/db/migrate/20220427043126_create_cisa_known_exploits.rb
@ -6,7 +6,7 @@ class CreateCisaKnownExploits < ActiveRecord::Migration[7.0]
      t.date :date_released
      t.index :date_released, unique: true
      t.integer :count
-      t.jsonb :vulnerabilities
+      t.jsonb :vulnerabilities, array: true
      t.timestamps
    end
  end
--- a/db/schema.rb
+++ b/db/schema.rb
@ -19,7 +19,7 @@ ActiveRecord::Schema[7.0].define(version: 2022_04_27_043126) do
    t.string "catalog_version"
    t.date "date_released"
    t.integer "count"
-    t.jsonb "vulnerabilities"
+    t.jsonb "vulnerabilities", array: true
    t.datetime "created_at", null: false
    t.datetime "updated_at", null: false
    t.index ["date_released"], name: "index_cisa_known_exploits_on_date_released", unique: true
--- a/lib/cna_scrapers/adobe_scraper.rb
+++ b/lib/cna_scrapers/adobe_scraper.rb
@ -0,0 +1,61 @@
+class AdobeScraper
+  attr_accessor :index_url, :doc
+  APSB_ID_MATCHER = /apsb\d{2}-\d{2,3}/i
+  APSA_ID_MATCHER = /apsa\d{2}-\d{2,3}/i
+
+  def initialize
+    @index_url = 'https://helpx.adobe.com/security/security-bulletin.html'
+    @doc = read_html(get_html)
+  end
+
+  def get_html
+    r = RestClient::Request.execute(
+      :method => :get,
+      :url => index_url
+    )
+    if r.code == 200
+      r.body
+    else
+      puts "HTTP Code #{r.code}"
+    end
+  end
+
+  def read_html(doc)
+    Nokogiri::HTML(doc)
+  end
+
+  def a_with_href
+    doc.xpath("//tr/td/a[starts-with(@href, 'https://')]")
+  end
+
+  def advisory_id_from_url(url)
+    # adobes advisory ids end with .html
+    if url.ends_with?'.html'
+      # adobe product security bulletin
+      if url.include? 'apsb'
+        result = url.scan(APSB_ID_MATCHER).first.upcase
+      elsif url.include? 'apsa'
+        result = url.scan(APSA_ID_MATCHER).first.upcase
+      else
+        result = 'None'
+      end
+      result
+    end
+  end
+
+  def index_hash
+    a_with_href.map.with_index do |a, index|
+      url = a.attributes['href'].value
+      advisory_id = advisory_id_from_url(url) 
+      if advisory_id == 'None' 
+        next
+      else 
+        {
+          :index => index,
+          :advisory_id => advisory_id,
+          :url => url
+        }
+      end
+    end
+  end
+end