From 1d88c3cfbb0dc24ceedec2a30a6cefcf33e1b0cc Mon Sep 17 00:00:00 2001 From: Brendan McDevitt Date: Tue, 4 Aug 2020 00:27:55 -0400 Subject: [PATCH] added crawler --- crawler/.gitignore | 11 ++++++ crawler/.gitmodules | 3 ++ crawler/.rspec | 3 ++ crawler/.travis.yml | 6 ++++ crawler/Gemfile | 9 +++++ crawler/Gemfile.lock | 64 ++++++++++++++++++++++++++++++++++ crawler/README.md | 36 +++++++++++++++++++ crawler/Rakefile | 6 ++++ crawler/bin/console | 20 +++++++++++ crawler/bin/setup | 8 +++++ crawler/crawler.gemspec | 27 ++++++++++++++ crawler/intrigue-ident | 1 + crawler/lib/crawler.rb | 6 ++++ crawler/lib/crawler/version.rb | 3 ++ crawler/spec/crawler_spec.rb | 9 +++++ crawler/spec/spec_helper.rb | 14 ++++++++ 16 files changed, 226 insertions(+) create mode 100644 crawler/.gitignore create mode 100644 crawler/.gitmodules create mode 100644 crawler/.rspec create mode 100644 crawler/.travis.yml create mode 100644 crawler/Gemfile create mode 100644 crawler/Gemfile.lock create mode 100644 crawler/README.md create mode 100644 crawler/Rakefile create mode 100755 crawler/bin/console create mode 100755 crawler/bin/setup create mode 100644 crawler/crawler.gemspec create mode 160000 crawler/intrigue-ident create mode 100644 crawler/lib/crawler.rb create mode 100644 crawler/lib/crawler/version.rb create mode 100644 crawler/spec/crawler_spec.rb create mode 100644 crawler/spec/spec_helper.rb diff --git a/crawler/.gitignore b/crawler/.gitignore new file mode 100644 index 0000000..b04a8c8 --- /dev/null +++ b/crawler/.gitignore @@ -0,0 +1,11 @@ +/.bundle/ +/.yardoc +/_yardoc/ +/coverage/ +/doc/ +/pkg/ +/spec/reports/ +/tmp/ + +# rspec failure tracking +.rspec_status diff --git a/crawler/.gitmodules b/crawler/.gitmodules new file mode 100644 index 0000000..4825aa2 --- /dev/null +++ b/crawler/.gitmodules @@ -0,0 +1,3 @@ +[submodule "intrigue-ident"] + path = intrigue-ident + url = https://github.com/intrigueio/intrigue-ident.git diff --git a/crawler/.rspec b/crawler/.rspec new file mode 100644 index 0000000..34c5164 --- /dev/null +++ b/crawler/.rspec @@ -0,0 +1,3 @@ +--format documentation +--color +--require spec_helper diff --git a/crawler/.travis.yml b/crawler/.travis.yml new file mode 100644 index 0000000..e91174f --- /dev/null +++ b/crawler/.travis.yml @@ -0,0 +1,6 @@ +--- +language: ruby +cache: bundler +rvm: + - 2.6.5 +before_install: gem install bundler -v 2.1.4 diff --git a/crawler/Gemfile b/crawler/Gemfile new file mode 100644 index 0000000..5b459e8 --- /dev/null +++ b/crawler/Gemfile @@ -0,0 +1,9 @@ +source "https://rubygems.org" + +# Specify your gem's dependencies in crawler.gemspec +gemspec + +gem "rake", "~> 12.0" +gem "rspec", "~> 3.0" +gem "pry" +gem "intrigue-ident", path: "./intrigue-ident" diff --git a/crawler/Gemfile.lock b/crawler/Gemfile.lock new file mode 100644 index 0000000..1881982 --- /dev/null +++ b/crawler/Gemfile.lock @@ -0,0 +1,64 @@ +PATH + remote: . + specs: + crawler (0.1.0) + +PATH + remote: intrigue-ident + specs: + intrigue-ident (1.0.7) + dnsruby + recog-intrigue + snmp + socketry + +GEM + remote: https://rubygems.org/ + specs: + addressable (2.7.0) + public_suffix (>= 2.0.2, < 5.0) + coderay (1.1.3) + diff-lcs (1.4.4) + dnsruby (1.61.3) + addressable (~> 2.5) + hitimes (1.3.1) + method_source (1.0.0) + mini_portile2 (2.4.0) + nokogiri (1.10.10) + mini_portile2 (~> 2.4.0) + pry (0.13.1) + coderay (~> 1.1) + method_source (~> 1.0) + public_suffix (4.0.5) + rake (12.3.3) + recog-intrigue (2.3.7) + nokogiri + rspec (3.9.0) + rspec-core (~> 3.9.0) + rspec-expectations (~> 3.9.0) + rspec-mocks (~> 3.9.0) + rspec-core (3.9.2) + rspec-support (~> 3.9.3) + rspec-expectations (3.9.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.9.0) + rspec-mocks (3.9.1) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.9.0) + rspec-support (3.9.3) + snmp (1.3.2) + socketry (0.5.1) + hitimes (~> 1.2) + +PLATFORMS + ruby + +DEPENDENCIES + crawler! + intrigue-ident! + pry + rake (~> 12.0) + rspec (~> 3.0) + +BUNDLED WITH + 2.1.4 diff --git a/crawler/README.md b/crawler/README.md new file mode 100644 index 0000000..e5ff937 --- /dev/null +++ b/crawler/README.md @@ -0,0 +1,36 @@ +# Crawler + +Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/crawler`. To experiment with that code, run `bin/console` for an interactive prompt. + +TODO: Delete this and the text above, and describe your gem + +## Installation + +Add this line to your application's Gemfile: + +```ruby +gem 'crawler' +``` + +And then execute: + + $ bundle install + +Or install it yourself as: + + $ gem install crawler + +## Usage + +TODO: Write usage instructions here + +## Development + +After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment. + +To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). + +## Contributing + +Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/crawler. + diff --git a/crawler/Rakefile b/crawler/Rakefile new file mode 100644 index 0000000..b7e9ed5 --- /dev/null +++ b/crawler/Rakefile @@ -0,0 +1,6 @@ +require "bundler/gem_tasks" +require "rspec/core/rake_task" + +RSpec::Core::RakeTask.new(:spec) + +task :default => :spec diff --git a/crawler/bin/console b/crawler/bin/console new file mode 100755 index 0000000..92588fa --- /dev/null +++ b/crawler/bin/console @@ -0,0 +1,20 @@ +#!/usr/bin/env ruby + +require "bundler/setup" +require "crawler" + +# You can add fixtures and/or initialization code here to make experimenting +# with your gem easier. You can also use a different console, if you like. + +# (If you use this, don't forget to add pry to your Gemfile!) +require "pry" + +# trying to figure out how to use this as a lib +require_relative '../intrigue-ident/lib/ident' +include Intrigue::Ident +include Intrigue::Ident::Utils + +Pry.start + +#require "irb" +#IRB.start(__FILE__) diff --git a/crawler/bin/setup b/crawler/bin/setup new file mode 100755 index 0000000..dce67d8 --- /dev/null +++ b/crawler/bin/setup @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +IFS=$'\n\t' +set -vx + +bundle install + +# Do any other automated setup that you need to do here diff --git a/crawler/crawler.gemspec b/crawler/crawler.gemspec new file mode 100644 index 0000000..2c6fcd2 --- /dev/null +++ b/crawler/crawler.gemspec @@ -0,0 +1,27 @@ +require_relative 'lib/crawler/version' + +Gem::Specification.new do |spec| + spec.name = "crawler" + spec.version = Crawler::VERSION + spec.authors = ["Brendan McDevitt"] + spec.email = ["brendan@mcdevitt.tech"] + + spec.summary = %q{A webcrawler in ruby} + spec.description = %q{A webcrawler in ruby} + spec.homepage = "https://git.mcdevitt.tech/bpmcdevitt/misc_rbtools/crawler" + spec.required_ruby_version = Gem::Requirement.new(">= 2.6.5") + + spec.metadata["allowed_push_host"] = "https://git.mcdevitt.tech/bpmcdevitt/misc_rbtools" + + spec.metadata["homepage_uri"] = spec.homepage + spec.metadata["source_code_uri"] = "https://git.mcdevitt.tech/bpmcdevitt/misc_rbtools" + + # Specify which files should be added to the gem when it is released. + # The `git ls-files -z` loads the files in the RubyGem that have been added into git. + spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do + `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } + end + spec.bindir = "exe" + spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } + spec.require_paths = ["lib"] +end diff --git a/crawler/intrigue-ident b/crawler/intrigue-ident new file mode 160000 index 0000000..3e5b77c --- /dev/null +++ b/crawler/intrigue-ident @@ -0,0 +1 @@ +Subproject commit 3e5b77ca46c2a1bcf973a80c3440bf670e61b069 diff --git a/crawler/lib/crawler.rb b/crawler/lib/crawler.rb new file mode 100644 index 0000000..ab749e4 --- /dev/null +++ b/crawler/lib/crawler.rb @@ -0,0 +1,6 @@ +require "crawler/version" + +module Crawler + class Error < StandardError; end + # Your code goes here... +end diff --git a/crawler/lib/crawler/version.rb b/crawler/lib/crawler/version.rb new file mode 100644 index 0000000..e8f03ce --- /dev/null +++ b/crawler/lib/crawler/version.rb @@ -0,0 +1,3 @@ +module Crawler + VERSION = "0.1.0" +end diff --git a/crawler/spec/crawler_spec.rb b/crawler/spec/crawler_spec.rb new file mode 100644 index 0000000..88aded1 --- /dev/null +++ b/crawler/spec/crawler_spec.rb @@ -0,0 +1,9 @@ +RSpec.describe Crawler do + it "has a version number" do + expect(Crawler::VERSION).not_to be nil + end + + it "does something useful" do + expect(false).to eq(true) + end +end diff --git a/crawler/spec/spec_helper.rb b/crawler/spec/spec_helper.rb new file mode 100644 index 0000000..cad8119 --- /dev/null +++ b/crawler/spec/spec_helper.rb @@ -0,0 +1,14 @@ +require "bundler/setup" +require "crawler" + +RSpec.configure do |config| + # Enable flags like --only-failures and --next-failure + config.example_status_persistence_file_path = ".rspec_status" + + # Disable RSpec exposing methods globally on `Module` and `main` + config.disable_monkey_patching! + + config.expect_with :rspec do |c| + c.syntax = :expect + end +end