made elastic search helper better and made pastebinner pack json

This commit is contained in:
booboy 2019-02-01 22:19:36 -06:00
parent ba20e8dc41
commit 50b052a9bd
14 changed files with 256 additions and 296 deletions

View file

@ -1,6 +1,6 @@
source "https://rubygems.org"
source 'https://rubygems.org'
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
# Specify your gem's dependencies in pastebinner.gemspec
gemspec

View file

@ -1,6 +1,6 @@
require "bundler/gem_tasks"
require "rspec/core/rake_task"
require 'bundler/gem_tasks'
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec)
task :default => :spec
task default: :spec

View file

@ -1,11 +1,12 @@
#!/usr/bin/env ruby
require "../lib/pastebinner"
require '../lib/pastebinner'
require '../lib/elastic_search_helper'
# You can add fixtures and/or initialization code here to make experimenting
# with your gem easier. You can also use a different console, if you like.
# (If you use this, don't forget to add pry to your Gemfile!)
require "pry"
require 'pry'
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
binding.pry

View file

@ -11,8 +11,8 @@ pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pa
def download_pastes(pb)
pub_pastes = pb.scrape_public_pastes
keys = pb.get_unique_paste_keys(pub_pastes)
data_dir = "../data/"
filename = "pastebin_paste_key"
data_dir = '../data/'
filename = 'pastebin_paste_key'
binding.pry
keys.map do |id|
if File.exist?(data_dir + filename + "_#{id}")
@ -27,7 +27,7 @@ options = OptionParser.parse!
if options[:s]
puts pb.scrape_public_pastes
elsif options[:r] and options[:k]
elsif options[:r] && options[:k]
key = options[:k]
puts pb.raw_paste_data(key)
elsif options[:t]
@ -36,9 +36,9 @@ elsif options[:g]
r = pb.scrape_public_pastes
puts pb.get_unique_paste_keys(r)
elsif options[:d]
puts "Downloading paste data into the data directory..."
puts 'Downloading paste data into the data directory...'
download_pastes(pb)
puts "Complete."
puts 'Complete.'
elsif options[:k]
puts '-k or --key= requires -r,--raw'
exit

View file

@ -1,53 +1,33 @@
require 'elasticsearch'
class ElasticSearchHelper
attr_accessor :server_uri, :index
attr_accessor :server_uri, :index, :pastebinner, :doctype
DEFAULT_METHOD = :post
def initialize(server_uri, index)
def initialize(server_uri, index, doctype='_doc')
@server_uri = server_uri
@index = index
@doctype = doctype
@pastebinner = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
end
# will build an array of 50 pastes to ship to es
def build_json_array(pb, keys)
json_for_es = keys.map do |k|
pb.encode_json(pb.raw_paste_data(k), pb.raw_paste_metadata(k))
end
end
def puts_to_es(payload, increment_num)
header = { 'Content-type': 'application/json' }
def create_index
response = RestClient::Request.execute(
method: :put,
url: "#{server_uri}/#{index}/#{index}s/#{increment_num}",
headers: header,
payload: payload)
url: "#{server_uri}/#{index}")
end
def data_mappings
# metadata mappings
# send a PUT
{
"mappings": {
"_doc": {
"properties": {
"type": { "type": "keyword" },
"paste_metadata": { "type": "nested" },
"properties": {
"scrape_url": { "type": "string" },
"full_url": { "type": "string" },
"date": { "type": "string" },
"size": { "type": "string" },
"expire": { "type": "string" },
"title": { "type": "string" },
"syntax": { "type": "string" },
"user": { "type": "string" },
"hits": { "type": "string" }
}
"paste_text": { "type": "string" }
}
}
}
}
def json_to_es(paste_json, method=nil)
header = { 'Content-type': 'application/json' }
response = RestClient::Request.execute(
method: method ||= DEFAULT_METHOD,
url: "#{server_uri}/#{index}/#{doctype}",
headers: header,
payload: paste_json)
end
def json_to_es_bulk(array_of_paste_json)
array_of_paste_json.each do |paste_json|
self.to_es(paste_json)
end
end
end

2
lib/examples/examples.rb Normal file → Executable file
View file

@ -15,7 +15,7 @@ api_dev_key = ENV['pastebin_api_key']
# prepare some sample paste data to send
paste_data = 'this is a test paste two two two.'
# prepare our paste params
params = { "api_dev_key": api_dev_key, "api_option": "paste", "api_paste_code": paste_data }
params = { "api_dev_key": api_dev_key, "api_option": 'paste', "api_paste_code": paste_data }
puts pb.create_paste(params)
#### SCRAPE PUBLIC PASTES

View file

@ -1,16 +1,13 @@
module PastebinnerError
class ArgumentError < StandardError
def message
"Invalid argument"
'Invalid argument'
end
end
class ConfigError < StandardError
def message
"Invalid configuration"
'Invalid configuration'
end
end
end

View file

@ -17,12 +17,12 @@ class Pastebinner
end
# this should be a hash of { endpoint_name: '/url_endpoint.php'}
ENDPOINTS = { :login => '/api_login.php',
:post => '/api_post.php',
:raw => '/api_raw.php',
:scraping => '/api_scraping.php',
:scrape_item => '/api_scrape_item.php',
:scrape_item_meta => '/api_scrape_item_meta.php' }
ENDPOINTS = { login: '/api_login.php',
post: '/api_post.php',
raw: '/api_raw.php',
scraping: '/api_scraping.php',
scrape_item: '/api_scrape_item.php',
scrape_item_meta: '/api_scrape_item_meta.php' }.freeze
# basic example hash for creating a paste:
# params = { 'api_dev_key': @api_dev_key, 'api_option': 'paste'. 'api_paste_code': paste_data}
@ -46,27 +46,26 @@ class Pastebinner
def api_user_key
# returns a user session key that can be used as the api_user_key param
@api_user_key ||= RestClient::Request.execute({
@api_user_key ||= RestClient::Request.execute(
method: :post,
url: @base_api_url + ENDPOINTS[:login],
payload: { 'api_dev_key': @api_dev_key,
'api_user_name': @username,
'api_user_password': @password }})
'api_user_password': @password }
)
end
def list_user_pastes
params = { 'api_dev_key': api_dev_key,
'api_user_key': api_user_key,
'api_results_limit': '100',
'api_option': 'list'
}
'api_option': 'list' }
execute_query(:api_post, params)
end
def list_trending_pastes
params = { 'api_dev_key': api_dev_key,
'api_option': 'trends'
}
'api_option': 'trends' }
execute_query(:api_post, params)
end
@ -74,8 +73,7 @@ class Pastebinner
params = { 'api_dev_key': api_dev_key,
'api_user_key': api_user_key,
'api_paste_key': api_paste_key,
'api_option': 'show_paste'
}
'api_option': 'show_paste' }
execute_query(:api_post, params)
end
@ -84,67 +82,54 @@ class Pastebinner
params = { 'api_dev_key': api_dev_key,
'api_user_key': api_user_key,
'api_paste_key': api_paste_key,
'api_option': 'delete'
}
'api_option': 'delete' }
execute_query(:api_post, params)
end
def get_user_info
params = { 'api_dev_key': api_dev_key,
}
params = { 'api_dev_key': api_dev_key }
end
def api_post(params)
response = RestClient::Request.execute(
method: :post,
url: @base_api_url + ENDPOINTS[:post],
payload: params)
payload: params
)
end
# params is optional for now. to query specific language ?lang=ruby as an example
def scrape_public_pastes(params = nil)
def scrape_public_pastes(_params = nil)
response = RestClient::Request.execute(
method: :get,
url: @scraping_api_url + ENDPOINTS[:scraping])
url: @scraping_api_url + ENDPOINTS[:scraping]
)
end
# will extract just the keys from recent public pastes
def get_unique_paste_keys(public_pastes)
pp = JSON.parse(public_pastes)
pp.map {|p| p['key']}
pp.map { |p| p['key'] }
end
def raw_paste_data(unique_paste_key)
response = RestClient::Request.execute(
method: :get,
url: @scraping_api_url + ENDPOINTS[:scrape_item] + "?i=#{unique_paste_key}")
url: @scraping_api_url + ENDPOINTS[:scrape_item] + "?i=#{unique_paste_key}"
)
end
def raw_paste_metadata(unique_paste_key)
response = RestClient::Request.execute(
method: :get,
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}")
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}"
)
response
end
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
def http_to_es(es_uri, payload, method)
header = { 'Content-type': 'application/json' }
response = RestClient::Request.execute(
method: method,
url: es_uri,
headers: header,
payload: payload)
end
def create_index_es(es_uri, index)
response = RestClient::Request.execute(
method: :put,
url: es_uri + index)
end
def hash_pastes(keys)
keys.map do |key|
raw_paste = self.raw_paste_data(key).body
@ -153,16 +138,19 @@ class Pastebinner
end
end
def json_pastes(keys)
self.hash_pastes(keys).map do |paste_hash|
paste_hash.to_json
end
end
def hash_paste(raw_paste_data, raw_paste_metadata)
{ "paste_metadata": raw_paste_metadata,
"paste_text": raw_paste_data }
end
def send_es_bulk(esi_uri, json_data)
method = :post
json_data.each do |payload|
self.http_to_es(esi_uri, payload, method)
end
def json_paste(raw_paste_data, raw_paste_metadata)
self.hash_paste(raw_paste_data, raw_paste_metadata).to_json
end
def data_mappings
@ -172,20 +160,20 @@ class Pastebinner
"mappings": {
"_doc": {
"properties": {
"type": { "type": "keyword" },
"paste_metadata": { "type": "nested" },
"properties": [ {
"scrape_url": { "type": "string" },
"full_url": { "type": "string" },
"date": { "type": "string" },
"size": { "type": "string" },
"expire": { "type": "string" },
"title": { "type": "string" },
"syntax": { "type": "string" },
"user": { "type": "string" },
"hits": { "type": "string" }
} ],
"paste_text": { "type": "string" }
"type": { "type": 'keyword' },
"paste_metadata": { "type": 'nested' },
"properties": [{
"scrape_url": { "type": 'string' },
"full_url": { "type": 'string' },
"date": { "type": 'string' },
"size": { "type": 'string' },
"expire": { "type": 'string' },
"title": { "type": 'string' },
"syntax": { "type": 'string' },
"user": { "type": 'string' },
"hits": { "type": 'string' }
}],
"paste_text": { "type": 'string' }
}
}
}
@ -194,14 +182,13 @@ class Pastebinner
# keep this method private so we are not letting anyone run any method in our program
private
# this will be the main way to execute any of these methods. this has the exception handling taken care of.
def execute_query(selector, *args)
begin
send(selector, *args)
rescue RestClient::ExceptionWithResponse => e
puts e.message
end
end
# make my own exception class
# inherit ruby standard error class
end

View file

@ -1,9 +1,5 @@
class PastebinnerError < StandardError
def InvalidArgument; end
def InvalidArgument
end
def ConfigError
end
def ConfigError; end
end

View file

@ -6,22 +6,22 @@ class OptionParser
OptParse.new do |opts|
opts.default_argv = argv
opts.banner = "Usage: pastebinner [options]"
opts.banner = 'Usage: pastebinner [options]'
opts.on('-h', '--help', 'Show this help messae') do ||
opts.on('-h', '--help', 'Show this help messae') do
puts opts
exit
end
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |s|
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
options[:s] = true
end
opts.on('-r', '--raw', 'Raw paste. Requires --key passed with a valid key') do |r|
opts.on('-r', '--raw', 'Raw paste. Requires --key passed with a valid key') do |_r|
options[:r] = true
end
opts.on('-g', '--get_keys', 'Get unique paste keys from public pastes') do |g|
opts.on('-g', '--get_keys', 'Get unique paste keys from public pastes') do |_g|
options[:g] = true
end
@ -29,11 +29,11 @@ class OptionParser
options[:k] = k
end
opts.on('-d', '--download', 'Download all public pastes to data directory') do |d|
opts.on('-d', '--download', 'Download all public pastes to data directory') do |_d|
options[:d] = true
end
opts.on('-t', '--trending', 'Trending pastes') do |t|
opts.on('-t', '--trending', 'Trending pastes') do |_t|
options[:t] = true
end
opts.parse!

View file

@ -1,3 +1,3 @@
module Pastebinner
VERSION = "0.1.0"
VERSION = '0.1.0'.freeze
end

View file

@ -1,40 +1,39 @@
lib = File.expand_path("../lib", __FILE__)
lib = File.expand_path('lib', __dir__)
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
require "pastebinner/version"
require 'pastebinner/version'
Gem::Specification.new do |spec|
spec.name = "pastebinner"
spec.name = 'pastebinner'
spec.version = Pastebinner::VERSION
spec.authors = ["Brendan McDevitt"]
spec.email = ["brendan@mcdevitt.tech"]
spec.authors = ['Brendan McDevitt']
spec.email = ['brendan@mcdevitt.tech']
spec.summary = "A ruby client library for interacting with the pastebin API."
spec.homepage = "https://git.mcdevitt.tech/bpmcdevitt/pastebinner"
spec.summary = 'A ruby client library for interacting with the pastebin API.'
spec.homepage = 'https://git.mcdevitt.tech/bpmcdevitt/pastebinner'
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
# to allow pushing to a single host or delete this section to allow pushing to any host.
if spec.respond_to?(:metadata)
spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
else
raise "RubyGems 2.0 or newer is required to protect against " \
"public gem pushes."
raise 'RubyGems 2.0 or newer is required to protect against ' \
'public gem pushes.'
end
# Specify which files should be added to the gem when it is released.
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
spec.files = Dir.chdir(File.expand_path(__dir__)) do
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
end
spec.bindir = "bin"
spec.bindir = 'bin'
spec.executables = ['pastebinner']
spec.require_paths = ["lib"]
spec.require_paths = ['lib']
spec.add_development_dependency "bundler", "~> 2.0"
spec.add_development_dependency "rake", "~> 10.0"
spec.add_development_dependency "rspec", "~> 3.0"
spec.add_runtime_dependency "rest-client", "~> 2.0"
spec.add_runtime_dependency "json", "~> 2.0"
spec.add_runtime_dependency "pry", "~> 0.11"
spec.add_development_dependency 'bundler', '~> 2.0'
spec.add_development_dependency 'rake', '~> 10.0'
spec.add_development_dependency 'rspec', '~> 3.0'
spec.add_runtime_dependency 'json', '~> 2.0'
spec.add_runtime_dependency 'pry', '~> 0.11'
spec.add_runtime_dependency 'rest-client', '~> 2.0'
end

View file

@ -1,9 +1,9 @@
RSpec.describe Pastebinner do
it "has a version number" do
it 'has a version number' do
expect(Pastebinner::VERSION).not_to be nil
end
it "does something useful" do
it 'does something useful' do
expect(false).to eq(true)
end
end

View file

@ -1,9 +1,9 @@
require "bundler/setup"
require "pastebinner"
require 'bundler/setup'
require 'pastebinner'
RSpec.configure do |config|
# Enable flags like --only-failures and --next-failure
config.example_status_persistence_file_path = ".rspec_status"
config.example_status_persistence_file_path = '.rspec_status'
# Disable RSpec exposing methods globally on `Module` and `main`
config.disable_monkey_patching!