made elastic search helper better and made pastebinner pack json
This commit is contained in:
parent
ba20e8dc41
commit
50b052a9bd
14 changed files with 256 additions and 296 deletions
4
Gemfile
4
Gemfile
|
@ -1,6 +1,6 @@
|
|||
source "https://rubygems.org"
|
||||
source 'https://rubygems.org'
|
||||
|
||||
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
||||
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
||||
|
||||
# Specify your gem's dependencies in pastebinner.gemspec
|
||||
gemspec
|
||||
|
|
6
Rakefile
6
Rakefile
|
@ -1,6 +1,6 @@
|
|||
require "bundler/gem_tasks"
|
||||
require "rspec/core/rake_task"
|
||||
require 'bundler/gem_tasks'
|
||||
require 'rspec/core/rake_task'
|
||||
|
||||
RSpec::Core::RakeTask.new(:spec)
|
||||
|
||||
task :default => :spec
|
||||
task default: :spec
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require "../lib/pastebinner"
|
||||
require '../lib/pastebinner'
|
||||
require '../lib/elastic_search_helper'
|
||||
|
||||
# You can add fixtures and/or initialization code here to make experimenting
|
||||
# with your gem easier. You can also use a different console, if you like.
|
||||
|
||||
# (If you use this, don't forget to add pry to your Gemfile!)
|
||||
require "pry"
|
||||
require 'pry'
|
||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||
binding.pry
|
||||
|
|
|
@ -11,8 +11,8 @@ pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pa
|
|||
def download_pastes(pb)
|
||||
pub_pastes = pb.scrape_public_pastes
|
||||
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||
data_dir = "../data/"
|
||||
filename = "pastebin_paste_key"
|
||||
data_dir = '../data/'
|
||||
filename = 'pastebin_paste_key'
|
||||
binding.pry
|
||||
keys.map do |id|
|
||||
if File.exist?(data_dir + filename + "_#{id}")
|
||||
|
@ -27,7 +27,7 @@ options = OptionParser.parse!
|
|||
|
||||
if options[:s]
|
||||
puts pb.scrape_public_pastes
|
||||
elsif options[:r] and options[:k]
|
||||
elsif options[:r] && options[:k]
|
||||
key = options[:k]
|
||||
puts pb.raw_paste_data(key)
|
||||
elsif options[:t]
|
||||
|
@ -36,9 +36,9 @@ elsif options[:g]
|
|||
r = pb.scrape_public_pastes
|
||||
puts pb.get_unique_paste_keys(r)
|
||||
elsif options[:d]
|
||||
puts "Downloading paste data into the data directory..."
|
||||
puts 'Downloading paste data into the data directory...'
|
||||
download_pastes(pb)
|
||||
puts "Complete."
|
||||
puts 'Complete.'
|
||||
elsif options[:k]
|
||||
puts '-k or --key= requires -r,--raw'
|
||||
exit
|
||||
|
|
|
@ -1,53 +1,33 @@
|
|||
require 'elasticsearch'
|
||||
|
||||
class ElasticSearchHelper
|
||||
attr_accessor :server_uri, :index
|
||||
attr_accessor :server_uri, :index, :pastebinner, :doctype
|
||||
DEFAULT_METHOD = :post
|
||||
|
||||
def initialize(server_uri, index)
|
||||
def initialize(server_uri, index, doctype='_doc')
|
||||
@server_uri = server_uri
|
||||
@index = index
|
||||
@doctype = doctype
|
||||
@pastebinner = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||
end
|
||||
|
||||
# will build an array of 50 pastes to ship to es
|
||||
def build_json_array(pb, keys)
|
||||
json_for_es = keys.map do |k|
|
||||
pb.encode_json(pb.raw_paste_data(k), pb.raw_paste_metadata(k))
|
||||
end
|
||||
end
|
||||
|
||||
def puts_to_es(payload, increment_num)
|
||||
header = { 'Content-type': 'application/json' }
|
||||
def create_index
|
||||
response = RestClient::Request.execute(
|
||||
method: :put,
|
||||
url: "#{server_uri}/#{index}/#{index}s/#{increment_num}",
|
||||
headers: header,
|
||||
payload: payload)
|
||||
url: "#{server_uri}/#{index}")
|
||||
end
|
||||
|
||||
def data_mappings
|
||||
# metadata mappings
|
||||
# send a PUT
|
||||
{
|
||||
"mappings": {
|
||||
"_doc": {
|
||||
"properties": {
|
||||
"type": { "type": "keyword" },
|
||||
"paste_metadata": { "type": "nested" },
|
||||
"properties": {
|
||||
"scrape_url": { "type": "string" },
|
||||
"full_url": { "type": "string" },
|
||||
"date": { "type": "string" },
|
||||
"size": { "type": "string" },
|
||||
"expire": { "type": "string" },
|
||||
"title": { "type": "string" },
|
||||
"syntax": { "type": "string" },
|
||||
"user": { "type": "string" },
|
||||
"hits": { "type": "string" }
|
||||
}
|
||||
"paste_text": { "type": "string" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
def json_to_es(paste_json, method=nil)
|
||||
header = { 'Content-type': 'application/json' }
|
||||
response = RestClient::Request.execute(
|
||||
method: method ||= DEFAULT_METHOD,
|
||||
url: "#{server_uri}/#{index}/#{doctype}",
|
||||
headers: header,
|
||||
payload: paste_json)
|
||||
end
|
||||
|
||||
def json_to_es_bulk(array_of_paste_json)
|
||||
array_of_paste_json.each do |paste_json|
|
||||
self.to_es(paste_json)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
2
lib/examples/examples.rb
Normal file → Executable file
2
lib/examples/examples.rb
Normal file → Executable file
|
@ -15,7 +15,7 @@ api_dev_key = ENV['pastebin_api_key']
|
|||
# prepare some sample paste data to send
|
||||
paste_data = 'this is a test paste two two two.'
|
||||
# prepare our paste params
|
||||
params = { "api_dev_key": api_dev_key, "api_option": "paste", "api_paste_code": paste_data }
|
||||
params = { "api_dev_key": api_dev_key, "api_option": 'paste', "api_paste_code": paste_data }
|
||||
puts pb.create_paste(params)
|
||||
|
||||
#### SCRAPE PUBLIC PASTES
|
||||
|
|
|
@ -1,16 +1,13 @@
|
|||
module PastebinnerError
|
||||
|
||||
class ArgumentError < StandardError
|
||||
def message
|
||||
"Invalid argument"
|
||||
'Invalid argument'
|
||||
end
|
||||
end
|
||||
|
||||
class ConfigError < StandardError
|
||||
def message
|
||||
"Invalid configuration"
|
||||
'Invalid configuration'
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
|
|
@ -17,12 +17,12 @@ class Pastebinner
|
|||
end
|
||||
|
||||
# this should be a hash of { endpoint_name: '/url_endpoint.php'}
|
||||
ENDPOINTS = { :login => '/api_login.php',
|
||||
:post => '/api_post.php',
|
||||
:raw => '/api_raw.php',
|
||||
:scraping => '/api_scraping.php',
|
||||
:scrape_item => '/api_scrape_item.php',
|
||||
:scrape_item_meta => '/api_scrape_item_meta.php' }
|
||||
ENDPOINTS = { login: '/api_login.php',
|
||||
post: '/api_post.php',
|
||||
raw: '/api_raw.php',
|
||||
scraping: '/api_scraping.php',
|
||||
scrape_item: '/api_scrape_item.php',
|
||||
scrape_item_meta: '/api_scrape_item_meta.php' }.freeze
|
||||
|
||||
# basic example hash for creating a paste:
|
||||
# params = { 'api_dev_key': @api_dev_key, 'api_option': 'paste'. 'api_paste_code': paste_data}
|
||||
|
@ -46,27 +46,26 @@ class Pastebinner
|
|||
|
||||
def api_user_key
|
||||
# returns a user session key that can be used as the api_user_key param
|
||||
@api_user_key ||= RestClient::Request.execute({
|
||||
@api_user_key ||= RestClient::Request.execute(
|
||||
method: :post,
|
||||
url: @base_api_url + ENDPOINTS[:login],
|
||||
payload: { 'api_dev_key': @api_dev_key,
|
||||
'api_user_name': @username,
|
||||
'api_user_password': @password }})
|
||||
'api_user_password': @password }
|
||||
)
|
||||
end
|
||||
|
||||
def list_user_pastes
|
||||
params = { 'api_dev_key': api_dev_key,
|
||||
'api_user_key': api_user_key,
|
||||
'api_results_limit': '100',
|
||||
'api_option': 'list'
|
||||
}
|
||||
'api_option': 'list' }
|
||||
execute_query(:api_post, params)
|
||||
end
|
||||
|
||||
def list_trending_pastes
|
||||
params = { 'api_dev_key': api_dev_key,
|
||||
'api_option': 'trends'
|
||||
}
|
||||
'api_option': 'trends' }
|
||||
execute_query(:api_post, params)
|
||||
end
|
||||
|
||||
|
@ -74,8 +73,7 @@ class Pastebinner
|
|||
params = { 'api_dev_key': api_dev_key,
|
||||
'api_user_key': api_user_key,
|
||||
'api_paste_key': api_paste_key,
|
||||
'api_option': 'show_paste'
|
||||
}
|
||||
'api_option': 'show_paste' }
|
||||
execute_query(:api_post, params)
|
||||
end
|
||||
|
||||
|
@ -84,67 +82,54 @@ class Pastebinner
|
|||
params = { 'api_dev_key': api_dev_key,
|
||||
'api_user_key': api_user_key,
|
||||
'api_paste_key': api_paste_key,
|
||||
'api_option': 'delete'
|
||||
}
|
||||
'api_option': 'delete' }
|
||||
execute_query(:api_post, params)
|
||||
end
|
||||
|
||||
def get_user_info
|
||||
params = { 'api_dev_key': api_dev_key,
|
||||
}
|
||||
params = { 'api_dev_key': api_dev_key }
|
||||
end
|
||||
|
||||
def api_post(params)
|
||||
response = RestClient::Request.execute(
|
||||
method: :post,
|
||||
url: @base_api_url + ENDPOINTS[:post],
|
||||
payload: params)
|
||||
payload: params
|
||||
)
|
||||
end
|
||||
|
||||
# params is optional for now. to query specific language ?lang=ruby as an example
|
||||
def scrape_public_pastes(params = nil)
|
||||
def scrape_public_pastes(_params = nil)
|
||||
response = RestClient::Request.execute(
|
||||
method: :get,
|
||||
url: @scraping_api_url + ENDPOINTS[:scraping])
|
||||
url: @scraping_api_url + ENDPOINTS[:scraping]
|
||||
)
|
||||
end
|
||||
|
||||
# will extract just the keys from recent public pastes
|
||||
def get_unique_paste_keys(public_pastes)
|
||||
pp = JSON.parse(public_pastes)
|
||||
pp.map {|p| p['key']}
|
||||
pp.map { |p| p['key'] }
|
||||
end
|
||||
|
||||
def raw_paste_data(unique_paste_key)
|
||||
response = RestClient::Request.execute(
|
||||
method: :get,
|
||||
url: @scraping_api_url + ENDPOINTS[:scrape_item] + "?i=#{unique_paste_key}")
|
||||
url: @scraping_api_url + ENDPOINTS[:scrape_item] + "?i=#{unique_paste_key}"
|
||||
)
|
||||
end
|
||||
|
||||
def raw_paste_metadata(unique_paste_key)
|
||||
response = RestClient::Request.execute(
|
||||
method: :get,
|
||||
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}")
|
||||
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}"
|
||||
)
|
||||
response
|
||||
end
|
||||
|
||||
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
|
||||
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
|
||||
|
||||
def http_to_es(es_uri, payload, method)
|
||||
header = { 'Content-type': 'application/json' }
|
||||
response = RestClient::Request.execute(
|
||||
method: method,
|
||||
url: es_uri,
|
||||
headers: header,
|
||||
payload: payload)
|
||||
end
|
||||
|
||||
def create_index_es(es_uri, index)
|
||||
response = RestClient::Request.execute(
|
||||
method: :put,
|
||||
url: es_uri + index)
|
||||
end
|
||||
|
||||
def hash_pastes(keys)
|
||||
keys.map do |key|
|
||||
raw_paste = self.raw_paste_data(key).body
|
||||
|
@ -153,16 +138,19 @@ class Pastebinner
|
|||
end
|
||||
end
|
||||
|
||||
def json_pastes(keys)
|
||||
self.hash_pastes(keys).map do |paste_hash|
|
||||
paste_hash.to_json
|
||||
end
|
||||
end
|
||||
|
||||
def hash_paste(raw_paste_data, raw_paste_metadata)
|
||||
{ "paste_metadata": raw_paste_metadata,
|
||||
"paste_text": raw_paste_data }
|
||||
end
|
||||
|
||||
def send_es_bulk(esi_uri, json_data)
|
||||
method = :post
|
||||
json_data.each do |payload|
|
||||
self.http_to_es(esi_uri, payload, method)
|
||||
end
|
||||
def json_paste(raw_paste_data, raw_paste_metadata)
|
||||
self.hash_paste(raw_paste_data, raw_paste_metadata).to_json
|
||||
end
|
||||
|
||||
def data_mappings
|
||||
|
@ -172,20 +160,20 @@ class Pastebinner
|
|||
"mappings": {
|
||||
"_doc": {
|
||||
"properties": {
|
||||
"type": { "type": "keyword" },
|
||||
"paste_metadata": { "type": "nested" },
|
||||
"properties": [ {
|
||||
"scrape_url": { "type": "string" },
|
||||
"full_url": { "type": "string" },
|
||||
"date": { "type": "string" },
|
||||
"size": { "type": "string" },
|
||||
"expire": { "type": "string" },
|
||||
"title": { "type": "string" },
|
||||
"syntax": { "type": "string" },
|
||||
"user": { "type": "string" },
|
||||
"hits": { "type": "string" }
|
||||
} ],
|
||||
"paste_text": { "type": "string" }
|
||||
"type": { "type": 'keyword' },
|
||||
"paste_metadata": { "type": 'nested' },
|
||||
"properties": [{
|
||||
"scrape_url": { "type": 'string' },
|
||||
"full_url": { "type": 'string' },
|
||||
"date": { "type": 'string' },
|
||||
"size": { "type": 'string' },
|
||||
"expire": { "type": 'string' },
|
||||
"title": { "type": 'string' },
|
||||
"syntax": { "type": 'string' },
|
||||
"user": { "type": 'string' },
|
||||
"hits": { "type": 'string' }
|
||||
}],
|
||||
"paste_text": { "type": 'string' }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -194,14 +182,13 @@ class Pastebinner
|
|||
|
||||
# keep this method private so we are not letting anyone run any method in our program
|
||||
private
|
||||
|
||||
# this will be the main way to execute any of these methods. this has the exception handling taken care of.
|
||||
def execute_query(selector, *args)
|
||||
begin
|
||||
send(selector, *args)
|
||||
rescue RestClient::ExceptionWithResponse => e
|
||||
puts e.message
|
||||
end
|
||||
end
|
||||
# make my own exception class
|
||||
# inherit ruby standard error class
|
||||
end
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
class PastebinnerError < StandardError
|
||||
def InvalidArgument; end
|
||||
|
||||
def InvalidArgument
|
||||
end
|
||||
|
||||
def ConfigError
|
||||
end
|
||||
|
||||
def ConfigError; end
|
||||
end
|
||||
|
|
|
@ -6,22 +6,22 @@ class OptionParser
|
|||
OptParse.new do |opts|
|
||||
opts.default_argv = argv
|
||||
|
||||
opts.banner = "Usage: pastebinner [options]"
|
||||
opts.banner = 'Usage: pastebinner [options]'
|
||||
|
||||
opts.on('-h', '--help', 'Show this help messae') do ||
|
||||
opts.on('-h', '--help', 'Show this help messae') do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
|
||||
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |s|
|
||||
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
|
||||
options[:s] = true
|
||||
end
|
||||
|
||||
opts.on('-r', '--raw', 'Raw paste. Requires --key passed with a valid key') do |r|
|
||||
opts.on('-r', '--raw', 'Raw paste. Requires --key passed with a valid key') do |_r|
|
||||
options[:r] = true
|
||||
end
|
||||
|
||||
opts.on('-g', '--get_keys', 'Get unique paste keys from public pastes') do |g|
|
||||
opts.on('-g', '--get_keys', 'Get unique paste keys from public pastes') do |_g|
|
||||
options[:g] = true
|
||||
end
|
||||
|
||||
|
@ -29,11 +29,11 @@ class OptionParser
|
|||
options[:k] = k
|
||||
end
|
||||
|
||||
opts.on('-d', '--download', 'Download all public pastes to data directory') do |d|
|
||||
opts.on('-d', '--download', 'Download all public pastes to data directory') do |_d|
|
||||
options[:d] = true
|
||||
end
|
||||
|
||||
opts.on('-t', '--trending', 'Trending pastes') do |t|
|
||||
opts.on('-t', '--trending', 'Trending pastes') do |_t|
|
||||
options[:t] = true
|
||||
end
|
||||
opts.parse!
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
module Pastebinner
|
||||
VERSION = "0.1.0"
|
||||
VERSION = '0.1.0'.freeze
|
||||
end
|
||||
|
|
|
@ -1,40 +1,39 @@
|
|||
lib = File.expand_path("../lib", __FILE__)
|
||||
lib = File.expand_path('lib', __dir__)
|
||||
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
||||
require "pastebinner/version"
|
||||
require 'pastebinner/version'
|
||||
|
||||
Gem::Specification.new do |spec|
|
||||
spec.name = "pastebinner"
|
||||
spec.name = 'pastebinner'
|
||||
spec.version = Pastebinner::VERSION
|
||||
spec.authors = ["Brendan McDevitt"]
|
||||
spec.email = ["brendan@mcdevitt.tech"]
|
||||
spec.authors = ['Brendan McDevitt']
|
||||
spec.email = ['brendan@mcdevitt.tech']
|
||||
|
||||
spec.summary = "A ruby client library for interacting with the pastebin API."
|
||||
spec.homepage = "https://git.mcdevitt.tech/bpmcdevitt/pastebinner"
|
||||
spec.summary = 'A ruby client library for interacting with the pastebin API.'
|
||||
spec.homepage = 'https://git.mcdevitt.tech/bpmcdevitt/pastebinner'
|
||||
|
||||
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
||||
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
||||
if spec.respond_to?(:metadata)
|
||||
spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
||||
spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
||||
else
|
||||
raise "RubyGems 2.0 or newer is required to protect against " \
|
||||
"public gem pushes."
|
||||
raise 'RubyGems 2.0 or newer is required to protect against ' \
|
||||
'public gem pushes.'
|
||||
end
|
||||
|
||||
# Specify which files should be added to the gem when it is released.
|
||||
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
||||
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
||||
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
||||
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
||||
end
|
||||
spec.bindir = "bin"
|
||||
spec.bindir = 'bin'
|
||||
spec.executables = ['pastebinner']
|
||||
|
||||
spec.require_paths = ["lib"]
|
||||
spec.require_paths = ['lib']
|
||||
|
||||
spec.add_development_dependency "bundler", "~> 2.0"
|
||||
spec.add_development_dependency "rake", "~> 10.0"
|
||||
spec.add_development_dependency "rspec", "~> 3.0"
|
||||
spec.add_runtime_dependency "rest-client", "~> 2.0"
|
||||
spec.add_runtime_dependency "json", "~> 2.0"
|
||||
spec.add_runtime_dependency "pry", "~> 0.11"
|
||||
spec.add_development_dependency 'bundler', '~> 2.0'
|
||||
spec.add_development_dependency 'rake', '~> 10.0'
|
||||
spec.add_development_dependency 'rspec', '~> 3.0'
|
||||
spec.add_runtime_dependency 'json', '~> 2.0'
|
||||
spec.add_runtime_dependency 'pry', '~> 0.11'
|
||||
spec.add_runtime_dependency 'rest-client', '~> 2.0'
|
||||
end
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
RSpec.describe Pastebinner do
|
||||
it "has a version number" do
|
||||
it 'has a version number' do
|
||||
expect(Pastebinner::VERSION).not_to be nil
|
||||
end
|
||||
|
||||
it "does something useful" do
|
||||
it 'does something useful' do
|
||||
expect(false).to eq(true)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
require "bundler/setup"
|
||||
require "pastebinner"
|
||||
require 'bundler/setup'
|
||||
require 'pastebinner'
|
||||
|
||||
RSpec.configure do |config|
|
||||
# Enable flags like --only-failures and --next-failure
|
||||
config.example_status_persistence_file_path = ".rspec_status"
|
||||
config.example_status_persistence_file_path = '.rspec_status'
|
||||
|
||||
# Disable RSpec exposing methods globally on `Module` and `main`
|
||||
config.disable_monkey_patching!
|
||||
|
|
Loading…
Add table
Reference in a new issue