this is now namedspaced correctly. at least much better than it was before this
This commit is contained in:
parent
f8d790e6df
commit
238d770d5e
14 changed files with 315 additions and 341 deletions
|
@ -11,7 +11,7 @@ pastebin_password
|
||||||
```
|
```
|
||||||
|
|
||||||
If you want to scrape pastes, you can view a json response of the latest pastes by using the ```-s``` or ```--scrape_public``` options.
|
If you want to scrape pastes, you can view a json response of the latest pastes by using the ```-s``` or ```--scrape_public``` options.
|
||||||
Creating pastes is built in, check ```lib/pastebinner.rb```, I am still working on adding in the functionality to use it on the command line.
|
Creating pastes is built in, check ```lib/api_client.rb```, I am still working on adding in the functionality to use it on the command line.
|
||||||
The command line app can be used as follows:
|
The command line app can be used as follows:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
|
|
17
bin/console
17
bin/console
|
@ -2,21 +2,12 @@
|
||||||
|
|
||||||
require 'sidekiq'
|
require 'sidekiq'
|
||||||
require 'sidekiq/api'
|
require 'sidekiq/api'
|
||||||
require 'pry'
|
|
||||||
|
|
||||||
|
require 'bundler/setup'
|
||||||
require '../lib/pastebinner'
|
require 'pastebinner'
|
||||||
require '../helpers/elastic_search_helper'
|
|
||||||
require '../workers/paste_to_es'
|
|
||||||
require '../config/initializers/sidekiq'
|
|
||||||
|
|
||||||
require 'pry'
|
require 'pry'
|
||||||
|
pb = Pastebinner::ApiClient.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
# set restclient logging and setup a pb object
|
es = Pastebinner::ElasticSearchHelper.new(ENV['elastic_search_url'], 'pastes')
|
||||||
RestClient.log ='stdout'
|
|
||||||
|
|
||||||
# setup a pastebinner object and elastic search objects so we have them to work with right when we launch console
|
|
||||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
|
||||||
es = ElasticSearchHelper.new(ENV['elastic_search_url'], 'pastes')
|
|
||||||
|
|
||||||
binding.pry
|
binding.pry
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
#!/usr/bin/env ruby
|
#!/usr/bin/env ruby
|
||||||
require '../lib/pastebinner'
|
require 'sidekiq'
|
||||||
require '../lib/pastebinner/option_parser'
|
require 'sidekiq/api'
|
||||||
require 'pry'
|
require 'bundler/setup'
|
||||||
|
require 'pastebinner'
|
||||||
|
|
||||||
# setup our object and grab a session key
|
# setup our object and grab a session key
|
||||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
pb = Pastebinner::ApiClient.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
|
|
||||||
# set the commandline client to grab 50 pastes by default. this should be an option to config though once we add configuration methods
|
# set the commandline client to grab 50 pastes by default. this should be an option to config though once we add configuration methods
|
||||||
paste_max = 50
|
paste_max = 50
|
||||||
|
@ -39,10 +40,10 @@ def download_pastes_raw(pb, paste_max)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
options = OptionParser.parse!
|
options = Pastebinner::OptionParser.parse!
|
||||||
|
|
||||||
if options[:s]
|
if options[:s]
|
||||||
puts pb.scrape_public_paste(paste_max)
|
puts pb.scrape_public_pastes(paste_max)
|
||||||
elsif options[:r] && options[:k]
|
elsif options[:r] && options[:k]
|
||||||
key = options[:k]
|
key = options[:k]
|
||||||
puts pb.raw_paste_data(key)
|
puts pb.raw_paste_data(key)
|
||||||
|
|
14
examples/examples.rb
Executable file → Normal file
14
examples/examples.rb
Executable file → Normal file
|
@ -1,14 +1,7 @@
|
||||||
#!/usr/bin/env ruby
|
|
||||||
|
|
||||||
require '../lib/pastebinner'
|
|
||||||
|
|
||||||
######################## TESTING ####################################################
|
|
||||||
#####################################################################################
|
|
||||||
|
|
||||||
#### INITIAL STEPS
|
#### INITIAL STEPS
|
||||||
|
#
|
||||||
# setup our object and grab a session key
|
# setup our object and grab a session key
|
||||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
pb = Pastebinner::ApiClient.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
api_dev_key = ENV['pastebin_api_key']
|
api_dev_key = ENV['pastebin_api_key']
|
||||||
|
|
||||||
#### CREATE PASTE
|
#### CREATE PASTE
|
||||||
|
@ -29,3 +22,6 @@ puts pb.raw_paste_data('Gkb4ukK9')
|
||||||
|
|
||||||
#### SCRAPE RAW METADATA OF A PASTE KEY (WORKS WITH WHITELISTED IP ONLY)
|
#### SCRAPE RAW METADATA OF A PASTE KEY (WORKS WITH WHITELISTED IP ONLY)
|
||||||
puts pb.raw_paste_metadata('Gkb4ukK9')
|
puts pb.raw_paste_metadata('Gkb4ukK9')
|
||||||
|
|
||||||
|
|
||||||
|
### MORE EXAMPLES TO COME
|
||||||
|
|
|
@ -1,47 +0,0 @@
|
||||||
class ElasticSearchHelper
|
|
||||||
attr_accessor :server_uri, :index, :pastebinner, :doctype
|
|
||||||
DEFAULT_METHOD = :post
|
|
||||||
|
|
||||||
def initialize(server_uri, index, doctype='paste')
|
|
||||||
@server_uri = server_uri
|
|
||||||
@index = index
|
|
||||||
@doctype = doctype
|
|
||||||
@pastebinner = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete_index
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :delete,
|
|
||||||
url: "#{server_uri}/#{index}")
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_mappings
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :get,
|
|
||||||
url: "#{server_uri}/#{index}/_mappings?pretty")
|
|
||||||
end
|
|
||||||
|
|
||||||
def update_mapping(mapping_json)
|
|
||||||
header = { 'Content-type': 'application/json' }
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :put,
|
|
||||||
url: "#{server_uri}/#{index}/_mapping/#{doctype}",
|
|
||||||
payload: mapping_json,
|
|
||||||
headers: header)
|
|
||||||
end
|
|
||||||
|
|
||||||
def json_to_es(paste_json, method=nil)
|
|
||||||
header = { 'Content-type': 'application/json' }
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: method ||= DEFAULT_METHOD,
|
|
||||||
url: "#{server_uri}/#{index}/#{doctype}",
|
|
||||||
headers: header,
|
|
||||||
payload: paste_json)
|
|
||||||
end
|
|
||||||
|
|
||||||
def json_to_es_bulk(array_of_paste_json)
|
|
||||||
array_of_paste_json.each do |paste_json|
|
|
||||||
self.json_to_es(paste_json)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,12 +0,0 @@
|
||||||
require 'sidekiq'
|
|
||||||
require 'sidekiq/api'
|
|
||||||
|
|
||||||
# load up the redis cfg
|
|
||||||
require '../config/initializers/sidekiq'
|
|
||||||
|
|
||||||
class SidekiqHelper
|
|
||||||
|
|
||||||
def initialize
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
|
|
@ -1,13 +0,0 @@
|
||||||
module PastebinnerError
|
|
||||||
class ArgumentError < StandardError
|
|
||||||
def message
|
|
||||||
'Invalid argument'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
class ConfigError < StandardError
|
|
||||||
def message
|
|
||||||
'Invalid configuration'
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
|
@ -1,188 +1,16 @@
|
||||||
#!/usr/bin/env ruby
|
# change this to a config module soon:
|
||||||
# author: brendan mcdevitt
|
require '../config/initializers/sidekiq'
|
||||||
# a ruby wrapper around all of the methods pastebin provides with its api
|
|
||||||
# official docs from pastebin on their api can be found at https://pastebin.com/api
|
|
||||||
require 'rest-client'
|
require 'rest-client'
|
||||||
require 'json'
|
require 'json'
|
||||||
require 'yaml'
|
require 'yaml'
|
||||||
|
require 'optparse'
|
||||||
|
require 'pastebinner/version'
|
||||||
|
require 'pastebinner/option_parser'
|
||||||
|
require 'pastebinner/api_client'
|
||||||
|
require 'pastebinner/helpers/elastic_search_helper'
|
||||||
|
require 'pastebinner/workers/paste_to_es'
|
||||||
|
|
||||||
class Pastebinner
|
module Pastebinner
|
||||||
attr_accessor :api_dev_key, :username, :password
|
class Error < StandardError; end
|
||||||
|
# your code goes here
|
||||||
def initialize(api_dev_key, username, password)
|
|
||||||
@api_dev_key = api_dev_key
|
|
||||||
@username = username
|
|
||||||
@password = password
|
|
||||||
@base_api_url = 'https://pastebin.com/api'
|
|
||||||
@scraping_api_url = 'https://scrape.pastebin.com'
|
|
||||||
end
|
|
||||||
|
|
||||||
# this should be a hash of { endpoint_name: '/url_endpoint.php'}
|
|
||||||
ENDPOINTS = { login: '/api_login.php',
|
|
||||||
post: '/api_post.php',
|
|
||||||
raw: '/api_raw.php',
|
|
||||||
scraping: '/api_scraping.php',
|
|
||||||
scrape_item: '/api_scrape_item.php',
|
|
||||||
scrape_item_meta: '/api_scrape_item_meta.php' }.freeze
|
|
||||||
|
|
||||||
# basic example hash for creating a paste:
|
|
||||||
# params = { 'api_dev_key': @api_dev_key, 'api_option': 'paste'. 'api_paste_code': paste_data}
|
|
||||||
|
|
||||||
# required params:
|
|
||||||
# api_dev_key - your unique developer api key
|
|
||||||
# api_option - set as paste, this will indicate you want to create a new paste
|
|
||||||
# api_paste_code - this is the text that will be written inside of your paste
|
|
||||||
|
|
||||||
# optional params:
|
|
||||||
# api_user_key - this parameter is part of the login system, which is explained further down the page
|
|
||||||
# api_paste_name - this will be the name / title of your paste
|
|
||||||
# api_paste_format - this will be the syntax highlighting value, which is explained in detail further down the page
|
|
||||||
# api_paste_private - this makes a paste public, unlisted, or private, public = 0, unlisted = 1, private = 2
|
|
||||||
# api_paste_expire_date - this sets the expiration date of your paste, the values are explained further down the page
|
|
||||||
|
|
||||||
# example - params = { "api_dev_key": api_dev_key, "api_option": "paste", "api_paste_code": paste_data }
|
|
||||||
def create_paste(params)
|
|
||||||
execute_query(:api_post, params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def api_user_key
|
|
||||||
# returns a user session key that can be used as the api_user_key param
|
|
||||||
@api_user_key ||= RestClient::Request.execute(
|
|
||||||
method: :post,
|
|
||||||
url: @base_api_url + ENDPOINTS[:login],
|
|
||||||
payload: { 'api_dev_key': @api_dev_key,
|
|
||||||
'api_user_name': @username,
|
|
||||||
'api_user_password': @password }
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
def list_user_pastes
|
|
||||||
params = { 'api_dev_key': api_dev_key,
|
|
||||||
'api_user_key': api_user_key,
|
|
||||||
'api_results_limit': '100',
|
|
||||||
'api_option': 'list' }
|
|
||||||
execute_query(:api_post, params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def list_trending_pastes
|
|
||||||
params = { 'api_dev_key': api_dev_key,
|
|
||||||
'api_option': 'trends' }
|
|
||||||
execute_query(:api_post, params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def list_raw_user_paste(api_paste_key)
|
|
||||||
params = { 'api_dev_key': api_dev_key,
|
|
||||||
'api_user_key': api_user_key,
|
|
||||||
'api_paste_key': api_paste_key,
|
|
||||||
'api_option': 'show_paste' }
|
|
||||||
execute_query(:api_post, params)
|
|
||||||
end
|
|
||||||
|
|
||||||
# api_paste_key = this is the unique key of the paste data you want to delete.
|
|
||||||
def delete_user_paste(api_paste_key)
|
|
||||||
params = { 'api_dev_key': api_dev_key,
|
|
||||||
'api_user_key': api_user_key,
|
|
||||||
'api_paste_key': api_paste_key,
|
|
||||||
'api_option': 'delete' }
|
|
||||||
execute_query(:api_post, params)
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_user_info
|
|
||||||
params = { 'api_dev_key': api_dev_key }
|
|
||||||
end
|
|
||||||
|
|
||||||
def api_post(params)
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :post,
|
|
||||||
url: @base_api_url + ENDPOINTS[:post],
|
|
||||||
payload: params
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
# params is optional for now. to query specific language ?lang=ruby as an example
|
|
||||||
# right now its set to grab the max 250, default is 50. param is ?limit=value
|
|
||||||
def scrape_public_pastes(_params = nil, limit)
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :get,
|
|
||||||
url: @scraping_api_url + ENDPOINTS[:scraping] + "?limit=#{limit}"
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
# will extract just the keys from recent public pastes
|
|
||||||
def get_unique_paste_keys(public_pastes)
|
|
||||||
pp = JSON.parse(public_pastes)
|
|
||||||
pp.map { |p| p['key'] }
|
|
||||||
end
|
|
||||||
|
|
||||||
# scraped keys difference returned
|
|
||||||
# https://stackoverflow.com/questions/8639857/rails-3-how-to-get-the-difference-between-two-arrays
|
|
||||||
# note ruby 2.6 has a method for this - https://github.com/ruby/ruby/blob/trunk/array.c#L4450-L4563
|
|
||||||
def difference(a, b)
|
|
||||||
a - b | b - a
|
|
||||||
end
|
|
||||||
|
|
||||||
def raw_paste_data(unique_paste_key)
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :get,
|
|
||||||
url: @scraping_api_url + ENDPOINTS[:scrape_item] + "?i=#{unique_paste_key}"
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
def raw_paste_metadata(unique_paste_key)
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :get,
|
|
||||||
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}"
|
|
||||||
)
|
|
||||||
YAML.safe_load(response.body).first
|
|
||||||
end
|
|
||||||
|
|
||||||
def hash_pastes(keys)
|
|
||||||
if keys.is_a? String
|
|
||||||
raw_paste = self.raw_paste_data(keys)
|
|
||||||
raw_paste_metadata = self.raw_paste_metadata(keys)
|
|
||||||
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
|
||||||
else
|
|
||||||
keys.map do |key|
|
|
||||||
raw_paste = self.raw_paste_data(key).body
|
|
||||||
raw_paste_metadata = self.raw_paste_metadata(key)
|
|
||||||
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def hash_paste(raw_paste_data, raw_paste_metadata)
|
|
||||||
{ "paste_metadata": raw_paste_metadata,
|
|
||||||
"paste_text": raw_paste_data }
|
|
||||||
end
|
|
||||||
|
|
||||||
def json_paste(key=nil, keys)
|
|
||||||
# if we give keys, create an array of X json pastes
|
|
||||||
if keys
|
|
||||||
self.hash_pastes(keys).map do |paste_hash|
|
|
||||||
paste_hash.to_json
|
|
||||||
end
|
|
||||||
else
|
|
||||||
# otherwise, just make a json of the 1 raw_paste_data & raw_paste_metadata
|
|
||||||
raw_paste_data = self.raw_paste_data(key)
|
|
||||||
raw_paste_metadata = self.raw_paste_metadata(key)
|
|
||||||
self.hash_paste(raw_paste_data, raw_paste_metadata).to_json
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def json_paste_from_file(raw_paste_json_file)
|
|
||||||
raw_paste_json = File.read(raw_paste_json_file)
|
|
||||||
self.hash_paste(raw_paste_json).to_json
|
|
||||||
end
|
|
||||||
|
|
||||||
# keep this method private so we are not letting anyone run any method in our program
|
|
||||||
private
|
|
||||||
|
|
||||||
# this will be the main way to execute any of these methods. this has the exception handling taken care of.
|
|
||||||
def execute_query(selector, *args)
|
|
||||||
send(selector, *args)
|
|
||||||
rescue RestClient::ExceptionWithResponse => e
|
|
||||||
puts e.message
|
|
||||||
end
|
|
||||||
# make my own exception class
|
|
||||||
# inherit ruby standard error class
|
|
||||||
end
|
end
|
||||||
|
|
184
lib/pastebinner/api_client.rb
Normal file
184
lib/pastebinner/api_client.rb
Normal file
|
@ -0,0 +1,184 @@
|
||||||
|
# author: brendan mcdevitt
|
||||||
|
# a ruby wrapper around all of the methods pastebin provides with its api
|
||||||
|
# official docs from pastebin on their api can be found at https://pastebin.com/api
|
||||||
|
|
||||||
|
module Pastebinner
|
||||||
|
class ApiClient
|
||||||
|
attr_accessor :api_dev_key, :username, :password
|
||||||
|
|
||||||
|
def initialize(api_dev_key, username, password)
|
||||||
|
@api_dev_key = api_dev_key
|
||||||
|
@username = username
|
||||||
|
@password = password
|
||||||
|
@base_api_url = 'https://pastebin.com/api'
|
||||||
|
@scraping_api_url = 'https://scrape.pastebin.com'
|
||||||
|
end
|
||||||
|
|
||||||
|
# this should be a hash of { endpoint_name: '/url_endpoint.php'}
|
||||||
|
ENDPOINTS = { login: '/api_login.php',
|
||||||
|
post: '/api_post.php',
|
||||||
|
raw: '/api_raw.php',
|
||||||
|
scraping: '/api_scraping.php',
|
||||||
|
scrape_item: '/api_scrape_item.php',
|
||||||
|
scrape_item_meta: '/api_scrape_item_meta.php' }.freeze
|
||||||
|
|
||||||
|
# basic example hash for creating a paste:
|
||||||
|
# params = { 'api_dev_key': @api_dev_key, 'api_option': 'paste'. 'api_paste_code': paste_data}
|
||||||
|
|
||||||
|
# required params:
|
||||||
|
# api_dev_key - your unique developer api key
|
||||||
|
# api_option - set as paste, this will indicate you want to create a new paste
|
||||||
|
# api_paste_code - this is the text that will be written inside of your paste
|
||||||
|
|
||||||
|
# optional params:
|
||||||
|
# api_user_key - this parameter is part of the login system, which is explained further down the page
|
||||||
|
# api_paste_name - this will be the name / title of your paste
|
||||||
|
# api_paste_format - this will be the syntax highlighting value, which is explained in detail further down the page
|
||||||
|
# api_paste_private - this makes a paste public, unlisted, or private, public = 0, unlisted = 1, private = 2
|
||||||
|
# api_paste_expire_date - this sets the expiration date of your paste, the values are explained further down the page
|
||||||
|
|
||||||
|
# example - params = { "api_dev_key": api_dev_key, "api_option": "paste", "api_paste_code": paste_data }
|
||||||
|
def create_paste(params)
|
||||||
|
execute_query(:api_post, params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def api_user_key
|
||||||
|
# returns a user session key that can be used as the api_user_key param
|
||||||
|
@api_user_key ||= RestClient::Request.execute(
|
||||||
|
method: :post,
|
||||||
|
url: @base_api_url + ENDPOINTS[:login],
|
||||||
|
payload: { 'api_dev_key': @api_dev_key,
|
||||||
|
'api_user_name': @username,
|
||||||
|
'api_user_password': @password }
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def list_user_pastes
|
||||||
|
params = { 'api_dev_key': api_dev_key,
|
||||||
|
'api_user_key': api_user_key,
|
||||||
|
'api_results_limit': '100',
|
||||||
|
'api_option': 'list' }
|
||||||
|
execute_query(:api_post, params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def list_trending_pastes
|
||||||
|
params = { 'api_dev_key': api_dev_key,
|
||||||
|
'api_option': 'trends' }
|
||||||
|
execute_query(:api_post, params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def list_raw_user_paste(api_paste_key)
|
||||||
|
params = { 'api_dev_key': api_dev_key,
|
||||||
|
'api_user_key': api_user_key,
|
||||||
|
'api_paste_key': api_paste_key,
|
||||||
|
'api_option': 'show_paste' }
|
||||||
|
execute_query(:api_post, params)
|
||||||
|
end
|
||||||
|
|
||||||
|
# api_paste_key = this is the unique key of the paste data you want to delete.
|
||||||
|
def delete_user_paste(api_paste_key)
|
||||||
|
params = { 'api_dev_key': api_dev_key,
|
||||||
|
'api_user_key': api_user_key,
|
||||||
|
'api_paste_key': api_paste_key,
|
||||||
|
'api_option': 'delete' }
|
||||||
|
execute_query(:api_post, params)
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_user_info
|
||||||
|
params = { 'api_dev_key': api_dev_key }
|
||||||
|
end
|
||||||
|
|
||||||
|
def api_post(params)
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :post,
|
||||||
|
url: @base_api_url + ENDPOINTS[:post],
|
||||||
|
payload: params
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
# params is optional for now. to query specific language ?lang=ruby as an example
|
||||||
|
# right now its set to grab the max 250, default is 50. param is ?limit=value
|
||||||
|
def scrape_public_pastes(_params = nil, limit)
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :get,
|
||||||
|
url: @scraping_api_url + ENDPOINTS[:scraping] + "?limit=#{limit}"
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
# will extract just the keys from recent public pastes
|
||||||
|
def get_unique_paste_keys(public_pastes)
|
||||||
|
pp = JSON.parse(public_pastes)
|
||||||
|
pp.map { |p| p['key'] }
|
||||||
|
end
|
||||||
|
|
||||||
|
# scraped keys difference returned
|
||||||
|
# https://stackoverflow.com/questions/8639857/rails-3-how-to-get-the-difference-between-two-arrays
|
||||||
|
# note ruby 2.6 has a method for this - https://github.com/ruby/ruby/blob/trunk/array.c#L4450-L4563
|
||||||
|
def difference(a, b)
|
||||||
|
a - b | b - a
|
||||||
|
end
|
||||||
|
|
||||||
|
def raw_paste_data(unique_paste_key)
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :get,
|
||||||
|
url: @scraping_api_url + ENDPOINTS[:scrape_item] + "?i=#{unique_paste_key}"
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def raw_paste_metadata(unique_paste_key)
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :get,
|
||||||
|
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}"
|
||||||
|
)
|
||||||
|
YAML.safe_load(response.body).first
|
||||||
|
end
|
||||||
|
|
||||||
|
def hash_pastes(keys)
|
||||||
|
if keys.is_a? String
|
||||||
|
raw_paste = self.raw_paste_data(keys)
|
||||||
|
raw_paste_metadata = self.raw_paste_metadata(keys)
|
||||||
|
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
||||||
|
else
|
||||||
|
keys.map do |key|
|
||||||
|
raw_paste = self.raw_paste_data(key).body
|
||||||
|
raw_paste_metadata = self.raw_paste_metadata(key)
|
||||||
|
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def hash_paste(raw_paste_data, raw_paste_metadata)
|
||||||
|
{ "paste_metadata": raw_paste_metadata,
|
||||||
|
"paste_text": raw_paste_data }
|
||||||
|
end
|
||||||
|
|
||||||
|
def json_paste(key=nil, keys)
|
||||||
|
# if we give keys, create an array of X json pastes
|
||||||
|
if keys
|
||||||
|
self.hash_pastes(keys).map do |paste_hash|
|
||||||
|
paste_hash.to_json
|
||||||
|
end
|
||||||
|
else
|
||||||
|
# otherwise, just make a json of the 1 raw_paste_data & raw_paste_metadata
|
||||||
|
raw_paste_data = self.raw_paste_data(key)
|
||||||
|
raw_paste_metadata = self.raw_paste_metadata(key)
|
||||||
|
self.hash_paste(raw_paste_data, raw_paste_metadata).to_json
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def json_paste_from_file(raw_paste_json_file)
|
||||||
|
raw_paste_json = File.read(raw_paste_json_file)
|
||||||
|
self.hash_paste(raw_paste_json).to_json
|
||||||
|
end
|
||||||
|
|
||||||
|
# keep this method private so we are not letting anyone run any method in our program
|
||||||
|
private
|
||||||
|
|
||||||
|
# this will be the main way to execute any of these methods. this has the exception handling taken care of.
|
||||||
|
def execute_query(selector, *args)
|
||||||
|
send(selector, *args)
|
||||||
|
rescue RestClient::ExceptionWithResponse => e
|
||||||
|
puts e.message
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,5 +0,0 @@
|
||||||
class PastebinnerError < StandardError
|
|
||||||
def InvalidArgument; end
|
|
||||||
|
|
||||||
def ConfigError; end
|
|
||||||
end
|
|
49
lib/pastebinner/helpers/elastic_search_helper.rb
Normal file
49
lib/pastebinner/helpers/elastic_search_helper.rb
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
module Pastebinner
|
||||||
|
class ElasticSearchHelper
|
||||||
|
attr_accessor :server_uri, :index, :api_client, :doctype
|
||||||
|
DEFAULT_METHOD = :post
|
||||||
|
|
||||||
|
def initialize(server_uri, index, doctype='paste')
|
||||||
|
@server_uri = server_uri
|
||||||
|
@index = index
|
||||||
|
@doctype = doctype
|
||||||
|
@api_client = Pastebinner::ApiClient.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
|
end
|
||||||
|
|
||||||
|
def delete_index
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :delete,
|
||||||
|
url: "#{server_uri}/#{index}")
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_mappings
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :get,
|
||||||
|
url: "#{server_uri}/#{index}/_mappings?pretty")
|
||||||
|
end
|
||||||
|
|
||||||
|
def update_mapping(mapping_json)
|
||||||
|
header = { 'Content-type': 'application/json' }
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :put,
|
||||||
|
url: "#{server_uri}/#{index}/_mapping/#{doctype}",
|
||||||
|
payload: mapping_json,
|
||||||
|
headers: header)
|
||||||
|
end
|
||||||
|
|
||||||
|
def json_to_es(paste_json, method=nil)
|
||||||
|
header = { 'Content-type': 'application/json' }
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: method ||= DEFAULT_METHOD,
|
||||||
|
url: "#{server_uri}/#{index}/#{doctype}",
|
||||||
|
headers: header,
|
||||||
|
payload: paste_json)
|
||||||
|
end
|
||||||
|
|
||||||
|
def json_to_es_bulk(array_of_paste_json)
|
||||||
|
array_of_paste_json.each do |paste_json|
|
||||||
|
self.json_to_es(paste_json)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,51 +1,51 @@
|
||||||
require 'optparse'
|
module Pastebinner
|
||||||
|
class OptionParser
|
||||||
|
def self.parse!(argv = ARGV)
|
||||||
|
options = {}
|
||||||
|
OptParse.new do |opts|
|
||||||
|
opts.default_argv = argv
|
||||||
|
|
||||||
class OptionParser
|
opts.banner = 'Usage: pastebinner [options]'
|
||||||
def self.parse!(argv = ARGV)
|
|
||||||
options = {}
|
|
||||||
OptParse.new do |opts|
|
|
||||||
opts.default_argv = argv
|
|
||||||
|
|
||||||
opts.banner = 'Usage: pastebinner [options]'
|
opts.on('-h', '--help', 'Show this help messae') do
|
||||||
|
puts opts
|
||||||
opts.on('-h', '--help', 'Show this help messae') do
|
exit
|
||||||
puts opts
|
end
|
||||||
exit
|
|
||||||
end
|
|
||||||
|
|
||||||
opts.on('-v', '--verbose', 'Verbose http output (WIP)') do |_v|
|
opts.on('-v', '--verbose', 'Verbose http output (WIP)') do |_v|
|
||||||
options[:v] = true
|
options[:v] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
|
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
|
||||||
options[:s] = true
|
options[:s] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-r', '--raw', 'Raw paste. Requires --key passed with a valid key') do |_r|
|
opts.on('-r', '--raw', 'Raw paste. Requires --key passed with a valid key') do |_r|
|
||||||
options[:r] = true
|
options[:r] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-g', '--get_keys', 'Get unique paste keys from public pastes') do |_g|
|
opts.on('-g', '--get_keys', 'Get unique paste keys from public pastes') do |_g|
|
||||||
options[:g] = true
|
options[:g] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-k', '--key=', 'Unique paste key') do |k|
|
opts.on('-k', '--key=', 'Unique paste key') do |k|
|
||||||
options[:k] = k
|
options[:k] = k
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-d', '--download', 'Download all public pastes to data directory') do |_d|
|
opts.on('-d', '--download', 'Download all public pastes to data directory') do |_d|
|
||||||
options[:d] = true
|
options[:d] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-j', '--json', 'Download all public pastes as a json into data directory') do |_j|
|
opts.on('-j', '--json', 'Download all public pastes as a json into data directory') do |_j|
|
||||||
options[:j] = true
|
options[:j] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
opts.on('-t', '--trending', 'Trending pastes') do |_t|
|
opts.on('-t', '--trending', 'Trending pastes') do |_t|
|
||||||
options[:t] = true
|
options[:t] = true
|
||||||
|
end
|
||||||
|
opts.parse!
|
||||||
end
|
end
|
||||||
opts.parse!
|
options
|
||||||
end
|
end
|
||||||
options
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
17
lib/pastebinner/workers/paste_to_es.rb
Normal file
17
lib/pastebinner/workers/paste_to_es.rb
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
module Pastebinner
|
||||||
|
class PasteToEs
|
||||||
|
include Sidekiq::Worker
|
||||||
|
|
||||||
|
sidekiq_options retry: false # i dont want to get rate limited so im just letting this fail if their are any failures
|
||||||
|
def perform(es_object, pb_object, paste_max)
|
||||||
|
Logger.new(STDOUT).info("PasteToEs started")
|
||||||
|
# get public pastes and their keys
|
||||||
|
pastes = pb_object.scrape_public_pastes(paste_max)
|
||||||
|
keys = pb_object.get_unique_paste_keys(pastes)
|
||||||
|
|
||||||
|
# build it into json and send it to elasticsearch
|
||||||
|
json_data = pb_object.json_paste(keys)
|
||||||
|
es_object.json_to_es_bulk(json_data)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -1,15 +0,0 @@
|
||||||
class PasteToEs
|
|
||||||
include Sidekiq::Worker
|
|
||||||
|
|
||||||
sidekiq_options retry: false # i dont want to get rate limited so im just letting this fail if their are any failures
|
|
||||||
def perform(es_object, pb_object, paste_max)
|
|
||||||
Logger.new(STDOUT).info("PasteToEs started")
|
|
||||||
# get public pastes and their keys
|
|
||||||
pastes = pb_object.scrape_public_pastes(paste_max)
|
|
||||||
keys = pb_object.get_unique_paste_keys(pastes)
|
|
||||||
|
|
||||||
# build it into json and send it to elasticsearch
|
|
||||||
json_data = pb_object.json_paste(keys)
|
|
||||||
es_object.json_to_es_bulk(json_data)
|
|
||||||
end
|
|
||||||
end
|
|
Loading…
Add table
Reference in a new issue