made a paste_max limit option so you can query how many pastes you want
This commit is contained in:
parent
8d781e2499
commit
0415c64960
4 changed files with 16 additions and 11 deletions
|
@ -6,10 +6,13 @@ require 'pry'
|
||||||
# setup our object and grab a session key
|
# setup our object and grab a session key
|
||||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
|
|
||||||
|
# set the commandline client to grab 50 pastes by default. this should be an option to config though once we add configuration methods
|
||||||
|
paste_max = 50
|
||||||
|
|
||||||
# pass in the Pastebinner.new client.
|
# pass in the Pastebinner.new client.
|
||||||
# will download all of the raw pastes from the public scrape results into each own file in data dir.
|
# will download all of the raw pastes from the public scrape results into each own file in data dir.
|
||||||
def download_pastes_json(pb)
|
def download_pastes_json(pb, paste_max)
|
||||||
pub_pastes = pb.scrape_public_pastes
|
pub_pastes = pb.scrape_public_pastes(paste_max)
|
||||||
keys = pb.get_unique_paste_keys(pub_pastes)
|
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||||
data_dir = '../data/'
|
data_dir = '../data/'
|
||||||
filename = 'pastebin_paste_key'
|
filename = 'pastebin_paste_key'
|
||||||
|
@ -22,8 +25,8 @@ def download_pastes_json(pb)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def download_pastes_raw(pb)
|
def download_pastes_raw(pb, paste_max)
|
||||||
pub_pastes = pb.scrape_public_pastes
|
pub_pastes = pb.scrape_public_pastes(paste_max)
|
||||||
keys = pb.get_unique_paste_keys(pub_pastes)
|
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||||
data_dir = '../data/'
|
data_dir = '../data/'
|
||||||
filename = 'pastebin_paste_key'
|
filename = 'pastebin_paste_key'
|
||||||
|
@ -39,14 +42,14 @@ end
|
||||||
options = OptionParser.parse!
|
options = OptionParser.parse!
|
||||||
|
|
||||||
if options[:s]
|
if options[:s]
|
||||||
puts pb.scrape_public_pastes
|
puts pb.scrape_public_paste(paste_max)
|
||||||
elsif options[:r] && options[:k]
|
elsif options[:r] && options[:k]
|
||||||
key = options[:k]
|
key = options[:k]
|
||||||
puts pb.raw_paste_data(key)
|
puts pb.raw_paste_data(key)
|
||||||
elsif options[:t]
|
elsif options[:t]
|
||||||
puts pb.list_trending_pastes
|
puts pb.list_trending_pastes
|
||||||
elsif options[:g]
|
elsif options[:g]
|
||||||
r = pb.scrape_public_pastes
|
r = pb.scrape_public_pastes(paste_max)
|
||||||
puts pb.get_unique_paste_keys(r)
|
puts pb.get_unique_paste_keys(r)
|
||||||
elsif options[:j]
|
elsif options[:j]
|
||||||
puts 'Downloading paste data as a json into the data directory...'
|
puts 'Downloading paste data as a json into the data directory...'
|
||||||
|
|
|
@ -19,7 +19,9 @@ params = { "api_dev_key": api_dev_key, "api_option": 'paste', "api_paste_code":
|
||||||
puts pb.create_paste(params)
|
puts pb.create_paste(params)
|
||||||
|
|
||||||
#### SCRAPE PUBLIC PASTES
|
#### SCRAPE PUBLIC PASTES
|
||||||
puts pb.scrape_public_pastes
|
paste_max = 50
|
||||||
|
# set to scrape 50 pastes, max is 250 (sometimes can get rate limited when around 250 range)
|
||||||
|
puts pb.scrape_public_pastes(paste_max)
|
||||||
|
|
||||||
#### SCRAPING - WHITELISTED IP ONLY
|
#### SCRAPING - WHITELISTED IP ONLY
|
||||||
#### SCRAPE RAW PASTE DATA OF A PASTE KEY
|
#### SCRAPE RAW PASTE DATA OF A PASTE KEY
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
class PasteToEs
|
class PasteToEs
|
||||||
include Sidekiq::Worker
|
include Sidekiq::Worker
|
||||||
def perform(es_object, pb_object)
|
def perform(es_object, pb_object, paste_max)
|
||||||
Logger.new(STDOUT).info("PasteToEs started")
|
Logger.new(STDOUT).info("PasteToEs started")
|
||||||
pastes = pb_object.scrape_public_pastes
|
pastes = pb_object.scrape_public_pastes(paste_max)
|
||||||
keys = pb_object.get_unique_paste_keys(pastes)
|
keys = pb_object.get_unique_paste_keys(pastes)
|
||||||
json_data = pb_object.json_paste(keys)
|
json_data = pb_object.json_paste(keys)
|
||||||
es_object.json_to_es_bulk(json_data)
|
es_object.json_to_es_bulk(json_data)
|
||||||
|
|
|
@ -101,10 +101,10 @@ class Pastebinner
|
||||||
|
|
||||||
# params is optional for now. to query specific language ?lang=ruby as an example
|
# params is optional for now. to query specific language ?lang=ruby as an example
|
||||||
# right now its set to grab the max 250, default is 50. param is ?limit=value
|
# right now its set to grab the max 250, default is 50. param is ?limit=value
|
||||||
def scrape_public_pastes(_params = nil)
|
def scrape_public_pastes(_params = nil, limit)
|
||||||
response = RestClient::Request.execute(
|
response = RestClient::Request.execute(
|
||||||
method: :get,
|
method: :get,
|
||||||
url: @scraping_api_url + ENDPOINTS[:scraping] + '?limit=250'
|
url: @scraping_api_url + ENDPOINTS[:scraping] + "?limit=#{limit}"
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue