made a paste_max limit option so you can query how many pastes you want
This commit is contained in:
parent
8d781e2499
commit
0415c64960
4 changed files with 16 additions and 11 deletions
|
@ -6,10 +6,13 @@ require 'pry'
|
|||
# setup our object and grab a session key
|
||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||
|
||||
# set the commandline client to grab 50 pastes by default. this should be an option to config though once we add configuration methods
|
||||
paste_max = 50
|
||||
|
||||
# pass in the Pastebinner.new client.
|
||||
# will download all of the raw pastes from the public scrape results into each own file in data dir.
|
||||
def download_pastes_json(pb)
|
||||
pub_pastes = pb.scrape_public_pastes
|
||||
def download_pastes_json(pb, paste_max)
|
||||
pub_pastes = pb.scrape_public_pastes(paste_max)
|
||||
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||
data_dir = '../data/'
|
||||
filename = 'pastebin_paste_key'
|
||||
|
@ -22,8 +25,8 @@ def download_pastes_json(pb)
|
|||
end
|
||||
end
|
||||
|
||||
def download_pastes_raw(pb)
|
||||
pub_pastes = pb.scrape_public_pastes
|
||||
def download_pastes_raw(pb, paste_max)
|
||||
pub_pastes = pb.scrape_public_pastes(paste_max)
|
||||
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||
data_dir = '../data/'
|
||||
filename = 'pastebin_paste_key'
|
||||
|
@ -39,14 +42,14 @@ end
|
|||
options = OptionParser.parse!
|
||||
|
||||
if options[:s]
|
||||
puts pb.scrape_public_pastes
|
||||
puts pb.scrape_public_paste(paste_max)
|
||||
elsif options[:r] && options[:k]
|
||||
key = options[:k]
|
||||
puts pb.raw_paste_data(key)
|
||||
elsif options[:t]
|
||||
puts pb.list_trending_pastes
|
||||
elsif options[:g]
|
||||
r = pb.scrape_public_pastes
|
||||
r = pb.scrape_public_pastes(paste_max)
|
||||
puts pb.get_unique_paste_keys(r)
|
||||
elsif options[:j]
|
||||
puts 'Downloading paste data as a json into the data directory...'
|
||||
|
|
|
@ -19,7 +19,9 @@ params = { "api_dev_key": api_dev_key, "api_option": 'paste', "api_paste_code":
|
|||
puts pb.create_paste(params)
|
||||
|
||||
#### SCRAPE PUBLIC PASTES
|
||||
puts pb.scrape_public_pastes
|
||||
paste_max = 50
|
||||
# set to scrape 50 pastes, max is 250 (sometimes can get rate limited when around 250 range)
|
||||
puts pb.scrape_public_pastes(paste_max)
|
||||
|
||||
#### SCRAPING - WHITELISTED IP ONLY
|
||||
#### SCRAPE RAW PASTE DATA OF A PASTE KEY
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
class PasteToEs
|
||||
include Sidekiq::Worker
|
||||
def perform(es_object, pb_object)
|
||||
def perform(es_object, pb_object, paste_max)
|
||||
Logger.new(STDOUT).info("PasteToEs started")
|
||||
pastes = pb_object.scrape_public_pastes
|
||||
pastes = pb_object.scrape_public_pastes(paste_max)
|
||||
keys = pb_object.get_unique_paste_keys(pastes)
|
||||
json_data = pb_object.json_paste(keys)
|
||||
es_object.json_to_es_bulk(json_data)
|
||||
|
|
|
@ -101,10 +101,10 @@ class Pastebinner
|
|||
|
||||
# params is optional for now. to query specific language ?lang=ruby as an example
|
||||
# right now its set to grab the max 250, default is 50. param is ?limit=value
|
||||
def scrape_public_pastes(_params = nil)
|
||||
def scrape_public_pastes(_params = nil, limit)
|
||||
response = RestClient::Request.execute(
|
||||
method: :get,
|
||||
url: @scraping_api_url + ENDPOINTS[:scraping] + '?limit=250'
|
||||
url: @scraping_api_url + ENDPOINTS[:scraping] + "?limit=#{limit}"
|
||||
)
|
||||
end
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue