made a paste_max limit option so you can query how many pastes you want

This commit is contained in:
booboy 2019-02-04 22:45:26 -06:00
parent 8d781e2499
commit 0415c64960
4 changed files with 16 additions and 11 deletions

View file

@ -6,10 +6,13 @@ require 'pry'
# setup our object and grab a session key
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
# set the commandline client to grab 50 pastes by default. this should be an option to config though once we add configuration methods
paste_max = 50
# pass in the Pastebinner.new client.
# will download all of the raw pastes from the public scrape results into each own file in data dir.
def download_pastes_json(pb)
pub_pastes = pb.scrape_public_pastes
def download_pastes_json(pb, paste_max)
pub_pastes = pb.scrape_public_pastes(paste_max)
keys = pb.get_unique_paste_keys(pub_pastes)
data_dir = '../data/'
filename = 'pastebin_paste_key'
@ -22,8 +25,8 @@ def download_pastes_json(pb)
end
end
def download_pastes_raw(pb)
pub_pastes = pb.scrape_public_pastes
def download_pastes_raw(pb, paste_max)
pub_pastes = pb.scrape_public_pastes(paste_max)
keys = pb.get_unique_paste_keys(pub_pastes)
data_dir = '../data/'
filename = 'pastebin_paste_key'
@ -39,14 +42,14 @@ end
options = OptionParser.parse!
if options[:s]
puts pb.scrape_public_pastes
puts pb.scrape_public_paste(paste_max)
elsif options[:r] && options[:k]
key = options[:k]
puts pb.raw_paste_data(key)
elsif options[:t]
puts pb.list_trending_pastes
elsif options[:g]
r = pb.scrape_public_pastes
r = pb.scrape_public_pastes(paste_max)
puts pb.get_unique_paste_keys(r)
elsif options[:j]
puts 'Downloading paste data as a json into the data directory...'

View file

@ -19,7 +19,9 @@ params = { "api_dev_key": api_dev_key, "api_option": 'paste', "api_paste_code":
puts pb.create_paste(params)
#### SCRAPE PUBLIC PASTES
puts pb.scrape_public_pastes
paste_max = 50
# set to scrape 50 pastes, max is 250 (sometimes can get rate limited when around 250 range)
puts pb.scrape_public_pastes(paste_max)
#### SCRAPING - WHITELISTED IP ONLY
#### SCRAPE RAW PASTE DATA OF A PASTE KEY

View file

@ -1,8 +1,8 @@
class PasteToEs
include Sidekiq::Worker
def perform(es_object, pb_object)
def perform(es_object, pb_object, paste_max)
Logger.new(STDOUT).info("PasteToEs started")
pastes = pb_object.scrape_public_pastes
pastes = pb_object.scrape_public_pastes(paste_max)
keys = pb_object.get_unique_paste_keys(pastes)
json_data = pb_object.json_paste(keys)
es_object.json_to_es_bulk(json_data)

View file

@ -101,10 +101,10 @@ class Pastebinner
# params is optional for now. to query specific language ?lang=ruby as an example
# right now its set to grab the max 250, default is 50. param is ?limit=value
def scrape_public_pastes(_params = nil)
def scrape_public_pastes(_params = nil, limit)
response = RestClient::Request.execute(
method: :get,
url: @scraping_api_url + ENDPOINTS[:scraping] + '?limit=250'
url: @scraping_api_url + ENDPOINTS[:scraping] + "?limit=#{limit}"
)
end