added the ability to download either raw pastes or the full metadata + pastetext in one json document

This commit is contained in:
booboy 2019-02-02 04:17:17 -06:00
parent e580c572f7
commit fafd8268c0
2 changed files with 31 additions and 5 deletions

View file

@ -8,16 +8,30 @@ pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pas
# pass in the Pastebinner.new client.
# will download all of the raw pastes from the public scrape results into each own file in data dir.
def download_pastes(pb)
def download_pastes_json(pb)
pub_pastes = pb.scrape_public_pastes
keys = pb.get_unique_paste_keys(pub_pastes)
data_dir = '../data/'
filename = 'pastebin_paste_key'
keys.map do |id|
if File.exist?(data_dir + filename + "_#{id}")
puts "#{id} already exists on your filesystem, skipping..."
if File.exist?("#{data_dir}#{filename}_#{id}.json")
puts "#{data_dir}#{filename}_#{id}.json already exists on your filesystem, skipping..."
else
File.write(data_dir + filename + "_#{id}", pb.json_paste(pb.raw_paste_data(id), pb.raw_paste_metadata(id)))
File.write("#{data_dir}#{filename}_#{id}.json", pb.json_paste(pb.raw_paste_data(id), pb.raw_paste_metadata(id)))
end
end
end
def download_pastes_raw(pb)
pub_pastes = pb.scrape_public_pastes
keys = pb.get_unique_paste_keys(pub_pastes)
data_dir = '../data/'
filename = 'pastebin_paste_key'
keys.map do |id|
if File.exist?("#{data_dir}#{filename}_#{id}.raw")
puts "#{data_dir}#{filename}_#{id}.raw already exists on your filesystem, skipping..."
else
File.write("#{data_dir}#{filename}_#{id}.raw", pb.raw_paste_data(id))
end
end
end
@ -34,9 +48,13 @@ elsif options[:t]
elsif options[:g]
r = pb.scrape_public_pastes
puts pb.get_unique_paste_keys(r)
elsif options[:j]
puts 'Downloading paste data as a json into the data directory...'
download_pastes_json(pb)
puts 'Complete.'
elsif options[:d]
puts 'Downloading paste data into the data directory...'
download_pastes(pb)
download_pastes_raw(pb)
puts 'Complete.'
elsif options[:k]
puts '-k or --key= requires -r,--raw'

View file

@ -12,6 +12,10 @@ class OptionParser
puts opts
exit
end
opts.on('-v', '--verbose', 'Verbose http output (WIP)') do |_v|
options[:v] = true
end
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
options[:s] = true
@ -33,6 +37,10 @@ class OptionParser
options[:d] = true
end
opts.on('-j', '--json', 'Download all public pastes as a json into data directory') do |_j|
options[:j] = true
end
opts.on('-t', '--trending', 'Trending pastes') do |_t|
options[:t] = true
end