added the ability to download either raw pastes or the full metadata + pastetext in one json document
This commit is contained in:
parent
e580c572f7
commit
fafd8268c0
2 changed files with 31 additions and 5 deletions
|
@ -8,16 +8,30 @@ pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pas
|
||||||
|
|
||||||
# pass in the Pastebinner.new client.
|
# pass in the Pastebinner.new client.
|
||||||
# will download all of the raw pastes from the public scrape results into each own file in data dir.
|
# will download all of the raw pastes from the public scrape results into each own file in data dir.
|
||||||
def download_pastes(pb)
|
def download_pastes_json(pb)
|
||||||
pub_pastes = pb.scrape_public_pastes
|
pub_pastes = pb.scrape_public_pastes
|
||||||
keys = pb.get_unique_paste_keys(pub_pastes)
|
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||||
data_dir = '../data/'
|
data_dir = '../data/'
|
||||||
filename = 'pastebin_paste_key'
|
filename = 'pastebin_paste_key'
|
||||||
keys.map do |id|
|
keys.map do |id|
|
||||||
if File.exist?(data_dir + filename + "_#{id}")
|
if File.exist?("#{data_dir}#{filename}_#{id}.json")
|
||||||
puts "#{id} already exists on your filesystem, skipping..."
|
puts "#{data_dir}#{filename}_#{id}.json already exists on your filesystem, skipping..."
|
||||||
else
|
else
|
||||||
File.write(data_dir + filename + "_#{id}", pb.json_paste(pb.raw_paste_data(id), pb.raw_paste_metadata(id)))
|
File.write("#{data_dir}#{filename}_#{id}.json", pb.json_paste(pb.raw_paste_data(id), pb.raw_paste_metadata(id)))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def download_pastes_raw(pb)
|
||||||
|
pub_pastes = pb.scrape_public_pastes
|
||||||
|
keys = pb.get_unique_paste_keys(pub_pastes)
|
||||||
|
data_dir = '../data/'
|
||||||
|
filename = 'pastebin_paste_key'
|
||||||
|
keys.map do |id|
|
||||||
|
if File.exist?("#{data_dir}#{filename}_#{id}.raw")
|
||||||
|
puts "#{data_dir}#{filename}_#{id}.raw already exists on your filesystem, skipping..."
|
||||||
|
else
|
||||||
|
File.write("#{data_dir}#{filename}_#{id}.raw", pb.raw_paste_data(id))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -34,9 +48,13 @@ elsif options[:t]
|
||||||
elsif options[:g]
|
elsif options[:g]
|
||||||
r = pb.scrape_public_pastes
|
r = pb.scrape_public_pastes
|
||||||
puts pb.get_unique_paste_keys(r)
|
puts pb.get_unique_paste_keys(r)
|
||||||
|
elsif options[:j]
|
||||||
|
puts 'Downloading paste data as a json into the data directory...'
|
||||||
|
download_pastes_json(pb)
|
||||||
|
puts 'Complete.'
|
||||||
elsif options[:d]
|
elsif options[:d]
|
||||||
puts 'Downloading paste data into the data directory...'
|
puts 'Downloading paste data into the data directory...'
|
||||||
download_pastes(pb)
|
download_pastes_raw(pb)
|
||||||
puts 'Complete.'
|
puts 'Complete.'
|
||||||
elsif options[:k]
|
elsif options[:k]
|
||||||
puts '-k or --key= requires -r,--raw'
|
puts '-k or --key= requires -r,--raw'
|
||||||
|
|
|
@ -12,6 +12,10 @@ class OptionParser
|
||||||
puts opts
|
puts opts
|
||||||
exit
|
exit
|
||||||
end
|
end
|
||||||
|
|
||||||
|
opts.on('-v', '--verbose', 'Verbose http output (WIP)') do |_v|
|
||||||
|
options[:v] = true
|
||||||
|
end
|
||||||
|
|
||||||
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
|
opts.on('-s', '--scrape_public', 'Scrape public pastes') do |_s|
|
||||||
options[:s] = true
|
options[:s] = true
|
||||||
|
@ -33,6 +37,10 @@ class OptionParser
|
||||||
options[:d] = true
|
options[:d] = true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
opts.on('-j', '--json', 'Download all public pastes as a json into data directory') do |_j|
|
||||||
|
options[:j] = true
|
||||||
|
end
|
||||||
|
|
||||||
opts.on('-t', '--trending', 'Trending pastes') do |_t|
|
opts.on('-t', '--trending', 'Trending pastes') do |_t|
|
||||||
options[:t] = true
|
options[:t] = true
|
||||||
end
|
end
|
||||||
|
|
Loading…
Add table
Reference in a new issue