From e104b0b0153fbf4fa79d6028d1cbb303fa3c5a31 Mon Sep 17 00:00:00 2001 From: booboy Date: Sun, 3 Feb 2019 02:09:51 -0600 Subject: [PATCH] good additions, got nested object working by loading paste_metadata as an array of hash instead of a string --- bin/console | 2 + lib/elastic_search_helper.rb | 39 +------------ lib/mappings.json | 109 +++++++++++++++++++++++++++++++++-- lib/pastebinner.rb | 35 +++++++---- 4 files changed, 131 insertions(+), 54 deletions(-) diff --git a/bin/console b/bin/console index e2f9975..fa30917 100755 --- a/bin/console +++ b/bin/console @@ -8,5 +8,7 @@ require '../lib/elastic_search_helper' # (If you use this, don't forget to add pry to your Gemfile!) require 'pry' +# set restclient logging and setup a pb object +RestClient.log ='stdout' pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password']) binding.pry diff --git a/lib/elastic_search_helper.rb b/lib/elastic_search_helper.rb index d61412a..4b3957b 100644 --- a/lib/elastic_search_helper.rb +++ b/lib/elastic_search_helper.rb @@ -9,15 +9,6 @@ class ElasticSearchHelper @pastebinner = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password']) end - def create_index_with_mappings - header = { 'Content-type': 'application/json' } - response = RestClient::Request.execute( - method: :put, - url: "#{server_uri}/#{index}", - headers: header, - payload: self.mappings.to_json) - end - def delete_index response = RestClient::Request.execute( method: :delete, @@ -27,35 +18,7 @@ class ElasticSearchHelper def get_mappings response = RestClient::Request.execute( method: :get, - url: "#{server_uri}/#{index}/_mappings") - end - - - def mappings - { - "mappings": { - "paste": { - "properties": { - "paste_metadata": { - "type": "nested", - "properties": { - "scrape_url": { "type": "text" }, - "full_url": { "type": "text" }, - "date": { "type": "text" }, - "key": { "type": "text" }, - "size": { "type": "text" }, - "expire": { "type": "text" }, - "title": { "type": "text" }, - "syntax": { "type": "text" }, - "user": { "type": "text" }, - "hits": { "type": "integer" } - } - }, - "paste_text": { "type": "text" } - } - } - } - } + url: "#{server_uri}/#{index}/_mappings?pretty") end def update_mapping(mapping_json) diff --git a/lib/mappings.json b/lib/mappings.json index c758ffd..0f1a304 100644 --- a/lib/mappings.json +++ b/lib/mappings.json @@ -1,12 +1,113 @@ - { +{ + "pastes": { "mappings": { - "_doc": { + "paste": { "properties": { "paste_metadata": { - "type": "nested" + "properties": { + "date": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "expire": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "full_url": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "hits": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "key": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "scrape_url": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "size": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "syntax": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "title": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "user": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } + } }, - "paste_text": { "type": "text" } + "paste_text": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + } } } } } +} diff --git a/lib/pastebinner.rb b/lib/pastebinner.rb index a1da763..8a360af 100755 --- a/lib/pastebinner.rb +++ b/lib/pastebinner.rb @@ -4,6 +4,7 @@ # official docs from pastebin on their api can be found at https://pastebin.com/api require 'rest-client' require 'json' +require 'yaml' class Pastebinner attr_accessor :api_dev_key, :username, :password @@ -124,23 +125,23 @@ class Pastebinner method: :get, url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}" ) - response + YAML.load(response.body) end ##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING ##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field def hash_pastes(keys) - keys.map do |key| - raw_paste = self.raw_paste_data(key).body - raw_paste_metadata = self.raw_paste_metadata(key).body + if keys.is_a? String + raw_paste = self.raw_paste_data(keys) + raw_paste_metadata = self.raw_paste_metadata(keys) hash = self.hash_paste(raw_paste, raw_paste_metadata) - end - end - - def json_pastes(keys) - self.hash_pastes(keys).map do |paste_hash| - paste_hash.to_json + else + keys.map do |key| + raw_paste = self.raw_paste_data(key).body + raw_paste_metadata = self.raw_paste_metadata(key) + hash = self.hash_paste(raw_paste, raw_paste_metadata) + end end end @@ -149,8 +150,18 @@ class Pastebinner "paste_text": raw_paste_data } end - def json_paste(raw_paste_data, raw_paste_metadata) - self.hash_paste(raw_paste_data, raw_paste_metadata).to_json + def json_paste(key=nil, keys) + # if we give keys, create an array of 50 json pastes + if keys + self.hash_pastes(keys).map do |paste_hash| + paste_hash.to_json + end + else + # otherwise, just make a json of the 1 raw_paste_data & raw_paste_metadata + raw_paste_data = self.raw_paste_data(key) + raw_paste_metadata = self.raw_paste_metadata(key) + self.hash_paste(raw_paste_data, raw_paste_metadata).to_json + end end # keep this method private so we are not letting anyone run any method in our program