From 18c1bde0a3d9dff3af90d105673dc5fd2c9cd786 Mon Sep 17 00:00:00 2001 From: booboy Date: Sun, 18 Nov 2018 06:04:51 -0600 Subject: [PATCH] made a bunch of hashes that merge to prepare pastes and metadata for ES. pastes seem to work, but metadata is still messed up. i think it is because of the mappings that i did in the elastic_search_helper.rb script --- lib/elastic_search_helper.rb | 57 ++++++++++++++++++++++++++++++++++++ lib/es.rb | 9 ++++++ lib/pastebinner.rb | 47 ++++++++++++++++++++++++++--- 3 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 lib/elastic_search_helper.rb create mode 100644 lib/es.rb diff --git a/lib/elastic_search_helper.rb b/lib/elastic_search_helper.rb new file mode 100644 index 0000000..700938d --- /dev/null +++ b/lib/elastic_search_helper.rb @@ -0,0 +1,57 @@ +require 'elasticsearch' + +class ElasticSearchHelper + attr_accessor :server_uri, :index + + def initialize(server_uri, index) + @server_uri = server_uri + @index = index + end + + # will build an array of 50 pastes to ship to es + def build_json_array(pb, keys) + json_for_es = keys.map do |k| + pb.encode_json(pb.raw_paste_data(k), pb.raw_paste_metadata(k)) + end + end + + def puts_to_es(payload, increment_num) + header = { 'Content-type': 'application/json' } + response = RestClient::Request.execute( + method: :put, + url: "#{server_uri}/#{index}/#{index}s/#{increment_num}", + headers: header, + payload: payload) + end + + def metadata_mappings + # metadata mappings + # send a PUT + { + "mappings": { + "_doc": { + "properties": { + "type": { "type": "keyword" }, + "paste_metadata": { "type": "nested" } + } + } + } + } + end + + def set_paste_text_mappings + # paste mappings + # send a PUT + { + "mappings": { + "_doc": { + "properties": { + "type": {"type": "keyword" }, + "paste_text": { "type": "text" } + } + } + } + } + end + +end diff --git a/lib/es.rb b/lib/es.rb new file mode 100644 index 0000000..124ab6e --- /dev/null +++ b/lib/es.rb @@ -0,0 +1,9 @@ +require 'elasticsearch' + +client = Elasticsearch::Client.new url: 'http://192.168.1.9200', log: true + +client.transport.reload_connections! + +client.cluster.health + +client.index index: 'paste', type: 'pastes' diff --git a/lib/pastebinner.rb b/lib/pastebinner.rb index db7f9af..25e5e3c 100755 --- a/lib/pastebinner.rb +++ b/lib/pastebinner.rb @@ -111,7 +111,7 @@ class Pastebinner # will extract just the keys from recent public pastes def get_unique_paste_keys(public_pastes) pp = JSON.parse(public_pastes) - pp.map {|p| {'key': p['key']}} + pp.map {|p| p['key']} end def raw_paste_data(unique_paste_key) @@ -125,10 +125,49 @@ class Pastebinner method: :get, url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}") end + + ##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING + ##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field + + def hash_paste(raw_paste_text) + hash_paste = { "paste_text": raw_paste_text } + end - def encode_json(raw_paste_text, raw_paste_metadata) - hashed_data = { "paste_metadata": raw_paste_metadata, "paste_text": raw_paste_text } - hashed_data.to_json + def hash_metadata(raw_paste_metadata) + hash_metadata = { "paste_metadata": raw_paste_metadata } + end + + def hash_doc_type(doc_type) + hash_doc_type = { "type": doc_type } + end + + def pop_doc_type_hash(doc_type_hash, hash_to_get_popped) + popped_doc_type_hash = doc_type_hash.merge(hash_to_get_popped) + end + + def to_json(final_hash) + final_hash.to_json + end + + def build_hash(raw_paste_text=nil, raw_paste_metadata=nil, doc_type) + if raw_paste_text + hash = self.hash_paste(raw_paste_text) + elsif raw_paste_metadata + hash = self.hash_metadata(raw_paste_metadata) + else + puts 'there is supposed to be an error here' + end + doc_type_hash = self.hash_doc_type(doc_type) + final_hash = self.pop_doc_type_hash(doc_type_hash, hash) + end + + def puts_to_es(es_uri, payload) + header = { 'Content-type': 'application/json' } + response = RestClient::Request.execute( + method: :put, + url: es_uri, + headers: header, + payload: payload) end # keep this method private so we are not letting anyone run any method in our program