diff --git a/lib/elastic_search_helper.rb b/lib/elastic_search_helper.rb index 920a0ab..343f823 100644 --- a/lib/elastic_search_helper.rb +++ b/lib/elastic_search_helper.rb @@ -24,7 +24,7 @@ class ElasticSearchHelper payload: payload) end - def metadata_mappings + def data_mappings # metadata mappings # send a PUT { @@ -44,25 +44,10 @@ class ElasticSearchHelper "user": { "type": "string" }, "hits": { "type": "string" } } + "paste_text": { "type": "string" } } } } } end - - def set_paste_text_mappings - # paste mappings - # send a PUT - { - "mappings": { - "_doc": { - "properties": { - "type": {"type": "keyword" }, - "paste_text": { "type": "text" } - } - } - } - } - end - end diff --git a/lib/pastebinner.rb b/lib/pastebinner.rb index 835d3bf..5099b2c 100755 --- a/lib/pastebinner.rb +++ b/lib/pastebinner.rb @@ -130,38 +130,6 @@ class Pastebinner ##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING ##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field - def hash_paste(raw_paste_text) - hash_paste = { "paste_text": raw_paste_text } - end - - def hash_metadata(raw_paste_metadata) - hash_metadata = { "paste_metadata": "#{raw_paste_metadata}" } - end - - def hash_doc_type(doc_type) - hash_doc_type = { "type": doc_type } - end - - def pop_doc_type_hash(doc_type_hash, hash_to_get_popped) - popped_doc_type_hash = doc_type_hash.merge(hash_to_get_popped) - end - - def to_json(final_hash) - final_hash.to_json - end - - def build_hash(raw_paste_text=nil, raw_paste_metadata=nil, doc_type) - if raw_paste_text - hash = self.hash_paste(raw_paste_text) - elsif raw_paste_metadata - hash = self.hash_metadata(raw_paste_metadata) - else - puts 'there is supposed to be an error here' - end - doc_type_hash = self.hash_doc_type(doc_type) - final_hash = self.pop_doc_type_hash(doc_type_hash, hash) - end - def puts_to_es(es_uri, payload) header = { 'Content-type': 'application/json' } response = RestClient::Request.execute( @@ -171,17 +139,65 @@ class Pastebinner payload: payload) end - def text_and_metadata(keys) - # keys = self.get_unique_keys - raw_pastes = keys.map do |key| - self.raw_paste_data(key) + def create_index_es(es_uri, index) + response = RestClient::Request.execute( + method: :put, + url: es_uri + index) + end + + def hash_pastes(keys, pb) + keys.map do |key| + raw_paste = pb.raw_paste_data(key).body + raw_paste_metadata = pb.raw_paste_metadata(key).body + hash = pb.hash_paste(raw_paste, raw_paste_metadata) end + end + + def hash_paste(raw_paste, raw_paste_metadata) + { "paste_metadata": raw_paste_metadata, + "paste_text": raw_paste } + end + + def post_to_es(es_uri, payload) + header = { 'Content-type': 'application/json' } + response = RestClient::Request.execute( + method: :post, + url: es_uri, + headers: header, + payload: payload) + end - raw_paste_metadata = keys.map do |key| - self.raw_paste_metadata(key) + def send_es_bulk(esi_uri, json_data) + json_data.each do |data| + self.post_to_es(esi_uri, data) end + end - + def data_mappings + # metadata mappings + # send a PUT + { + "mappings": { + "_doc": { + "properties": { + "type": { "type": "keyword" }, + "paste_metadata": { "type": "nested" }, + "properties": [ { + "scrape_url": { "type": "string" }, + "full_url": { "type": "string" }, + "date": { "type": "string" }, + "size": { "type": "string" }, + "expire": { "type": "string" }, + "title": { "type": "string" }, + "syntax": { "type": "string" }, + "user": { "type": "string" }, + "hits": { "type": "string" } + } ], + "paste_text": { "type": "string" } + } + } + } + } end # keep this method private so we are not letting anyone run any method in our program