made a bunch of hashes that merge to prepare pastes and metadata for ES. pastes seem to work, but metadata is still messed up. i think it is because of the mappings that i did in the elastic_search_helper.rb script
This commit is contained in:
parent
345f826600
commit
18c1bde0a3
3 changed files with 109 additions and 4 deletions
57
lib/elastic_search_helper.rb
Normal file
57
lib/elastic_search_helper.rb
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
require 'elasticsearch'
|
||||||
|
|
||||||
|
class ElasticSearchHelper
|
||||||
|
attr_accessor :server_uri, :index
|
||||||
|
|
||||||
|
def initialize(server_uri, index)
|
||||||
|
@server_uri = server_uri
|
||||||
|
@index = index
|
||||||
|
end
|
||||||
|
|
||||||
|
# will build an array of 50 pastes to ship to es
|
||||||
|
def build_json_array(pb, keys)
|
||||||
|
json_for_es = keys.map do |k|
|
||||||
|
pb.encode_json(pb.raw_paste_data(k), pb.raw_paste_metadata(k))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def puts_to_es(payload, increment_num)
|
||||||
|
header = { 'Content-type': 'application/json' }
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :put,
|
||||||
|
url: "#{server_uri}/#{index}/#{index}s/#{increment_num}",
|
||||||
|
headers: header,
|
||||||
|
payload: payload)
|
||||||
|
end
|
||||||
|
|
||||||
|
def metadata_mappings
|
||||||
|
# metadata mappings
|
||||||
|
# send a PUT
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"_doc": {
|
||||||
|
"properties": {
|
||||||
|
"type": { "type": "keyword" },
|
||||||
|
"paste_metadata": { "type": "nested" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def set_paste_text_mappings
|
||||||
|
# paste mappings
|
||||||
|
# send a PUT
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"_doc": {
|
||||||
|
"properties": {
|
||||||
|
"type": {"type": "keyword" },
|
||||||
|
"paste_text": { "type": "text" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
9
lib/es.rb
Normal file
9
lib/es.rb
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
require 'elasticsearch'
|
||||||
|
|
||||||
|
client = Elasticsearch::Client.new url: 'http://192.168.1.9200', log: true
|
||||||
|
|
||||||
|
client.transport.reload_connections!
|
||||||
|
|
||||||
|
client.cluster.health
|
||||||
|
|
||||||
|
client.index index: 'paste', type: 'pastes'
|
|
@ -111,7 +111,7 @@ class Pastebinner
|
||||||
# will extract just the keys from recent public pastes
|
# will extract just the keys from recent public pastes
|
||||||
def get_unique_paste_keys(public_pastes)
|
def get_unique_paste_keys(public_pastes)
|
||||||
pp = JSON.parse(public_pastes)
|
pp = JSON.parse(public_pastes)
|
||||||
pp.map {|p| {'key': p['key']}}
|
pp.map {|p| p['key']}
|
||||||
end
|
end
|
||||||
|
|
||||||
def raw_paste_data(unique_paste_key)
|
def raw_paste_data(unique_paste_key)
|
||||||
|
@ -126,9 +126,48 @@ class Pastebinner
|
||||||
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}")
|
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}")
|
||||||
end
|
end
|
||||||
|
|
||||||
def encode_json(raw_paste_text, raw_paste_metadata)
|
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
|
||||||
hashed_data = { "paste_metadata": raw_paste_metadata, "paste_text": raw_paste_text }
|
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
|
||||||
hashed_data.to_json
|
|
||||||
|
def hash_paste(raw_paste_text)
|
||||||
|
hash_paste = { "paste_text": raw_paste_text }
|
||||||
|
end
|
||||||
|
|
||||||
|
def hash_metadata(raw_paste_metadata)
|
||||||
|
hash_metadata = { "paste_metadata": raw_paste_metadata }
|
||||||
|
end
|
||||||
|
|
||||||
|
def hash_doc_type(doc_type)
|
||||||
|
hash_doc_type = { "type": doc_type }
|
||||||
|
end
|
||||||
|
|
||||||
|
def pop_doc_type_hash(doc_type_hash, hash_to_get_popped)
|
||||||
|
popped_doc_type_hash = doc_type_hash.merge(hash_to_get_popped)
|
||||||
|
end
|
||||||
|
|
||||||
|
def to_json(final_hash)
|
||||||
|
final_hash.to_json
|
||||||
|
end
|
||||||
|
|
||||||
|
def build_hash(raw_paste_text=nil, raw_paste_metadata=nil, doc_type)
|
||||||
|
if raw_paste_text
|
||||||
|
hash = self.hash_paste(raw_paste_text)
|
||||||
|
elsif raw_paste_metadata
|
||||||
|
hash = self.hash_metadata(raw_paste_metadata)
|
||||||
|
else
|
||||||
|
puts 'there is supposed to be an error here'
|
||||||
|
end
|
||||||
|
doc_type_hash = self.hash_doc_type(doc_type)
|
||||||
|
final_hash = self.pop_doc_type_hash(doc_type_hash, hash)
|
||||||
|
end
|
||||||
|
|
||||||
|
def puts_to_es(es_uri, payload)
|
||||||
|
header = { 'Content-type': 'application/json' }
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :put,
|
||||||
|
url: es_uri,
|
||||||
|
headers: header,
|
||||||
|
payload: payload)
|
||||||
end
|
end
|
||||||
|
|
||||||
# keep this method private so we are not letting anyone run any method in our program
|
# keep this method private so we are not letting anyone run any method in our program
|
||||||
|
|
Loading…
Add table
Reference in a new issue