added some methods to better prepare data for elasticsearch
This commit is contained in:
parent
c72f8c9a5f
commit
107c1ff559
2 changed files with 57 additions and 56 deletions
|
@ -24,7 +24,7 @@ class ElasticSearchHelper
|
||||||
payload: payload)
|
payload: payload)
|
||||||
end
|
end
|
||||||
|
|
||||||
def metadata_mappings
|
def data_mappings
|
||||||
# metadata mappings
|
# metadata mappings
|
||||||
# send a PUT
|
# send a PUT
|
||||||
{
|
{
|
||||||
|
@ -44,25 +44,10 @@ class ElasticSearchHelper
|
||||||
"user": { "type": "string" },
|
"user": { "type": "string" },
|
||||||
"hits": { "type": "string" }
|
"hits": { "type": "string" }
|
||||||
}
|
}
|
||||||
|
"paste_text": { "type": "string" }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
def set_paste_text_mappings
|
|
||||||
# paste mappings
|
|
||||||
# send a PUT
|
|
||||||
{
|
|
||||||
"mappings": {
|
|
||||||
"_doc": {
|
|
||||||
"properties": {
|
|
||||||
"type": {"type": "keyword" },
|
|
||||||
"paste_text": { "type": "text" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
|
@ -130,38 +130,6 @@ class Pastebinner
|
||||||
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
|
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
|
||||||
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
|
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
|
||||||
|
|
||||||
def hash_paste(raw_paste_text)
|
|
||||||
hash_paste = { "paste_text": raw_paste_text }
|
|
||||||
end
|
|
||||||
|
|
||||||
def hash_metadata(raw_paste_metadata)
|
|
||||||
hash_metadata = { "paste_metadata": "#{raw_paste_metadata}" }
|
|
||||||
end
|
|
||||||
|
|
||||||
def hash_doc_type(doc_type)
|
|
||||||
hash_doc_type = { "type": doc_type }
|
|
||||||
end
|
|
||||||
|
|
||||||
def pop_doc_type_hash(doc_type_hash, hash_to_get_popped)
|
|
||||||
popped_doc_type_hash = doc_type_hash.merge(hash_to_get_popped)
|
|
||||||
end
|
|
||||||
|
|
||||||
def to_json(final_hash)
|
|
||||||
final_hash.to_json
|
|
||||||
end
|
|
||||||
|
|
||||||
def build_hash(raw_paste_text=nil, raw_paste_metadata=nil, doc_type)
|
|
||||||
if raw_paste_text
|
|
||||||
hash = self.hash_paste(raw_paste_text)
|
|
||||||
elsif raw_paste_metadata
|
|
||||||
hash = self.hash_metadata(raw_paste_metadata)
|
|
||||||
else
|
|
||||||
puts 'there is supposed to be an error here'
|
|
||||||
end
|
|
||||||
doc_type_hash = self.hash_doc_type(doc_type)
|
|
||||||
final_hash = self.pop_doc_type_hash(doc_type_hash, hash)
|
|
||||||
end
|
|
||||||
|
|
||||||
def puts_to_es(es_uri, payload)
|
def puts_to_es(es_uri, payload)
|
||||||
header = { 'Content-type': 'application/json' }
|
header = { 'Content-type': 'application/json' }
|
||||||
response = RestClient::Request.execute(
|
response = RestClient::Request.execute(
|
||||||
|
@ -171,17 +139,65 @@ class Pastebinner
|
||||||
payload: payload)
|
payload: payload)
|
||||||
end
|
end
|
||||||
|
|
||||||
def text_and_metadata(keys)
|
def create_index_es(es_uri, index)
|
||||||
# keys = self.get_unique_keys
|
response = RestClient::Request.execute(
|
||||||
raw_pastes = keys.map do |key|
|
method: :put,
|
||||||
self.raw_paste_data(key)
|
url: es_uri + index)
|
||||||
|
end
|
||||||
|
|
||||||
|
def hash_pastes(keys, pb)
|
||||||
|
keys.map do |key|
|
||||||
|
raw_paste = pb.raw_paste_data(key).body
|
||||||
|
raw_paste_metadata = pb.raw_paste_metadata(key).body
|
||||||
|
hash = pb.hash_paste(raw_paste, raw_paste_metadata)
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
raw_paste_metadata = keys.map do |key|
|
def hash_paste(raw_paste, raw_paste_metadata)
|
||||||
self.raw_paste_metadata(key)
|
{ "paste_metadata": raw_paste_metadata,
|
||||||
|
"paste_text": raw_paste }
|
||||||
|
end
|
||||||
|
|
||||||
|
def post_to_es(es_uri, payload)
|
||||||
|
header = { 'Content-type': 'application/json' }
|
||||||
|
response = RestClient::Request.execute(
|
||||||
|
method: :post,
|
||||||
|
url: es_uri,
|
||||||
|
headers: header,
|
||||||
|
payload: payload)
|
||||||
|
end
|
||||||
|
|
||||||
|
def send_es_bulk(esi_uri, json_data)
|
||||||
|
json_data.each do |data|
|
||||||
|
self.post_to_es(esi_uri, data)
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def data_mappings
|
||||||
|
# metadata mappings
|
||||||
|
# send a PUT
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"_doc": {
|
||||||
|
"properties": {
|
||||||
|
"type": { "type": "keyword" },
|
||||||
|
"paste_metadata": { "type": "nested" },
|
||||||
|
"properties": [ {
|
||||||
|
"scrape_url": { "type": "string" },
|
||||||
|
"full_url": { "type": "string" },
|
||||||
|
"date": { "type": "string" },
|
||||||
|
"size": { "type": "string" },
|
||||||
|
"expire": { "type": "string" },
|
||||||
|
"title": { "type": "string" },
|
||||||
|
"syntax": { "type": "string" },
|
||||||
|
"user": { "type": "string" },
|
||||||
|
"hits": { "type": "string" }
|
||||||
|
} ],
|
||||||
|
"paste_text": { "type": "string" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
# keep this method private so we are not letting anyone run any method in our program
|
# keep this method private so we are not letting anyone run any method in our program
|
||||||
|
|
Loading…
Add table
Reference in a new issue