This commit is contained in:
booboy 2019-01-06 19:26:49 -06:00
commit fc82bd78d1
3 changed files with 121 additions and 10 deletions

View file

@ -0,0 +1,68 @@
require 'elasticsearch'
class ElasticSearchHelper
attr_accessor :server_uri, :index
def initialize(server_uri, index)
@server_uri = server_uri
@index = index
end
# will build an array of 50 pastes to ship to es
def build_json_array(pb, keys)
json_for_es = keys.map do |k|
pb.encode_json(pb.raw_paste_data(k), pb.raw_paste_metadata(k))
end
end
def puts_to_es(payload, increment_num)
header = { 'Content-type': 'application/json' }
response = RestClient::Request.execute(
method: :put,
url: "#{server_uri}/#{index}/#{index}s/#{increment_num}",
headers: header,
payload: payload)
end
def metadata_mappings
# metadata mappings
# send a PUT
{
"mappings": {
"_doc": {
"properties": {
"type": { "type": "keyword" },
"paste_metadata": { "type": "nested" },
"properties": {
"scrape_url": { "type": "string" },
"full_url": { "type": "string" },
"date": { "type": "string" },
"size": { "type": "string" },
"expire": { "type": "string" },
"title": { "type": "string" },
"syntax": { "type": "string" },
"user": { "type": "string" },
"hits": { "type": "string" }
}
}
}
}
}
end
def set_paste_text_mappings
# paste mappings
# send a PUT
{
"mappings": {
"_doc": {
"properties": {
"type": {"type": "keyword" },
"paste_text": { "type": "text" }
}
}
}
}
end
end

9
lib/es.rb Normal file
View file

@ -0,0 +1,9 @@
require 'elasticsearch'
client = Elasticsearch::Client.new url: 'http://192.168.1.9200', log: true
client.transport.reload_connections!
client.cluster.health
client.index index: 'paste', type: 'pastes'

View file

@ -111,7 +111,7 @@ class Pastebinner
# will extract just the keys from recent public pastes
def get_unique_paste_keys(public_pastes)
pp = JSON.parse(public_pastes)
pp.map {|p| {'key': p['key']}}
pp.map {|p| p['key']}
end
def raw_paste_data(unique_paste_key)
@ -124,17 +124,51 @@ class Pastebinner
response = RestClient::Request.execute(
method: :get,
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}")
response
end
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
def hash_paste(raw_paste_text)
hash_paste = { "paste_text": raw_paste_text }
end
def encode_json(raw_paste_text)
# raw_paste_text should be a RestClient response.body or a string
time = Time.new
hashed_data =
{
"date": time.strftime("%Y-%m-%d %H:%M:%S"),
"text": raw_paste_text
}
hashed_data.to_json
def hash_metadata(raw_paste_metadata)
hash_metadata = { "paste_metadata": raw_paste_metadata }
end
def hash_doc_type(doc_type)
hash_doc_type = { "type": doc_type }
end
def pop_doc_type_hash(doc_type_hash, hash_to_get_popped)
popped_doc_type_hash = doc_type_hash.merge(hash_to_get_popped)
end
def to_json(final_hash)
final_hash.to_json
end
def build_hash(raw_paste_text=nil, raw_paste_metadata=nil, doc_type)
if raw_paste_text
hash = self.hash_paste(raw_paste_text)
elsif raw_paste_metadata
hash = self.hash_metadata(raw_paste_metadata)
else
puts 'there is supposed to be an error here'
end
doc_type_hash = self.hash_doc_type(doc_type)
final_hash = self.pop_doc_type_hash(doc_type_hash, hash)
end
def puts_to_es(es_uri, payload)
header = { 'Content-type': 'application/json' }
response = RestClient::Request.execute(
method: :put,
url: es_uri,
headers: header,
payload: payload)
end
# keep this method private so we are not letting anyone run any method in our program