good additions, got nested object working by loading paste_metadata as an array of hash instead of a string
This commit is contained in:
parent
e43e63b98a
commit
e104b0b015
4 changed files with 131 additions and 54 deletions
|
@ -8,5 +8,7 @@ require '../lib/elastic_search_helper'
|
||||||
|
|
||||||
# (If you use this, don't forget to add pry to your Gemfile!)
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
||||||
require 'pry'
|
require 'pry'
|
||||||
|
# set restclient logging and setup a pb object
|
||||||
|
RestClient.log ='stdout'
|
||||||
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
pb = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
binding.pry
|
binding.pry
|
||||||
|
|
|
@ -9,15 +9,6 @@ class ElasticSearchHelper
|
||||||
@pastebinner = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
@pastebinner = Pastebinner.new(ENV['pastebin_api_key'], ENV['pastebin_username'], ENV['pastebin_password'])
|
||||||
end
|
end
|
||||||
|
|
||||||
def create_index_with_mappings
|
|
||||||
header = { 'Content-type': 'application/json' }
|
|
||||||
response = RestClient::Request.execute(
|
|
||||||
method: :put,
|
|
||||||
url: "#{server_uri}/#{index}",
|
|
||||||
headers: header,
|
|
||||||
payload: self.mappings.to_json)
|
|
||||||
end
|
|
||||||
|
|
||||||
def delete_index
|
def delete_index
|
||||||
response = RestClient::Request.execute(
|
response = RestClient::Request.execute(
|
||||||
method: :delete,
|
method: :delete,
|
||||||
|
@ -27,35 +18,7 @@ class ElasticSearchHelper
|
||||||
def get_mappings
|
def get_mappings
|
||||||
response = RestClient::Request.execute(
|
response = RestClient::Request.execute(
|
||||||
method: :get,
|
method: :get,
|
||||||
url: "#{server_uri}/#{index}/_mappings")
|
url: "#{server_uri}/#{index}/_mappings?pretty")
|
||||||
end
|
|
||||||
|
|
||||||
|
|
||||||
def mappings
|
|
||||||
{
|
|
||||||
"mappings": {
|
|
||||||
"paste": {
|
|
||||||
"properties": {
|
|
||||||
"paste_metadata": {
|
|
||||||
"type": "nested",
|
|
||||||
"properties": {
|
|
||||||
"scrape_url": { "type": "text" },
|
|
||||||
"full_url": { "type": "text" },
|
|
||||||
"date": { "type": "text" },
|
|
||||||
"key": { "type": "text" },
|
|
||||||
"size": { "type": "text" },
|
|
||||||
"expire": { "type": "text" },
|
|
||||||
"title": { "type": "text" },
|
|
||||||
"syntax": { "type": "text" },
|
|
||||||
"user": { "type": "text" },
|
|
||||||
"hits": { "type": "integer" }
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"paste_text": { "type": "text" }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def update_mapping(mapping_json)
|
def update_mapping(mapping_json)
|
||||||
|
|
|
@ -1,12 +1,113 @@
|
||||||
{
|
{
|
||||||
|
"pastes": {
|
||||||
"mappings": {
|
"mappings": {
|
||||||
"_doc": {
|
"paste": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"paste_metadata": {
|
"paste_metadata": {
|
||||||
"type": "nested"
|
"properties": {
|
||||||
|
"date": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"expire": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"full_url": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"hits": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"key": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"scrape_url": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"syntax": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"title": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"user": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"paste_text": { "type": "text" }
|
"paste_text": {
|
||||||
|
"type": "text",
|
||||||
|
"fields": {
|
||||||
|
"keyword": {
|
||||||
|
"type": "keyword",
|
||||||
|
"ignore_above": 256
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
# official docs from pastebin on their api can be found at https://pastebin.com/api
|
# official docs from pastebin on their api can be found at https://pastebin.com/api
|
||||||
require 'rest-client'
|
require 'rest-client'
|
||||||
require 'json'
|
require 'json'
|
||||||
|
require 'yaml'
|
||||||
|
|
||||||
class Pastebinner
|
class Pastebinner
|
||||||
attr_accessor :api_dev_key, :username, :password
|
attr_accessor :api_dev_key, :username, :password
|
||||||
|
@ -124,23 +125,23 @@ class Pastebinner
|
||||||
method: :get,
|
method: :get,
|
||||||
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}"
|
url: @scraping_api_url + ENDPOINTS[:scrape_item_meta] + "?i=#{unique_paste_key}"
|
||||||
)
|
)
|
||||||
response
|
YAML.load(response.body)
|
||||||
end
|
end
|
||||||
|
|
||||||
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
|
##### PREPARING THE PASTES FOR SERIALIZATION FOR ES CONFORMING TO PER INDEX SEARCHING
|
||||||
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
|
##### SEE - https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#_custom_type_field
|
||||||
|
|
||||||
def hash_pastes(keys)
|
def hash_pastes(keys)
|
||||||
keys.map do |key|
|
if keys.is_a? String
|
||||||
raw_paste = self.raw_paste_data(key).body
|
raw_paste = self.raw_paste_data(keys)
|
||||||
raw_paste_metadata = self.raw_paste_metadata(key).body
|
raw_paste_metadata = self.raw_paste_metadata(keys)
|
||||||
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
||||||
end
|
else
|
||||||
end
|
keys.map do |key|
|
||||||
|
raw_paste = self.raw_paste_data(key).body
|
||||||
def json_pastes(keys)
|
raw_paste_metadata = self.raw_paste_metadata(key)
|
||||||
self.hash_pastes(keys).map do |paste_hash|
|
hash = self.hash_paste(raw_paste, raw_paste_metadata)
|
||||||
paste_hash.to_json
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -149,8 +150,18 @@ class Pastebinner
|
||||||
"paste_text": raw_paste_data }
|
"paste_text": raw_paste_data }
|
||||||
end
|
end
|
||||||
|
|
||||||
def json_paste(raw_paste_data, raw_paste_metadata)
|
def json_paste(key=nil, keys)
|
||||||
self.hash_paste(raw_paste_data, raw_paste_metadata).to_json
|
# if we give keys, create an array of 50 json pastes
|
||||||
|
if keys
|
||||||
|
self.hash_pastes(keys).map do |paste_hash|
|
||||||
|
paste_hash.to_json
|
||||||
|
end
|
||||||
|
else
|
||||||
|
# otherwise, just make a json of the 1 raw_paste_data & raw_paste_metadata
|
||||||
|
raw_paste_data = self.raw_paste_data(key)
|
||||||
|
raw_paste_metadata = self.raw_paste_metadata(key)
|
||||||
|
self.hash_paste(raw_paste_data, raw_paste_metadata).to_json
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# keep this method private so we are not letting anyone run any method in our program
|
# keep this method private so we are not letting anyone run any method in our program
|
||||||
|
|
Loading…
Add table
Reference in a new issue