#!/usr/bin/env ruby # this will scrape the following: # https://learn.microsoft.com/en-us/exchange/new-features/build-numbers-and-release-dates require 'nokogiri' require 'json' require 'rest-client' class MicrosoftExchangeReleaseInfo attr_accessor :url def initialize @url = 'https://learn.microsoft.com/en-us/exchange/new-features/build-numbers-and-release-dates' end def get r = RestClient::Request.execute( :method => :get, :url => url ) if r.code == 200 r.body else puts "HTTP Code: #{r.code}" end end def parse_html(html) Nokogiri::HTML(html) end def headings(html_doc) html_doc.xpath("//table").first.xpath('./thead/tr').text.split("\n").drop(1) end def table_nodes(html_doc) html_doc.xpath("//table") end def table_records(table_node) table_node.xpath("./tbody/tr") end def data_from_table(tr) tds = tr.xpath("./td") # NO BREAK SPACE removal with the gsub \u00A0. thx msft # only happens if there is a URL linking to the kb for a product_name product_name_text = tds[0].xpath("./a").children.text.gsub("\u00A0", "") if product_name_text.empty? product_name_text = tds[0].text end kb_url = tds[0]&.xpath("./a/@href").text release_date = tds[1]&.text build_num_short = tds[2]&.text build_num_long = tds[3]&.text { :product_name => product_name_text, :kb_url => kb_url, :release_date => release_date, :build_num_short => build_num_short, :build_num_long => build_num_long } end def main html = get doc = parse_html(html) tables = table_nodes(doc) tables.map do |table_node| trs = table_records(table_node) trs.map do |tr| data_from_table(tr) end end end end