from urllib import request from io import TextIOWrapper from itertools import groupby import re import json META_RELEASE_WORD_MATCHER = r'(^\Dist\:\s|Name:\s|Version:\s|Date:\s|Supported:\s|Description:\s|Release\-File\:\s|ReleaseNotes:\s|ReleaseNotesHtml\:\s|UpgradeTool:\s|UpgradeToolSignature:\s)' META_SPACE_MATCHER = r'^\s' class SourceMetaRelease: def __init__(self): self.meta_release_url = 'http://changelogs.ubuntu.com/meta-release' def meta_release_parse(self): """ Returns the meta_release_file parsed as a list of dicts """ with request.urlopen(self.meta_release_url) as response: index_counter = 0 lines = TextIOWrapper(response, encoding='utf-8') stripped_lines = [ re.split(META_RELEASE_WORD_MATCHER, l.strip())[1::] for l in lines ] grouped_lines = [list(group) for key, group in groupby(stripped_lines, lambda x: x == []) if not key] list_of_dicts = [] for group in grouped_lines: d = {} # list of each group for arr in group: arr_per_group = len(group) k = arr[0].lower().replace(":", "").strip() v = arr[1].strip() # this builds up our dict by adding one k,v pair per key d[f"{k}"] = v list_of_dicts.append(d) if arr_per_group == len(d.keys()) else None return list_of_dicts class MetaRelease(object): """ MetaRelease(): an object for holding onto the contents of a meta_release entry """ def __init__(self, meta_dict, dist=None, name=None, version=None, date=None, supported=None, description=None, release_file=None, release_notes=None, release_notes_html=None, upgrade_tool=None, upgrade_tool_signature=None): # swtich back to get self.dist = meta_dict.get('dist', None) self.name = meta_dict.get('name', None) self.version = meta_dict.get('version', None) self.supported = meta_dict.get('supported', None) self.description = meta_dict.get('description', None) self.release_file = meta_dict.get('release-file', None) self.release_notes = meta_dict.get('releasenotes', None) self.release_notes_html = meta_dict.get('releasenoteshtml', None) self.upgrade_tool = meta_dict.get('upgradetool', None) self.upgrade_tool_signature = meta_dict.get('upgradetoolsignature', None) def to_json(self): return json.dumps(self, default=lambda o: o.__dict__, indent=4) class MetaReleaseJsons: def __init__(self, meta_release): self.meta_release = meta_release def build(self): return [ MetaRelease(meta_dict).to_json() for meta_dict in self.meta_release ] if __name__ == '__main__': meta_release = SourceMetaRelease().meta_release_parse() meta_release_jsons = MetaReleaseJsons(meta_release).build() # print jsons to stdout for release_info in meta_release_jsons: print(release_info)