from urllib import request from io import TextIOWrapper from itertools import groupby import re import json META_RELEASE_WORD_MATCHER = r'(^\Dist\:\s|Name:\s|Version:\s|Date:\s|Supported:\s|Description:\s|Release\-File\:\s|ReleaseNotes:\s|ReleaseNotesHtml\:\s|UpgradeTool:\s|UpgradeToolSignature:\s)' META_SPACE_MATCHER = r'^\s' class SourceMetaRelease: def __init__(self): self.meta_release_url = 'http://changelogs.ubuntu.com/meta-release' def meta_release_parse(self): """ Returns the meta_release_file parsed as a list of dicts """ with request.urlopen(self.meta_release_url) as response: index_counter = 0 lines = TextIOWrapper(response, encoding='utf-8') stripped_lines = [ re.split(META_RELEASE_WORD_MATCHER, l.strip())[1::] for l in lines ] grouped_lines = [list(group) for key, group in groupby(stripped_lines, lambda x: x == []) if not key] list_of_dicts = [] for group in grouped_lines: d = {} # list of each group for arr in group: arr_per_group = len(group) k = arr[0].lower().replace(":", "").strip() v = arr[1].strip() # this builds up our dict by adding one k,v pair per key d[f"{k}"] = v list_of_dicts.append(d) if arr_per_group == len(d.keys()) else None return list_of_dicts class MetaRelease: """ MetaRelease(): an object for holding onto the contents of a meta_release entry """ def __init__(self, dist=None, name=None, version=None, date=None, supported=None, description=None, release_file=None, release_notes=None, release_notes_html=None, upgrade_tool=None, upgrade_tool_signature=None): self.dist = dist self.name = name self.version = version self.supported = supported self.description = description self.release_file = release_file self.release_notes = release_notes self.release_notes_html = release_notes_html self.upgrade_tool = upgrade_tool self.upgrade_tool_signature = upgrade_tool_signature def to_json(self): return json.dumps(self, default=lambda o: o.__dict__, indent=4) if __name__ == '__main__': meta_release = SourceMetaRelease().meta_release_parse() for d in meta_release: keys = d.keys() meta_release_obj = MetaRelease( dist = d['dist'] if 'dist' in keys else None, name = d['name'] if 'name' in keys else None, version = d['version'] if 'version' in keys else None, supported = d['supported'] if 'supported' in keys else None, description = d['description'] if 'description' in keys else None, release_file = d['release-file'] if 'release-file' in keys else None, release_notes = d['releasenotes'] if 'releasenotes' in keys else None, release_notes_html = d['releasenoteshtml'] if 'releasenoteshtml' in keys else None, upgrade_tool = d['upgradetool'] if 'upgradetool' in keys else None, upgrade_tool_signature = d['upgradetoolsignature'] if 'upgradetoolsignature' in keys else None ) print(meta_release_obj.to_json())