from urllib import request from io import TextIOWrapper from itertools import groupby import re META_RELEASE_WORD_MATCHER = '^w\:' META_SPACE_MATCHER = r'^\s' class SourceUbuntuPackages: def __init__(self, distro_series): self.distro_series = distro_series self.meta_release_url = 'http://changelogs.ubuntu.com/meta-release' # http://bazaar.launchpad.net/~ubuntu-branches/ubuntu/wily/update-manager/wily/view/head:/UpdateManager/Core/MetaRelease.py#L100-#L105 # TODO: there is still a bug its ommitting the last keys/value pairs that we need that # contain urls. because of the re.split with regex i think. def meta_release_parse(self): """ Returns the meta_release_file parsed as a list of dicts """ with request.urlopen(self.meta_release_url) as response: index_counter = 0 lines = TextIOWrapper(response, encoding='utf-8') stripped_lines = [ re.split(META_RELEASE_WORD_MATCHER, l.strip()) for l in lines ] grouped_lines = [list(group) for key, group in groupby(stripped_lines, lambda x: x == ['']) if not key] list_of_dicts = [] for group in grouped_lines: d = {} # list of each group for arr in group: l_str = arr[0] # regex still needs work i think. use the same constant # matcher after fixing it. parts = re.split(r"(^\w+\:)", l_str.strip()) split_parts = parts[1::] # this signifies the end of the current group if split_parts == []: break else: k = split_parts[0] v = split_parts[1] cleaned_k = k.lower() cleaned_v = re.sub(META_SPACE_MATCHER, '', v) d["{}".format(cleaned_k)] = cleaned_v list_of_dicts.append(d) return list_of_dicts if __name__ == '__main__': s = SourceUbuntuPackages('dapper') print((s.meta_release_parse()))