from urllib import request from io import TextIOWrapper from itertools import groupby import re import json from meta_release import SourceMetaRelease, MetaRelease import pdb #RELEASE_WORD_MATCHER = r'(^\Origin\:\s|Label:\s|Suite:\s|Version:\s|Codename:\s|Date:\s|Architecture:\s|Components:\s|Description:\s|MD5Sum:\s|SHA256:\s|Aquire\-By\-Hash:\s)' class Release: def __init__(self, distro_codename): # example url:http://old-releases.ubuntu.com/ubuntu/dists/hoary/Release # example distro_codename = 'hoary' self.distro_codename = distro_codename self.meta_release = self.changelog_release_file() def changelog_release_file(self): meta_release = SourceMetaRelease().meta_release_parse() return meta_release def distro_release_file_urls(self): meta_release_objects = [] for d in self.meta_release: meta_release_obj = MetaRelease(d) dist_and_release_file_url = { meta_release_obj.dist: meta_release_obj.release_file } meta_release_objects.append(dist_and_release_file_url) return meta_release_objects def line_format(self, line): """ Use this method for cleaning the line, especially the one with the md5 and shasums in them. """ try: data = {} #cleaned_line = re.split(RELEASE_WORD_MATCHER, line) split_line_arr = line.split(':') split_line_arr_len = len(split_line_arr) if split_line_arr_len == 1: # example: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n'] l = split_line_arr[0] checksum_size_and_filepath = l.strip().split(' ') while ('' in checksum_size_and_filepath): checksum_size_and_filepath.remove('') data['checksum'] = checksum_size_and_filepath[0] data['filesize'] = checksum_size_and_filepath[1] data['filepath'] = checksum_size_and_filepath[2] elif split_line_arr_len >= 2: k = split_line_arr[0].strip() v = split_line_arr[1] data[k] = v return data except Exception as e: print('failed to clean') print(line) print(e) def validate_parse(self): """ Use this method for validating the entire returned dictionary. make sure it has the expected keys we want/expect. """ return def md5_from_line(line): return def sha256_from_line(line): return def release_file_parse(self, release_file_url): """ Returns the parsed_release_file parsed as a list of dicts """ with request.urlopen(f'{release_file_url}') as response: index_counter = 0 lines = TextIOWrapper(response, encoding='utf-8') cleaned_lines = [] for l in lines: cleaned_line = self.line_format(l) cleaned_lines.append(cleaned_line) #print(cleaned_line) return cleaned_lines #return parsed_release_file """ stripped_lines = [ re.split(RELEASE_WORD_MATCHER, l.strip()[::1]) for l in lines ] print(stripped_lines) grouped_lines = [list(group) for key, group in groupby(stripped_lines, lambda x: x == []) if not key] list_of_dicts = [] for group in grouped_lines: d = {} # list of each group for arr in group: arr_per_group = len(group) k = arr[0].lower().replace(":", "").strip() v = arr[1].strip() # this builds up our dict by adding one k,v pair per key d[f"{k}"] = v list_of_dicts.append(d) if arr_per_group == len(d.keys()) else None return list_of_dicts """ if __name__ == '__main__': # testing r = Release('focal') release_file_urls = r.distro_release_file_urls() for meta_release_dict in release_file_urls: keys = meta_release_dict.keys() for distro in keys: url = meta_release_dict[distro] try: results = r.release_file_parse(url) print(type(results)) print(len(results)) for d in results: print(type(d)) print(d) except Exception as e: print(e.__doc__) print(e.message)