from urllib import request from io import TextIOWrapper from itertools import groupby import re import json from meta_release import SourceMetaRelease, MetaRelease import pdb #RELEASE_WORD_MATCHER = r'(^\Origin\:\s|Label:\s|Suite:\s|Version:\s|Codename:\s|Date:\s|Architecture:\s|Components:\s|Description:\s|MD5Sum:\s|SHA256:\s|Aquire\-By\-Hash:\s)' class Release: def __init__(self, distro_codename): # example url:http://old-releases.ubuntu.com/ubuntu/dists/hoary/Release # example distro_codename = 'hoary' self.distro_codename = distro_codename self.meta_release = self.changelog_release_file() def changelog_release_file(self): meta_release = SourceMetaRelease().meta_release_parse() return meta_release def distro_release_file_urls(self): meta_release_objects = [] for d in self.meta_release: meta_release_obj = MetaRelease(d) dist_and_release_file_url = { meta_release_obj.dist: meta_release_obj.release_file } meta_release_objects.append(dist_and_release_file_url) return meta_release_objects def line_format(self, line): """ Use this method for cleaning the line, especially the one with the md5 and shasums in them. """ try: data = {} split_line_arr = line.split(':') split_line_arr_len = len(split_line_arr) if split_line_arr_len == 1: data = self.checksum_line_clean(split_line_arr) elif split_line_arr_len >= 2: k = split_line_arr[0].strip().lower() v = split_line_arr[1].strip() #if k == 'md5sum': # v = [] #elif k == 'sha256': # v = [] data[k] = v return data except Exception as e: print(f'Failed to clean {line}') print(e.__doc__) print(e.message) def checksum_line_clean(self, checksum_line_arr): """ in: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n'] out: { 'checksum': '26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de', 'filesize': '36973', 'filepath': 'universe/debian-installer/binary-sparc/Packages\n' } """ data = {} l = checksum_line_arr[0] checksum_size_and_filepath = l.strip().split(' ') while ('' in checksum_size_and_filepath): checksum_size_and_filepath.remove('') data['checksum'] = checksum_size_and_filepath[0] data['filesize'] = checksum_size_and_filepath[1] data['filepath'] = checksum_size_and_filepath[2] return data def validate_parse(self): """ Use this method for validating the entire returned dictionary. make sure it has the expected keys we want/expect. """ return def release_file_parse(self, release_file_url): """ Returns the parsed_release_file parsed as a list of dicts """ data = {} with request.urlopen(f'{release_file_url}') as response: index_count = 0 lines = TextIOWrapper(response, encoding='utf-8') lines = [ l for l in lines ] cleaned_lines = [] for l in lines: index_count += 1 cleaned_line = self.line_format(l) # TODO: FINISH THIS. MAKE A FUNCTION THAT HANDLES THIS STUFF. if 'md5sum' in cleaned_line: # this should get the dict with md5sum # next one should be a range of each dicts checksum, # filesize, and filepath until we reach sha256. md5sum_index = index_count start_md5_checksums = md5sum_index + 1 # the older distros use sha1 elif 'sha1' in cleaned_line: sha1sum_index = index_count start_sha1_checksums = sha1sum_index + 1 end_md5_checksums = sha1sum_index - 1 if 'aquire-by-hash' in cleaned_line: aquire_by_hash_index = index_count end_sha1sum_checksums = aquire_by_hash_index - 1 # newer distros use sha256 elif 'sha256' in cleaned_line: sha256sum_index = index_count end_md5_checksums = sha256sum_index - 1 start_sha256_checksums = sha256sum_index + 1 if 'aquire-by-hash' in cleaned_line: aquire_by_hash_index = index_count end_sha256_checksums = aquire_by_hash_index - 1 #else: #cleaned_lines.append(cleaned_line) cleaned_lines.append(cleaned_line) # can we use list slicing to extract each range? we know where # the index is for each. md5sums = cleaned_lines[start_md5_checksums:end_md5_checksums] print(f'index of md5 start: {start_md5_checksums}') print(f'index of md5 end: {end_md5_checksums}') sha256sums = cleaned_lines[start_sha256_checksums:end_sha256_checksums] print(f'index of sha256 start: {start_sha256_checksums}') print(f'index of sha256 end: {end_sha256_checksums}') #sha1sums = cleaned_lines[start_sha1_checksums:end_sha1_checksums] #print(f'index of sha1 start: {start_sha1_checksums}') #print(f'index of sha1 end: {end_sha1_checksums}') #return cleaned_lines #return md5sums return sha256sums if __name__ == '__main__': # testing r = Release('focal') release_file_urls = r.distro_release_file_urls() for meta_release_dict in release_file_urls: keys = meta_release_dict.keys() for distro in keys: url = meta_release_dict[distro] try: results = r.release_file_parse(url) for d in results: print(d) except Exception as e: print(e.__doc__) print(e)