diff --git a/tools/ubuntu_package_puller/release.py b/tools/ubuntu_package_puller/release.py index 81d8ac4..fad2e91 100644 --- a/tools/ubuntu_package_puller/release.py +++ b/tools/ubuntu_package_puller/release.py @@ -35,85 +35,112 @@ class Release: and shasums in them. """ try: data = {} - #cleaned_line = re.split(RELEASE_WORD_MATCHER, line) split_line_arr = line.split(':') split_line_arr_len = len(split_line_arr) if split_line_arr_len == 1: - # example: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n'] - - l = split_line_arr[0] - checksum_size_and_filepath = l.strip().split(' ') - - while ('' in checksum_size_and_filepath): - checksum_size_and_filepath.remove('') - - data['checksum'] = checksum_size_and_filepath[0] - data['filesize'] = checksum_size_and_filepath[1] - data['filepath'] = checksum_size_and_filepath[2] + data = self.checksum_line_clean(split_line_arr) elif split_line_arr_len >= 2: - k = split_line_arr[0].strip() - v = split_line_arr[1] + k = split_line_arr[0].strip().lower() + v = split_line_arr[1].strip() + + #if k == 'md5sum': + # v = [] + #elif k == 'sha256': + # v = [] + data[k] = v return data except Exception as e: - print('failed to clean') - print(line) - print(e) + print(f'Failed to clean {line}') + print(e.__doc__) + print(e.message) + + def checksum_line_clean(self, checksum_line_arr): + """ + in: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n'] + out: { 'checksum': '26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de', 'filesize': '36973', 'filepath': 'universe/debian-installer/binary-sparc/Packages\n' } + """ + data = {} + + l = checksum_line_arr[0] + checksum_size_and_filepath = l.strip().split(' ') + + while ('' in checksum_size_and_filepath): + checksum_size_and_filepath.remove('') + + data['checksum'] = checksum_size_and_filepath[0] + data['filesize'] = checksum_size_and_filepath[1] + data['filepath'] = checksum_size_and_filepath[2] + + return data def validate_parse(self): """ Use this method for validating the entire returned dictionary. make sure it has the expected keys we want/expect. """ return - def md5_from_line(line): - return - - def sha256_from_line(line): - return - def release_file_parse(self, release_file_url): """ Returns the parsed_release_file parsed as a list of dicts """ + data = {} + with request.urlopen(f'{release_file_url}') as response: - index_counter = 0 + index_count = 0 lines = TextIOWrapper(response, encoding='utf-8') + lines = [ l for l in lines ] cleaned_lines = [] for l in lines: + index_count += 1 cleaned_line = self.line_format(l) + + # TODO: FINISH THIS. MAKE A FUNCTION THAT HANDLES THIS STUFF. + if 'md5sum' in cleaned_line: + # this should get the dict with md5sum + # next one should be a range of each dicts checksum, + # filesize, and filepath until we reach sha256. + md5sum_index = index_count + start_md5_checksums = md5sum_index + 1 + # the older distros use sha1 + elif 'sha1' in cleaned_line: + sha1sum_index = index_count + start_sha1_checksums = sha1sum_index + 1 + end_md5_checksums = sha1sum_index - 1 + if 'aquire-by-hash' in cleaned_line: + aquire_by_hash_index = index_count + end_sha1sum_checksums = aquire_by_hash_index - 1 + # newer distros use sha256 + elif 'sha256' in cleaned_line: + sha256sum_index = index_count + end_md5_checksums = sha256sum_index - 1 + start_sha256_checksums = sha256sum_index + 1 + if 'aquire-by-hash' in cleaned_line: + aquire_by_hash_index = index_count + end_sha256_checksums = aquire_by_hash_index - 1 + #else: + #cleaned_lines.append(cleaned_line) cleaned_lines.append(cleaned_line) - #print(cleaned_line) - return cleaned_lines - #return parsed_release_file -""" - stripped_lines = [ - re.split(RELEASE_WORD_MATCHER, l.strip()[::1]) for l in - lines ] - print(stripped_lines) - grouped_lines = [list(group) for key, group in - groupby(stripped_lines, lambda x: x == []) if not key] - list_of_dicts = [] + # can we use list slicing to extract each range? we know where + # the index is for each. - for group in grouped_lines: - d = {} - # list of each group - for arr in group: - arr_per_group = len(group) - k = arr[0].lower().replace(":", "").strip() - v = arr[1].strip() + md5sums = cleaned_lines[start_md5_checksums:end_md5_checksums] + print(f'index of md5 start: {start_md5_checksums}') + print(f'index of md5 end: {end_md5_checksums}') + sha256sums = cleaned_lines[start_sha256_checksums:end_sha256_checksums] + print(f'index of sha256 start: {start_sha256_checksums}') + print(f'index of sha256 end: {end_sha256_checksums}') + #sha1sums = cleaned_lines[start_sha1_checksums:end_sha1_checksums] + #print(f'index of sha1 start: {start_sha1_checksums}') + #print(f'index of sha1 end: {end_sha1_checksums}') + #return cleaned_lines + #return md5sums + return sha256sums - # this builds up our dict by adding one k,v pair per key - d[f"{k}"] = v - - list_of_dicts.append(d) if arr_per_group == len(d.keys()) else None - - return list_of_dicts -""" if __name__ == '__main__': # testing @@ -127,12 +154,9 @@ if __name__ == '__main__': url = meta_release_dict[distro] try: results = r.release_file_parse(url) - print(type(results)) - print(len(results)) for d in results: - print(type(d)) print(d) except Exception as e: print(e.__doc__) - print(e.message) + print(e)