2022-08-20 22:45:06 -05:00
|
|
|
from urllib import request
|
|
|
|
from io import TextIOWrapper
|
|
|
|
from itertools import groupby
|
|
|
|
import re
|
|
|
|
import json
|
|
|
|
from meta_release import SourceMetaRelease, MetaRelease
|
2022-08-22 09:26:53 -05:00
|
|
|
import pdb
|
2022-08-20 22:45:06 -05:00
|
|
|
|
|
|
|
|
2022-08-21 22:08:11 -05:00
|
|
|
#RELEASE_WORD_MATCHER = r'(^\Origin\:\s|Label:\s|Suite:\s|Version:\s|Codename:\s|Date:\s|Architecture:\s|Components:\s|Description:\s|MD5Sum:\s|SHA256:\s|Aquire\-By\-Hash:\s)'
|
2022-08-20 22:45:06 -05:00
|
|
|
|
|
|
|
class Release:
|
|
|
|
def __init__(self, distro_codename):
|
|
|
|
# example url:http://old-releases.ubuntu.com/ubuntu/dists/hoary/Release
|
|
|
|
# example distro_codename = 'hoary'
|
|
|
|
self.distro_codename = distro_codename
|
|
|
|
self.meta_release = self.changelog_release_file()
|
|
|
|
|
|
|
|
def changelog_release_file(self):
|
|
|
|
meta_release = SourceMetaRelease().meta_release_parse()
|
|
|
|
return meta_release
|
|
|
|
|
|
|
|
def distro_release_file_urls(self):
|
|
|
|
meta_release_objects = []
|
|
|
|
|
|
|
|
for d in self.meta_release:
|
|
|
|
meta_release_obj = MetaRelease(d)
|
2022-08-21 07:12:21 -05:00
|
|
|
dist_and_release_file_url = { meta_release_obj.dist: meta_release_obj.release_file }
|
2022-08-20 22:45:06 -05:00
|
|
|
meta_release_objects.append(dist_and_release_file_url)
|
|
|
|
|
|
|
|
return meta_release_objects
|
|
|
|
|
2022-08-21 07:12:21 -05:00
|
|
|
def line_format(self, line):
|
|
|
|
""" Use this method for cleaning the line, especially the one with the md5
|
|
|
|
and shasums in them. """
|
|
|
|
try:
|
2022-08-21 22:08:11 -05:00
|
|
|
data = {}
|
|
|
|
split_line_arr = line.split(':')
|
2022-08-22 09:26:53 -05:00
|
|
|
split_line_arr_len = len(split_line_arr)
|
|
|
|
|
|
|
|
if split_line_arr_len == 1:
|
2022-08-22 10:54:32 -05:00
|
|
|
data = self.checksum_line_clean(split_line_arr)
|
2022-08-22 09:26:53 -05:00
|
|
|
|
|
|
|
elif split_line_arr_len >= 2:
|
2022-08-22 10:54:32 -05:00
|
|
|
k = split_line_arr[0].strip().lower()
|
|
|
|
v = split_line_arr[1].strip()
|
|
|
|
|
|
|
|
#if k == 'md5sum':
|
|
|
|
# v = []
|
|
|
|
#elif k == 'sha256':
|
|
|
|
# v = []
|
|
|
|
|
2022-08-22 09:26:53 -05:00
|
|
|
data[k] = v
|
|
|
|
|
|
|
|
return data
|
2022-08-21 07:12:21 -05:00
|
|
|
except Exception as e:
|
2022-08-22 10:54:32 -05:00
|
|
|
print(f'Failed to clean {line}')
|
|
|
|
print(e.__doc__)
|
|
|
|
print(e.message)
|
|
|
|
|
|
|
|
def checksum_line_clean(self, checksum_line_arr):
|
|
|
|
"""
|
|
|
|
in: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n']
|
|
|
|
out: { 'checksum': '26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de', 'filesize': '36973', 'filepath': 'universe/debian-installer/binary-sparc/Packages\n' }
|
|
|
|
"""
|
|
|
|
data = {}
|
|
|
|
|
|
|
|
l = checksum_line_arr[0]
|
|
|
|
checksum_size_and_filepath = l.strip().split(' ')
|
|
|
|
|
|
|
|
while ('' in checksum_size_and_filepath):
|
|
|
|
checksum_size_and_filepath.remove('')
|
|
|
|
|
|
|
|
data['checksum'] = checksum_size_and_filepath[0]
|
|
|
|
data['filesize'] = checksum_size_and_filepath[1]
|
|
|
|
data['filepath'] = checksum_size_and_filepath[2]
|
|
|
|
|
|
|
|
return data
|
2022-08-21 07:12:21 -05:00
|
|
|
|
|
|
|
def validate_parse(self):
|
|
|
|
""" Use this method for validating the entire returned dictionary. make
|
|
|
|
sure it has the expected keys we want/expect. """
|
|
|
|
return
|
|
|
|
|
2022-08-20 22:45:06 -05:00
|
|
|
def release_file_parse(self, release_file_url):
|
2022-08-21 07:12:21 -05:00
|
|
|
""" Returns the parsed_release_file parsed as a list of dicts """
|
2022-08-22 10:54:32 -05:00
|
|
|
data = {}
|
|
|
|
|
2022-08-20 22:45:06 -05:00
|
|
|
with request.urlopen(f'{release_file_url}') as response:
|
2022-08-22 10:54:32 -05:00
|
|
|
index_count = 0
|
2022-08-20 22:45:06 -05:00
|
|
|
lines = TextIOWrapper(response, encoding='utf-8')
|
2022-08-22 10:54:32 -05:00
|
|
|
lines = [ l for l in lines ]
|
2022-08-21 07:12:21 -05:00
|
|
|
|
|
|
|
cleaned_lines = []
|
|
|
|
|
|
|
|
for l in lines:
|
2022-08-22 10:54:32 -05:00
|
|
|
index_count += 1
|
2022-08-21 07:12:21 -05:00
|
|
|
cleaned_line = self.line_format(l)
|
|
|
|
|
2022-08-22 10:54:32 -05:00
|
|
|
# TODO: FINISH THIS. MAKE A FUNCTION THAT HANDLES THIS STUFF.
|
|
|
|
if 'md5sum' in cleaned_line:
|
|
|
|
# this should get the dict with md5sum
|
|
|
|
# next one should be a range of each dicts checksum,
|
|
|
|
# filesize, and filepath until we reach sha256.
|
|
|
|
md5sum_index = index_count
|
|
|
|
start_md5_checksums = md5sum_index + 1
|
|
|
|
# the older distros use sha1
|
|
|
|
elif 'sha1' in cleaned_line:
|
|
|
|
sha1sum_index = index_count
|
|
|
|
start_sha1_checksums = sha1sum_index + 1
|
|
|
|
end_md5_checksums = sha1sum_index - 1
|
|
|
|
if 'aquire-by-hash' in cleaned_line:
|
|
|
|
aquire_by_hash_index = index_count
|
|
|
|
end_sha1sum_checksums = aquire_by_hash_index - 1
|
|
|
|
# newer distros use sha256
|
|
|
|
elif 'sha256' in cleaned_line:
|
|
|
|
sha256sum_index = index_count
|
|
|
|
end_md5_checksums = sha256sum_index - 1
|
|
|
|
start_sha256_checksums = sha256sum_index + 1
|
|
|
|
if 'aquire-by-hash' in cleaned_line:
|
|
|
|
aquire_by_hash_index = index_count
|
|
|
|
end_sha256_checksums = aquire_by_hash_index - 1
|
|
|
|
#else:
|
|
|
|
#cleaned_lines.append(cleaned_line)
|
|
|
|
cleaned_lines.append(cleaned_line)
|
2022-08-20 22:45:06 -05:00
|
|
|
|
|
|
|
|
2022-08-22 10:54:32 -05:00
|
|
|
# can we use list slicing to extract each range? we know where
|
|
|
|
# the index is for each.
|
2022-08-20 22:45:06 -05:00
|
|
|
|
2022-08-22 10:54:32 -05:00
|
|
|
md5sums = cleaned_lines[start_md5_checksums:end_md5_checksums]
|
|
|
|
print(f'index of md5 start: {start_md5_checksums}')
|
|
|
|
print(f'index of md5 end: {end_md5_checksums}')
|
|
|
|
sha256sums = cleaned_lines[start_sha256_checksums:end_sha256_checksums]
|
|
|
|
print(f'index of sha256 start: {start_sha256_checksums}')
|
|
|
|
print(f'index of sha256 end: {end_sha256_checksums}')
|
|
|
|
#sha1sums = cleaned_lines[start_sha1_checksums:end_sha1_checksums]
|
|
|
|
#print(f'index of sha1 start: {start_sha1_checksums}')
|
|
|
|
#print(f'index of sha1 end: {end_sha1_checksums}')
|
|
|
|
#return cleaned_lines
|
|
|
|
#return md5sums
|
|
|
|
return sha256sums
|
2022-08-20 22:45:06 -05:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
# testing
|
|
|
|
|
|
|
|
r = Release('focal')
|
|
|
|
release_file_urls = r.distro_release_file_urls()
|
|
|
|
|
2022-08-21 07:12:21 -05:00
|
|
|
for meta_release_dict in release_file_urls:
|
|
|
|
keys = meta_release_dict.keys()
|
|
|
|
for distro in keys:
|
|
|
|
url = meta_release_dict[distro]
|
|
|
|
try:
|
|
|
|
results = r.release_file_parse(url)
|
2022-08-22 09:26:53 -05:00
|
|
|
for d in results:
|
|
|
|
print(d)
|
2022-08-21 07:12:21 -05:00
|
|
|
except Exception as e:
|
|
|
|
print(e.__doc__)
|
2022-08-22 10:54:32 -05:00
|
|
|
print(e)
|
2022-08-20 22:45:06 -05:00
|
|
|
|