trying to make the release parser now checksum aware and store the appropriate range of checksums into a k,v pair. trying to use list slicing
This commit is contained in:
parent
26e55b186e
commit
fff732f932
1 changed files with 77 additions and 53 deletions
|
@ -35,85 +35,112 @@ class Release:
|
|||
and shasums in them. """
|
||||
try:
|
||||
data = {}
|
||||
#cleaned_line = re.split(RELEASE_WORD_MATCHER, line)
|
||||
split_line_arr = line.split(':')
|
||||
split_line_arr_len = len(split_line_arr)
|
||||
|
||||
if split_line_arr_len == 1:
|
||||
# example: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n']
|
||||
|
||||
l = split_line_arr[0]
|
||||
checksum_size_and_filepath = l.strip().split(' ')
|
||||
|
||||
while ('' in checksum_size_and_filepath):
|
||||
checksum_size_and_filepath.remove('')
|
||||
|
||||
data['checksum'] = checksum_size_and_filepath[0]
|
||||
data['filesize'] = checksum_size_and_filepath[1]
|
||||
data['filepath'] = checksum_size_and_filepath[2]
|
||||
data = self.checksum_line_clean(split_line_arr)
|
||||
|
||||
elif split_line_arr_len >= 2:
|
||||
k = split_line_arr[0].strip()
|
||||
v = split_line_arr[1]
|
||||
k = split_line_arr[0].strip().lower()
|
||||
v = split_line_arr[1].strip()
|
||||
|
||||
#if k == 'md5sum':
|
||||
# v = []
|
||||
#elif k == 'sha256':
|
||||
# v = []
|
||||
|
||||
data[k] = v
|
||||
|
||||
return data
|
||||
except Exception as e:
|
||||
print('failed to clean')
|
||||
print(line)
|
||||
print(e)
|
||||
print(f'Failed to clean {line}')
|
||||
print(e.__doc__)
|
||||
print(e.message)
|
||||
|
||||
def checksum_line_clean(self, checksum_line_arr):
|
||||
"""
|
||||
in: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n']
|
||||
out: { 'checksum': '26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de', 'filesize': '36973', 'filepath': 'universe/debian-installer/binary-sparc/Packages\n' }
|
||||
"""
|
||||
data = {}
|
||||
|
||||
l = checksum_line_arr[0]
|
||||
checksum_size_and_filepath = l.strip().split(' ')
|
||||
|
||||
while ('' in checksum_size_and_filepath):
|
||||
checksum_size_and_filepath.remove('')
|
||||
|
||||
data['checksum'] = checksum_size_and_filepath[0]
|
||||
data['filesize'] = checksum_size_and_filepath[1]
|
||||
data['filepath'] = checksum_size_and_filepath[2]
|
||||
|
||||
return data
|
||||
|
||||
def validate_parse(self):
|
||||
""" Use this method for validating the entire returned dictionary. make
|
||||
sure it has the expected keys we want/expect. """
|
||||
return
|
||||
|
||||
def md5_from_line(line):
|
||||
return
|
||||
|
||||
def sha256_from_line(line):
|
||||
return
|
||||
|
||||
def release_file_parse(self, release_file_url):
|
||||
""" Returns the parsed_release_file parsed as a list of dicts """
|
||||
data = {}
|
||||
|
||||
with request.urlopen(f'{release_file_url}') as response:
|
||||
index_counter = 0
|
||||
index_count = 0
|
||||
lines = TextIOWrapper(response, encoding='utf-8')
|
||||
lines = [ l for l in lines ]
|
||||
|
||||
cleaned_lines = []
|
||||
|
||||
for l in lines:
|
||||
index_count += 1
|
||||
cleaned_line = self.line_format(l)
|
||||
|
||||
# TODO: FINISH THIS. MAKE A FUNCTION THAT HANDLES THIS STUFF.
|
||||
if 'md5sum' in cleaned_line:
|
||||
# this should get the dict with md5sum
|
||||
# next one should be a range of each dicts checksum,
|
||||
# filesize, and filepath until we reach sha256.
|
||||
md5sum_index = index_count
|
||||
start_md5_checksums = md5sum_index + 1
|
||||
# the older distros use sha1
|
||||
elif 'sha1' in cleaned_line:
|
||||
sha1sum_index = index_count
|
||||
start_sha1_checksums = sha1sum_index + 1
|
||||
end_md5_checksums = sha1sum_index - 1
|
||||
if 'aquire-by-hash' in cleaned_line:
|
||||
aquire_by_hash_index = index_count
|
||||
end_sha1sum_checksums = aquire_by_hash_index - 1
|
||||
# newer distros use sha256
|
||||
elif 'sha256' in cleaned_line:
|
||||
sha256sum_index = index_count
|
||||
end_md5_checksums = sha256sum_index - 1
|
||||
start_sha256_checksums = sha256sum_index + 1
|
||||
if 'aquire-by-hash' in cleaned_line:
|
||||
aquire_by_hash_index = index_count
|
||||
end_sha256_checksums = aquire_by_hash_index - 1
|
||||
#else:
|
||||
#cleaned_lines.append(cleaned_line)
|
||||
cleaned_lines.append(cleaned_line)
|
||||
#print(cleaned_line)
|
||||
|
||||
return cleaned_lines
|
||||
#return parsed_release_file
|
||||
|
||||
"""
|
||||
stripped_lines = [
|
||||
re.split(RELEASE_WORD_MATCHER, l.strip()[::1]) for l in
|
||||
lines ]
|
||||
print(stripped_lines)
|
||||
grouped_lines = [list(group) for key, group in
|
||||
groupby(stripped_lines, lambda x: x == []) if not key]
|
||||
list_of_dicts = []
|
||||
# can we use list slicing to extract each range? we know where
|
||||
# the index is for each.
|
||||
|
||||
for group in grouped_lines:
|
||||
d = {}
|
||||
# list of each group
|
||||
for arr in group:
|
||||
arr_per_group = len(group)
|
||||
k = arr[0].lower().replace(":", "").strip()
|
||||
v = arr[1].strip()
|
||||
md5sums = cleaned_lines[start_md5_checksums:end_md5_checksums]
|
||||
print(f'index of md5 start: {start_md5_checksums}')
|
||||
print(f'index of md5 end: {end_md5_checksums}')
|
||||
sha256sums = cleaned_lines[start_sha256_checksums:end_sha256_checksums]
|
||||
print(f'index of sha256 start: {start_sha256_checksums}')
|
||||
print(f'index of sha256 end: {end_sha256_checksums}')
|
||||
#sha1sums = cleaned_lines[start_sha1_checksums:end_sha1_checksums]
|
||||
#print(f'index of sha1 start: {start_sha1_checksums}')
|
||||
#print(f'index of sha1 end: {end_sha1_checksums}')
|
||||
#return cleaned_lines
|
||||
#return md5sums
|
||||
return sha256sums
|
||||
|
||||
# this builds up our dict by adding one k,v pair per key
|
||||
d[f"{k}"] = v
|
||||
|
||||
list_of_dicts.append(d) if arr_per_group == len(d.keys()) else None
|
||||
|
||||
return list_of_dicts
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
# testing
|
||||
|
@ -127,12 +154,9 @@ if __name__ == '__main__':
|
|||
url = meta_release_dict[distro]
|
||||
try:
|
||||
results = r.release_file_parse(url)
|
||||
print(type(results))
|
||||
print(len(results))
|
||||
for d in results:
|
||||
print(type(d))
|
||||
print(d)
|
||||
except Exception as e:
|
||||
print(e.__doc__)
|
||||
print(e.message)
|
||||
print(e)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue