almost got it parsing to a dict

This commit is contained in:
Brendan McDevitt 2022-08-22 09:26:53 -05:00
parent 3e141ad886
commit 26e55b186e

View file

@ -4,6 +4,7 @@ from itertools import groupby
import re
import json
from meta_release import SourceMetaRelease, MetaRelease
import pdb
#RELEASE_WORD_MATCHER = r'(^\Origin\:\s|Label:\s|Suite:\s|Version:\s|Codename:\s|Date:\s|Architecture:\s|Components:\s|Description:\s|MD5Sum:\s|SHA256:\s|Aquire\-By\-Hash:\s)'
@ -36,19 +37,31 @@ class Release:
data = {}
#cleaned_line = re.split(RELEASE_WORD_MATCHER, line)
split_line_arr = line.split(':')
if len(split_line_arr) == 3:
for checksum, filesize, filepath:
data['checksum'] = checksum
data['filesize'] = filesize
data['filepath'] = filepath
# if the the first element in the array regular expression matches
# RELEASE_WORD_MATCHER so we can detect if its a key: we need.
#elif len(split_line_arr)[0]
## TODO: LEFT OFF HERE
return cleaned_line
split_line_arr_len = len(split_line_arr)
if split_line_arr_len == 1:
# example: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de 36973 universe/debian-installer/binary-sparc/Packages\n']
l = split_line_arr[0]
checksum_size_and_filepath = l.strip().split(' ')
while ('' in checksum_size_and_filepath):
checksum_size_and_filepath.remove('')
data['checksum'] = checksum_size_and_filepath[0]
data['filesize'] = checksum_size_and_filepath[1]
data['filepath'] = checksum_size_and_filepath[2]
elif split_line_arr_len >= 2:
k = split_line_arr[0].strip()
v = split_line_arr[1]
data[k] = v
return data
except Exception as e:
print('failed to clean')
print(e.message)
print(line)
print(e)
def validate_parse(self):
""" Use this method for validating the entire returned dictionary. make
@ -72,7 +85,7 @@ class Release:
for l in lines:
cleaned_line = self.line_format(l)
cleaned_lines.append(cleaned_line)
print(cleaned_line)
#print(cleaned_line)
return cleaned_lines
#return parsed_release_file
@ -114,6 +127,11 @@ if __name__ == '__main__':
url = meta_release_dict[distro]
try:
results = r.release_file_parse(url)
print(type(results))
print(len(results))
for d in results:
print(type(d))
print(d)
except Exception as e:
print(e.__doc__)
print(e.message)