almost got it parsing to a dict

2022-08-22 09:26:53 -05:00 · 2022-08-22 09:26:53 -05:00 · 26e55b186e
commit 26e55b186e
parent 3e141ad886
1 changed files with 30 additions and 12 deletions
--- a/tools/ubuntu_package_puller/release.py
+++ b/tools/ubuntu_package_puller/release.py
@ -4,6 +4,7 @@ from itertools import groupby
 import re
 import json
 from meta_release import SourceMetaRelease, MetaRelease
+import pdb


 #RELEASE_WORD_MATCHER = r'(^\Origin\:\s|Label:\s|Suite:\s|Version:\s|Codename:\s|Date:\s|Architecture:\s|Components:\s|Description:\s|MD5Sum:\s|SHA256:\s|Aquire\-By\-Hash:\s)'
@ -36,19 +37,31 @@ class Release:
            data = {}
            #cleaned_line = re.split(RELEASE_WORD_MATCHER, line)
            split_line_arr = line.split(':')
-            if len(split_line_arr) == 3:
-                for checksum, filesize, filepath:
-                    data['checksum'] = checksum
-                    data['filesize'] = filesize
-                    data['filepath'] = filepath
-            # if the the first element in the array regular expression matches
-            # RELEASE_WORD_MATCHER so we can detect if its a key: we need.
-            #elif len(split_line_arr)[0] 
-            ## TODO: LEFT OFF HERE
-            return cleaned_line
+            split_line_arr_len = len(split_line_arr)
+
+            if split_line_arr_len == 1:
+                # example: [' 26f7612b4526f7b97b6b2f7abbdd59e5f83f879a0dbdcce799b7b91bc36387de            36973 universe/debian-installer/binary-sparc/Packages\n'] 
+
+                l = split_line_arr[0]
+                checksum_size_and_filepath = l.strip().split(' ')
+
+                while ('' in checksum_size_and_filepath):
+                    checksum_size_and_filepath.remove('')
+
+                data['checksum'] = checksum_size_and_filepath[0]
+                data['filesize'] = checksum_size_and_filepath[1]
+                data['filepath'] = checksum_size_and_filepath[2]
+                
+            elif split_line_arr_len >= 2:
+                k = split_line_arr[0].strip()
+                v = split_line_arr[1]
+                data[k] = v
+
+            return data 
        except Exception as e:
            print('failed to clean')
-            print(e.message)
+            print(line)
+            print(e)

    def validate_parse(self):
        """ Use this method for validating the entire returned dictionary. make
@ -72,7 +85,7 @@ class Release:
            for l in lines:
                cleaned_line = self.line_format(l)
                cleaned_lines.append(cleaned_line)
-                print(cleaned_line)
+                #print(cleaned_line)

            return cleaned_lines
            #return parsed_release_file
@ -114,6 +127,11 @@ if __name__ == '__main__':
            url = meta_release_dict[distro]
            try:
                results = r.release_file_parse(url)
+                print(type(results))
+                print(len(results))
+                for d in results:
+                    print(type(d))
+                    print(d)
            except Exception as e:
                print(e.__doc__)
                print(e.message)