From 51e23619d1d1b687055c647422b759459c124574 Mon Sep 17 00:00:00 2001 From: bpmcdevitt Date: Wed, 17 Aug 2022 01:52:44 -0500 Subject: [PATCH] added ubuntu_package_puller poc code --- tools/ubuntu_package_puller/README.md | 54 +++++++++++++++++++ tools/ubuntu_package_puller/export_to_json.py | 31 +++++++++++ .../package_version_history.py | 42 +++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 tools/ubuntu_package_puller/README.md create mode 100644 tools/ubuntu_package_puller/export_to_json.py create mode 100644 tools/ubuntu_package_puller/package_version_history.py diff --git a/tools/ubuntu_package_puller/README.md b/tools/ubuntu_package_puller/README.md new file mode 100644 index 0000000..0e3051c --- /dev/null +++ b/tools/ubuntu_package_puller/README.md @@ -0,0 +1,54 @@ +## PoC Ubuntu: Package Version History + +#### Example 1: Using the PackageVersionHistory python class +``` + +# The below example code showcases how to use this class to check version history for specific source_package_names and distribution_versions +# returns: ( package_name, [list_of_versions] ) + +In [1]: import package_version_history as p + +In [2]: kernel_checker = p.PackageVersionHistory('linux', '20.04') + +In [3]: python_checker = p.PackageVersionHistory('python3.8', '20.04') + +In [4]: kernel_results = kernel_checker.package_name_and_version_history() + +In [5]: python_results = python_checker.package_name_and_version_history() +``` + +#### Seed Data: How to pull all source package names in a target distribution (Need local shell access) +We need local shell access to a system to run some commands to create a text file that contains every remote package source name in the repos on a target distro. The below sequence of commands should create a text file with source_package_names uniqu'd and sorted. + +``` +# this will create a text file with the values +grep ^Source /var/lib/apt/lists/*_Packages | awk ' {print $2}' | sort -u > source_packages_remote_ubuntu_2004.default + +# it is grepping through information that is stored as follows. Below is an example from the package lvm2 +Package: liblvm2cmd2.03 +Architecture: amd64 +Version: 2.03.11-2ubuntu4~ubuntu20.04.1 +Multi-Arch: same +Priority: optional +Section: libs +Source: lvm2 +Origin: Ubuntu +Maintainer: Ubuntu Developers +Original-Maintainer: Debian LVM Team +Bugs: https://bugs.launchpad.net/ubuntu/+filebug +Installed-Size: 2902 +Depends: libaio1 (>= 0.3.93), libblkid1 (>= 2.24.2), libc6 (>= 2.28), libselinux1 (>= 1.32), libsystemd0 (>= 222), libudev1 (>= 183), dmeventd +Filename: pool/main/l/lvm2/liblvm2cmd2.03_2.03.11-2ubuntu4~ubuntu20.04.1_amd64.deb +Size: 699512 +MD5sum: 99319083dcac52e719f6066930781f9e +SHA1: 19c40a6e3e26f7cdfb9a0931b480d3bdd2ecdcf5 +SHA256: a2fc4a87717aa81e152f0890c395c1b42b4eb07ffda4e3fbd3f83f9a70cbd95f +SHA512: 364acc8ce9932794f7d1d59360845b6ad915d261ea206fd94088c5ebf0e0ac2d11ab8832ebfde467cfa1ae82b6f7c7d179af61de8eb0898d2c3794161d2ea39d +Homepage: https://sourceware.org/lvm2/ +Description: LVM2 command library +Task: server, cloud-image, ubuntu-live, kubuntu-live, xubuntu-live, lubuntu-live, ubuntustudio-dvd-live, ubuntukylin-live, ubuntu-mate-live, ubuntu-budgie-live +Description-md5: 8f4d76592086bd210b07fd8b6370be43 + +# below command can be used to check the output number of results +wc -l source_packages_remote_ubuntu_2004.default +13991 source_packages_remote_ubuntu_2004.default diff --git a/tools/ubuntu_package_puller/export_to_json.py b/tools/ubuntu_package_puller/export_to_json.py new file mode 100644 index 0000000..83eb9bd --- /dev/null +++ b/tools/ubuntu_package_puller/export_to_json.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +# export job that will use existing class code to export to json + +import json +from package_version_history import PackageVersionHistory as pvh + +# start with just ubuntu 20.04 +distro_version = '20.04' + +source_pkgs_filepath = './package_data/source_packages_remote_ubuntu_2004.default' + +source_file = open(source_pkgs_filepath, 'r') +lines = source_file.readlines() + +results = [] + +for source_pkgname in lines: + pkg = source_pkgname.strip() + print(pkg) + name_and_versions = pvh( + pkg, distro_version).package_name_and_version_history() + results.append(name_and_versions) + print(name_and_versions) + +# create json of the results +json_obj = json.dumps(results) +json_arr = json.loads(json_obj) + +# write to a file +with open('./version_history_data/ubuntu_2004_source_package_version_history.data', 'w') as outfile: + json.dump(json_arr, outfile, indent=4) diff --git a/tools/ubuntu_package_puller/package_version_history.py b/tools/ubuntu_package_puller/package_version_history.py new file mode 100644 index 0000000..605daef --- /dev/null +++ b/tools/ubuntu_package_puller/package_version_history.py @@ -0,0 +1,42 @@ +from launchpadlib.launchpad import Launchpad +from collections import defaultdict + + +class PackageVersionHistory: + def __init__(self, name, distro_series): + self.name = name + self.distro_series = distro_series + self.launchpad = self.login() + + def login(self): + return Launchpad.login_anonymously('just testing', 'production') + + def set_ubuntu(self): + return self.launchpad.distributions['ubuntu'] + + def set_distro_series(self): + ubuntu = self.set_ubuntu() + return ubuntu.getSeries(name_or_version=self.distro_series) + + def set_archive(self): + ubuntu = self.set_ubuntu() + return ubuntu.main_archive + + def get_published_sources(self): + archive = self.set_archive() + series = self.set_distro_series() + return archive.getPublishedSources(source_name=self.name, distro_series=series) + + def package_name_and_version_history(self): + sources = self.get_published_sources() + results = [] + for source in sources: + name = source.source_package_name + version = source.source_package_version + results.append((name, version)) + + d = defaultdict(list) + for k, v in results: + d[k].append(v) + + return sorted(d.items())