Improve frozen repo tool

* Increase speed
* Only import packages which match the required architecture
* Adjust config so architecture is specified globally
* Add PPA upstream support
* Updated comments with usage examples
This commit is contained in:
Hugh Saunders 2014-09-04 11:50:57 +01:00
parent 561c6f6b44
commit 33b0dbd1f5
2 changed files with 44852 additions and 8982 deletions

View File

@ -13,62 +13,81 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ---------------------------------------------------------------------------
# Rackspace Private Cloud Frozen Repo Tool (Elsa)
# This tool does the following:
# - Read config file containing ordered list of upstreams and package lists
# - Create dist containing all the required packages
#
# TODO:
# - config file manipulation - add/remove packages
# - sync to CDN?
#
# Note: I originaly started writing this as a tool that would manually
# read remote repos and generate local repos. However aptly seems to do that
# well already, so I restarted with an aptly wrapper. This reduces the amount
# of new code needed, but is a bit messy as its scraping CLI output.
# I did investigate the python levelsdb interface but the aptly data is
# not transparent.
# - Create dist containing all the required packages at specific versions
# Example: python elsa_repo.py config.yml dist_from_config repo-123
# - Check for new/updated packages and generate a new config file
# Example: python elsa_repo.py config.yml\
# generate_updated_config --add-new newconfig.yml
#
# Requirements:
# - yaml python module
# - aptly tool installed http://www.aptly.info
# - gpg (and a private key already added for repo signing)
# - curl
# - apt_pkg module
# - space for mirrors of potentially large repos
# - Pip:
# * python-apt
# * pyyaml
# - OS tools:
# * curl (apt/curl)
# * aptly http://www.aptly.info/download/
# - Lots of space for mirrors of potentially large repos.
# The repo mirrors will be stored in ~/.aptly.
#
# Structure of a debian repo
# /Repo
# /dists
# /dist1
# Release # lists components and architectures
# /component1
# /binary-arch
# Release # metadat for this particular
# # dist/comp/arch/type
# Packages.{bz2,gz} # lists all packages in this
# # dist/comp/arch/type
# /component2
# /dist2
# /pool
# /component1
# /a
# /package_name
# package_name_version_arch.deb
# /b
# /component2
# /a
# /b
# Example YAML Config File:
# ---
# config:
# # only packages matching this arch or 'all' will be imported
# architecture: amd64
# # hash of package name: version
# packages:
# compat-libstdc: 5-1
# libargtable2-0: 12-1
# ...
# # list of upstreams to search for packages
# upstream_repos:
# - component: main
# dist: testing
# key_url: http://www.rabbitmq.com/rabbitmq-signing-key-public.asc
# name: rabbit
# url: http://www.rabbitmq.com/debian/
# - component: main
# dist: stable
# key_url: http://packages.elasticsearch.org/GPG-KEY-elasticsearch
# name: elasticsearch
# url: http://packages.elasticsearch.org/logstash/1.4/debian
# - key_id: 5234BF2B
# name: rsyslog_ppa_v8
# ppa: adiscon/v8-stable
# - key_id: 1285491434D8786F
# name: openmanage
# url: http://linux.dell.com/repo/community/deb/latest/
# - component: main
# dist: cloudmonitoring
# key_url: https://monitoring.api.rackspacecloud.com/pki/agent/linux.asc
# name: cloud_monitoring_agent
# url: >
# http://stable.packages.cloudmonitoring.rackspace.com/
# ubuntu-14.04-x86_64
# Notes: to generate a full mirror set packages: {} in config then use
# generate_updated_config --add-new
# Standard Lib Imports
import argparse
import apt_pkg
import copy
import logging
import re
import subprocess
import sys
# External imports
import apt_pkg
import yaml
# Log configuration
@ -78,6 +97,157 @@ stdout_channel = logging.StreamHandler(sys.stdout)
LOGGER.addHandler(stdout_channel)
class PackageStore(object):
"""Store a list of PackageLists and provide search"""
def __init__(self):
self.package_lists = {}
def __iter__(self):
return self.package_lists.values().__iter__()
def __contains__(self, list_name):
return list_name in self.package_lists
def __getitem__(self, item):
return self.package_lists.get(item)
def __setitem__(self, key, value):
self.package_lists[key] = value
def package_query(self, package, newer_only=False):
"""Search for a package in all package lists"""
results = []
for pl in self.package_lists.values():
results.extend(pl.package_query(package, newer_only))
return sorted(set(results))
def all_packages(self):
"""return all packages in a flat list"""
results = PackageList('all_packages')
for pl in self.package_lists.values():
for package in pl.all_packages():
results.add_package(package)
return results
def distinct_packages(self):
"""return newest version for each package in flat list"""
results = PackageList('all_packages')
for pl in self.package_lists.values():
for package in pl.distinct_packages():
results.add_package(package)
return results
class PackageList(object):
"""Store packages and versions from a single source"""
def __init__(self, name):
# data contains a hash for fast lookup
self.data = {}
self.name = name
def __iter__(self):
return self.all_packages().__iter__()
def __len__(self):
return len(self.all_packages())
def __contains__(self, other):
return other.name in self.data
def __sub__(self, other):
"""subtract another package list from this one
returns a new package list with the result"""
result_packages = set(self) - set(other)
new_list = PackageList("%s - %s" % (self.name, other.name))
for package in result_packages:
new_list.add_package(package)
return new_list
def __add__(self, other):
"""Add two package lists, return a new list"""
new_list = PackageList("%s + %s" % (self.name, other.name))
new_packages = set(self) + set(other)
for package in new_packages:
new_list.add_package(package)
def add_package(self, package):
"""Add a package to this package list, if its not already present"""
if package.name not in self.data:
self.data[package.name] = {}
versions = self.data[package.name]
if package.version not in versions:
versions[package.version] = package
package.last_list = self.name
LOGGER.debug('adding %s to %s' % (package, self.name))
def package_query(self, package, newer_only=False):
"""Search for a package in this package list
if package.version is None, multiple versions may be returned
if newer_only is true, then only versions newer than the specified
version will be returned
Always returns a list.
"""
if package.name not in self.data:
return []
versions = sorted(set(self.data[package.name].values()))
if package.version is None:
return versions
else:
if not newer_only:
if package.version in self.data[package.name]:
return [self.data[package.name].get(package.version)]
else:
return []
else:
return [p for p in versions if p > package]
def all_packages(self):
"""return flat list of all packages"""
packages = []
for versions in self.data.values():
packages.extend(versions.values())
return list(set(packages))
def distinct_packages(self):
"""return flat list of newest version of each package"""
packages = []
for versions in self.data.values():
packages.append(sorted(versions.values())[-1])
return list(set(packages))
class Package(object):
"""Represents a single package"""
def __init__(self, name, version=None, source=None):
self.name = name
self.version = version
self.source = source
def __eq__(self, other):
"""Packages are 'equal' if there name is the same"""
return self.name == other.name and self.version == other.version
def __hash__(self):
"""hash matches equality by hashing name and version"""
return hash(self.name) ^ hash(str(self.version))
def __cmp__(self, other):
""" packages are sorted by name then version """
if self.name == other.name:
return apt_pkg.version_compare(self.version, other.version)
else:
return self.name.__cmp__(other.name)
def __repr__(self):
return "Package: %s, Version: %s" % (self.name, self.version)
class Aptly(object):
"""Wrapper around the aptly tool"""
@ -103,8 +273,9 @@ class Aptly(object):
"""List aptly mirrors"""
return self.run('mirror list -raw').splitlines()
def mirror_create(self, name, url, architecture, dist="./",
component="", key_id=None, key_url=None):
def mirror_create(self, name, architecture, dist="./",
component="", url=None, key_id=None, key_url=None,
ppa=None):
"""Create a repo mirror via aptly.
These are metadata only, packages arent pulled till update
"""
@ -115,7 +286,7 @@ class Aptly(object):
# check we have a key for the mirror
if key_id is None and key_url is None:
raise ValueError("mirror create requires key_id or key_url")
raise ValueError("mirror create requires key_id, key_url")
LOGGER.info("Creating mirror: %s" % name)
@ -143,12 +314,18 @@ class Aptly(object):
raise Exception("Failed to import gpg key for mirror %s"
% name)
self.run("mirror create -architectures %(arch)s %(name)s %(url)s "
"%(dist)s %(component)s" % {'arch': architecture,
'name': name,
'url': url,
'dist': dist,
'component': component})
if ppa:
self.run("mirror create -architectures %(arch)s %(name)s "
"ppa:%(ppa)s" % {'arch': architecture,
'name': name,
'ppa': ppa})
else:
self.run("mirror create -architectures %(arch)s %(name)s %(url)s "
"%(dist)s %(component)s" % {'arch': architecture,
'name': name,
'url': url,
'dist': dist,
'component': component})
def mirror_update(self, name):
"""Update (refresh) an aptly mirror"""
@ -157,37 +334,43 @@ class Aptly(object):
% {'name': name})
LOGGER.info("Updating mirror %s" % name)
self.run("mirror update %(name)s" % {'name': name})
self.mirror_get_packages(name)
def mirror_get_packages(self, name):
"""Git list of package,version tuples from a mirror"""
def mirror_get_packages(self, name, arch):
"""Get list of packages from a mirror"""
lines = self.run("mirror show -with-packages %(name)s"
% {'name': name}).splitlines()
packages = self.parse_aptly_package_list(lines)
packages = self.parse_aptly_package_list(lines, source=name,
arch=arch)
LOGGER.debug("Found %(num_packages)s packages for mirror %(name)s"
% {'num_packages': len(packages),
'name': name})
return packages
def parse_aptly_package_list(self, lines):
""" Get list of (name,version) tuples from an aptly package list"""
def parse_aptly_package_list(self, lines, source=None, arch=None):
"""Generate a PackageList() from aptly package list output"""
# match package list from aptly ... show -with-packages name
line_re = re.compile('\s{2,}(?P<name>[^_]*)_(?P<version>[^_]*)'
'_(?P<arch>[^_]*)')
packages = []
package_list = PackageList(name=source)
for line in lines:
match = line_re.match(line)
if match:
packages.append((match.groupdict()['name'],
match.groupdict()['version']))
return packages
gd = match.groupdict()
if gd['arch'] in [arch, 'all', None]:
package_list.add_package(Package(name=gd['name'],
version=gd['version'],
source=source))
else:
LOGGER.debug('rejecting package %s %s due to invalid '
'architecture %s' % (gd['name'],
gd['version'],
gd['arch']))
return package_list
def repo_list(self):
"""Get list of repos known to aptly"""
@ -198,41 +381,48 @@ class Aptly(object):
self.run("repo create %(name)s" % {'name': name})
def repo_get_packages(self, name):
""" get list of name, version tuples from a repo"""
"""Generate PackageList representing an atply repo"""
lines = self.run('repo show -with-packages %(name)s'
% {'name': name})
return self.parse_aptly_package_list(lines)
return self.parse_aptly_package_list(lines, source=name)
def package_query(self, name, version):
""" return aptly query string for a package name & version """
return '"%(name)s (=%(version)s)"' % {'name': name,
'version': version}
def package_query(self, package):
"""return aptly query string for a package name & version"""
return '"%(name)s (=%(version)s)"' % {'name': package.name,
'version': package.version}
def repo_import_package(self, mirror, repo, package_name,
package_version):
def repo_import_package(self, mirror, repo, package):
"""Pull a package from a mirror into a repo"""
self.run('repo import %(mirror)s %(repo)s %(query)s'
% {'mirror': mirror,
'repo': repo,
'query': self.package_query(
package_name, package_version)
package.name, package.version)
}, shell=True)
def repo_import_packages(self, mirror, repo, packages, batch_size=200):
def repo_import_packages(self, repo, package_list, batch_size=200):
"""Batch import packages from a mirror into a repo"""
packages = package_list.all_packages()
packages_copy = copy.deepcopy(packages)
packages_reconstruct = []
LOGGER.info('importing %s packages from %s' % (len(packages),
package_list.name))
while packages:
batch = packages[0:batch_size]
batch = packages[:batch_size]
packages_reconstruct.extend(batch)
if not batch:
break
packages = packages[batch_size:]
query_string = " ".join([self.package_query(n, v) for
n, v in batch])
query_string = " ".join([self.package_query(p) for p in batch])
self.run('repo import %(mirror)s %(repo)s %(query)s'
% {'mirror': mirror,
% {'mirror': package_list.name,
'repo': repo,
'query': query_string},
shell=True)
assert packages_reconstruct == packages_copy
def repo_publish(self, name):
"""Create on disk distribution metata for an aptly internal repo"""
self.run('publish repo -distribution %(name)s %(name)s'
@ -241,11 +431,11 @@ class Aptly(object):
LOGGER.info("Published repo %(name)s" % {'name': name})
def snapshot_list(self):
""" List reposnapshots known to aptly"""
"""List reposnapshots known to aptly"""
self.run("-raw snapshot list").splitlines()
def snapshot_create(self, name):
""" Create an empty aptly snapshot"""
"""Create an empty aptly snapshot"""
self.run("snapshot create %(name)s empty")
@ -287,79 +477,74 @@ class AptlyOrechestrator(object):
self.args = args
self.aptly = Aptly()
self.config = Config(args.config_path)
def available_versions_for_package(self, name):
"""Find available versions for a package"""
versions = []
for mirror, packages in self.mirrors:
for package_name, version in packages:
if package_name == name:
versions.append(version)
return versions
# needed in order to use apt_pkg.version_compare
apt_pkg.init()
def ensure_mirrors(self, required_mirrors):
""" Check mirrors list, create any that are missing"""
current_mirrors = self.aptly.mirror_list()
self.mirrors = []
self.mirrors = PackageStore()
for required_mirror in required_mirrors:
# Create mirror if necessary
mirror_name = required_mirror['name']
if mirror_name not in current_mirrors:
required_mirror['architecture'] =\
self.config['config']['architecture']
self.aptly.mirror_create(**required_mirror)
# Update mirror if created or already existed
self.aptly.mirror_update(mirror_name)
# Store list of available packages for each mirror
self.mirrors.append(
(mirror_name, self.aptly.mirror_get_packages(mirror_name)))
self.aptly.mirror_update(mirror_name)
self.mirrors[mirror_name] = \
self.aptly.mirror_get_packages(
mirror_name,
self.config['config']['architecture'])
def packages_from_config(self):
"""get list of required packages from config
:returns: list of name,value tuples.
"""
return [(name, str(version)) for name, version
in self.config['packages'].iteritems()]
pl = PackageList(name='config')
for name, version in self.config['packages'].iteritems():
pl.add_package(Package(name=name, version=version,
source='config'))
return pl
def ensure_packages(self, dist_name):
"""Add all packages form config file to repo dist_name"""
"""Add all packages from config file to repo dist_name"""
# convert name: version dict to [(name,version),..]
required_packages = self.packages_from_config()
repo_packages = self.aptly.repo_get_packages(dist_name)
# packages that aren't in this repo already so need to be added
missing_packages = [p for p in required_packages
if p not in repo_packages]
missing_packages = required_packages - repo_packages
LOGGER.debug("missing packages: %s" % missing_packages)
LOGGER.debug("Packages to add: %s" % missing_packages)
# list of packages we dont find in any upstreams
unavailable_packages = []
# list of packages we don't find in any upstreams
unavailable_packages = PackageList('unavailable')
# map of mirror to package list for found packages
package_map = {}
packages_to_import = PackageStore()
for package in missing_packages:
for mirror, packages in self.mirrors:
if mirror not in package_map:
package_map[mirror] = []
found = False
if package in packages:
package_map[mirror].append(package)
found = True
LOGGER.debug("Found %(pname)s in %(mname)s"
% {'pname': package, 'mname': mirror})
break
if not found:
unavailable_packages.append(package)
result = self.mirrors.package_query(package)
if not result:
unavailable_packages.add_package(package)
LOGGER.debug("Failed to find package %(name)s %(version)s"
% {'name': package[0],
'version': package[1]})
% {'name': package.name,
'version': package.version})
else:
package = result[0]
if package.source not in packages_to_import:
packages_to_import[package.source] = \
PackageList(package.source)
packages_to_import[package.source].add_package(package)
LOGGER.debug("Found %(pname)s in %(mname)s"
% {'pname': package, 'mname': package.source})
# Batch import all the packages that are known to be available
for mirror, packages in package_map.iteritems():
LOGGER.info("importing packages from %s" % mirror)
self.aptly.repo_import_packages(mirror, dist_name, packages)
for package_list in packages_to_import:
self.aptly.repo_import_packages(dist_name, package_list)
# Return list of packages that were not found
return unavailable_packages
@ -373,72 +558,76 @@ class AptlyOrechestrator(object):
raise ValueError("dist name must be unique, %s already exists"
% dist_name)
LOGGER.info("Creating repo %(name)s" % {'name': dist_name})
self.ensure_mirrors(self.config['upstream_repos'])
self.ensure_mirrors(copy.deepcopy(self.config['upstream_repos']))
self.aptly.repo_create(dist_name)
unavailable_packages = self.ensure_packages(dist_name)
if unavailable_packages:
LOGGER.warning("The following packages are not available")
for name, version in unavailable_packages:
available_versions_str = ",".join(
self.available_versions_for_package(name))
LOGGER.warning("Not Found: %(name)s version: %(version)s. "
"Availble versions: %(avs)s"
% {'name': name,
'version': version,
'avs': available_versions_str})
for package in unavailable_packages:
LOGGER.warning("Not Found: %(package)s"
% {'package': package})
self.aptly.repo_publish(dist_name)
def add_package_to_config(self):
""" Add a package to the supplied config file"""
self.config['packages'][self.args.name] = self.args.version
self.config.write()
def delete_package_from_config(self):
""" Remove a package from the supplied config file"""
del self.config['packages'][self.args.name]
self.config.write()
def list_upstream_packages(self):
self.ensure_mirrors(self.config['upstream_repos'])
for mirror, packages in self.mirrors:
for package_name, package_version in packages:
print "%(mirror_name)s,%(package_name)s,%(package_version)s"\
% {'mirror_name': mirror,
'package_name': package_name,
'package_version': package_version}
for package in self.mirrors.all_packages():
print "%(mirror_name)s,%(package)s"\
% {'mirror_name': package.source,
'package_name': package}
def list_available_updates(self):
def check_for_new_and_updated(self):
"""Check for new and updated packages print them and optionally
generate a new config file"""
self.ensure_mirrors(self.config['upstream_repos'])
# needed in order to use apt_pkg.version_compare
apt_pkg.init()
# Count updated packages
package_updates = 0
config_packages = self.packages_from_config()
# Store newest version available for each package with updates
package_updates = {}
for package_name, package_version in self.packages_from_config():
updates = [v for v in self.available_versions_for_package(
package_name) if apt_pkg.version_compare(
package_version, v) < 0]
updates = sorted(set(updates),
cmp=apt_pkg.version_compare)
# Itterate over packages in config and check for newer versions.
for package in config_packages:
updates = self.mirrors.package_query(package, newer_only=True)
if updates:
package_updates[package_name] = updates[-1]
self.config['packages'][package.name] = updates[-1].version
package_updates += 1
LOGGER.info("Package %(name)s Config Version: %(current)s"
" Updates: %(updates)s"
% {'name': package_name,
'current': package_version,
'updates': updates})
% {'name': package.name,
'current': package.version,
'updates': [p.version for p in updates]})
# write new config file with updated versions, if requested
if package_updates and 'new_config_path' in self.args:
for package, version in package_updates.iteritems():
self.config['packages'][package] = version
# Find packages that are available upstream but not in the config file
all_upstream_packages = self.mirrors.all_packages()
new_packages = PackageList('new_packages')
for package in all_upstream_packages:
if package not in config_packages:
new_packages.add_package(package)
LOGGER.info("New %s" % (package))
self.config['packages'][package.name] = package.version
# write new config file with updated versions and new packages
# if requested
if 'new_config_path' in self.args:
self.config.write(self.args.new_config_path)
LOGGER.info("updated config written to %s"
% self.args.new_config_path)
LOGGER.info("updated config containing %s packages written to %s"
% (len(self.config['packages']),
self.args.new_config_path))
LOGGER.info("Input Config Packages: %s, Upstream Packages: %s, "
"New Packages: %s, Package Updates: %s" %
(len(config_packages),
len(self.mirrors.distinct_packages()),
len(new_packages.distinct_packages()), package_updates))
def main(args):
@ -453,27 +642,38 @@ def main(args):
subparsers = parser.add_subparsers()
# dfc = distribution from config
parser_dfc = subparsers.add_parser('dist_from_config')
parser_dfc = subparsers.add_parser(
'dist_from_config',
help="Create distribution from config file")
parser_dfc.add_argument('dist_name', help="must be unique")
parser_dfc.set_defaults(func="dfc")
# subcommand to add packages to config
parser_add_package = subparsers.add_parser('add_package_to_config')
parser_add_package = subparsers.add_parser(
'add_package_to_config',
help="Add a package to the config file")
parser_add_package.add_argument('name')
parser_add_package.add_argument('version')
parser_add_package.set_defaults(func="add_package")
# subcommand to remove packages from config
parser_del_package = subparsers.add_parser('delete_package_from_config')
parser_del_package = subparsers.add_parser(
'delete_package_from_config',
help="Delete package from config file")
parser_del_package.add_argument('name')
parser_del_package.set_defaults(func="del_package")
# List all packages available in configured upstreams
parser_list_upstream_pkgs = subparsers.add_parser('list_upstream_pkgs')
parser_list_upstream_pkgs = subparsers.add_parser(
'list_upstream_packages',
help="List all packages from upstreams in config file")
parser_list_upstream_pkgs.set_defaults(func='list_upstream')
# List all packages in the package list that have updates available
parser_list_upstream_pkgs = subparsers.add_parser('list_pkg_updates')
parser_list_upstream_pkgs = subparsers.add_parser(
'list_package_updates',
help="List packages which have newer versions available upstream"
" than listed in the config file")
parser_list_upstream_pkgs.set_defaults(func='list_updates')
# Generate new config file with updated package versions
@ -484,7 +684,11 @@ def main(args):
parser_generate_updated_config.add_argument(
'new_config_path',
help="New config with updated package verions will be generated"
" and written to this path")
" and written to this path")
parser_generate_updated_config.add_argument(
'--add-new', dest="add_new", action='store_true',
help="Add packages available upstream but not listed in"
" the specified config file to the new config file")
parser_generate_updated_config.set_defaults(func='list_updates')
args = parser.parse_args(args=args[1:])
@ -492,13 +696,14 @@ def main(args):
if args.verbose:
LOGGER.setLevel(logging.DEBUG)
apt_pkg.init() # Only needs to be done once to read the apt configs
ao = AptlyOrechestrator(args)
# Each subparser sets the func arg, call the appropriate function
{'dfc': ao.create_dist_from_package_list,
'add_package': ao.add_package_to_config,
'del_package': ao.delete_package_from_config,
'list_upstream': ao.list_upstream_packages,
'list_updates': ao.list_available_updates}[args.func]()
'list_updates': ao.check_for_new_and_updated}[args.func]()
if __name__ == "__main__":
main(sys.argv)

File diff suppressed because it is too large Load Diff