# Check which of the wheels in our AFS directory exist upstream # # This outputs two files # # to-delete.txt : a list of files and directories that can be removed # from the mirror as all contents are cached in pypi # # log.txt : the leading number is the number of files left # in the given directory after checking upstream # package contents. i.e. this is unique content in # our mirror volume. # # Needs pypi-simple import sys import os import json from pypi_simple import PyPISimple, NoSuchProjectError BASE = '/afs/openstack.org/mirror/wheel' FILE_DEL = open('to-delete.txt', 'w') FILE_LOG = open('log.txt', 'w') PLATFORMS = ('centos-8-x86_64', 'centos-9-x86_64', 'debian-10-x86_64' 'debian-11-x86_64', 'ubuntu-18.04-aarch64', 'ubuntu-20.04-aarch64', 'ubuntu-22.04-aarch64', 'centos-8-aarch64', 'centos-9-aarch64', 'debian-10-aarch64', 'debian-11-aarch64', 'ubuntu-16.04-x86_64', 'ubuntu-18.04-x86_64', 'ubuntu-20.04-x86_64', 'ubuntu-22.04-x86_64') def iterate_wheels(path, d): name = os.path.basename(path) if os.path.isdir(path): if name not in d['dirs']: d['dirs'][name] = {'dirs':{},'files':[]} for x in os.listdir(path): iterate_wheels(os.path.join(path,x), d['dirs'][name]) # top level has index.html; skip # otherwise the directory name is the pypi project name if name and (d['dirs'][name]['files']): with PyPISimple() as client: try: page = client.get_project_page(name) except NoSuchProjectError: print("Removing disappeared project : %s" % name, file=sys.stderr) for w in d['dirs'][name]['files']: print("%s/%s" % (path, w), file=FILE_DEL) return upstream = set([package.filename for package in page.packages]) local = set(d['dirs'][name]['files']) not_upstream = local.difference(upstream) dups = local.intersection(upstream) # Print files to delete, and if the directory is empty # put that in the list to delete too. for d in dups: print("%s/%s" % (path, d), file=FILE_DEL) if len(not_upstream) == 0: print("%s" % path, file=FILE_DEL) # Output the file left in the directory after pruning print("%4d %s" % (len(not_upstream), path), file=FILE_LOG) else: d['files'].append(name) return d for p in PLATFORMS: print("Processing %s" % p, file=sys.stderr) iterate_wheels('%s/%s/' % (BASE, p), d = {'dirs':{},'files':[]})