tools/upstream-wheel-audit.py
This is a tool to tell us which of our on-disk wheels are duplicated upstream by PyPI. These are things we don't need to cache locally. At one time, we were downloading all dependencies of our requirements and caching them; we shouldn't be doing that any more, but anything reported by this tool can be removed from our local mirrors. Now that the number of platforms * number of branches is becoming a maintence issue, this will help us foucs on keeping a useful working set in the cache. Change-Id: I3ded6b9869598a0907d7cda9f03bf414e46885df
This commit is contained in:
parent
a24509a7d7
commit
d1548e5049
85
tools/upstream-wheel-audit.py
Normal file
85
tools/upstream-wheel-audit.py
Normal file
@ -0,0 +1,85 @@
|
||||
# Check which of the wheels in our AFS directory exist upstream
|
||||
#
|
||||
# This outputs two files
|
||||
#
|
||||
# to-delete.txt : a list of files and directories that can be removed
|
||||
# from the mirror as all contents are cached in pypi
|
||||
#
|
||||
# log.txt : the leading number is the number of files left
|
||||
# in the given directory after checking upstream
|
||||
# package contents. i.e. this is unique content in
|
||||
# our mirror volume.
|
||||
#
|
||||
# Needs pypi-simple
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
|
||||
from pypi_simple import PyPISimple, NoSuchProjectError
|
||||
|
||||
BASE = '/afs/openstack.org/mirror/wheel'
|
||||
|
||||
FILE_DEL = open('to-delete.txt', 'w')
|
||||
FILE_LOG = open('log.txt', 'w')
|
||||
|
||||
PLATFORMS = ('centos-8-x86_64',
|
||||
'centos-9-x86_64',
|
||||
'debian-10-x86_64'
|
||||
'debian-11-x86_64',
|
||||
'ubuntu-18.04-aarch64',
|
||||
'ubuntu-20.04-aarch64',
|
||||
'ubuntu-22.04-aarch64',
|
||||
'centos-8-aarch64',
|
||||
'centos-9-aarch64',
|
||||
'debian-10-aarch64',
|
||||
'debian-11-aarch64',
|
||||
'ubuntu-16.04-x86_64',
|
||||
'ubuntu-18.04-x86_64',
|
||||
'ubuntu-20.04-x86_64',
|
||||
'ubuntu-22.04-x86_64')
|
||||
|
||||
def iterate_wheels(path, d):
|
||||
name = os.path.basename(path)
|
||||
|
||||
if os.path.isdir(path):
|
||||
if name not in d['dirs']:
|
||||
d['dirs'][name] = {'dirs':{},'files':[]}
|
||||
for x in os.listdir(path):
|
||||
iterate_wheels(os.path.join(path,x), d['dirs'][name])
|
||||
|
||||
# top level has index.html; skip
|
||||
# otherwise the directory name is the pypi project name
|
||||
if name and (d['dirs'][name]['files']):
|
||||
with PyPISimple() as client:
|
||||
try:
|
||||
page = client.get_project_page(name)
|
||||
except NoSuchProjectError:
|
||||
print("Removing disappeared project : %s" % name, file=sys.stderr)
|
||||
for w in d['dirs'][name]['files']:
|
||||
print("%s/%s" % (path, w), file=FILE_DEL)
|
||||
return
|
||||
|
||||
upstream = set([package.filename for package in page.packages])
|
||||
local = set(d['dirs'][name]['files'])
|
||||
|
||||
not_upstream = local.difference(upstream)
|
||||
dups = local.intersection(upstream)
|
||||
|
||||
# Print files to delete, and if the directory is empty
|
||||
# put that in the list to delete too.
|
||||
for d in dups:
|
||||
print("%s/%s" % (path, d), file=FILE_DEL)
|
||||
if len(not_upstream) == 0:
|
||||
print("%s" % path, file=FILE_DEL)
|
||||
|
||||
# Output the file left in the directory after pruning
|
||||
print("%4d %s" % (len(not_upstream), path), file=FILE_LOG)
|
||||
else:
|
||||
d['files'].append(name)
|
||||
return d
|
||||
|
||||
for p in PLATFORMS:
|
||||
print("Processing %s" % p, file=sys.stderr)
|
||||
iterate_wheels('%s/%s/' % (BASE, p),
|
||||
d = {'dirs':{},'files':[]})
|
Loading…
Reference in New Issue
Block a user