d1548e5049
This is a tool to tell us which of our on-disk wheels are duplicated upstream by PyPI. These are things we don't need to cache locally. At one time, we were downloading all dependencies of our requirements and caching them; we shouldn't be doing that any more, but anything reported by this tool can be removed from our local mirrors. Now that the number of platforms * number of branches is becoming a maintence issue, this will help us foucs on keeping a useful working set in the cache. Change-Id: I3ded6b9869598a0907d7cda9f03bf414e46885df
86 lines
2.9 KiB
Python
86 lines
2.9 KiB
Python
# Check which of the wheels in our AFS directory exist upstream
|
|
#
|
|
# This outputs two files
|
|
#
|
|
# to-delete.txt : a list of files and directories that can be removed
|
|
# from the mirror as all contents are cached in pypi
|
|
#
|
|
# log.txt : the leading number is the number of files left
|
|
# in the given directory after checking upstream
|
|
# package contents. i.e. this is unique content in
|
|
# our mirror volume.
|
|
#
|
|
# Needs pypi-simple
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
|
|
from pypi_simple import PyPISimple, NoSuchProjectError
|
|
|
|
BASE = '/afs/openstack.org/mirror/wheel'
|
|
|
|
FILE_DEL = open('to-delete.txt', 'w')
|
|
FILE_LOG = open('log.txt', 'w')
|
|
|
|
PLATFORMS = ('centos-8-x86_64',
|
|
'centos-9-x86_64',
|
|
'debian-10-x86_64'
|
|
'debian-11-x86_64',
|
|
'ubuntu-18.04-aarch64',
|
|
'ubuntu-20.04-aarch64',
|
|
'ubuntu-22.04-aarch64',
|
|
'centos-8-aarch64',
|
|
'centos-9-aarch64',
|
|
'debian-10-aarch64',
|
|
'debian-11-aarch64',
|
|
'ubuntu-16.04-x86_64',
|
|
'ubuntu-18.04-x86_64',
|
|
'ubuntu-20.04-x86_64',
|
|
'ubuntu-22.04-x86_64')
|
|
|
|
def iterate_wheels(path, d):
|
|
name = os.path.basename(path)
|
|
|
|
if os.path.isdir(path):
|
|
if name not in d['dirs']:
|
|
d['dirs'][name] = {'dirs':{},'files':[]}
|
|
for x in os.listdir(path):
|
|
iterate_wheels(os.path.join(path,x), d['dirs'][name])
|
|
|
|
# top level has index.html; skip
|
|
# otherwise the directory name is the pypi project name
|
|
if name and (d['dirs'][name]['files']):
|
|
with PyPISimple() as client:
|
|
try:
|
|
page = client.get_project_page(name)
|
|
except NoSuchProjectError:
|
|
print("Removing disappeared project : %s" % name, file=sys.stderr)
|
|
for w in d['dirs'][name]['files']:
|
|
print("%s/%s" % (path, w), file=FILE_DEL)
|
|
return
|
|
|
|
upstream = set([package.filename for package in page.packages])
|
|
local = set(d['dirs'][name]['files'])
|
|
|
|
not_upstream = local.difference(upstream)
|
|
dups = local.intersection(upstream)
|
|
|
|
# Print files to delete, and if the directory is empty
|
|
# put that in the list to delete too.
|
|
for d in dups:
|
|
print("%s/%s" % (path, d), file=FILE_DEL)
|
|
if len(not_upstream) == 0:
|
|
print("%s" % path, file=FILE_DEL)
|
|
|
|
# Output the file left in the directory after pruning
|
|
print("%4d %s" % (len(not_upstream), path), file=FILE_LOG)
|
|
else:
|
|
d['files'].append(name)
|
|
return d
|
|
|
|
for p in PLATFORMS:
|
|
print("Processing %s" % p, file=sys.stderr)
|
|
iterate_wheels('%s/%s/' % (BASE, p),
|
|
d = {'dirs':{},'files':[]})
|