copy-wheels : generate indexes version 2

Some things have become evident when generating the indexes requiring
some larger changes.

Firstly, the indexer script needs python3 on the host.  Since we're
still building CentOS 7 wheels, we need to install Python 3 from EPEL
there.

Secondly, because part of the PEP503 index page is the file hash,
reading all the files back over AFS is quite slow.  It's also quite
slow having ansible loop a task each time, which all adds up to job
timeouts.

Instead, make the indexes on the local disk before we copy the results
to AFS.  This requires copying both scripts to the host for execution
(rather than relying on "script:" ) so the wheel-copy.sh script can
call wheel-indexer.py.

While we are there, a small refactor on the wheel-indexer.py to use
os.walk() (which makes it easier to have this as a stand-alone
recursive script later, if something changes).  Also update the output
to use <ul><li> for the filenames, so it looks a little better on the
output html.

Change-Id: I85f9e132bc55fd8d33583a698e15c47665e5cf8d
This commit is contained in:
Ian Wienand 2020-01-15 07:43:42 +11:00
parent dc3289235e
commit 3e9efa3a65
5 changed files with 95 additions and 60 deletions

View File

@ -22,6 +22,10 @@ for f in $WHEELHOUSE_DIR/*; do
DEST_DIR="${PACKAGENAME:0:1}/$PACKAGENAME"
# Create the index file
# NOTE(ianw) : remove temporary "--output" when working
/usr/local/bin/wheel-indexer.py --debug --output "index.html.tmp" $f
# Create the mirror directories in AFS /s/split style. This
# depends on the existence of a mod_rewrite script which unmunges
# the path, and is required because AFS has a practical folder size

View File

@ -16,22 +16,24 @@
# under the License.
#
# glob all .whl files in a directory, and make a index.html page
# Final all .whl files in a directory, and make a index.html page
# in PEP503 (https://www.python.org/dev/peps/pep-0503/) format
import argparse
import datetime
import email
import glob
import hashlib
import html
import logging
import os
import sys
import zipfile
parser = argparse.ArgumentParser()
parser.add_argument('outfile', nargs='?', default='-', help="output filename")
parser.add_argument('toplevel', help="directory to index")
parser.add_argument('-d', '--debug', dest="debug", action='store_true')
parser.add_argument('-o', '--output', dest="output",
default='index.html', help="Output filename, - for stdout")
args = parser.parse_args()
level = logging.DEBUG if args.debug else logging.INFO
@ -92,56 +94,74 @@ def get_sha256(filename):
return(sha256.hexdigest())
output = '''<html>
def create_index(path, files):
project = os.path.basename(path)
output = f'''<html>
<head>
<title>Links</title>
<title>{project}</title>
</head>
<body>
<ul>
'''
files = glob.glob('*.whl')
for f in files:
for f in files:
f_full = os.path.join(path, f)
requirements = ''
try:
logging.debug("Checking for requirements of : %s" % f_full)
requirements = get_requirements(f_full)
logging.debug("requirements are: %s" % requirements)
# NOTE(ianw): i'm not really sure if any of these should be
# terminal, as it would mean pip can't read the file anyway. Just
# log for now.
except NoMetadataException:
logging.debug("no metadata")
pass
except NoRequirementsException:
logging.debug("no python requirements")
pass
except BadFormatException:
logging.debug("Could not open")
pass
requirements = ''
try:
logging.debug("Checking for requirements of : %s" % f)
requirements = get_requirements(f)
logging.debug("requirements are: %s" % requirements)
# NOTE(ianw): i'm not really sure if any of these should be
# terminal, as it would mean pip can't read the file anyway. Just
# log for now.
except NoMetadataException:
logging.debug("no metadata")
pass
except NoRequirementsException:
logging.debug("no python requirements")
pass
except BadFormatException:
logging.debug("Could not open")
pass
sha256 = get_sha256(f_full)
logging.debug("sha256 for %s: %s" % (f_full, sha256))
sha256 = get_sha256(f)
logging.debug("sha256 for %s: %s" % (f, sha256))
output += f' <li><a href="{f}#sha256={sha256}"'
if requirements:
output += f' data-requires-python="{requirements}" '
output += f'>{f}</a></li>\n'
output += f' <a href="{f}#sha256={sha256}"'
if requirements:
output += f' data-requires-python="{requirements}" '
output += f'>{f}</a>\n'
output += ''' </body>
output += ''' </ul>
</body>
</html>
'''
now = datetime.datetime.now()
output += '<!-- last update: %s -->\n' % now.isoformat()
now = datetime.datetime.now()
output += '<!-- last update: %s -->\n' % now.isoformat()
logging.debug("Final output write")
return output
if args.outfile == '-':
outfile = sys.stdout
else:
outfile = open(args.outfile, "w")
logging.debug("Output going to: %s" % args.outfile)
outfile.write(output)
for root, dirs, files in os.walk(args.toplevel):
# sanity check we are only called from leaf directories by the
# driver script
if dirs:
print("This should only be called from leaf directories")
sys.exit(1)
logging.debug("Done!")
logging.debug("Processing %s" % root)
output = create_index(root, files)
logging.debug("Final output write")
if args.output == '-':
out_file = sys.stdout
else:
out_path = os.path.join(root, args.output)
logging.debug("Writing index file: %s" % out_path)
out_file = open(out_path, "w")
out_file.write(output)
logging.debug("Done!")

View File

@ -1,5 +1,24 @@
- name: Ensure we have python3 for indexer
include_tasks: "{{ lookup('first_found', params) }}"
vars:
params:
files:
- "py3.{{ ansible_distribution }}.{{ ansible_distribution_major_version }}.yaml"
- "default.yaml"
- name: Put copy scripts on host
copy:
src: '{{ item }}'
dest: '/usr/local/bin/{{ item }}'
owner: root
group: root
mode: '0755'
loop:
- wheel-copy.sh
- wheel-indexer.py
- name: Copy the wheels to AFS
script: wheel-copy.sh {{ wheel_dir }} {{ afs_dir }}
command: '/usr/local/bin/wheel-copy.sh {{ wheel_dir }} {{ afs_dir }}'
- name: Rebuild top-level mirror index
script: wheel-index.sh {{ afs_dir }}
@ -7,20 +26,3 @@
# the final index. All hosts should be finished copying under
# linear strategy.
run_once: True
- name: Get project directories
# the directories are laid out a/ b/ c/ ... z/ with projects
# underneath (ergo */* match). We actually use mod_rewrite to paper
# over this in the mirror apache config for external users.
shell: 'ls -d {{ afs_dir }}/*/*'
register: directories
- name: Create individual project indexes
# NOTE(ianw) .test to be removed after testing
script: wheel-indexer.py --debug index.html.test
args:
chdir: '{{ item }}'
executable: 'python3'
loop: "{{ directories.stdout.split('\n') }}"
# NOTE(ianw) remove after testing
ignore_errors: true

View File

@ -0,0 +1,5 @@
- name: Install Python3
yum:
package: python3
enablerepo: epel
state: present

View File

@ -0,0 +1,4 @@
- name: Install Python3
package:
name: python3
state: present