openstack-ansible/scripts/fastest-infra-wheel-mirror.py
Jesse Pretorius 7f70ca7ff5 Set AIO to use an OpenStack-Infra wheel mirror
To speed up an AIO build when *not* executed in the OpenStack-CI gate,
this patch determines the fastest available wheel mirror and includes
it as an extra pip index.

This speeds up the repo build process by ensuring that the wheels do
not need to be built locally, but can simply be downloaded from the
mirror.

The patch also includes some clean up which was left out of
I73fa1db5210f02d6df9dc324f8b4ec21232d06ba relating to apt conf files.

Change-Id: I56f7c8520cc6cec74df5ca8611d988039184efbb
2016-05-26 08:00:40 +01:00

171 lines
5.5 KiB
Python
Executable File

#!/usr/bin/env python
#
# Copyright 2016, Rackspace US, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# (c) 2016, Jesse Pretorius <jesse.pretorius@rackspace.co.uk>
#
# Based on the mirror test script posted at
# http://code.activestate.com/recipes/284631-a-python-script-to-test-download-mirrors/
import platform
import Queue
import re
import threading
import time
import urllib
HTTP_TIMEOUT = 10.0 # Max. seconds to wait for a response
HTTP_TITLE = "Wheel Index" # HTTP Title to look for to validate the page
MAX_THREADS = 10
MIRROR_LIST = ["http://mirror.dfw.rax.openstack.org/wheel/",
"http://mirror.ord.rax.openstack.org/wheel/",
"http://mirror.iad.rax.openstack.org/wheel/",
"http://mirror.gra1.ovh.openstack.org/wheel/",
"http://mirror.bhs1.ovh.openstack.org/wheel/",
"http://mirror.sjc1.bluebox.openstack.org/wheel/",
"http://mirror.nyj01.internap.openstack.org/wheel/",
"http://mirror.cloud1.osic.openstack.org/wheel/"]
def TestUrl(workQueue, resultQueue):
'''Worker thread procedure.
Test how long it takes to return the mirror index page,
then return the results into resultQueue.
'''
def SubthreadProc(url, result):
'''Subthread procedure.
Actually get the mirror index page in a subthread, so that we can time
out using join rather than wait for a very slow server. Passing in a
list for result lets us simulate pass-by-reference, since callers
cannot get the return code from a Python thread.
'''
startTime = time.time()
try:
data = urllib.urlopen(url).read()
except Exception:
# Could be a socket error or an HTTP error--either way, we
# don't care--it's a failure to us.
result.append(-1)
else:
if not CheckTitle(data):
result.append(-1)
else:
elapsed = int((time.time() - startTime) * 1000)
result.append(elapsed)
def CheckTitle(html):
'''Check that the HTML title is the expected value.
Check the HTML returned for the presence of a specified
title. This caters for a situation where a service provider
may be redirecting DNS resolution failures to a web search
page, or where the returned data is invalid in some other
way.
'''
titleRegex = re.compile("<title>(.+?)</title>")
try:
title = titleRegex.search(html).group(1)
except Exception:
# If there is no match, then we consider it a failure.
result.append(-1)
else:
if title == HTTP_TITLE:
return True
else:
return False
while 1:
# Continue pulling data from the work queue until it's empty
try:
url = workQueue.get(0)
except Queue.Empty:
# work queue is empty--exit the thread proc.
return
# Create a single subthread to do the actual work
result = []
subThread = threading.Thread(target=SubthreadProc, args=(url, result))
# Daemonize the subthread so that even if a few are hanging
# around when the process is done, the process will exit.
subThread.setDaemon(True)
# Run the subthread and wait for it to finish, or time out
subThread.start()
subThread.join(HTTP_TIMEOUT)
if [] == result:
# Subthread hasn't give a result yet. Consider it timed out.
resultQueue.put((url, "TIMEOUT"))
elif -1 == result[0]:
# Subthread returned an error from geturl.
resultQueue.put((url, "FAILED"))
else:
# Subthread returned a time. Store it.
resultQueue.put((url, result[0]))
# Set the number of threads to use
numThreads = min(MAX_THREADS, len(MIRROR_LIST))
# Build a queue to feed the worker threads
workQueue = Queue.Queue()
for url in MIRROR_LIST:
# Build the complete URL
distro = platform.linux_distribution()[0].lower()
version = platform.linux_distribution()[1]
architecture = platform.machine()
fullUrl = url + distro + "-" + version + "-" + architecture + "/"
workQueue.put(fullUrl)
workers = []
resultQueue = Queue.Queue()
# Create worker threads to load-balance the retrieval
for threadNum in range(0, numThreads):
workers.append(threading.Thread(target=TestUrl,
args=(workQueue, resultQueue)))
workers[-1].start()
# Wait for all the workers to finish
for w in workers:
w.join()
# Separate the successes from failures
timings = []
failures = []
while not resultQueue.empty():
url, result = resultQueue.get(0)
if isinstance(result, str):
failures.append((result, url))
else:
timings.append((result, url))
# Sort by increasing time or result string
timings.sort()
failures.sort()
# If all results are failed, then exit silently
if len(timings) > 0:
# Print out the fastest mirror URL
print(timings[0][1])