diff --git a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance index 1889b7e6f42e..7f3575199341 100755 --- a/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance +++ b/plugins/xenserver/xenapi/etc/xapi.d/plugins/glance @@ -26,6 +26,7 @@ try: import json except ImportError: import simplejson as json +import md5 import os import os.path import pickle @@ -46,6 +47,10 @@ CHUNK_SIZE = 8192 KERNEL_DIR = '/boot/guest' +class RetryException(Exception): + pass + + def _copy_kernel_vdi(dest, copy_args): vdi_uuid = copy_args['vdi_uuid'] vdi_size = copy_args['vdi_size'] @@ -68,10 +73,60 @@ def _copy_kernel_vdi(dest, copy_args): return filename -def _download_tarball(sr_path, staging_path, image_id, glance_host, - glance_port, auth_token, num_retries): +def _download_tarball(request, staging_path): + """Make one attempt to download and extract the image tarball""" + try: + response = urllib2.urlopen(request) + except urllib2.HTTPError, error: + raise RetryException(error) + except urllib2.URLError, error: + raise RetryException(error) + + tar_cmd = "tar -zx --directory=%(staging_path)s" % locals() + tar_proc = _make_subprocess(tar_cmd, stderr=True, stdin=True) + + checksum = md5.new() + etag = response.info().getheader('etag', None) + if etag is None: + etag = response.info().getheader('x-image-meta-checksum', None) + + url = request.get_full_url() + logging.info("Reading image data from %s" % url) + + length_read = 0 + while True: + chunk = response.read(CHUNK_SIZE) + if chunk == '': + break + length_read += len(chunk) + checksum.update(chunk) + tar_proc.stdin.write(chunk) + + logging.info("Read %(length_read)s bytes from %(url)s" % locals()) + + try: + _finish_subprocess(tar_proc, tar_cmd) + except Exception, error: + raise RetryException(error) + + checksum = checksum.hexdigest() + if etag is None: + msg = "No ETag found for comparison to checksum %(checksum)s" + logging.info(msg % locals()) + elif checksum != etag: + msg = 'ETag %(etag)s does not match computed md5sum %(checksum)s' + raise RetryException(msg % locals()) + else: + msg = "Verified image checksum %(checksum)s" + logging.info(msg % locals()) + + return + + +def _download_tarball_with_retry(sr_path, image_id, glance_host, + glance_port, auth_token, num_retries): """Download the tarball image from Glance and extract it into the staging - area. + area. Retry if there is any failure. """ # Build request headers headers = {} @@ -80,50 +135,27 @@ def _download_tarball(sr_path, staging_path, image_id, glance_host, url = "http://%(glance_host)s:%(glance_port)d/v1/images/"\ "%(image_id)s" % locals() - - logging.debug("Downloading tarball from %(url)s" % locals()) + logging.info("Downloading %s" % url) request = urllib2.Request(url, headers=headers) - response = None + sleep_time = 0.5 for try_num in xrange(1, num_retries + 2): try: - response = urllib2.urlopen(request) - break + staging_path = _make_staging_area(sr_path) + _download_tarball(request, staging_path) + return staging_path + except RetryException, error: + msg = "Downloading %(url)s attempt %(try_num)d error: %(error)s" + logging.error(msg % locals()) - except urllib2.HTTPError, error: - if error.code == 404: - msg = "Image '%s' not found in Glance" % image_id - logging.error(msg) - raise Exception(msg) + _cleanup_staging_area(staging_path) + time.sleep(sleep_time) + sleep_time = min(2 * sleep_time, 15) - elif try_num == (num_retries + 1): - msg = "Unable to retrieve image after %d attempts." % try_num - logging.error(msg) - raise Exception(msg) - - except urllib2.URLError, error: - pass - - logging.error("Download attempt %d error: %s" % (try_num, error)) - time.sleep(1) - - if response is None: - msg = "Unable to retrieve image: %(error)s" % locals() - logging.error(msg) - raise Exception(msg) - - tar_cmd = "tar -zx --directory=%(staging_path)s" % locals() - tar_proc = _make_subprocess(tar_cmd, stderr=True, stdin=True) - - logging.info("Reading image data from %s" % url) - - chunk = response.read(CHUNK_SIZE) - while chunk: - tar_proc.stdin.write(chunk) - chunk = response.read(CHUNK_SIZE) - - _finish_subprocess(tar_proc, tar_cmd) + msg = "Unable to retrieve %(url)s after %(try_num)d attempt(s)." % locals() + logging.error(msg) + raise Exception(msg) def _import_vhds(sr_path, staging_path, uuid_stack): @@ -397,7 +429,8 @@ def _cleanup_staging_area(staging_path): it's safe to remove the staging-area because the SR will keep the link count > 0 (so the VHDs in the SR will not be deleted). """ - shutil.rmtree(staging_path) + if os.path.exists(staging_path): + shutil.rmtree(staging_path) def _make_subprocess(cmdline, stdout=False, stderr=False, stdin=False): @@ -436,15 +469,17 @@ def download_vhd(session, args): auth_token = params["auth_token"] num_retries = params["num_retries"] - staging_path = _make_staging_area(sr_path) + staging_path = None try: - _download_tarball(sr_path, staging_path, image_id, glance_host, - glance_port, auth_token, num_retries) + staging_path = _download_tarball_with_retry(sr_path, image_id, + glance_host, glance_port, + auth_token, num_retries) # Right now, it's easier to return a single string via XenAPI, # so we'll json encode the list of VHDs. return json.dumps(_import_vhds(sr_path, staging_path, uuid_stack)) finally: - _cleanup_staging_area(staging_path) + if staging_path is not None: + _cleanup_staging_area(staging_path) def upload_vhd(session, args):