# vim: tabstop=4 shiftwidth=4 softtabstop=4 # Copyright (C) 2012 Yahoo! Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import contextlib import hashlib import os import re import tarfile import urlparse from anvil import colorizer from anvil import downloader as down from anvil import exceptions as exc from anvil import importer from anvil import log from anvil import shell as sh from anvil import utils LOG = log.getLogger(__name__) # Extensions that tarfile knows how to work with TAR_EXTS = ['.tgz', '.gzip', '.gz', '.bz2', '.tar'] # Used to attempt to produce a name for images (to see if we already have it) # And to use as the final name... # Reverse sorted so that .tar.gz replaces before .tar (and so on) NAME_CLEANUPS = [ '.tar.gz', '.img.gz', '.qcow2', '.img', ] + TAR_EXTS NAME_CLEANUPS.sort() NAME_CLEANUPS.reverse() # Used to match various file names with what could be a kernel image KERNEL_CHECKS = [ re.compile(r"(.*)vmlinuz(.*)$", re.I), re.compile(r'(.*?)aki-tty/image$', re.I), ] # Used to match various file names with what could be a root image ROOT_CHECKS = [ re.compile(r"(.*)img$", re.I), re.compile(r"(.*)qcow2$", re.I), re.compile(r'(.*?)aki-tty/image$', re.I), ] # Used to match various file names with what could be a ram disk image RAMDISK_CHECKS = [ re.compile(r"(.*)-initrd$", re.I), re.compile(r"initrd[-]?(.*)$", re.I), re.compile(r"(.*)initramfs(.*)$", re.I), re.compile(r'(.*?)ari-tty/image$', re.I), ] # Skip files that match these patterns SKIP_CHECKS = [ re.compile(r"^[.]", re.I), ] # File extensions we will skip over (typically of content hashes) BAD_EXTENSIONS = ['md5', 'sha', 'sfv'] class Unpacker(object): def _get_tar_file_members(self, arc_fn): LOG.info("Finding what exists in %s.", colorizer.quote(arc_fn)) files = [] with contextlib.closing(tarfile.open(arc_fn, 'r')) as tfh: for tmemb in tfh.getmembers(): if not tmemb.isfile(): continue files.append(tmemb.name) return files def _pat_checker(self, fn, patterns): (_root_fn, fn_ext) = os.path.splitext(fn) if utils.has_any(fn_ext.lower(), *BAD_EXTENSIONS): return False for pat in patterns: if pat.search(fn): return True return False def _find_pieces(self, files, files_location): """Match files against the patterns in KERNEL_CHECKS, RAMDISK_CHECKS, and ROOT_CHECKS to determine which files contain which image parts. """ kernel_fn = None ramdisk_fn = None img_fn = None utils.log_iterable( files, logger=LOG, header="Looking at %s files from %s to find the " "kernel/ramdisk/root images" % (len(files), colorizer.quote(files_location)) ) for fn in files: if self._pat_checker(fn, KERNEL_CHECKS): kernel_fn = fn LOG.debug("Found kernel: %r" % (fn)) elif self._pat_checker(fn, RAMDISK_CHECKS): ramdisk_fn = fn LOG.debug("Found ram disk: %r" % (fn)) elif self._pat_checker(fn, ROOT_CHECKS): img_fn = fn LOG.debug("Found root image: %r" % (fn)) else: LOG.debug("Unknown member %r - skipping" % (fn)) return (img_fn, ramdisk_fn, kernel_fn) def _unpack_tar_member(self, tarhandle, member, output_location): LOG.info("Extracting %s to %s.", colorizer.quote(member.name), colorizer.quote(output_location)) with contextlib.closing(tarhandle.extractfile(member)) as mfh: with open(output_location, "wb") as ofh: return sh.pipe_in_out(mfh, ofh) def _describe(self, root_fn, ramdisk_fn, kernel_fn): """Make an "info" dict that describes the path, disk format, and container format of each component of an image. """ info = dict() if kernel_fn: info['kernel'] = { 'file_name': kernel_fn, 'disk_format': 'aki', 'container_format': 'aki', } if ramdisk_fn: info['ramdisk'] = { 'file_name': ramdisk_fn, 'disk_format': 'ari', 'container_format': 'ari', } info['file_name'] = root_fn info['disk_format'] = 'ami' info['container_format'] = 'ami' return info def _filter_files(self, files): filtered = [] for fn in files: if self._pat_checker(fn, SKIP_CHECKS): pass else: filtered.append(fn) return filtered def _unpack_tar(self, file_name, file_location, tmp_dir): (root_name, _) = os.path.splitext(file_name) tar_members = self._filter_files(self._get_tar_file_members(file_location)) (root_img_fn, ramdisk_fn, kernel_fn) = self._find_pieces(tar_members, file_location) if not root_img_fn: msg = "Tar file %r has no root image member" % (file_name) raise IOError(msg) kernel_real_fn = None root_real_fn = None ramdisk_real_fn = None self._log_pieces_found('archive', root_img_fn, ramdisk_fn, kernel_fn) extract_dir = sh.mkdir(sh.joinpths(tmp_dir, root_name)) with contextlib.closing(tarfile.open(file_location, 'r')) as tfh: for m in tfh.getmembers(): if m.name == root_img_fn: root_real_fn = sh.joinpths(extract_dir, sh.basename(root_img_fn)) self._unpack_tar_member(tfh, m, root_real_fn) elif ramdisk_fn and m.name == ramdisk_fn: ramdisk_real_fn = sh.joinpths(extract_dir, sh.basename(ramdisk_fn)) self._unpack_tar_member(tfh, m, ramdisk_real_fn) elif kernel_fn and m.name == kernel_fn: kernel_real_fn = sh.joinpths(extract_dir, sh.basename(kernel_fn)) self._unpack_tar_member(tfh, m, kernel_real_fn) return self._describe(root_real_fn, ramdisk_real_fn, kernel_real_fn) def _log_pieces_found(self, src_type, root_fn, ramdisk_fn, kernel_fn): pieces = [] if root_fn: pieces.append("%s (root image)" % (colorizer.quote(root_fn))) if ramdisk_fn: pieces.append("%s (ramdisk image)" % (colorizer.quote(ramdisk_fn))) if kernel_fn: pieces.append("%s (kernel image)" % (colorizer.quote(kernel_fn))) if pieces: utils.log_iterable(pieces, logger=LOG, header="Found %s images from a %s" % (len(pieces), src_type)) def _unpack_dir(self, dir_path): """Pick through a directory to figure out which files are which image pieces, and create a dict that describes them. """ potential_files = set() for fn in self._filter_files(sh.listdir(dir_path)): full_fn = sh.joinpths(dir_path, fn) if sh.isfile(full_fn): potential_files.add(sh.canon_path(full_fn)) (root_fn, ramdisk_fn, kernel_fn) = self._find_pieces(potential_files, dir_path) if not root_fn: msg = "Directory %r has no root image member" % (dir_path) raise IOError(msg) self._log_pieces_found('directory', root_fn, ramdisk_fn, kernel_fn) return self._describe(root_fn, ramdisk_fn, kernel_fn) def unpack(self, file_name, file_location, tmp_dir): if sh.isdir(file_location): return self._unpack_dir(file_location) elif sh.isfile(file_location): (_, fn_ext) = os.path.splitext(file_name) fn_ext = fn_ext.lower() if fn_ext in TAR_EXTS: return self._unpack_tar(file_name, file_location, tmp_dir) elif fn_ext in ['.img', '.qcow2']: info = dict() info['file_name'] = file_location if fn_ext == '.img': info['disk_format'] = 'raw' else: info['disk_format'] = 'qcow2' info['container_format'] = 'bare' return info msg = "Currently we do not know how to unpack %r" % (file_location) raise IOError(msg) class Cache(object): """Represents an image cache.""" def __init__(self, cache_dir, url): self._cache_dir = cache_dir self._url = url hashed_url = self._hash(self._url) self._cache_path = sh.joinpths(self._cache_dir, hashed_url) self._details_path = sh.joinpths(self._cache_dir, hashed_url + ".details") @staticmethod def _hash(data, alg='md5'): """Hash data with a given algorithm.""" hasher = hashlib.new(alg) hasher.update(data) return hasher.hexdigest() def load_details(self): """Load cached image details.""" return utils.load_yaml_text(sh.load_file(self._details_path)) def save_details(self, details): """Save cached image details.""" sh.write_file(self._details_path, utils.prettify_yaml(details)) @property def path(self): return self._cache_path @property def is_valid(self): """Check if cache is valid.""" for path in (self._cache_path, self._details_path): if not sh.exists(path): return False check_files = [] try: image_details = self.load_details() check_files.append(image_details['file_name']) if 'kernel' in image_details: check_files.append(image_details['kernel']['file_name']) if 'ramdisk' in image_details: check_files.append(image_details['ramdisk']['file_name']) except Exception: return False for path in check_files: if not sh.isfile(path): return False return True class Image(object): """Represents an image with its own cache.""" def __init__(self, client, url, is_public, cache_dir): self._client = client self._url = url self._parsed_url = urlparse.urlparse(url) self._is_public = is_public self._cache = Cache(cache_dir, url) def _check_name(self, image_name): """Check if image already present in glance.""" for image in self._client.images.list(): if image_name == image.name: raise exc.DuplicateException( "Image %s already exists in glance." % colorizer.quote(image_name) ) def _create(self, file_name, **kwargs): """Create image in glance.""" with open(file_name, 'r') as fh: image = self._client.images.create(data=fh, **kwargs) return image.id def _register(self, image_name, location): """Register image in glance.""" # Upload the kernel, if we have one kernel = location.pop('kernel', None) kernel_id = '' if kernel: kernel_image_name = "%s-vmlinuz" % (image_name) self._check_name(kernel_image_name) LOG.info('Adding kernel %s to glance.', colorizer.quote(kernel_image_name)) LOG.info("Please wait installing...") conf = { 'container_format': kernel['container_format'], 'disk_format': kernel['disk_format'], 'name': kernel_image_name, 'is_public': self._is_public, } kernel_id = self._create(kernel['file_name'], **conf) # Upload the ramdisk, if we have one initrd = location.pop('ramdisk', None) initrd_id = '' if initrd: ram_image_name = "%s-initrd" % (image_name) self._check_name(ram_image_name) LOG.info('Adding ramdisk %s to glance.', colorizer.quote(ram_image_name)) LOG.info("Please wait installing...") conf = { 'container_format': initrd['container_format'], 'disk_format': initrd['disk_format'], 'name': ram_image_name, 'is_public': self._is_public, } initrd_id = self._create(initrd['file_name'], **conf) # Upload the root, we must have one LOG.info('Adding image %s to glance.', colorizer.quote(image_name)) self._check_name(image_name) conf = { 'name': image_name, 'container_format': location['container_format'], 'disk_format': location['disk_format'], 'is_public': self._is_public, 'properties': {}, } if kernel_id or initrd_id: if kernel_id: conf['properties']['kernel_id'] = kernel_id if initrd_id: conf['properties']['ramdisk_id'] = initrd_id LOG.info("Please wait installing...") image_id = self._create(location['file_name'], **conf) return image_id def _generate_image_name(self, url_fn): """Generate image name from a given url file name.""" name = url_fn for look_for in NAME_CLEANUPS: name = name.replace(look_for, '') return name def _extract_url_fn(self): """Extract filename from an image url.""" return sh.basename(self._parsed_url.path) def _is_url_local(self): """Check if image url is local.""" return sh.exists(self._url) or (self._parsed_url.scheme == '' and self._parsed_url.netloc == '') def install(self): """Process image installation.""" url_fn = self._extract_url_fn() if not url_fn: raise IOError("Can not determine file name from url: %r" % self._url) if self._cache.is_valid: LOG.info("Found valid cached image+metadata at: %s", colorizer.quote(self._cache.path)) image_details = self._cache.load_details() else: sh.mkdir(self._cache.path) if not self._is_url_local(): fetched_fn, bytes_down = down.UrlLibDownloader( self._url, sh.joinpths(self._cache.path, url_fn)).download() LOG.debug("For url %s we downloaded %s bytes to %s", self._url, bytes_down, fetched_fn) else: fetched_fn = self._url image_details = Unpacker().unpack(url_fn, fetched_fn, self._cache.path) self._cache.save_details(image_details) image_name = self._generate_image_name(url_fn) image_id = self._register(image_name, image_details) return image_name, image_id class UploadService(object): def __init__(self, glance, keystone, cache_dir='/usr/share/anvil/glance/cache', is_public=True): self._glance_params = glance self._keystone_params = keystone self._cache_dir = cache_dir self._is_public = is_public def _get_token(self, kclient_v2): LOG.info("Getting your keystone token so that image uploads may proceed.") k_params = self._keystone_params client = kclient_v2.Client(username=k_params['admin_user'], password=k_params['admin_password'], tenant_name=k_params['admin_tenant'], auth_url=k_params['endpoints']['public']['uri']) return client.auth_token def install(self, urls): am_installed = 0 try: # Done at a function level since this module may be used # before these libraries actually exist. gclient_v1 = importer.import_module('glanceclient.v1.client') gexceptions = importer.import_module('glanceclient.common.exceptions') kclient_v2 = importer.import_module('keystoneclient.v2_0.client') kexceptions = importer.import_module('keystoneclient.exceptions') except RuntimeError as e: LOG.exception("Failed at importing required client modules: %s", e) return am_installed if urls: try: # Ensure all services are up for params in (self._glance_params, self._keystone_params): utils.wait_for_url(params['endpoints']['public']['uri']) g_params = self._glance_params client = gclient_v1.Client(endpoint=g_params['endpoints']['public']['uri'], token=self._get_token(kclient_v2)) except (RuntimeError, gexceptions.ClientException, kexceptions.ClientException, IOError) as e: LOG.exception('Failed fetching needed clients for image calls due to: %s', e) return am_installed utils.log_iterable(urls, logger=LOG, header="Attempting to download+extract+upload %s images" % len(urls)) for url in urls: try: img_handle = Image(client, url, is_public=self._is_public, cache_dir=self._cache_dir) (name, img_id) = img_handle.install() LOG.info("Installed image %s with id %s.", colorizer.quote(name), colorizer.quote(img_id)) am_installed += 1 except exc.DuplicateException as e: LOG.warning(e) except (IOError, tarfile.TarError, gexceptions.ClientException, kexceptions.ClientException) as e: LOG.exception('Installing %r failed due to: %s', url, e) return am_installed def get_shared_params(ip, api_port=9292, protocol='http', reg_port=9191, **kwargs): mp = {} mp['service_host'] = ip glance_host = ip glance_port = api_port glance_protocol = protocol glance_registry_port = reg_port # Uri's of the http/https endpoints mp['endpoints'] = { 'admin': { 'uri': utils.make_url(glance_protocol, glance_host, glance_port), 'port': glance_port, 'host': glance_host, 'protocol': glance_protocol, }, 'registry': { 'uri': utils.make_url(glance_protocol, glance_host, glance_registry_port), 'port': glance_registry_port, 'host': glance_host, 'protocol': glance_protocol, } } mp['endpoints']['internal'] = dict(mp['endpoints']['admin']) mp['endpoints']['public'] = dict(mp['endpoints']['admin']) return mp