OpenStack Compute (Nova)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

739 lines
27 KiB

# Copyright 2010 United States Government as represented by the
# Administrator of the National Aeronautics and Space Administration.
# All Rights Reserved.
# Copyright (c) 2010 Citrix Systems, Inc.
# Copyright (c) 2011 Piston Cloud Computing, Inc
# Copyright (c) 2011 OpenStack Foundation
# (c) Copyright 2013 Hewlett-Packard Development Company, L.P.
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import errno
import grp
import os
import pwd
import re
import typing as ty
import uuid
import os_traits
from oslo_concurrency import processutils
from oslo_log import log as logging
from oslo_utils import fileutils
import nova.conf
from nova import context as nova_context
from nova import exception
from nova.i18n import _
from nova import objects
from nova.objects import fields as obj_fields
import nova.privsep.fs
import nova.privsep.idmapshift
import nova.privsep.libvirt
import nova.privsep.path
from nova.scheduler import utils as scheduler_utils
from nova import utils
from nova.virt import images
from nova.virt.libvirt import config as vconfig
from nova.virt.libvirt import guest as libvirt_guest
from nova.virt.libvirt.volume import remotefs
CONF = nova.conf.CONF
LOG = logging.getLogger(__name__)
RESIZE_SNAPSHOT_NAME = 'nova-resize'
# Mapping used to convert libvirt cpu features to traits, for more details, see
'3dnow': os_traits.HW_CPU_X86_3DNOW,
'abm': os_traits.HW_CPU_X86_ABM,
'aes': os_traits.HW_CPU_X86_AESNI,
'avx': os_traits.HW_CPU_X86_AVX,
'avx2': os_traits.HW_CPU_X86_AVX2,
'avx512bw': os_traits.HW_CPU_X86_AVX512BW,
'avx512cd': os_traits.HW_CPU_X86_AVX512CD,
'avx512dq': os_traits.HW_CPU_X86_AVX512DQ,
'avx512er': os_traits.HW_CPU_X86_AVX512ER,
'avx512f': os_traits.HW_CPU_X86_AVX512F,
'avx512pf': os_traits.HW_CPU_X86_AVX512PF,
'avx512vl': os_traits.HW_CPU_X86_AVX512VL,
'avx512vnni': os_traits.HW_CPU_X86_AVX512VNNI,
'avx512vbmi': os_traits.HW_CPU_X86_AVX512VBMI,
'avx512ifma': os_traits.HW_CPU_X86_AVX512IFMA,
'avx512vbmi2': os_traits.HW_CPU_X86_AVX512VBMI2,
'avx512bitalg': os_traits.HW_CPU_X86_AVX512BITALG,
'vaes': os_traits.HW_CPU_X86_AVX512VAES,
'gfni': os_traits.HW_CPU_X86_AVX512GFNI,
'vpclmulqdq': os_traits.HW_CPU_X86_AVX512VPCLMULQDQ,
'avx512-vpopcntdq': os_traits.HW_CPU_X86_AVX512VPOPCNTDQ,
'bmi1': os_traits.HW_CPU_X86_BMI,
'bmi2': os_traits.HW_CPU_X86_BMI2,
'pclmuldq': os_traits.HW_CPU_X86_CLMUL,
'f16c': os_traits.HW_CPU_X86_F16C,
'fma': os_traits.HW_CPU_X86_FMA3,
'fma4': os_traits.HW_CPU_X86_FMA4,
'mmx': os_traits.HW_CPU_X86_MMX,
'mpx': os_traits.HW_CPU_X86_MPX,
'sha-ni': os_traits.HW_CPU_X86_SHA,
'sse': os_traits.HW_CPU_X86_SSE,
'sse2': os_traits.HW_CPU_X86_SSE2,
'sse3': os_traits.HW_CPU_X86_SSE3,
'sse4.1': os_traits.HW_CPU_X86_SSE41,
'sse4.2': os_traits.HW_CPU_X86_SSE42,
'sse4a': os_traits.HW_CPU_X86_SSE4A,
'ssse3': os_traits.HW_CPU_X86_SSSE3,
# We have to continue to support the old (generic) trait for the
# AMD-specific svm feature.
'svm': (os_traits.HW_CPU_X86_SVM, os_traits.HW_CPU_X86_AMD_SVM),
'tbm': os_traits.HW_CPU_X86_TBM,
# We have to continue to support the old (generic) trait for the
# Intel-specific vmx feature.
'vmx': (os_traits.HW_CPU_X86_VMX, os_traits.HW_CPU_X86_INTEL_VMX),
'xop': os_traits.HW_CPU_X86_XOP
TRAITS_CPU_MAPPING = {v: k for k, v in CPU_TRAITS_MAPPING.items()}
# global directory for emulated TPM
VTPM_DIR = '/var/lib/libvirt/swtpm/'
def create_image(
disk_format: str, path: str, size: ty.Union[str, int],
) -> None:
"""Create a disk image
:param disk_format: Disk image format (as known by qemu-img)
:param path: Desired location of the disk image
:param size: Desired size of disk image. May be given as an int or
a string. If given as an int, it will be interpreted
as bytes. If it's a string, it should consist of a number
with an optional suffix ('K' for Kibibytes,
M for Mebibytes, 'G' for Gibibytes, 'T' for Tebibytes).
If no suffix is given, it will be interpreted as bytes.
processutils.execute('qemu-img', 'create', '-f', disk_format, path, size)
def create_cow_image(
backing_file: ty.Optional[str], path: str, size: ty.Optional[int] = None,
) -> None:
"""Create COW image
Creates a COW image with the given backing file
:param backing_file: Existing image on which to base the COW image
:param path: Desired location of the COW image
base_cmd = ['qemu-img', 'create', '-f', 'qcow2']
cow_opts = []
if backing_file:
base_details = images.qemu_img_info(backing_file)
cow_opts += ['backing_file=%s' % backing_file]
cow_opts += ['backing_fmt=%s' % base_details.file_format]
base_details = None
# Explicitly inherit the value of 'cluster_size' property of a qcow2
# overlay image from its backing file. This can be useful in cases
# when people create a base image with a non-default 'cluster_size'
# value or cases when images were created with very old QEMU
# versions which had a different default 'cluster_size'.
if base_details and base_details.cluster_size is not None:
cow_opts += ['cluster_size=%s' % base_details.cluster_size]
if size is not None:
cow_opts += ['size=%s' % size]
if cow_opts:
# Format as a comma separated list
csv_opts = ",".join(cow_opts)
cow_opts = ['-o', csv_opts]
cmd = base_cmd + cow_opts + [path]
def create_ploop_image(
disk_format: str, path: str, size: ty.Union[int, str], fs_type: str,
) -> None:
"""Create ploop image
:param disk_format: Disk image format (as known by ploop)
:param path: Desired location of the ploop image
:param size: Desired size of ploop image. May be given as an int or
a string. If given as an int, it will be interpreted
as bytes. If it's a string, it should consist of a number
with an optional suffix ('K' for Kibibytes,
M for Mebibytes, 'G' for Gibibytes, 'T' for Tebibytes).
If no suffix is given, it will be interpreted as bytes.
:param fs_type: Filesystem type
if not fs_type:
fs_type = CONF.default_ephemeral_format or \
disk_path = os.path.join(path, 'root.hds')
nova.privsep.libvirt.ploop_init(size, disk_format, fs_type, disk_path)
def pick_disk_driver_name(
hypervisor_version: int, is_block_dev: bool = False,
) -> ty.Optional[str]:
"""Pick the libvirt primary backend driver name
If the hypervisor supports multiple backend drivers we have to tell libvirt
which one should be used.
Xen supports the following drivers: "tap", "tap2", "phy", "file", or
"qemu", being "qemu" the preferred one. Qemu only supports "qemu".
:param is_block_dev:
:returns: driver_name or None
if CONF.libvirt.virt_type == "xen":
if is_block_dev:
return "phy"
# 4002000 == 4.2.0
if hypervisor_version >= 4002000:
except OSError as exc:
if exc.errno == errno.ENOENT:
LOG.debug("xend is not found")
# libvirt will try to use libxl toolstack
return 'qemu'
except processutils.ProcessExecutionError:
LOG.debug("xend is not started")
# libvirt will try to use libxl toolstack
return 'qemu'
# libvirt will use xend/xm toolstack
out, err = processutils.execute('tap-ctl', 'check',
if out == 'ok\n':
# 4000000 == 4.0.0
if hypervisor_version > 4000000:
return "tap2"
return "tap"
else:"tap-ctl check: %s", out)
except OSError as exc:
if exc.errno == errno.ENOENT:
LOG.debug("tap-ctl tool is not installed")
return "file"
elif CONF.libvirt.virt_type in ('kvm', 'qemu'):
return "qemu"
# UML doesn't want a driver_name set
return None
def get_disk_size(path: str, format: ty.Optional[str] = None) -> int:
"""Get the (virtual) size of a disk image
:param path: Path to the disk image
:param format: the on-disk format of path
:returns: Size (in bytes) of the given disk image as it would be seen
by a virtual machine.
size = images.qemu_img_info(path, format).virtual_size
return int(size)
def get_disk_backing_file(
path: str, basename: bool = True, format: ty.Optional[str] = None,
) -> ty.Optional[str]:
"""Get the backing file of a disk image
:param path: Path to the disk image
:returns: a path to the image's backing store
backing_file = images.qemu_img_info(path, format).backing_file
if backing_file and basename:
backing_file = os.path.basename(backing_file)
return backing_file
def copy_image(
src: str,
dest: str,
host: ty.Optional[str] = None,
receive: bool = False,
on_execute: ty.Callable = None,
on_completion: ty.Callable = None,
compression: bool = True,
) -> None:
"""Copy a disk image to an existing directory
:param src: Source image
:param dest: Destination path
:param host: Remote host
:param receive: Reverse the rsync direction
:param on_execute: Callback method to store pid of process in cache
:param on_completion: Callback method to remove pid of process from cache
:param compression: Allows to use rsync operation with or without
if not host:
# We shell out to cp because that will intelligently copy
# sparse files. I.E. holes will not be written to DEST,
# rather recreated efficiently. In addition, since
# coreutils 8.11, holes can be read efficiently too.
# we add '-r' argument because ploop disks are directories
processutils.execute('cp', '-r', src, dest)
if receive:
src = "%s:%s" % (utils.safe_ip_format(host), src)
dest = "%s:%s" % (utils.safe_ip_format(host), dest)
remote_filesystem_driver = remotefs.RemoteFilesystem()
remote_filesystem_driver.copy_file(src, dest,
on_execute=on_execute, on_completion=on_completion,
def chown_for_id_maps(
path: str, id_maps: ty.List[vconfig.LibvirtConfigGuestIDMap],
) -> None:
"""Change ownership of file or directory for an id mapped
:param path: File or directory whose ownership to change
:param id_maps: List of type LibvirtConfigGuestIDMap
uid_maps = [id_map for id_map in id_maps if
isinstance(id_map, vconfig.LibvirtConfigGuestUIDMap)]
gid_maps = [id_map for id_map in id_maps if
isinstance(id_map, vconfig.LibvirtConfigGuestGIDMap)]
nova.privsep.idmapshift.shift(path, uid_maps, gid_maps)
def extract_snapshot(
disk_path: str, source_fmt: str, out_path: str, dest_fmt: str,
) -> None:
"""Extract a snapshot from a disk image.
Note that nobody should write to the disk image during this operation.
:param disk_path: Path to disk image
:param out_path: Desired path of extracted snapshot
# NOTE(markmc): ISO is just raw to qemu-img
if dest_fmt == 'iso':
dest_fmt = 'raw'
if dest_fmt == 'ploop':
dest_fmt = 'parallels'
compress = CONF.libvirt.snapshot_compression and dest_fmt == "qcow2"
images.convert_image(disk_path, out_path, source_fmt, dest_fmt,
# TODO(stephenfin): This is dumb; remove it.
def load_file(path: str) -> str:
"""Read contents of file
:param path: File to read
with open(path, 'r') as fp:
# TODO(stephenfin): Remove this; we have suitably powerful mocking abilities
# nowadays
def file_open(*args, **kwargs):
"""Open file
see built-in open() documentation for more details
Note: The reason this is kept in a separate module is to easily
be able to provide a stub module that doesn't alter system
state at all (for unit tests)
return open(*args, **kwargs)
def find_disk(guest: libvirt_guest.Guest) -> ty.Tuple[str, ty.Optional[str]]:
"""Find root device path for instance
May be file or device
guest_config = guest.get_config()
disk_format = None
if guest_config.virt_type == 'lxc':
filesystem = next(d for d in guest_config.devices
if isinstance(d, vconfig.LibvirtConfigGuestFilesys))
disk_path = filesystem.source_dir
disk_path = disk_path[0:disk_path.rfind('rootfs')]
disk_path = os.path.join(disk_path, 'disk')
elif (guest_config.virt_type == 'parallels' and
guest_config.os_type == obj_fields.VMMode.EXE):
filesystem = next(d for d in guest_config.devices
if isinstance(d, vconfig.LibvirtConfigGuestFilesys))
disk_format = filesystem.driver_type
disk_path = filesystem.source_file
disk = next(d for d in guest_config.devices
if isinstance(d, vconfig.LibvirtConfigGuestDisk))
disk_format = disk.driver_format
disk_path = disk.source_path if disk.source_type != 'mount' else None
if not disk_path and disk.source_protocol == 'rbd':
disk_path = disk.source_name
if disk_path:
disk_path = 'rbd:' + disk_path
if not disk_path:
raise RuntimeError(_("Can't retrieve root device path "
"from instance libvirt configuration"))
# This is a legacy quirk of libvirt/xen. Everything else should
# report the on-disk format in type.
if disk_format == 'aio':
disk_format = 'raw'
return (disk_path, disk_format)
def get_disk_type_from_path(path: str) -> ty.Optional[str]:
"""Retrieve disk type (raw, qcow2, lvm, ploop) for given file."""
if path.startswith('/dev'):
return 'lvm'
elif path.startswith('rbd:'):
return 'rbd'
elif (os.path.isdir(path) and
os.path.exists(os.path.join(path, "DiskDescriptor.xml"))):
return 'ploop'
# We can't reliably determine the type from this path
return None
def get_fs_info(path: str) -> ty.Dict[str, int]:
"""Get free/used/total space info for a filesystem
:param path: Any dirent on the filesystem
:returns: A dict containing:
:free: How much space is free (in bytes)
:used: How much space is used (in bytes)
:total: How big the filesystem is (in bytes)
hddinfo = os.statvfs(path)
total = hddinfo.f_frsize * hddinfo.f_blocks
free = hddinfo.f_frsize * hddinfo.f_bavail
used = hddinfo.f_frsize * (hddinfo.f_blocks - hddinfo.f_bfree)
return {'total': total, 'free': free, 'used': used}
def fetch_image(
context: nova_context.RequestContext,
target: str,
image_id: str,
trusted_certs: ty.Optional['objects.TrustedCerts'] = None,
) -> None:
"""Grab image.
:param context: nova.context.RequestContext auth request context
:param target: target path to put the image
:param image_id: id of the image to fetch
:param trusted_certs: optional objects.TrustedCerts for image validation
images.fetch_to_raw(context, image_id, target, trusted_certs)
def fetch_raw_image(
context: nova_context.RequestContext,
target: str,
image_id: str,
trusted_certs: ty.Optional['objects.TrustedCerts'] = None,
) -> None:
"""Grab initrd or kernel image.
This function does not attempt raw conversion, as these images will
already be in raw format.
:param context: nova.context.RequestContext auth request context
:param target: target path to put the image
:param image_id: id of the image to fetch
:param trusted_certs: optional objects.TrustedCerts for image validation
images.fetch(context, image_id, target, trusted_certs)
def get_instance_path(
instance: 'objects.Instance', relative: bool = False,
) -> str:
"""Determine the correct path for instance storage.
This method determines the directory name for instance storage.
:param instance: the instance we want a path for
:param relative: if True, just the relative path is returned
:returns: a path to store information about that instance
if relative:
return instance.uuid
return os.path.join(CONF.instances_path, instance.uuid)
def get_instance_path_at_destination(
instance: 'objects.Instance',
migrate_data: ty.Optional['objects.LibvirtLiveMigrateData'] = None,
) -> str:
"""Get the instance path on destination node while live migration.
This method determines the directory name for instance storage on
destination node, while live migration.
:param instance: the instance we want a path for
:param migrate_data: if not None, it is a dict which holds data
required for live migration without shared
:returns: a path to store information about that instance
instance_relative_path = None
if migrate_data:
instance_relative_path = migrate_data.instance_relative_path
# NOTE(mikal): this doesn't use libvirt_utils.get_instance_path
# because we are ensuring that the same instance directory name
# is used as was at the source
if instance_relative_path:
instance_dir = os.path.join(CONF.instances_path,
instance_dir = get_instance_path(instance)
return instance_dir
def get_arch(image_meta: 'objects.ImageMeta') -> str:
"""Determine the architecture of the guest (or host).
This method determines the CPU architecture that must be supported by
the hypervisor. It gets the (guest) arch info from image_meta properties,
and it will fallback to the nova-compute (host) arch if no architecture
info is provided in image_meta.
:param image_meta: the metadata associated with the instance image
:returns: guest (or host) architecture
if image_meta:
image_arch ='hw_architecture')
if image_arch is not None:
return image_arch
return obj_fields.Architecture.from_host()
def is_mounted(mount_path: str, source: ty.Optional[str] = None) -> bool:
"""Check if the given source is mounted at given destination point."""
if not os.path.ismount(mount_path):
return False
if source is None:
return True
with open('/proc/mounts', 'r') as proc_mounts:
mounts = [mount.split() for mount in proc_mounts.readlines()]
return any(mnt[0] == source and mnt[1] == mount_path for mnt in mounts)
def is_valid_hostname(hostname: str) -> bool:
return bool(re.match(r"^[\w\-\.:]+$", hostname))
def version_to_string(version: ty.Tuple[int, int, int]) -> str:
"""Returns string version based on tuple"""
return '.'.join([str(x) for x in version])
def cpu_features_to_traits(features: ty.Set[str]) -> ty.Dict[str, bool]:
"""Returns this driver's CPU traits dict where keys are trait names from
CPU_TRAITS_MAPPING, values are boolean indicates whether the trait should
be set in the provider tree.
:param features: A set of feature names (short, lowercase,
traits = {}
for feature_name, val in CPU_TRAITS_MAPPING.items():
trait_tuple = val if isinstance(val, tuple) else (val,)
for trait_name in trait_tuple:
traits[trait_name] = feature_name in features
return traits
def get_cpu_model_from_arch(arch: str) -> str:
mode = 'qemu64'
if arch == obj_fields.Architecture.I686:
mode = 'qemu32'
elif arch == obj_fields.Architecture.PPC64LE:
mode = 'POWER8'
# NOTE(kevinz): In aarch64, cpu model 'max' will offer the capabilities
# that all the stuff it can currently emulate, both for "TCG" and "KVM"
elif arch == obj_fields.Architecture.AARCH64:
mode = 'max'
return mode
def get_machine_type(image_meta: 'objects.ImageMeta') -> ty.Optional[str]:
"""The guest machine type can be set as an image metadata property, or
otherwise based on architecture-specific defaults. If no defaults are
found then None will be returned. This will ultimately lead to QEMU using
its own default which is currently the 'pc' machine type.
if'hw_machine_type') is not None:
# If set in the config, use that as the default.
return get_default_machine_type(get_arch(image_meta))
def get_default_machine_type(arch: str) -> ty.Optional[str]:
# NOTE(lyarwood): Values defined in [libvirt]/hw_machine_type take
# precedence here if available for the provided arch.
for mapping in CONF.libvirt.hw_machine_type or {}:
host_arch, _, machine_type = mapping.partition('=')
if machine_type == '':
LOG.warning("Invalid hw_machine_type config value %s", mapping)
elif host_arch == arch:
return machine_type
# NOTE(kchamart): For ARMv7 and AArch64, use the 'virt' board as the
# default machine type. It is the recommended board, which is designed
# to be used with virtual machines. The 'virt' board is more flexible,
# supports PCI, 'virtio', has decent RAM limits, etc.
# NOTE(sean-k-mooney): Nova's default for x86 is still 'pc', so
# use that, not 'q35', for x86_64 and i686.
# NOTE(aspiers): If you change this, don't forget to update the
# docs and metadata for hw_machine_type in glance.
default_mtypes = {
obj_fields.Architecture.ARMV7: "virt",
obj_fields.Architecture.AARCH64: "virt",
obj_fields.Architecture.S390: "s390-ccw-virtio",
obj_fields.Architecture.S390X: "s390-ccw-virtio",
obj_fields.Architecture.I686: "pc",
obj_fields.Architecture.X86_64: "pc",
return default_mtypes.get(arch)
def mdev_name2uuid(mdev_name: str) -> str:
"""Convert an mdev name (of the form mdev_<uuid_with_underscores>) to a
uuid (of the form 8-4-4-4-12).
return str(uuid.UUID(mdev_name[5:].replace('_', '-')))
def mdev_uuid2name(mdev_uuid: str) -> str:
"""Convert an mdev uuid (of the form 8-4-4-4-12) to a name (of the form
return "mdev_" + mdev_uuid.replace('-', '_')
def get_flags_by_flavor_specs(flavor: 'objects.Flavor') -> ty.Set[str]:
req_spec = objects.RequestSpec(flavor=flavor)
resource_request = scheduler_utils.ResourceRequest.from_request_spec(
required_traits = resource_request.all_required_traits
flags = [TRAITS_CPU_MAPPING[trait] for trait in required_traits
return set(flags)
def save_and_migrate_vtpm_dir(
instance_uuid: str,
inst_base_resize: str,
inst_base: str,
dest: str,
on_execute: ty.Callable,
on_completion: ty.Callable,
) -> None:
"""Save vTPM data to instance directory and migrate to the destination.
If the instance has vTPM enabled, then we need to save its vTPM data
locally (to allow for revert) and then migrate the data to the dest node.
Do so by copying vTPM data from the swtpm data directory to a resize
working directory, $inst_base_resize, and then copying this to the remote
directory at $dest:$inst_base.
:param instance_uuid: The instance's UUID.
:param inst_base_resize: The instance's base resize working directory.
:param inst_base: The instances's base directory on the destination host.
:param dest: Destination host.
:param on_execute: Callback method to store PID of process in cache.
:param on_completion: Callback method to remove PID of process from cache.
:returns: None.
vtpm_dir = os.path.join(VTPM_DIR, instance_uuid)
if not os.path.exists(vtpm_dir):
# We likely need to create the instance swtpm directory on the dest node
# with ownership that is not the user running nova. We only have
# permissions to copy files to <instance_path> on the dest node so we need
# to get creative.
# First, make a new directory in the local instance directory
swtpm_dir = os.path.join(inst_base_resize, 'swtpm')
# Now move the per-instance swtpm persistent files into the
# local instance directory.
nova.privsep.path.move_tree(vtpm_dir, swtpm_dir)
# Now adjust ownership.
swtpm_dir, os.geteuid(), os.getegid(), recursive=True)
# Copy the swtpm subtree to the remote instance directory
swtpm_dir, inst_base, host=dest, on_execute=on_execute,
def restore_vtpm_dir(swtpm_dir: str) -> None:
"""Given a saved TPM directory, restore it where libvirt can find it.
:path swtpm_dir: Path to swtpm directory.
:returns: None
# Ensure global swtpm dir exists with suitable
# permissions/ownership
if not os.path.exists(VTPM_DIR):
nova.privsep.path.chmod(VTPM_DIR, 0o711)
elif not os.path.isdir(VTPM_DIR):
msg = _(
'Guest wants emulated TPM but host path %s is not a directory.')
raise exception.Invalid(msg % VTPM_DIR)
# These can raise KeyError but they're validated by the driver on startup.
uid = pwd.getpwnam(CONF.libvirt.swtpm_user).pw_uid
gid = grp.getgrnam(CONF.libvirt.swtpm_group).gr_gid
# Set ownership of instance-specific files
nova.privsep.path.chown(swtpm_dir, uid, gid, recursive=True)
# Move instance-specific directory to global dir
nova.privsep.path.move_tree(swtpm_dir, VTPM_DIR)