OpenStack Compute (Nova)
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

6178 lines
287KB

  1. # Copyright 2010 United States Government as represented by the
  2. # Administrator of the National Aeronautics and Space Administration.
  3. # Copyright 2011 Piston Cloud Computing, Inc.
  4. # Copyright 2012-2013 Red Hat, Inc.
  5. # All Rights Reserved.
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  8. # not use this file except in compliance with the License. You may obtain
  9. # a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. # License for the specific language governing permissions and limitations
  17. # under the License.
  18. """Handles all requests relating to compute resources (e.g. guest VMs,
  19. networking and storage of VMs, and compute hosts on which they run)."""
  20. import collections
  21. import functools
  22. import re
  23. import string
  24. from castellan import key_manager
  25. from oslo_log import log as logging
  26. from oslo_messaging import exceptions as oslo_exceptions
  27. from oslo_serialization import base64 as base64utils
  28. from oslo_utils import excutils
  29. from oslo_utils import strutils
  30. from oslo_utils import timeutils
  31. from oslo_utils import units
  32. from oslo_utils import uuidutils
  33. import six
  34. from six.moves import range
  35. from nova import availability_zones
  36. from nova import block_device
  37. from nova.compute import flavors
  38. from nova.compute import instance_actions
  39. from nova.compute import instance_list
  40. from nova.compute import migration_list
  41. from nova.compute import power_state
  42. from nova.compute import rpcapi as compute_rpcapi
  43. from nova.compute import task_states
  44. from nova.compute import utils as compute_utils
  45. from nova.compute.utils import wrap_instance_event
  46. from nova.compute import vm_states
  47. from nova import conductor
  48. import nova.conf
  49. from nova import context as nova_context
  50. from nova import crypto
  51. from nova.db import base
  52. from nova.db.sqlalchemy import api as db_api
  53. from nova import exception
  54. from nova import exception_wrapper
  55. from nova import hooks
  56. from nova.i18n import _
  57. from nova import image
  58. from nova.network import constants
  59. from nova.network import model as network_model
  60. from nova.network import neutron
  61. from nova.network.security_group import openstack_driver
  62. from nova import objects
  63. from nova.objects import base as obj_base
  64. from nova.objects import block_device as block_device_obj
  65. from nova.objects import external_event as external_event_obj
  66. from nova.objects import fields as fields_obj
  67. from nova.objects import keypair as keypair_obj
  68. from nova.objects import quotas as quotas_obj
  69. from nova.pci import request as pci_request
  70. from nova.policies import servers as servers_policies
  71. import nova.policy
  72. from nova import profiler
  73. from nova import rpc
  74. from nova.scheduler.client import query
  75. from nova.scheduler.client import report
  76. from nova.scheduler import utils as scheduler_utils
  77. from nova import servicegroup
  78. from nova import utils
  79. from nova.virt import hardware
  80. from nova.volume import cinder
  81. LOG = logging.getLogger(__name__)
  82. get_notifier = functools.partial(rpc.get_notifier, service='compute')
  83. # NOTE(gibi): legacy notification used compute as a service but these
  84. # calls still run on the client side of the compute service which is
  85. # nova-api. By setting the binary to nova-api below, we can make sure
  86. # that the new versioned notifications has the right publisher_id but the
  87. # legacy notifications does not change.
  88. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  89. get_notifier=get_notifier,
  90. binary='nova-api')
  91. CONF = nova.conf.CONF
  92. AGGREGATE_ACTION_UPDATE = 'Update'
  93. AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
  94. AGGREGATE_ACTION_DELETE = 'Delete'
  95. AGGREGATE_ACTION_ADD = 'Add'
  96. MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
  97. MIN_COMPUTE_CROSS_CELL_RESIZE = 47
  98. # FIXME(danms): Keep a global cache of the cells we find the
  99. # first time we look. This needs to be refreshed on a timer or
  100. # trigger.
  101. CELLS = []
  102. def check_instance_state(vm_state=None, task_state=(None,),
  103. must_have_launched=True):
  104. """Decorator to check VM and/or task state before entry to API functions.
  105. If the instance is in the wrong state, or has not been successfully
  106. started at least once the wrapper will raise an exception.
  107. """
  108. if vm_state is not None and not isinstance(vm_state, set):
  109. vm_state = set(vm_state)
  110. if task_state is not None and not isinstance(task_state, set):
  111. task_state = set(task_state)
  112. def outer(f):
  113. @six.wraps(f)
  114. def inner(self, context, instance, *args, **kw):
  115. if vm_state is not None and instance.vm_state not in vm_state:
  116. raise exception.InstanceInvalidState(
  117. attr='vm_state',
  118. instance_uuid=instance.uuid,
  119. state=instance.vm_state,
  120. method=f.__name__)
  121. if (task_state is not None and
  122. instance.task_state not in task_state):
  123. raise exception.InstanceInvalidState(
  124. attr='task_state',
  125. instance_uuid=instance.uuid,
  126. state=instance.task_state,
  127. method=f.__name__)
  128. if must_have_launched and not instance.launched_at:
  129. raise exception.InstanceInvalidState(
  130. attr='launched_at',
  131. instance_uuid=instance.uuid,
  132. state=instance.launched_at,
  133. method=f.__name__)
  134. return f(self, context, instance, *args, **kw)
  135. return inner
  136. return outer
  137. def _set_or_none(q):
  138. return q if q is None or isinstance(q, set) else set(q)
  139. def reject_instance_state(vm_state=None, task_state=None):
  140. """Decorator. Raise InstanceInvalidState if instance is in any of the
  141. given states.
  142. """
  143. vm_state = _set_or_none(vm_state)
  144. task_state = _set_or_none(task_state)
  145. def outer(f):
  146. @six.wraps(f)
  147. def inner(self, context, instance, *args, **kw):
  148. _InstanceInvalidState = functools.partial(
  149. exception.InstanceInvalidState,
  150. instance_uuid=instance.uuid,
  151. method=f.__name__)
  152. if vm_state is not None and instance.vm_state in vm_state:
  153. raise _InstanceInvalidState(
  154. attr='vm_state', state=instance.vm_state)
  155. if task_state is not None and instance.task_state in task_state:
  156. raise _InstanceInvalidState(
  157. attr='task_state', state=instance.task_state)
  158. return f(self, context, instance, *args, **kw)
  159. return inner
  160. return outer
  161. def check_instance_host(check_is_up=False):
  162. """Validate the instance.host before performing the operation.
  163. At a minimum this method will check that the instance.host is set.
  164. :param check_is_up: If True, check that the instance.host status is UP
  165. or MAINTENANCE (disabled but not down).
  166. :raises: InstanceNotReady if the instance.host is not set
  167. :raises: ServiceUnavailable if check_is_up=True and the instance.host
  168. compute service status is not UP or MAINTENANCE
  169. """
  170. def outer(function):
  171. @six.wraps(function)
  172. def wrapped(self, context, instance, *args, **kwargs):
  173. if not instance.host:
  174. raise exception.InstanceNotReady(instance_id=instance.uuid)
  175. if check_is_up:
  176. # Make sure the source compute service is not down otherwise we
  177. # cannot proceed.
  178. host_status = self.get_instance_host_status(instance)
  179. if host_status not in (fields_obj.HostStatus.UP,
  180. fields_obj.HostStatus.MAINTENANCE):
  181. # ComputeServiceUnavailable would make more sense here but
  182. # we do not want to leak hostnames to end users.
  183. raise exception.ServiceUnavailable()
  184. return function(self, context, instance, *args, **kwargs)
  185. return wrapped
  186. return outer
  187. def check_instance_lock(function):
  188. @six.wraps(function)
  189. def inner(self, context, instance, *args, **kwargs):
  190. if instance.locked and not context.is_admin:
  191. raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
  192. return function(self, context, instance, *args, **kwargs)
  193. return inner
  194. def reject_sev_instances(operation):
  195. """Decorator. Raise OperationNotSupportedForSEV if instance has SEV
  196. enabled.
  197. """
  198. def outer(f):
  199. @six.wraps(f)
  200. def inner(self, context, instance, *args, **kw):
  201. if hardware.get_mem_encryption_constraint(instance.flavor,
  202. instance.image_meta):
  203. raise exception.OperationNotSupportedForSEV(
  204. instance_uuid=instance.uuid,
  205. operation=operation)
  206. return f(self, context, instance, *args, **kw)
  207. return inner
  208. return outer
  209. def _diff_dict(orig, new):
  210. """Return a dict describing how to change orig to new. The keys
  211. correspond to values that have changed; the value will be a list
  212. of one or two elements. The first element of the list will be
  213. either '+' or '-', indicating whether the key was updated or
  214. deleted; if the key was updated, the list will contain a second
  215. element, giving the updated value.
  216. """
  217. # Figure out what keys went away
  218. result = {k: ['-'] for k in set(orig.keys()) - set(new.keys())}
  219. # Compute the updates
  220. for key, value in new.items():
  221. if key not in orig or value != orig[key]:
  222. result[key] = ['+', value]
  223. return result
  224. def load_cells():
  225. global CELLS
  226. if not CELLS:
  227. CELLS = objects.CellMappingList.get_all(
  228. nova_context.get_admin_context())
  229. LOG.debug('Found %(count)i cells: %(cells)s',
  230. dict(count=len(CELLS),
  231. cells=','.join([c.identity for c in CELLS])))
  232. if not CELLS:
  233. LOG.error('No cells are configured, unable to continue')
  234. def _get_image_meta_obj(image_meta_dict):
  235. try:
  236. image_meta = objects.ImageMeta.from_dict(image_meta_dict)
  237. except ValueError as e:
  238. # there must be invalid values in the image meta properties so
  239. # consider this an invalid request
  240. msg = _('Invalid image metadata. Error: %s') % six.text_type(e)
  241. raise exception.InvalidRequest(msg)
  242. return image_meta
  243. @profiler.trace_cls("compute_api")
  244. class API(base.Base):
  245. """API for interacting with the compute manager."""
  246. def __init__(self, image_api=None, network_api=None, volume_api=None,
  247. security_group_api=None, **kwargs):
  248. self.image_api = image_api or image.API()
  249. self.network_api = network_api or neutron.API()
  250. self.volume_api = volume_api or cinder.API()
  251. self._placementclient = None # Lazy-load on first access.
  252. self.security_group_api = (security_group_api or
  253. openstack_driver.get_openstack_security_group_driver())
  254. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  255. self.compute_task_api = conductor.ComputeTaskAPI()
  256. self.servicegroup_api = servicegroup.API()
  257. self.host_api = HostAPI(self.compute_rpcapi, self.servicegroup_api)
  258. self.notifier = rpc.get_notifier('compute', CONF.host)
  259. if CONF.ephemeral_storage_encryption.enabled:
  260. self.key_manager = key_manager.API()
  261. # Help us to record host in EventReporter
  262. self.host = CONF.host
  263. super(API, self).__init__(**kwargs)
  264. def _record_action_start(self, context, instance, action):
  265. objects.InstanceAction.action_start(context, instance.uuid,
  266. action, want_result=False)
  267. def _check_injected_file_quota(self, context, injected_files):
  268. """Enforce quota limits on injected files.
  269. Raises a QuotaError if any limit is exceeded.
  270. """
  271. if not injected_files:
  272. return
  273. # Check number of files first
  274. try:
  275. objects.Quotas.limit_check(context,
  276. injected_files=len(injected_files))
  277. except exception.OverQuota:
  278. raise exception.OnsetFileLimitExceeded()
  279. # OK, now count path and content lengths; we're looking for
  280. # the max...
  281. max_path = 0
  282. max_content = 0
  283. for path, content in injected_files:
  284. max_path = max(max_path, len(path))
  285. max_content = max(max_content, len(content))
  286. try:
  287. objects.Quotas.limit_check(context,
  288. injected_file_path_bytes=max_path,
  289. injected_file_content_bytes=max_content)
  290. except exception.OverQuota as exc:
  291. # Favor path limit over content limit for reporting
  292. # purposes
  293. if 'injected_file_path_bytes' in exc.kwargs['overs']:
  294. raise exception.OnsetFilePathLimitExceeded(
  295. allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
  296. else:
  297. raise exception.OnsetFileContentLimitExceeded(
  298. allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
  299. def _check_metadata_properties_quota(self, context, metadata=None):
  300. """Enforce quota limits on metadata properties."""
  301. if not metadata:
  302. return
  303. if not isinstance(metadata, dict):
  304. msg = (_("Metadata type should be dict."))
  305. raise exception.InvalidMetadata(reason=msg)
  306. num_metadata = len(metadata)
  307. try:
  308. objects.Quotas.limit_check(context, metadata_items=num_metadata)
  309. except exception.OverQuota as exc:
  310. quota_metadata = exc.kwargs['quotas']['metadata_items']
  311. raise exception.MetadataLimitExceeded(allowed=quota_metadata)
  312. # Because metadata is stored in the DB, we hard-code the size limits
  313. # In future, we may support more variable length strings, so we act
  314. # as if this is quota-controlled for forwards compatibility.
  315. # Those are only used in V2 API, from V2.1 API, those checks are
  316. # validated at API layer schema validation.
  317. for k, v in metadata.items():
  318. try:
  319. utils.check_string_length(v)
  320. utils.check_string_length(k, min_length=1)
  321. except exception.InvalidInput as e:
  322. raise exception.InvalidMetadata(reason=e.format_message())
  323. if len(k) > 255:
  324. msg = _("Metadata property key greater than 255 characters")
  325. raise exception.InvalidMetadataSize(reason=msg)
  326. if len(v) > 255:
  327. msg = _("Metadata property value greater than 255 characters")
  328. raise exception.InvalidMetadataSize(reason=msg)
  329. def _check_requested_secgroups(self, context, secgroups):
  330. """Check if the security group requested exists and belongs to
  331. the project.
  332. :param context: The nova request context.
  333. :type context: nova.context.RequestContext
  334. :param secgroups: list of requested security group names, or uuids in
  335. the case of Neutron.
  336. :type secgroups: list
  337. :returns: list of requested security group names unmodified if using
  338. nova-network. If using Neutron, the list returned is all uuids.
  339. Note that 'default' is a special case and will be unmodified if
  340. it's requested.
  341. """
  342. security_groups = []
  343. for secgroup in secgroups:
  344. # NOTE(sdague): default is handled special
  345. if secgroup == "default":
  346. security_groups.append(secgroup)
  347. continue
  348. secgroup_dict = self.security_group_api.get(context, secgroup)
  349. if not secgroup_dict:
  350. raise exception.SecurityGroupNotFoundForProject(
  351. project_id=context.project_id, security_group_id=secgroup)
  352. security_groups.append(secgroup_dict['id'])
  353. return security_groups
  354. def _check_requested_networks(self, context, requested_networks,
  355. max_count):
  356. """Check if the networks requested belongs to the project
  357. and the fixed IP address for each network provided is within
  358. same the network block
  359. """
  360. if requested_networks is not None:
  361. if requested_networks.no_allocate:
  362. # If the network request was specifically 'none' meaning don't
  363. # allocate any networks, we just return the number of requested
  364. # instances since quotas don't change at all.
  365. return max_count
  366. # NOTE(danms): Temporary transition
  367. requested_networks = requested_networks.as_tuples()
  368. return self.network_api.validate_networks(context, requested_networks,
  369. max_count)
  370. def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
  371. image):
  372. """Choose kernel and ramdisk appropriate for the instance.
  373. The kernel and ramdisk can be chosen in one of two ways:
  374. 1. Passed in with create-instance request.
  375. 2. Inherited from image metadata.
  376. If inherited from image metadata, and if that image metadata value is
  377. set to 'nokernel', both kernel and ramdisk will default to None.
  378. """
  379. # Inherit from image if not specified
  380. image_properties = image.get('properties', {})
  381. if kernel_id is None:
  382. kernel_id = image_properties.get('kernel_id')
  383. if ramdisk_id is None:
  384. ramdisk_id = image_properties.get('ramdisk_id')
  385. # Force to None if kernel_id indicates that a kernel is not to be used
  386. if kernel_id == 'nokernel':
  387. kernel_id = None
  388. ramdisk_id = None
  389. # Verify kernel and ramdisk exist (fail-fast)
  390. if kernel_id is not None:
  391. kernel_image = self.image_api.get(context, kernel_id)
  392. # kernel_id could have been a URI, not a UUID, so to keep behaviour
  393. # from before, which leaked that implementation detail out to the
  394. # caller, we return the image UUID of the kernel image and ramdisk
  395. # image (below) and not any image URIs that might have been
  396. # supplied.
  397. # TODO(jaypipes): Get rid of this silliness once we move to a real
  398. # Image object and hide all of that stuff within nova.image.api.
  399. kernel_id = kernel_image['id']
  400. if ramdisk_id is not None:
  401. ramdisk_image = self.image_api.get(context, ramdisk_id)
  402. ramdisk_id = ramdisk_image['id']
  403. return kernel_id, ramdisk_id
  404. @staticmethod
  405. def parse_availability_zone(context, availability_zone):
  406. # NOTE(vish): We have a legacy hack to allow admins to specify hosts
  407. # via az using az:host:node. It might be nice to expose an
  408. # api to specify specific hosts to force onto, but for
  409. # now it just supports this legacy hack.
  410. # NOTE(deva): It is also possible to specify az::node, in which case
  411. # the host manager will determine the correct host.
  412. forced_host = None
  413. forced_node = None
  414. if availability_zone and ':' in availability_zone:
  415. c = availability_zone.count(':')
  416. if c == 1:
  417. availability_zone, forced_host = availability_zone.split(':')
  418. elif c == 2:
  419. if '::' in availability_zone:
  420. availability_zone, forced_node = \
  421. availability_zone.split('::')
  422. else:
  423. availability_zone, forced_host, forced_node = \
  424. availability_zone.split(':')
  425. else:
  426. raise exception.InvalidInput(
  427. reason="Unable to parse availability_zone")
  428. if not availability_zone:
  429. availability_zone = CONF.default_schedule_zone
  430. return availability_zone, forced_host, forced_node
  431. def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
  432. auto_disk_config, image):
  433. auto_disk_config_disabled = \
  434. utils.is_auto_disk_config_disabled(auto_disk_config_img)
  435. if auto_disk_config_disabled and auto_disk_config:
  436. raise exception.AutoDiskConfigDisabledByImage(image=image)
  437. def _inherit_properties_from_image(self, image, auto_disk_config):
  438. image_properties = image.get('properties', {})
  439. auto_disk_config_img = \
  440. utils.get_auto_disk_config_from_image_props(image_properties)
  441. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  442. auto_disk_config,
  443. image.get("id"))
  444. if auto_disk_config is None:
  445. auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
  446. return {
  447. 'os_type': image_properties.get('os_type'),
  448. 'architecture': image_properties.get('architecture'),
  449. 'vm_mode': image_properties.get('vm_mode'),
  450. 'auto_disk_config': auto_disk_config
  451. }
  452. def _check_config_drive(self, config_drive):
  453. if config_drive:
  454. try:
  455. bool_val = strutils.bool_from_string(config_drive,
  456. strict=True)
  457. except ValueError:
  458. raise exception.ConfigDriveInvalidValue(option=config_drive)
  459. else:
  460. bool_val = False
  461. # FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
  462. # but this is because the config drive column is a String. False
  463. # is represented by using an empty string. And for whatever
  464. # reason, we rely on the DB to cast True to a String.
  465. return True if bool_val else ''
  466. def _validate_flavor_image(self, context, image_id, image,
  467. instance_type, root_bdm, validate_numa=True):
  468. """Validate the flavor and image.
  469. This is called from the API service to ensure that the flavor
  470. extra-specs and image properties are self-consistent and compatible
  471. with each other.
  472. :param context: A context.RequestContext
  473. :param image_id: UUID of the image
  474. :param image: a dict representation of the image including properties,
  475. enforces the image status is active.
  476. :param instance_type: Flavor object
  477. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  478. the resize case.
  479. :param validate_numa: Flag to indicate whether or not to validate
  480. the NUMA-related metadata.
  481. :raises: Many different possible exceptions. See
  482. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  483. for the full list.
  484. """
  485. if image and image['status'] != 'active':
  486. raise exception.ImageNotActive(image_id=image_id)
  487. self._validate_flavor_image_nostatus(context, image, instance_type,
  488. root_bdm, validate_numa)
  489. @staticmethod
  490. def _validate_flavor_image_nostatus(context, image, instance_type,
  491. root_bdm, validate_numa=True,
  492. validate_pci=False):
  493. """Validate the flavor and image.
  494. This is called from the API service to ensure that the flavor
  495. extra-specs and image properties are self-consistent and compatible
  496. with each other.
  497. :param context: A context.RequestContext
  498. :param image: a dict representation of the image including properties
  499. :param instance_type: Flavor object
  500. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  501. the resize case.
  502. :param validate_numa: Flag to indicate whether or not to validate
  503. the NUMA-related metadata.
  504. :param validate_pci: Flag to indicate whether or not to validate
  505. the PCI-related metadata.
  506. :raises: Many different possible exceptions. See
  507. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  508. for the full list.
  509. """
  510. if not image:
  511. return
  512. image_properties = image.get('properties', {})
  513. config_drive_option = image_properties.get(
  514. 'img_config_drive', 'optional')
  515. if config_drive_option not in ['optional', 'mandatory']:
  516. raise exception.InvalidImageConfigDrive(
  517. config_drive=config_drive_option)
  518. if instance_type['memory_mb'] < int(image.get('min_ram') or 0):
  519. raise exception.FlavorMemoryTooSmall()
  520. # Image min_disk is in gb, size is in bytes. For sanity, have them both
  521. # in bytes.
  522. image_min_disk = int(image.get('min_disk') or 0) * units.Gi
  523. image_size = int(image.get('size') or 0)
  524. # Target disk is a volume. Don't check flavor disk size because it
  525. # doesn't make sense, and check min_disk against the volume size.
  526. if root_bdm is not None and root_bdm.is_volume:
  527. # There are 2 possibilities here:
  528. #
  529. # 1. The target volume already exists but bdm.volume_size is not
  530. # yet set because this method is called before
  531. # _bdm_validate_set_size_and_instance during server create.
  532. # 2. The target volume doesn't exist, in which case the bdm will
  533. # contain the intended volume size
  534. #
  535. # Note that rebuild also calls this method with potentially a new
  536. # image but you can't rebuild a volume-backed server with a new
  537. # image (yet).
  538. #
  539. # Cinder does its own check against min_disk, so if the target
  540. # volume already exists this has already been done and we don't
  541. # need to check it again here. In this case, volume_size may not be
  542. # set on the bdm.
  543. #
  544. # If we're going to create the volume, the bdm will contain
  545. # volume_size. Therefore we should check it if it exists. This will
  546. # still be checked again by cinder when the volume is created, but
  547. # that will not happen until the request reaches a host. By
  548. # checking it here, the user gets an immediate and useful failure
  549. # indication.
  550. #
  551. # The third possibility is that we have failed to consider
  552. # something, and there are actually more than 2 possibilities. In
  553. # this case cinder will still do the check at volume creation time.
  554. # The behaviour will still be correct, but the user will not get an
  555. # immediate failure from the api, and will instead have to
  556. # determine why the instance is in an error state with a task of
  557. # block_device_mapping.
  558. #
  559. # We could reasonably refactor this check into _validate_bdm at
  560. # some future date, as the various size logic is already split out
  561. # in there.
  562. dest_size = root_bdm.volume_size
  563. if dest_size is not None:
  564. dest_size *= units.Gi
  565. if image_min_disk > dest_size:
  566. raise exception.VolumeSmallerThanMinDisk(
  567. volume_size=dest_size, image_min_disk=image_min_disk)
  568. # Target disk is a local disk whose size is taken from the flavor
  569. else:
  570. dest_size = instance_type['root_gb'] * units.Gi
  571. # NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
  572. # since libvirt interpreted the value differently than other
  573. # drivers. A value of 0 means don't check size.
  574. if dest_size != 0:
  575. if image_size > dest_size:
  576. raise exception.FlavorDiskSmallerThanImage(
  577. flavor_size=dest_size, image_size=image_size)
  578. if image_min_disk > dest_size:
  579. raise exception.FlavorDiskSmallerThanMinDisk(
  580. flavor_size=dest_size, image_min_disk=image_min_disk)
  581. else:
  582. # The user is attempting to create a server with a 0-disk
  583. # image-backed flavor, which can lead to issues with a large
  584. # image consuming an unexpectedly large amount of local disk
  585. # on the compute host. Check to see if the deployment will
  586. # allow that.
  587. if not context.can(
  588. servers_policies.ZERO_DISK_FLAVOR, fatal=False):
  589. raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
  590. API._validate_flavor_image_numa_pci(
  591. image, instance_type, validate_numa=validate_numa,
  592. validate_pci=validate_pci)
  593. @staticmethod
  594. def _validate_flavor_image_numa_pci(image, instance_type,
  595. validate_numa=True,
  596. validate_pci=False):
  597. """Validate the flavor and image NUMA/PCI values.
  598. This is called from the API service to ensure that the flavor
  599. extra-specs and image properties are self-consistent and compatible
  600. with each other.
  601. :param image: a dict representation of the image including properties
  602. :param instance_type: Flavor object
  603. :param validate_numa: Flag to indicate whether or not to validate
  604. the NUMA-related metadata.
  605. :param validate_pci: Flag to indicate whether or not to validate
  606. the PCI-related metadata.
  607. :raises: Many different possible exceptions. See
  608. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  609. for the full list.
  610. """
  611. image_meta = _get_image_meta_obj(image)
  612. API._validate_flavor_image_mem_encryption(instance_type, image_meta)
  613. # validate PMU extra spec and image metadata
  614. flavor_pmu = instance_type.extra_specs.get('hw:pmu')
  615. image_pmu = image_meta.properties.get('hw_pmu')
  616. if (flavor_pmu is not None and image_pmu is not None and
  617. image_pmu != strutils.bool_from_string(flavor_pmu)):
  618. raise exception.ImagePMUConflict()
  619. # Only validate values of flavor/image so the return results of
  620. # following 'get' functions are not used.
  621. hardware.get_number_of_serial_ports(instance_type, image_meta)
  622. if hardware.is_realtime_enabled(instance_type):
  623. hardware.vcpus_realtime_topology(instance_type, image_meta)
  624. hardware.get_cpu_topology_constraints(instance_type, image_meta)
  625. if validate_numa:
  626. hardware.numa_get_constraints(instance_type, image_meta)
  627. if validate_pci:
  628. pci_request.get_pci_requests_from_flavor(instance_type)
  629. @staticmethod
  630. def _validate_flavor_image_mem_encryption(instance_type, image):
  631. """Validate that the flavor and image don't make contradictory
  632. requests regarding memory encryption.
  633. :param instance_type: Flavor object
  634. :param image: an ImageMeta object
  635. :raises: nova.exception.FlavorImageConflict
  636. """
  637. # This library function will raise the exception for us if
  638. # necessary; if not, we can ignore the result returned.
  639. hardware.get_mem_encryption_constraint(instance_type, image)
  640. def _get_image_defined_bdms(self, instance_type, image_meta,
  641. root_device_name):
  642. image_properties = image_meta.get('properties', {})
  643. # Get the block device mappings defined by the image.
  644. image_defined_bdms = image_properties.get('block_device_mapping', [])
  645. legacy_image_defined = not image_properties.get('bdm_v2', False)
  646. image_mapping = image_properties.get('mappings', [])
  647. if legacy_image_defined:
  648. image_defined_bdms = block_device.from_legacy_mapping(
  649. image_defined_bdms, None, root_device_name)
  650. else:
  651. image_defined_bdms = list(map(block_device.BlockDeviceDict,
  652. image_defined_bdms))
  653. if image_mapping:
  654. image_mapping = self._prepare_image_mapping(instance_type,
  655. image_mapping)
  656. image_defined_bdms = self._merge_bdms_lists(
  657. image_mapping, image_defined_bdms)
  658. return image_defined_bdms
  659. def _get_flavor_defined_bdms(self, instance_type, block_device_mapping):
  660. flavor_defined_bdms = []
  661. have_ephemeral_bdms = any(filter(
  662. block_device.new_format_is_ephemeral, block_device_mapping))
  663. have_swap_bdms = any(filter(
  664. block_device.new_format_is_swap, block_device_mapping))
  665. if instance_type.get('ephemeral_gb') and not have_ephemeral_bdms:
  666. flavor_defined_bdms.append(
  667. block_device.create_blank_bdm(instance_type['ephemeral_gb']))
  668. if instance_type.get('swap') and not have_swap_bdms:
  669. flavor_defined_bdms.append(
  670. block_device.create_blank_bdm(instance_type['swap'], 'swap'))
  671. return flavor_defined_bdms
  672. def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
  673. """Override any block devices from the first list by device name
  674. :param overridable_mappings: list which items are overridden
  675. :param overrider_mappings: list which items override
  676. :returns: A merged list of bdms
  677. """
  678. device_names = set(bdm['device_name'] for bdm in overrider_mappings
  679. if bdm['device_name'])
  680. return (overrider_mappings +
  681. [bdm for bdm in overridable_mappings
  682. if bdm['device_name'] not in device_names])
  683. def _check_and_transform_bdm(self, context, base_options, instance_type,
  684. image_meta, min_count, max_count,
  685. block_device_mapping, legacy_bdm):
  686. # NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
  687. # It's needed for legacy conversion to work.
  688. root_device_name = (base_options.get('root_device_name') or 'vda')
  689. image_ref = base_options.get('image_ref', '')
  690. # If the instance is booted by image and has a volume attached,
  691. # the volume cannot have the same device name as root_device_name
  692. if image_ref:
  693. for bdm in block_device_mapping:
  694. if (bdm.get('destination_type') == 'volume' and
  695. block_device.strip_dev(bdm.get(
  696. 'device_name')) == root_device_name):
  697. msg = _('The volume cannot be assigned the same device'
  698. ' name as the root device %s') % root_device_name
  699. raise exception.InvalidRequest(msg)
  700. image_defined_bdms = self._get_image_defined_bdms(
  701. instance_type, image_meta, root_device_name)
  702. root_in_image_bdms = (
  703. block_device.get_root_bdm(image_defined_bdms) is not None)
  704. if legacy_bdm:
  705. block_device_mapping = block_device.from_legacy_mapping(
  706. block_device_mapping, image_ref, root_device_name,
  707. no_root=root_in_image_bdms)
  708. elif root_in_image_bdms:
  709. # NOTE (ndipanov): client will insert an image mapping into the v2
  710. # block_device_mapping, but if there is a bootable device in image
  711. # mappings - we need to get rid of the inserted image
  712. # NOTE (gibi): another case is when a server is booted with an
  713. # image to bdm mapping where the image only contains a bdm to a
  714. # snapshot. In this case the other image to bdm mapping
  715. # contains an unnecessary device with boot_index == 0.
  716. # Also in this case the image_ref is None as we are booting from
  717. # an image to volume bdm.
  718. def not_image_and_root_bdm(bdm):
  719. return not (bdm.get('boot_index') == 0 and
  720. bdm.get('source_type') == 'image')
  721. block_device_mapping = list(
  722. filter(not_image_and_root_bdm, block_device_mapping))
  723. block_device_mapping = self._merge_bdms_lists(
  724. image_defined_bdms, block_device_mapping)
  725. if min_count > 1 or max_count > 1:
  726. if any(map(lambda bdm: bdm['source_type'] == 'volume',
  727. block_device_mapping)):
  728. msg = _('Cannot attach one or more volumes to multiple'
  729. ' instances')
  730. raise exception.InvalidRequest(msg)
  731. block_device_mapping += self._get_flavor_defined_bdms(
  732. instance_type, block_device_mapping)
  733. return block_device_obj.block_device_make_list_from_dicts(
  734. context, block_device_mapping)
  735. def _get_image(self, context, image_href):
  736. if not image_href:
  737. return None, {}
  738. image = self.image_api.get(context, image_href)
  739. return image['id'], image
  740. def _checks_for_create_and_rebuild(self, context, image_id, image,
  741. instance_type, metadata,
  742. files_to_inject, root_bdm,
  743. validate_numa=True):
  744. self._check_metadata_properties_quota(context, metadata)
  745. self._check_injected_file_quota(context, files_to_inject)
  746. self._validate_flavor_image(context, image_id, image,
  747. instance_type, root_bdm,
  748. validate_numa=validate_numa)
  749. def _validate_and_build_base_options(self, context, instance_type,
  750. boot_meta, image_href, image_id,
  751. kernel_id, ramdisk_id, display_name,
  752. display_description, key_name,
  753. key_data, security_groups,
  754. availability_zone, user_data,
  755. metadata, access_ip_v4, access_ip_v6,
  756. requested_networks, config_drive,
  757. auto_disk_config, reservation_id,
  758. max_count,
  759. supports_port_resource_request):
  760. """Verify all the input parameters regardless of the provisioning
  761. strategy being performed.
  762. """
  763. if instance_type['disabled']:
  764. raise exception.FlavorNotFound(flavor_id=instance_type['id'])
  765. if user_data:
  766. try:
  767. base64utils.decode_as_bytes(user_data)
  768. except TypeError:
  769. raise exception.InstanceUserDataMalformed()
  770. # When using Neutron, _check_requested_secgroups will translate and
  771. # return any requested security group names to uuids.
  772. security_groups = (
  773. self._check_requested_secgroups(context, security_groups))
  774. # Note: max_count is the number of instances requested by the user,
  775. # max_network_count is the maximum number of instances taking into
  776. # account any network quotas
  777. max_network_count = self._check_requested_networks(context,
  778. requested_networks, max_count)
  779. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  780. context, kernel_id, ramdisk_id, boot_meta)
  781. config_drive = self._check_config_drive(config_drive)
  782. if key_data is None and key_name is not None:
  783. key_pair = objects.KeyPair.get_by_name(context,
  784. context.user_id,
  785. key_name)
  786. key_data = key_pair.public_key
  787. else:
  788. key_pair = None
  789. root_device_name = block_device.prepend_dev(
  790. block_device.properties_root_device_name(
  791. boot_meta.get('properties', {})))
  792. image_meta = _get_image_meta_obj(boot_meta)
  793. numa_topology = hardware.numa_get_constraints(
  794. instance_type, image_meta)
  795. system_metadata = {}
  796. pci_numa_affinity_policy = hardware.get_pci_numa_policy_constraint(
  797. instance_type, image_meta)
  798. # PCI requests come from two sources: instance flavor and
  799. # requested_networks. The first call in below returns an
  800. # InstancePCIRequests object which is a list of InstancePCIRequest
  801. # objects. The second call in below creates an InstancePCIRequest
  802. # object for each SR-IOV port, and append it to the list in the
  803. # InstancePCIRequests object
  804. pci_request_info = pci_request.get_pci_requests_from_flavor(
  805. instance_type, affinity_policy=pci_numa_affinity_policy)
  806. result = self.network_api.create_resource_requests(
  807. context, requested_networks, pci_request_info,
  808. affinity_policy=pci_numa_affinity_policy)
  809. network_metadata, port_resource_requests = result
  810. # Creating servers with ports that have resource requests, like QoS
  811. # minimum bandwidth rules, is only supported in a requested minimum
  812. # microversion.
  813. if port_resource_requests and not supports_port_resource_request:
  814. raise exception.CreateWithPortResourceRequestOldVersion()
  815. base_options = {
  816. 'reservation_id': reservation_id,
  817. 'image_ref': image_href,
  818. 'kernel_id': kernel_id or '',
  819. 'ramdisk_id': ramdisk_id or '',
  820. 'power_state': power_state.NOSTATE,
  821. 'vm_state': vm_states.BUILDING,
  822. 'config_drive': config_drive,
  823. 'user_id': context.user_id,
  824. 'project_id': context.project_id,
  825. 'instance_type_id': instance_type['id'],
  826. 'memory_mb': instance_type['memory_mb'],
  827. 'vcpus': instance_type['vcpus'],
  828. 'root_gb': instance_type['root_gb'],
  829. 'ephemeral_gb': instance_type['ephemeral_gb'],
  830. 'display_name': display_name,
  831. 'display_description': display_description,
  832. 'user_data': user_data,
  833. 'key_name': key_name,
  834. 'key_data': key_data,
  835. 'locked': False,
  836. 'metadata': metadata or {},
  837. 'access_ip_v4': access_ip_v4,
  838. 'access_ip_v6': access_ip_v6,
  839. 'availability_zone': availability_zone,
  840. 'root_device_name': root_device_name,
  841. 'progress': 0,
  842. 'pci_requests': pci_request_info,
  843. 'numa_topology': numa_topology,
  844. 'system_metadata': system_metadata,
  845. 'port_resource_requests': port_resource_requests}
  846. options_from_image = self._inherit_properties_from_image(
  847. boot_meta, auto_disk_config)
  848. base_options.update(options_from_image)
  849. # return the validated options and maximum number of instances allowed
  850. # by the network quotas
  851. return (base_options, max_network_count, key_pair, security_groups,
  852. network_metadata)
  853. @staticmethod
  854. @db_api.api_context_manager.writer
  855. def _create_reqspec_buildreq_instmapping(context, rs, br, im):
  856. """Create the request spec, build request, and instance mapping in a
  857. single database transaction.
  858. The RequestContext must be passed in to this method so that the
  859. database transaction context manager decorator will nest properly and
  860. include each create() into the same transaction context.
  861. """
  862. rs.create()
  863. br.create()
  864. im.create()
  865. def _validate_host_or_node(self, context, host, hypervisor_hostname):
  866. """Check whether compute nodes exist by validating the host
  867. and/or the hypervisor_hostname. There are three cases:
  868. 1. If only host is supplied, we can lookup the HostMapping in
  869. the API DB.
  870. 2. If only node is supplied, we can query a resource provider
  871. with that name in placement.
  872. 3. If both host and node are supplied, we can get the cell from
  873. HostMapping and from that lookup the ComputeNode with the
  874. given cell.
  875. :param context: The API request context.
  876. :param host: Target host.
  877. :param hypervisor_hostname: Target node.
  878. :raises: ComputeHostNotFound if we find no compute nodes with host
  879. and/or hypervisor_hostname.
  880. """
  881. if host:
  882. # When host is specified.
  883. try:
  884. host_mapping = objects.HostMapping.get_by_host(context, host)
  885. except exception.HostMappingNotFound:
  886. LOG.warning('No host-to-cell mapping found for host '
  887. '%(host)s.', {'host': host})
  888. raise exception.ComputeHostNotFound(host=host)
  889. # When both host and node are specified.
  890. if hypervisor_hostname:
  891. cell = host_mapping.cell_mapping
  892. with nova_context.target_cell(context, cell) as cctxt:
  893. # Here we only do an existence check, so we don't
  894. # need to store the return value into a variable.
  895. objects.ComputeNode.get_by_host_and_nodename(
  896. cctxt, host, hypervisor_hostname)
  897. elif hypervisor_hostname:
  898. # When only node is specified.
  899. try:
  900. self.placementclient.get_provider_by_name(
  901. context, hypervisor_hostname)
  902. except exception.ResourceProviderNotFound:
  903. raise exception.ComputeHostNotFound(host=hypervisor_hostname)
  904. def _get_volumes_for_bdms(self, context, bdms):
  905. """Get the pre-existing volumes from cinder for the list of BDMs.
  906. :param context: nova auth RequestContext
  907. :param bdms: BlockDeviceMappingList which has zero or more BDMs with
  908. a pre-existing volume_id specified.
  909. :return: dict, keyed by volume id, of volume dicts
  910. :raises: VolumeNotFound - if a given volume does not exist
  911. :raises: CinderConnectionFailed - if there are problems communicating
  912. with the cinder API
  913. :raises: Forbidden - if the user token does not have authority to see
  914. a volume
  915. """
  916. volumes = {}
  917. for bdm in bdms:
  918. if bdm.volume_id:
  919. volumes[bdm.volume_id] = self.volume_api.get(
  920. context, bdm.volume_id)
  921. return volumes
  922. @staticmethod
  923. def _validate_vol_az_for_create(instance_az, volumes):
  924. """Performs cross_az_attach validation for the instance and volumes.
  925. If [cinder]/cross_az_attach=True (default) this method is a no-op.
  926. If [cinder]/cross_az_attach=False, this method will validate that:
  927. 1. All volumes are in the same availability zone.
  928. 2. The volume AZ matches the instance AZ. If the instance is being
  929. created without a specific AZ (either via the user request or the
  930. [DEFAULT]/default_schedule_zone option), and the volume AZ matches
  931. [DEFAULT]/default_availability_zone for compute services, then the
  932. method returns the volume AZ so it can be set in the RequestSpec as
  933. if the user requested the zone explicitly.
  934. :param instance_az: Availability zone for the instance. In this case
  935. the host is not yet selected so the instance AZ value should come
  936. from one of the following cases:
  937. * The user requested availability zone.
  938. * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  939. does not specify an AZ (see parse_availability_zone).
  940. :param volumes: iterable of dicts of cinder volumes to be attached to
  941. the server being created
  942. :returns: None or volume AZ to set in the RequestSpec for the instance
  943. :raises: MismatchVolumeAZException if the instance and volume AZ do
  944. not match
  945. """
  946. if CONF.cinder.cross_az_attach:
  947. return
  948. if not volumes:
  949. return
  950. # First make sure that all of the volumes are in the same zone.
  951. vol_zones = [vol['availability_zone'] for vol in volumes]
  952. if len(set(vol_zones)) > 1:
  953. msg = (_("Volumes are in different availability zones: %s")
  954. % ','.join(vol_zones))
  955. raise exception.MismatchVolumeAZException(reason=msg)
  956. volume_az = vol_zones[0]
  957. # In this case the instance.host should not be set so the instance AZ
  958. # value should come from instance.availability_zone which will be one
  959. # of the following cases:
  960. # * The user requested availability zone.
  961. # * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  962. # does not specify an AZ (see parse_availability_zone).
  963. # If the instance is not being created with a specific AZ (the AZ is
  964. # input via the API create request *or* [DEFAULT]/default_schedule_zone
  965. # is not None), then check to see if we should use the default AZ
  966. # (which by default matches the default AZ in Cinder, i.e. 'nova').
  967. if instance_az is None:
  968. # Check if the volume AZ is the same as our default AZ for compute
  969. # hosts (nova) and if so, assume we are OK because the user did not
  970. # request an AZ and will get the same default. If the volume AZ is
  971. # not the same as our default, return the volume AZ so the caller
  972. # can put it into the request spec so the instance is scheduled
  973. # to the same zone as the volume. Note that we are paranoid about
  974. # the default here since both nova and cinder's default backend AZ
  975. # is "nova" and we do not want to pin the server to that AZ since
  976. # it's special, i.e. just like we tell users in the docs to not
  977. # specify availability_zone='nova' when creating a server since we
  978. # might not be able to migrate it later.
  979. if volume_az != CONF.default_availability_zone:
  980. return volume_az # indication to set in request spec
  981. # The volume AZ is the same as the default nova AZ so we will be OK
  982. return
  983. if instance_az != volume_az:
  984. msg = _("Server and volumes are not in the same availability "
  985. "zone. Server is in: %(instance_az)s. Volumes are in: "
  986. "%(volume_az)s") % {
  987. 'instance_az': instance_az, 'volume_az': volume_az}
  988. raise exception.MismatchVolumeAZException(reason=msg)
  989. def _provision_instances(self, context, instance_type, min_count,
  990. max_count, base_options, boot_meta, security_groups,
  991. block_device_mapping, shutdown_terminate,
  992. instance_group, check_server_group_quota, filter_properties,
  993. key_pair, tags, trusted_certs, supports_multiattach,
  994. network_metadata=None, requested_host=None,
  995. requested_hypervisor_hostname=None):
  996. # NOTE(boxiang): Check whether compute nodes exist by validating
  997. # the host and/or the hypervisor_hostname. Pass the destination
  998. # to the scheduler with host and/or hypervisor_hostname(node).
  999. destination = None
  1000. if requested_host or requested_hypervisor_hostname:
  1001. self._validate_host_or_node(context, requested_host,
  1002. requested_hypervisor_hostname)
  1003. destination = objects.Destination()
  1004. if requested_host:
  1005. destination.host = requested_host
  1006. destination.node = requested_hypervisor_hostname
  1007. # Check quotas
  1008. num_instances = compute_utils.check_num_instances_quota(
  1009. context, instance_type, min_count, max_count)
  1010. security_groups = self.security_group_api.populate_security_groups(
  1011. security_groups)
  1012. self.security_group_api.ensure_default(context)
  1013. port_resource_requests = base_options.pop('port_resource_requests')
  1014. instances_to_build = []
  1015. # We could be iterating over several instances with several BDMs per
  1016. # instance and those BDMs could be using a lot of the same images so
  1017. # we want to cache the image API GET results for performance.
  1018. image_cache = {} # dict of image dicts keyed by image id
  1019. # Before processing the list of instances get all of the requested
  1020. # pre-existing volumes so we can do some validation here rather than
  1021. # down in the bowels of _validate_bdm.
  1022. volumes = self._get_volumes_for_bdms(context, block_device_mapping)
  1023. volume_az = self._validate_vol_az_for_create(
  1024. base_options['availability_zone'], volumes.values())
  1025. if volume_az:
  1026. # This means the instance is not being created in a specific zone
  1027. # but needs to match the zone that the volumes are in so update
  1028. # base_options to match the volume zone.
  1029. base_options['availability_zone'] = volume_az
  1030. LOG.debug("Going to run %s instances...", num_instances)
  1031. try:
  1032. for i in range(num_instances):
  1033. # Create a uuid for the instance so we can store the
  1034. # RequestSpec before the instance is created.
  1035. instance_uuid = uuidutils.generate_uuid()
  1036. # Store the RequestSpec that will be used for scheduling.
  1037. req_spec = objects.RequestSpec.from_components(context,
  1038. instance_uuid, boot_meta, instance_type,
  1039. base_options['numa_topology'],
  1040. base_options['pci_requests'], filter_properties,
  1041. instance_group, base_options['availability_zone'],
  1042. security_groups=security_groups,
  1043. port_resource_requests=port_resource_requests)
  1044. if block_device_mapping:
  1045. # Record whether or not we are a BFV instance
  1046. root = block_device_mapping.root_bdm()
  1047. req_spec.is_bfv = bool(root and root.is_volume)
  1048. else:
  1049. # If we have no BDMs, we're clearly not BFV
  1050. req_spec.is_bfv = False
  1051. # NOTE(danms): We need to record num_instances on the request
  1052. # spec as this is how the conductor knows how many were in this
  1053. # batch.
  1054. req_spec.num_instances = num_instances
  1055. # NOTE(stephenfin): The network_metadata field is not persisted
  1056. # inside RequestSpec object.
  1057. if network_metadata:
  1058. req_spec.network_metadata = network_metadata
  1059. if destination:
  1060. req_spec.requested_destination = destination
  1061. # Create an instance object, but do not store in db yet.
  1062. instance = objects.Instance(context=context)
  1063. instance.uuid = instance_uuid
  1064. instance.update(base_options)
  1065. instance.keypairs = objects.KeyPairList(objects=[])
  1066. if key_pair:
  1067. instance.keypairs.objects.append(key_pair)
  1068. instance.trusted_certs = self._retrieve_trusted_certs_object(
  1069. context, trusted_certs)
  1070. instance = self.create_db_entry_for_new_instance(context,
  1071. instance_type, boot_meta, instance, security_groups,
  1072. block_device_mapping, num_instances, i,
  1073. shutdown_terminate, create_instance=False)
  1074. block_device_mapping = (
  1075. self._bdm_validate_set_size_and_instance(context,
  1076. instance, instance_type, block_device_mapping,
  1077. image_cache, volumes, supports_multiattach))
  1078. instance_tags = self._transform_tags(tags, instance.uuid)
  1079. build_request = objects.BuildRequest(context,
  1080. instance=instance, instance_uuid=instance.uuid,
  1081. project_id=instance.project_id,
  1082. block_device_mappings=block_device_mapping,
  1083. tags=instance_tags)
  1084. # Create an instance_mapping. The null cell_mapping indicates
  1085. # that the instance doesn't yet exist in a cell, and lookups
  1086. # for it need to instead look for the RequestSpec.
  1087. # cell_mapping will be populated after scheduling, with a
  1088. # scheduling failure using the cell_mapping for the special
  1089. # cell0.
  1090. inst_mapping = objects.InstanceMapping(context=context)
  1091. inst_mapping.instance_uuid = instance_uuid
  1092. inst_mapping.project_id = context.project_id
  1093. inst_mapping.user_id = context.user_id
  1094. inst_mapping.cell_mapping = None
  1095. # Create the request spec, build request, and instance mapping
  1096. # records in a single transaction so that if a DBError is
  1097. # raised from any of them, all INSERTs will be rolled back and
  1098. # no orphaned records will be left behind.
  1099. self._create_reqspec_buildreq_instmapping(context, req_spec,
  1100. build_request,
  1101. inst_mapping)
  1102. instances_to_build.append(
  1103. (req_spec, build_request, inst_mapping))
  1104. if instance_group:
  1105. if check_server_group_quota:
  1106. try:
  1107. objects.Quotas.check_deltas(
  1108. context, {'server_group_members': 1},
  1109. instance_group, context.user_id)
  1110. except exception.OverQuota:
  1111. msg = _("Quota exceeded, too many servers in "
  1112. "group")
  1113. raise exception.QuotaError(msg)
  1114. members = objects.InstanceGroup.add_members(
  1115. context, instance_group.uuid, [instance.uuid])
  1116. # NOTE(melwitt): We recheck the quota after creating the
  1117. # object to prevent users from allocating more resources
  1118. # than their allowed quota in the event of a race. This is
  1119. # configurable because it can be expensive if strict quota
  1120. # limits are not required in a deployment.
  1121. if CONF.quota.recheck_quota and check_server_group_quota:
  1122. try:
  1123. objects.Quotas.check_deltas(
  1124. context, {'server_group_members': 0},
  1125. instance_group, context.user_id)
  1126. except exception.OverQuota:
  1127. objects.InstanceGroup._remove_members_in_db(
  1128. context, instance_group.id, [instance.uuid])
  1129. msg = _("Quota exceeded, too many servers in "
  1130. "group")
  1131. raise exception.QuotaError(msg)
  1132. # list of members added to servers group in this iteration
  1133. # is needed to check quota of server group during add next
  1134. # instance
  1135. instance_group.members.extend(members)
  1136. # In the case of any exceptions, attempt DB cleanup
  1137. except Exception:
  1138. with excutils.save_and_reraise_exception():
  1139. self._cleanup_build_artifacts(None, instances_to_build)
  1140. return instances_to_build
  1141. @staticmethod
  1142. def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
  1143. """Convert user-requested trusted cert IDs to TrustedCerts object
  1144. Also validates that the deployment is new enough to support trusted
  1145. image certification validation.
  1146. :param context: The user request auth context
  1147. :param trusted_certs: list of user-specified trusted cert string IDs,
  1148. may be None
  1149. :param rebuild: True if rebuilding the server, False if creating a
  1150. new server
  1151. :returns: nova.objects.TrustedCerts object or None if no user-specified
  1152. trusted cert IDs were given and nova is not configured with
  1153. default trusted cert IDs
  1154. """
  1155. # Retrieve trusted_certs parameter, or use CONF value if certificate
  1156. # validation is enabled
  1157. if trusted_certs:
  1158. certs_to_return = objects.TrustedCerts(ids=trusted_certs)
  1159. elif (CONF.glance.verify_glance_signatures and
  1160. CONF.glance.enable_certificate_validation and
  1161. CONF.glance.default_trusted_certificate_ids):
  1162. certs_to_return = objects.TrustedCerts(
  1163. ids=CONF.glance.default_trusted_certificate_ids)
  1164. else:
  1165. return None
  1166. return certs_to_return
  1167. def _get_bdm_image_metadata(self, context, block_device_mapping,
  1168. legacy_bdm=True):
  1169. """If we are booting from a volume, we need to get the
  1170. volume details from Cinder and make sure we pass the
  1171. metadata back accordingly.
  1172. """
  1173. if not block_device_mapping:
  1174. return {}
  1175. for bdm in block_device_mapping:
  1176. if (legacy_bdm and
  1177. block_device.get_device_letter(
  1178. bdm.get('device_name', '')) != 'a'):
  1179. continue
  1180. elif not legacy_bdm and bdm.get('boot_index') != 0:
  1181. continue
  1182. volume_id = bdm.get('volume_id')
  1183. snapshot_id = bdm.get('snapshot_id')
  1184. if snapshot_id:
  1185. # NOTE(alaski): A volume snapshot inherits metadata from the
  1186. # originating volume, but the API does not expose metadata
  1187. # on the snapshot itself. So we query the volume for it below.
  1188. snapshot = self.volume_api.get_snapshot(context, snapshot_id)
  1189. volume_id = snapshot['volume_id']
  1190. if bdm.get('image_id'):
  1191. try:
  1192. image_id = bdm['image_id']
  1193. image_meta = self.image_api.get(context, image_id)
  1194. return image_meta
  1195. except Exception:
  1196. raise exception.InvalidBDMImage(id=image_id)
  1197. elif volume_id:
  1198. try:
  1199. volume = self.volume_api.get(context, volume_id)
  1200. except exception.CinderConnectionFailed:
  1201. raise
  1202. except Exception:
  1203. raise exception.InvalidBDMVolume(id=volume_id)
  1204. if not volume.get('bootable', True):
  1205. raise exception.InvalidBDMVolumeNotBootable(id=volume_id)
  1206. return utils.get_image_metadata_from_volume(volume)
  1207. return {}
  1208. @staticmethod
  1209. def _get_requested_instance_group(context, filter_properties):
  1210. if (not filter_properties or
  1211. not filter_properties.get('scheduler_hints')):
  1212. return
  1213. group_hint = filter_properties.get('scheduler_hints').get('group')
  1214. if not group_hint:
  1215. return
  1216. return objects.InstanceGroup.get_by_uuid(context, group_hint)
  1217. def _create_instance(self, context, instance_type,
  1218. image_href, kernel_id, ramdisk_id,
  1219. min_count, max_count,
  1220. display_name, display_description,
  1221. key_name, key_data, security_groups,
  1222. availability_zone, user_data, metadata, injected_files,
  1223. admin_password, access_ip_v4, access_ip_v6,
  1224. requested_networks, config_drive,
  1225. block_device_mapping, auto_disk_config, filter_properties,
  1226. reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
  1227. check_server_group_quota=False, tags=None,
  1228. supports_multiattach=False, trusted_certs=None,
  1229. supports_port_resource_request=False,
  1230. requested_host=None, requested_hypervisor_hostname=None):
  1231. """Verify all the input parameters regardless of the provisioning
  1232. strategy being performed and schedule the instance(s) for
  1233. creation.
  1234. """
  1235. # Normalize and setup some parameters
  1236. if reservation_id is None:
  1237. reservation_id = utils.generate_uid('r')
  1238. security_groups = security_groups or ['default']
  1239. min_count = min_count or 1
  1240. max_count = max_count or min_count
  1241. block_device_mapping = block_device_mapping or []
  1242. tags = tags or []
  1243. if image_href:
  1244. image_id, boot_meta = self._get_image(context, image_href)
  1245. else:
  1246. # This is similar to the logic in _retrieve_trusted_certs_object.
  1247. if (trusted_certs or
  1248. (CONF.glance.verify_glance_signatures and
  1249. CONF.glance.enable_certificate_validation and
  1250. CONF.glance.default_trusted_certificate_ids)):
  1251. msg = _("Image certificate validation is not supported "
  1252. "when booting from volume")
  1253. raise exception.CertificateValidationFailed(message=msg)
  1254. image_id = None
  1255. boot_meta = self._get_bdm_image_metadata(
  1256. context, block_device_mapping, legacy_bdm)
  1257. self._check_auto_disk_config(image=boot_meta,
  1258. auto_disk_config=auto_disk_config)
  1259. base_options, max_net_count, key_pair, security_groups, \
  1260. network_metadata = self._validate_and_build_base_options(
  1261. context, instance_type, boot_meta, image_href, image_id,
  1262. kernel_id, ramdisk_id, display_name, display_description,
  1263. key_name, key_data, security_groups, availability_zone,
  1264. user_data, metadata, access_ip_v4, access_ip_v6,
  1265. requested_networks, config_drive, auto_disk_config,
  1266. reservation_id, max_count, supports_port_resource_request)
  1267. # max_net_count is the maximum number of instances requested by the
  1268. # user adjusted for any network quota constraints, including
  1269. # consideration of connections to each requested network
  1270. if max_net_count < min_count:
  1271. raise exception.PortLimitExceeded()
  1272. elif max_net_count < max_count:
  1273. LOG.info("max count reduced from %(max_count)d to "
  1274. "%(max_net_count)d due to network port quota",
  1275. {'max_count': max_count,
  1276. 'max_net_count': max_net_count})
  1277. max_count = max_net_count
  1278. block_device_mapping = self._check_and_transform_bdm(context,
  1279. base_options, instance_type, boot_meta, min_count, max_count,
  1280. block_device_mapping, legacy_bdm)
  1281. # We can't do this check earlier because we need bdms from all sources
  1282. # to have been merged in order to get the root bdm.
  1283. # Set validate_numa=False since numa validation is already done by
  1284. # _validate_and_build_base_options().
  1285. self._checks_for_create_and_rebuild(context, image_id, boot_meta,
  1286. instance_type, metadata, injected_files,
  1287. block_device_mapping.root_bdm(), validate_numa=False)
  1288. instance_group = self._get_requested_instance_group(context,
  1289. filter_properties)
  1290. tags = self._create_tag_list_obj(context, tags)
  1291. instances_to_build = self._provision_instances(
  1292. context, instance_type, min_count, max_count, base_options,
  1293. boot_meta, security_groups, block_device_mapping,
  1294. shutdown_terminate, instance_group, check_server_group_quota,
  1295. filter_properties, key_pair, tags, trusted_certs,
  1296. supports_multiattach, network_metadata,
  1297. requested_host, requested_hypervisor_hostname)
  1298. instances = []
  1299. request_specs = []
  1300. build_requests = []
  1301. for rs, build_request, im in instances_to_build:
  1302. build_requests.append(build_request)
  1303. instance = build_request.get_new_instance(context)
  1304. instances.append(instance)
  1305. request_specs.append(rs)
  1306. self.compute_task_api.schedule_and_build_instances(
  1307. context,
  1308. build_requests=build_requests,
  1309. request_spec=request_specs,
  1310. image=boot_meta,
  1311. admin_password=admin_password,
  1312. injected_files=injected_files,
  1313. requested_networks=requested_networks,
  1314. block_device_mapping=block_device_mapping,
  1315. tags=tags)
  1316. return instances, reservation_id
  1317. @staticmethod
  1318. def _cleanup_build_artifacts(instances, instances_to_build):
  1319. # instances_to_build is a list of tuples:
  1320. # (RequestSpec, BuildRequest, InstanceMapping)
  1321. # Be paranoid about artifacts being deleted underneath us.
  1322. for instance in instances or []:
  1323. try:
  1324. instance.destroy()
  1325. except exception.InstanceNotFound:
  1326. pass
  1327. for rs, build_request, im in instances_to_build or []:
  1328. try:
  1329. rs.destroy()
  1330. except exception.RequestSpecNotFound:
  1331. pass
  1332. try:
  1333. build_request.destroy()
  1334. except exception.BuildRequestNotFound:
  1335. pass
  1336. try:
  1337. im.destroy()
  1338. except exception.InstanceMappingNotFound:
  1339. pass
  1340. @staticmethod
  1341. def _volume_size(instance_type, bdm):
  1342. size = bdm.get('volume_size')
  1343. # NOTE (ndipanov): inherit flavor size only for swap and ephemeral
  1344. if (size is None and bdm.get('source_type') == 'blank' and
  1345. bdm.get('destination_type') == 'local'):
  1346. if bdm.get('guest_format') == 'swap':
  1347. size = instance_type.get('swap', 0)
  1348. else:
  1349. size = instance_type.get('ephemeral_gb', 0)
  1350. return size
  1351. def _prepare_image_mapping(self, instance_type, mappings):
  1352. """Extract and format blank devices from image mappings."""
  1353. prepared_mappings = []
  1354. for bdm in block_device.mappings_prepend_dev(mappings):
  1355. LOG.debug("Image bdm %s", bdm)
  1356. virtual_name = bdm['virtual']
  1357. if virtual_name == 'ami' or virtual_name == 'root':
  1358. continue
  1359. if not block_device.is_swap_or_ephemeral(virtual_name):
  1360. continue
  1361. guest_format = bdm.get('guest_format')
  1362. if virtual_name == 'swap':
  1363. guest_format = 'swap'
  1364. if not guest_format:
  1365. guest_format = CONF.default_ephemeral_format
  1366. values = block_device.BlockDeviceDict({
  1367. 'device_name': bdm['device'],
  1368. 'source_type': 'blank',
  1369. 'destination_type': 'local',
  1370. 'device_type': 'disk',
  1371. 'guest_format': guest_format,
  1372. 'delete_on_termination': True,
  1373. 'boot_index': -1})
  1374. values['volume_size'] = self._volume_size(
  1375. instance_type, values)
  1376. if values['volume_size'] == 0:
  1377. continue
  1378. prepared_mappings.append(values)
  1379. return prepared_mappings
  1380. def _bdm_validate_set_size_and_instance(self, context, instance,
  1381. instance_type,
  1382. block_device_mapping,
  1383. image_cache, volumes,
  1384. supports_multiattach=False):
  1385. """Ensure the bdms are valid, then set size and associate with instance
  1386. Because this method can be called multiple times when more than one
  1387. instance is booted in a single request it makes a copy of the bdm list.
  1388. :param context: nova auth RequestContext
  1389. :param instance: Instance object
  1390. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1391. :param block_device_mapping: BlockDeviceMappingList object
  1392. :param image_cache: dict of image dicts keyed by id which is used as a
  1393. cache in case there are multiple BDMs in the same request using
  1394. the same image to avoid redundant GET calls to the image service
  1395. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1396. :param supports_multiattach: True if the request supports multiattach
  1397. volumes, False otherwise
  1398. """
  1399. LOG.debug("block_device_mapping %s", list(block_device_mapping),
  1400. instance_uuid=instance.uuid)
  1401. self._validate_bdm(
  1402. context, instance, instance_type, block_device_mapping,
  1403. image_cache, volumes, supports_multiattach)
  1404. instance_block_device_mapping = block_device_mapping.obj_clone()
  1405. for bdm in instance_block_device_mapping:
  1406. bdm.volume_size = self._volume_size(instance_type, bdm)
  1407. bdm.instance_uuid = instance.uuid
  1408. return instance_block_device_mapping
  1409. @staticmethod
  1410. def _check_requested_volume_type(bdm, volume_type_id_or_name,
  1411. volume_types):
  1412. """If we are specifying a volume type, we need to get the
  1413. volume type details from Cinder and make sure the ``volume_type``
  1414. is available.
  1415. """
  1416. # NOTE(brinzhang): Verify that the specified volume type exists.
  1417. # And save the volume type name internally for consistency in the
  1418. # BlockDeviceMapping object.
  1419. for vol_type in volume_types:
  1420. if (volume_type_id_or_name == vol_type['id'] or
  1421. volume_type_id_or_name == vol_type['name']):
  1422. bdm.volume_type = vol_type['name']
  1423. break
  1424. else:
  1425. raise exception.VolumeTypeNotFound(
  1426. id_or_name=volume_type_id_or_name)
  1427. def _validate_bdm(self, context, instance, instance_type,
  1428. block_device_mappings, image_cache, volumes,
  1429. supports_multiattach=False):
  1430. """Validate requested block device mappings.
  1431. :param context: nova auth RequestContext
  1432. :param instance: Instance object
  1433. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1434. :param block_device_mappings: BlockDeviceMappingList object
  1435. :param image_cache: dict of image dicts keyed by id which is used as a
  1436. cache in case there are multiple BDMs in the same request using
  1437. the same image to avoid redundant GET calls to the image service
  1438. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1439. :param supports_multiattach: True if the request supports multiattach
  1440. volumes, False otherwise
  1441. """
  1442. # Make sure that the boot indexes make sense.
  1443. # Setting a negative value or None indicates that the device should not
  1444. # be used for booting.
  1445. boot_indexes = sorted([bdm.boot_index
  1446. for bdm in block_device_mappings
  1447. if bdm.boot_index is not None and
  1448. bdm.boot_index >= 0])
  1449. # Each device which is capable of being used as boot device should
  1450. # be given a unique boot index, starting from 0 in ascending order, and
  1451. # there needs to be at least one boot device.
  1452. if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
  1453. # Convert the BlockDeviceMappingList to a list for repr details.
  1454. LOG.debug('Invalid block device mapping boot sequence for '
  1455. 'instance: %s', list(block_device_mappings),
  1456. instance=instance)
  1457. raise exception.InvalidBDMBootSequence()
  1458. volume_types = None
  1459. for bdm in block_device_mappings:
  1460. volume_type = bdm.volume_type
  1461. if volume_type:
  1462. if not volume_types:
  1463. # In order to reduce the number of hit cinder APIs,
  1464. # initialize our cache of volume types.
  1465. volume_types = self.volume_api.get_all_volume_types(
  1466. context)
  1467. # NOTE(brinzhang): Ensure the validity of volume_type.
  1468. self._check_requested_volume_type(bdm, volume_type,
  1469. volume_types)
  1470. # NOTE(vish): For now, just make sure the volumes are accessible.
  1471. # Additionally, check that the volume can be attached to this
  1472. # instance.
  1473. snapshot_id = bdm.snapshot_id
  1474. volume_id = bdm.volume_id
  1475. image_id = bdm.image_id
  1476. if image_id is not None:
  1477. if (image_id != instance.get('image_ref') and
  1478. image_id not in image_cache):
  1479. try:
  1480. # Cache the results of the image GET so we do not make
  1481. # the same request for the same image if processing
  1482. # multiple BDMs or multiple servers with the same image
  1483. image_cache[image_id] = self._get_image(
  1484. context, image_id)
  1485. except Exception:
  1486. raise exception.InvalidBDMImage(id=image_id)
  1487. if (bdm.source_type == 'image' and
  1488. bdm.destination_type == 'volume' and
  1489. not bdm.volume_size):
  1490. raise exception.InvalidBDM(message=_("Images with "
  1491. "destination_type 'volume' need to have a non-zero "
  1492. "size specified"))
  1493. elif volume_id is not None:
  1494. try:
  1495. volume = volumes[volume_id]
  1496. # We do not validate the instance and volume AZ here
  1497. # because that is done earlier by _provision_instances.
  1498. self._check_attach_and_reserve_volume(
  1499. context, volume, instance, bdm, supports_multiattach,
  1500. validate_az=False)
  1501. bdm.volume_size = volume.get('size')
  1502. except (exception.CinderConnectionFailed,
  1503. exception.InvalidVolume,
  1504. exception.MultiattachNotSupportedOldMicroversion):
  1505. raise
  1506. except exception.InvalidInput as exc:
  1507. raise exception.InvalidVolume(reason=exc.format_message())
  1508. except Exception as e:
  1509. LOG.info('Failed validating volume %s. Error: %s',
  1510. volume_id, e)
  1511. raise exception.InvalidBDMVolume(id=volume_id)
  1512. elif snapshot_id is not None:
  1513. try:
  1514. snap = self.volume_api.get_snapshot(context, snapshot_id)
  1515. bdm.volume_size = bdm.volume_size or snap.get('size')
  1516. except exception.CinderConnectionFailed:
  1517. raise
  1518. except Exception:
  1519. raise exception.InvalidBDMSnapshot(id=snapshot_id)
  1520. elif (bdm.source_type == 'blank' and
  1521. bdm.destination_type == 'volume' and
  1522. not bdm.volume_size):
  1523. raise exception.InvalidBDM(message=_("Blank volumes "
  1524. "(source: 'blank', dest: 'volume') need to have non-zero "
  1525. "size"))
  1526. ephemeral_size = sum(bdm.volume_size or instance_type['ephemeral_gb']
  1527. for bdm in block_device_mappings
  1528. if block_device.new_format_is_ephemeral(bdm))
  1529. if ephemeral_size > instance_type['ephemeral_gb']:
  1530. raise exception.InvalidBDMEphemeralSize()
  1531. # There should be only one swap
  1532. swap_list = block_device.get_bdm_swap_list(block_device_mappings)
  1533. if len(swap_list) > 1:
  1534. msg = _("More than one swap drive requested.")
  1535. raise exception.InvalidBDMFormat(details=msg)
  1536. if swap_list:
  1537. swap_size = swap_list[0].volume_size or 0
  1538. if swap_size > instance_type['swap']:
  1539. raise exception.InvalidBDMSwapSize()
  1540. max_local = CONF.max_local_block_devices
  1541. if max_local >= 0:
  1542. num_local = len([bdm for bdm in block_device_mappings
  1543. if bdm.destination_type == 'local'])
  1544. if num_local > max_local:
  1545. raise exception.InvalidBDMLocalsLimit()
  1546. def _populate_instance_names(self, instance, num_instances, index):
  1547. """Populate instance display_name and hostname.
  1548. :param instance: The instance to set the display_name, hostname for
  1549. :type instance: nova.objects.Instance
  1550. :param num_instances: Total number of instances being created in this
  1551. request
  1552. :param index: The 0-based index of this particular instance
  1553. """
  1554. # NOTE(mriedem): This is only here for test simplicity since a server
  1555. # name is required in the REST API.
  1556. if 'display_name' not in instance or instance.display_name is None:
  1557. instance.display_name = 'Server %s' % instance.uuid
  1558. # if we're booting multiple instances, we need to add an indexing
  1559. # suffix to both instance.hostname and instance.display_name. This is
  1560. # not necessary for a single instance.
  1561. if num_instances == 1:
  1562. default_hostname = 'Server-%s' % instance.uuid
  1563. instance.hostname = utils.sanitize_hostname(
  1564. instance.display_name, default_hostname)
  1565. elif num_instances > 1:
  1566. old_display_name = instance.display_name
  1567. new_display_name = '%s-%d' % (old_display_name, index + 1)
  1568. if utils.sanitize_hostname(old_display_name) == "":
  1569. instance.hostname = 'Server-%s' % instance.uuid
  1570. else:
  1571. instance.hostname = utils.sanitize_hostname(
  1572. new_display_name)
  1573. instance.display_name = new_display_name
  1574. def _populate_instance_for_create(self, context, instance, image,
  1575. index, security_groups, instance_type,
  1576. num_instances, shutdown_terminate):
  1577. """Build the beginning of a new instance."""
  1578. instance.launch_index = index
  1579. instance.vm_state = vm_states.BUILDING
  1580. instance.task_state = task_states.SCHEDULING
  1581. info_cache = objects.InstanceInfoCache()
  1582. info_cache.instance_uuid = instance.uuid
  1583. info_cache.network_info = network_model.NetworkInfo()
  1584. instance.info_cache = info_cache
  1585. instance.flavor = instance_type
  1586. instance.old_flavor = None
  1587. instance.new_flavor = None
  1588. if CONF.ephemeral_storage_encryption.enabled:
  1589. # NOTE(kfarr): dm-crypt expects the cipher in a
  1590. # hyphenated format: cipher-chainmode-ivmode
  1591. # (ex: aes-xts-plain64). The algorithm needs
  1592. # to be parsed out to pass to the key manager (ex: aes).
  1593. cipher = CONF.ephemeral_storage_encryption.cipher
  1594. algorithm = cipher.split('-')[0] if cipher else None
  1595. instance.ephemeral_key_uuid = self.key_manager.create_key(
  1596. context,
  1597. algorithm=algorithm,
  1598. length=CONF.ephemeral_storage_encryption.key_size)
  1599. else:
  1600. instance.ephemeral_key_uuid = None
  1601. # Store image properties so we can use them later
  1602. # (for notifications, etc). Only store what we can.
  1603. if not instance.obj_attr_is_set('system_metadata'):
  1604. instance.system_metadata = {}
  1605. # Make sure we have the dict form that we need for instance_update.
  1606. instance.system_metadata = utils.instance_sys_meta(instance)
  1607. system_meta = utils.get_system_metadata_from_image(
  1608. image, instance_type)
  1609. # In case we couldn't find any suitable base_image
  1610. system_meta.setdefault('image_base_image_ref', instance.image_ref)
  1611. system_meta['owner_user_name'] = context.user_name
  1612. system_meta['owner_project_name'] = context.project_name
  1613. instance.system_metadata.update(system_meta)
  1614. # Since the removal of nova-network, we don't actually store anything
  1615. # in the database. Instead, we proxy the security groups on the
  1616. # instance from the ports attached to the instance.
  1617. instance.security_groups = objects.SecurityGroupList()
  1618. self._populate_instance_names(instance, num_instances, index)
  1619. instance.shutdown_terminate = shutdown_terminate
  1620. return instance
  1621. def _create_tag_list_obj(self, context, tags):
  1622. """Create TagList objects from simple string tags.
  1623. :param context: security context.
  1624. :param tags: simple string tags from API request.
  1625. :returns: TagList object.
  1626. """
  1627. tag_list = [objects.Tag(context=context, tag=t) for t in tags]
  1628. tag_list_obj = objects.TagList(objects=tag_list)
  1629. return tag_list_obj
  1630. def _transform_tags(self, tags, resource_id):
  1631. """Change the resource_id of the tags according to the input param.
  1632. Because this method can be called multiple times when more than one
  1633. instance is booted in a single request it makes a copy of the tags
  1634. list.
  1635. :param tags: TagList object.
  1636. :param resource_id: string.
  1637. :returns: TagList object.
  1638. """
  1639. instance_tags = tags.obj_clone()
  1640. for tag in instance_tags:
  1641. tag.resource_id = resource_id
  1642. return instance_tags
  1643. # This method remains because cellsv1 uses it in the scheduler
  1644. def create_db_entry_for_new_instance(self, context, instance_type, image,
  1645. instance, security_group, block_device_mapping, num_instances,
  1646. index, shutdown_terminate=False, create_instance=True):
  1647. """Create an entry in the DB for this new instance,
  1648. including any related table updates (such as security group,
  1649. etc).
  1650. This is called by the scheduler after a location for the
  1651. instance has been determined.
  1652. :param create_instance: Determines if the instance is created here or
  1653. just populated for later creation. This is done so that this code
  1654. can be shared with cellsv1 which needs the instance creation to
  1655. happen here. It should be removed and this method cleaned up when
  1656. cellsv1 is a distant memory.
  1657. """
  1658. self._populate_instance_for_create(context, instance, image, index,
  1659. security_group, instance_type,
  1660. num_instances, shutdown_terminate)
  1661. if create_instance:
  1662. instance.create()
  1663. return instance
  1664. def _check_multiple_instances_with_neutron_ports(self,
  1665. requested_networks):
  1666. """Check whether multiple instances are created from port id(s)."""
  1667. for requested_net in requested_networks:
  1668. if requested_net.port_id:
  1669. msg = _("Unable to launch multiple instances with"
  1670. " a single configured port ID. Please launch your"
  1671. " instance one by one with different ports.")
  1672. raise exception.MultiplePortsNotApplicable(reason=msg)
  1673. def _check_multiple_instances_with_specified_ip(self, requested_networks):
  1674. """Check whether multiple instances are created with specified ip."""
  1675. for requested_net in requested_networks:
  1676. if requested_net.network_id and requested_net.address:
  1677. msg = _("max_count cannot be greater than 1 if an fixed_ip "
  1678. "is specified.")
  1679. raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
  1680. @hooks.add_hook("create_instance")
  1681. def create(self, context, instance_type,
  1682. image_href, kernel_id=None, ramdisk_id=None,
  1683. min_count=None, max_count=None,
  1684. display_name=None, display_description=None,
  1685. key_name=None, key_data=None, security_groups=None,
  1686. availability_zone=None, forced_host=None, forced_node=None,
  1687. user_data=None, metadata=None, injected_files=None,
  1688. admin_password=None, block_device_mapping=None,
  1689. access_ip_v4=None, access_ip_v6=None, requested_networks=None,
  1690. config_drive=None, auto_disk_config=None, scheduler_hints=None,
  1691. legacy_bdm=True, shutdown_terminate=False,
  1692. check_server_group_quota=False, tags=None,
  1693. supports_multiattach=False, trusted_certs=None,
  1694. supports_port_resource_request=False,
  1695. requested_host=None, requested_hypervisor_hostname=None):
  1696. """Provision instances, sending instance information to the
  1697. scheduler. The scheduler will determine where the instance(s)
  1698. go and will handle creating the DB entries.
  1699. Returns a tuple of (instances, reservation_id)
  1700. """
  1701. if requested_networks and max_count is not None and max_count > 1:
  1702. self._check_multiple_instances_with_specified_ip(
  1703. requested_networks)
  1704. self._check_multiple_instances_with_neutron_ports(
  1705. requested_networks)
  1706. if availability_zone:
  1707. available_zones = availability_zones.\
  1708. get_availability_zones(context.elevated(), self.host_api,
  1709. get_only_available=True)
  1710. if forced_host is None and availability_zone not in \
  1711. available_zones:
  1712. msg = _('The requested availability zone is not available')
  1713. raise exception.InvalidRequest(msg)
  1714. filter_properties = scheduler_utils.build_filter_properties(
  1715. scheduler_hints, forced_host, forced_node, instance_type)
  1716. return self._create_instance(
  1717. context, instance_type,
  1718. image_href, kernel_id, ramdisk_id,
  1719. min_count, max_count,
  1720. display_name, display_description,
  1721. key_name, key_data, security_groups,
  1722. availability_zone, user_data, metadata,
  1723. injected_files, admin_password,
  1724. access_ip_v4, access_ip_v6,
  1725. requested_networks, config_drive,
  1726. block_device_mapping, auto_disk_config,
  1727. filter_properties=filter_properties,
  1728. legacy_bdm=legacy_bdm,
  1729. shutdown_terminate=shutdown_terminate,
  1730. check_server_group_quota=check_server_group_quota,
  1731. tags=tags, supports_multiattach=supports_multiattach,
  1732. trusted_certs=trusted_certs,
  1733. supports_port_resource_request=supports_port_resource_request,
  1734. requested_host=requested_host,
  1735. requested_hypervisor_hostname=requested_hypervisor_hostname)
  1736. def _check_auto_disk_config(self, instance=None, image=None,
  1737. auto_disk_config=None):
  1738. if auto_disk_config is None:
  1739. return
  1740. if not image and not instance:
  1741. return
  1742. if image:
  1743. image_props = image.get("properties", {})
  1744. auto_disk_config_img = \
  1745. utils.get_auto_disk_config_from_image_props(image_props)
  1746. image_ref = image.get("id")
  1747. else:
  1748. sys_meta = utils.instance_sys_meta(instance)
  1749. image_ref = sys_meta.get('image_base_image_ref')
  1750. auto_disk_config_img = \
  1751. utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
  1752. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  1753. auto_disk_config,
  1754. image_ref)
  1755. def _lookup_instance(self, context, uuid):
  1756. '''Helper method for pulling an instance object from a database.
  1757. During the transition to cellsv2 there is some complexity around
  1758. retrieving an instance from the database which this method hides. If
  1759. there is an instance mapping then query the cell for the instance, if
  1760. no mapping exists then query the configured nova database.
  1761. Once we are past the point that all deployments can be assumed to be
  1762. migrated to cellsv2 this method can go away.
  1763. '''
  1764. inst_map = None
  1765. try:
  1766. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  1767. context, uuid)
  1768. except exception.InstanceMappingNotFound:
  1769. # TODO(alaski): This exception block can be removed once we're
  1770. # guaranteed everyone is using cellsv2.
  1771. pass
  1772. if inst_map is None or inst_map.cell_mapping is None:
  1773. # If inst_map is None then the deployment has not migrated to
  1774. # cellsv2 yet.
  1775. # If inst_map.cell_mapping is None then the instance is not in a
  1776. # cell yet. Until instance creation moves to the conductor the
  1777. # instance can be found in the configured database, so attempt
  1778. # to look it up.
  1779. cell = None
  1780. try:
  1781. instance = objects.Instance.get_by_uuid(context, uuid)
  1782. except exception.InstanceNotFound:
  1783. # If we get here then the conductor is in charge of writing the
  1784. # instance to the database and hasn't done that yet. It's up to
  1785. # the caller of this method to determine what to do with that
  1786. # information.
  1787. return None, None
  1788. else:
  1789. cell = inst_map.cell_mapping
  1790. with nova_context.target_cell(context, cell) as cctxt:
  1791. try:
  1792. instance = objects.Instance.get_by_uuid(cctxt, uuid)
  1793. except exception.InstanceNotFound:
  1794. # Since the cell_mapping exists we know the instance is in
  1795. # the cell, however InstanceNotFound means it's already
  1796. # deleted.
  1797. return None, None
  1798. return cell, instance
  1799. def _delete_while_booting(self, context, instance):
  1800. """Handle deletion if the instance has not reached a cell yet
  1801. Deletion before an instance reaches a cell needs to be handled
  1802. differently. What we're attempting to do is delete the BuildRequest
  1803. before the api level conductor does. If we succeed here then the boot
  1804. request stops before reaching a cell. If not then the instance will
  1805. need to be looked up in a cell db and the normal delete path taken.
  1806. """
  1807. deleted = self._attempt_delete_of_buildrequest(context, instance)
  1808. if deleted:
  1809. # If we've reached this block the successful deletion of the
  1810. # buildrequest indicates that the build process should be halted by
  1811. # the conductor.
  1812. # NOTE(alaski): Though the conductor halts the build process it
  1813. # does not currently delete the instance record. This is
  1814. # because in the near future the instance record will not be
  1815. # created if the buildrequest has been deleted here. For now we
  1816. # ensure the instance has been set to deleted at this point.
  1817. # Yes this directly contradicts the comment earlier in this
  1818. # method, but this is a temporary measure.
  1819. # Look up the instance because the current instance object was
  1820. # stashed on the buildrequest and therefore not complete enough
  1821. # to run .destroy().
  1822. try:
  1823. instance_uuid = instance.uuid
  1824. cell, instance = self._lookup_instance(context, instance_uuid)
  1825. if instance is not None:
  1826. # If instance is None it has already been deleted.
  1827. if cell:
  1828. with nova_context.target_cell(context, cell) as cctxt:
  1829. # FIXME: When the instance context is targeted,
  1830. # we can remove this
  1831. with compute_utils.notify_about_instance_delete(
  1832. self.notifier, cctxt, instance):
  1833. instance.destroy()
  1834. else:
  1835. instance.destroy()
  1836. except exception.InstanceNotFound:
  1837. pass
  1838. return True
  1839. return False
  1840. def _attempt_delete_of_buildrequest(self, context, instance):
  1841. # If there is a BuildRequest then the instance may not have been
  1842. # written to a cell db yet. Delete the BuildRequest here, which
  1843. # will indicate that the Instance build should not proceed.
  1844. try:
  1845. build_req = objects.BuildRequest.get_by_instance_uuid(
  1846. context, instance.uuid)
  1847. build_req.destroy()
  1848. except exception.BuildRequestNotFound:
  1849. # This means that conductor has deleted the BuildRequest so the
  1850. # instance is now in a cell and the delete needs to proceed
  1851. # normally.
  1852. return False
  1853. # We need to detach from any volumes so they aren't orphaned.
  1854. self._local_cleanup_bdm_volumes(
  1855. build_req.block_device_mappings, instance, context)
  1856. return True
  1857. def _delete(self, context, instance, delete_type, cb, **instance_attrs):
  1858. if instance.disable_terminate:
  1859. LOG.info('instance termination disabled', instance=instance)
  1860. return
  1861. cell = None
  1862. # If there is an instance.host (or the instance is shelved-offloaded or
  1863. # in error state), the instance has been scheduled and sent to a
  1864. # cell/compute which means it was pulled from the cell db.
  1865. # Normal delete should be attempted.
  1866. may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
  1867. instance)
  1868. if not instance.host and not may_have_ports_or_volumes:
  1869. try:
  1870. if self._delete_while_booting(context, instance):
  1871. return
  1872. # If instance.host was not set it's possible that the Instance
  1873. # object here was pulled from a BuildRequest object and is not
  1874. # fully populated. Notably it will be missing an 'id' field
  1875. # which will prevent instance.destroy from functioning
  1876. # properly. A lookup is attempted which will either return a
  1877. # full Instance or None if not found. If not found then it's
  1878. # acceptable to skip the rest of the delete processing.
  1879. cell, instance = self._lookup_instance(context, instance.uuid)
  1880. if cell and instance:
  1881. try:
  1882. # Now destroy the instance from the cell it lives in.
  1883. with compute_utils.notify_about_instance_delete(
  1884. self.notifier, context, instance):
  1885. instance.destroy()
  1886. except exception.InstanceNotFound:
  1887. pass
  1888. # The instance was deleted or is already gone.
  1889. return
  1890. if not instance:
  1891. # Instance is already deleted.
  1892. return
  1893. except exception.ObjectActionError:
  1894. # NOTE(melwitt): This means the instance.host changed
  1895. # under us indicating the instance became scheduled
  1896. # during the destroy(). Refresh the instance from the DB and
  1897. # continue on with the delete logic for a scheduled instance.
  1898. # NOTE(danms): If instance.host is set, we should be able to
  1899. # do the following lookup. If not, there's not much we can
  1900. # do to recover.
  1901. cell, instance = self._lookup_instance(context, instance.uuid)
  1902. if not instance:
  1903. # Instance is already deleted
  1904. return
  1905. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  1906. context, instance.uuid)
  1907. # At these states an instance has a snapshot associate.
  1908. if instance.vm_state in (vm_states.SHELVED,
  1909. vm_states.SHELVED_OFFLOADED):
  1910. snapshot_id = instance.system_metadata.get('shelved_image_id')
  1911. LOG.info("Working on deleting snapshot %s "
  1912. "from shelved instance...",
  1913. snapshot_id, instance=instance)
  1914. try:
  1915. self.image_api.delete(context, snapshot_id)
  1916. except (exception.ImageNotFound,
  1917. exception.ImageNotAuthorized) as exc:
  1918. LOG.warning("Failed to delete snapshot "
  1919. "from shelved instance (%s).",
  1920. exc.format_message(), instance=instance)
  1921. except Exception:
  1922. LOG.exception("Something wrong happened when trying to "
  1923. "delete snapshot from shelved instance.",
  1924. instance=instance)
  1925. original_task_state = instance.task_state
  1926. try:
  1927. # NOTE(maoy): no expected_task_state needs to be set
  1928. instance.update(instance_attrs)
  1929. instance.progress = 0
  1930. instance.save()
  1931. if not instance.host and not may_have_ports_or_volumes:
  1932. try:
  1933. with compute_utils.notify_about_instance_delete(
  1934. self.notifier, context, instance,
  1935. delete_type
  1936. if delete_type != 'soft_delete'
  1937. else 'delete'):
  1938. instance.destroy()
  1939. LOG.info('Instance deleted and does not have host '
  1940. 'field, its vm_state is %(state)s.',
  1941. {'state': instance.vm_state},
  1942. instance=instance)
  1943. return
  1944. except exception.ObjectActionError as ex:
  1945. # The instance's host likely changed under us as
  1946. # this instance could be building and has since been
  1947. # scheduled. Continue with attempts to delete it.
  1948. LOG.debug('Refreshing instance because: %s', ex,
  1949. instance=instance)
  1950. instance.refresh()
  1951. if instance.vm_state == vm_states.RESIZED:
  1952. self._confirm_resize_on_deleting(context, instance)
  1953. # NOTE(neha_alhat): After confirm resize vm_state will become
  1954. # 'active' and task_state will be set to 'None'. But for soft
  1955. # deleting a vm, the _do_soft_delete callback requires
  1956. # task_state in 'SOFT_DELETING' status. So, we need to set
  1957. # task_state as 'SOFT_DELETING' again for soft_delete case.
  1958. # After confirm resize and before saving the task_state to
  1959. # "SOFT_DELETING", during the short window, user can submit
  1960. # soft delete vm request again and system will accept and
  1961. # process it without any errors.
  1962. if delete_type == 'soft_delete':
  1963. instance.task_state = instance_attrs['task_state']
  1964. instance.save()
  1965. is_local_delete = True
  1966. try:
  1967. # instance.host must be set in order to look up the service.
  1968. if instance.host is not None:
  1969. service = objects.Service.get_by_compute_host(
  1970. context.elevated(), instance.host)
  1971. is_local_delete = not self.servicegroup_api.service_is_up(
  1972. service)
  1973. if not is_local_delete:
  1974. if original_task_state in (task_states.DELETING,
  1975. task_states.SOFT_DELETING):
  1976. LOG.info('Instance is already in deleting state, '
  1977. 'ignoring this request',
  1978. instance=instance)
  1979. return
  1980. self._record_action_start(context, instance,
  1981. instance_actions.DELETE)
  1982. cb(context, instance, bdms)
  1983. except exception.ComputeHostNotFound:
  1984. LOG.debug('Compute host %s not found during service up check, '
  1985. 'going to local delete instance', instance.host,
  1986. instance=instance)
  1987. if is_local_delete:
  1988. # If instance is in shelved_offloaded state or compute node
  1989. # isn't up, delete instance from db and clean bdms info and
  1990. # network info
  1991. if cell is None:
  1992. # NOTE(danms): If we didn't get our cell from one of the
  1993. # paths above, look it up now.
  1994. try:
  1995. im = objects.InstanceMapping.get_by_instance_uuid(
  1996. context, instance.uuid)
  1997. cell = im.cell_mapping
  1998. except exception.InstanceMappingNotFound:
  1999. LOG.warning('During local delete, failed to find '
  2000. 'instance mapping', instance=instance)
  2001. return
  2002. LOG.debug('Doing local delete in cell %s', cell.identity,
  2003. instance=instance)
  2004. with nova_context.target_cell(context, cell) as cctxt:
  2005. self._local_delete(cctxt, instance, bdms, delete_type, cb)
  2006. except exception.InstanceNotFound:
  2007. # NOTE(comstud): Race condition. Instance already gone.
  2008. pass
  2009. def _confirm_resize_on_deleting(self, context, instance):
  2010. # If in the middle of a resize, use confirm_resize to
  2011. # ensure the original instance is cleaned up too along
  2012. # with its allocations (and migration-based allocations)
  2013. # in placement.
  2014. migration = None
  2015. for status in ('finished', 'confirming'):
  2016. try:
  2017. migration = objects.Migration.get_by_instance_and_status(
  2018. context.elevated(), instance.uuid, status)
  2019. LOG.info('Found an unconfirmed migration during delete, '
  2020. 'id: %(id)s, status: %(status)s',
  2021. {'id': migration.id,
  2022. 'status': migration.status},
  2023. instance=instance)
  2024. break
  2025. except exception.MigrationNotFoundByStatus:
  2026. pass
  2027. if not migration:
  2028. LOG.info('Instance may have been confirmed during delete',
  2029. instance=instance)
  2030. return
  2031. self._record_action_start(context, instance,
  2032. instance_actions.CONFIRM_RESIZE)
  2033. # If migration.cross_cell_move, we need to also cleanup the instance
  2034. # data from the source cell database.
  2035. if migration.cross_cell_move:
  2036. self.compute_task_api.confirm_snapshot_based_resize(
  2037. context, instance, migration, do_cast=False)
  2038. else:
  2039. self.compute_rpcapi.confirm_resize(context,
  2040. instance, migration, migration.source_compute, cast=False)
  2041. def _local_cleanup_bdm_volumes(self, bdms, instance, context):
  2042. """The method deletes the bdm records and, if a bdm is a volume, call
  2043. the terminate connection and the detach volume via the Volume API.
  2044. """
  2045. elevated = context.elevated()
  2046. for bdm in bdms:
  2047. if bdm.is_volume:
  2048. try:
  2049. if bdm.attachment_id:
  2050. self.volume_api.attachment_delete(context,
  2051. bdm.attachment_id)
  2052. else:
  2053. connector = compute_utils.get_stashed_volume_connector(
  2054. bdm, instance)
  2055. if connector:
  2056. self.volume_api.terminate_connection(context,
  2057. bdm.volume_id,
  2058. connector)
  2059. else:
  2060. LOG.debug('Unable to find connector for volume %s,'
  2061. ' not attempting terminate_connection.',
  2062. bdm.volume_id, instance=instance)
  2063. # Attempt to detach the volume. If there was no
  2064. # connection made in the first place this is just
  2065. # cleaning up the volume state in the Cinder DB.
  2066. self.volume_api.detach(elevated, bdm.volume_id,
  2067. instance.uuid)
  2068. if bdm.delete_on_termination:
  2069. self.volume_api.delete(context, bdm.volume_id)
  2070. except Exception as exc:
  2071. LOG.warning("Ignoring volume cleanup failure due to %s",
  2072. exc, instance=instance)
  2073. # If we're cleaning up volumes from an instance that wasn't yet
  2074. # created in a cell, i.e. the user deleted the server while
  2075. # the BuildRequest still existed, then the BDM doesn't actually
  2076. # exist in the DB to destroy it.
  2077. if 'id' in bdm:
  2078. bdm.destroy()
  2079. @property
  2080. def placementclient(self):
  2081. if self._placementclient is None:
  2082. self._placementclient = report.SchedulerReportClient()
  2083. return self._placementclient
  2084. def _local_delete(self, context, instance, bdms, delete_type, cb):
  2085. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2086. LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
  2087. " the instance's info from database.",
  2088. instance=instance)
  2089. else:
  2090. LOG.warning("instance's host %s is down, deleting from "
  2091. "database", instance.host, instance=instance)
  2092. with compute_utils.notify_about_instance_delete(
  2093. self.notifier, context, instance,
  2094. delete_type if delete_type != 'soft_delete' else 'delete'):
  2095. elevated = context.elevated()
  2096. orig_host = instance.host
  2097. try:
  2098. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2099. sysmeta = getattr(instance,
  2100. obj_base.get_attrname(
  2101. 'system_metadata'))
  2102. instance.host = sysmeta.get('shelved_host')
  2103. self.network_api.deallocate_for_instance(elevated,
  2104. instance)
  2105. finally:
  2106. instance.host = orig_host
  2107. # cleanup volumes
  2108. self._local_cleanup_bdm_volumes(bdms, instance, context)
  2109. # Cleanup allocations in Placement since we can't do it from the
  2110. # compute service.
  2111. self.placementclient.delete_allocation_for_instance(
  2112. context, instance.uuid)
  2113. cb(context, instance, bdms, local=True)
  2114. instance.destroy()
  2115. @staticmethod
  2116. def _update_queued_for_deletion(context, instance, qfd):
  2117. # NOTE(tssurya): We query the instance_mapping record of this instance
  2118. # and update the queued_for_delete flag to True (or False according to
  2119. # the state of the instance). This just means that the instance is
  2120. # queued for deletion (or is no longer queued for deletion). It does
  2121. # not guarantee its successful deletion (or restoration). Hence the
  2122. # value could be stale which is fine, considering its use is only
  2123. # during down cell (desperate) situation.
  2124. im = objects.InstanceMapping.get_by_instance_uuid(context,
  2125. instance.uuid)
  2126. im.queued_for_delete = qfd
  2127. im.save()
  2128. def _do_delete(self, context, instance, bdms, local=False):
  2129. if local:
  2130. instance.vm_state = vm_states.DELETED
  2131. instance.task_state = None
  2132. instance.terminated_at = timeutils.utcnow()
  2133. instance.save()
  2134. else:
  2135. self.compute_rpcapi.terminate_instance(context, instance, bdms)
  2136. self._update_queued_for_deletion(context, instance, True)
  2137. def _do_soft_delete(self, context, instance, bdms, local=False):
  2138. if local:
  2139. instance.vm_state = vm_states.SOFT_DELETED
  2140. instance.task_state = None
  2141. instance.terminated_at = timeutils.utcnow()
  2142. instance.save()
  2143. else:
  2144. self.compute_rpcapi.soft_delete_instance(context, instance)
  2145. self._update_queued_for_deletion(context, instance, True)
  2146. # NOTE(maoy): we allow delete to be called no matter what vm_state says.
  2147. @check_instance_lock
  2148. @check_instance_state(vm_state=None, task_state=None,
  2149. must_have_launched=True)
  2150. def soft_delete(self, context, instance):
  2151. """Terminate an instance."""
  2152. LOG.debug('Going to try to soft delete instance',
  2153. instance=instance)
  2154. self._delete(context, instance, 'soft_delete', self._do_soft_delete,
  2155. task_state=task_states.SOFT_DELETING,
  2156. deleted_at=timeutils.utcnow())
  2157. def _delete_instance(self, context, instance):
  2158. self._delete(context, instance, 'delete', self._do_delete,
  2159. task_state=task_states.DELETING)
  2160. @check_instance_lock
  2161. @check_instance_state(vm_state=None, task_state=None,
  2162. must_have_launched=False)
  2163. def delete(self, context, instance):
  2164. """Terminate an instance."""
  2165. LOG.debug("Going to try to terminate instance", instance=instance)
  2166. self._delete_instance(context, instance)
  2167. @check_instance_lock
  2168. @check_instance_state(vm_state=[vm_states.SOFT_DELETED])
  2169. def restore(self, context, instance):
  2170. """Restore a previously deleted (but not reclaimed) instance."""
  2171. # Check quotas
  2172. flavor = instance.get_flavor()
  2173. project_id, user_id = quotas_obj.ids_from_instance(context, instance)
  2174. compute_utils.check_num_instances_quota(context, flavor, 1, 1,
  2175. project_id=project_id, user_id=user_id)
  2176. self._record_action_start(context, instance, instance_actions.RESTORE)
  2177. if instance.host:
  2178. instance.task_state = task_states.RESTORING
  2179. instance.deleted_at = None
  2180. instance.save(expected_task_state=[None])
  2181. # TODO(melwitt): We're not rechecking for strict quota here to
  2182. # guard against going over quota during a race at this time because
  2183. # the resource consumption for this operation is written to the
  2184. # database by compute.
  2185. self.compute_rpcapi.restore_instance(context, instance)
  2186. else:
  2187. instance.vm_state = vm_states.ACTIVE
  2188. instance.task_state = None
  2189. instance.deleted_at = None
  2190. instance.save(expected_task_state=[None])
  2191. self._update_queued_for_deletion(context, instance, False)
  2192. @check_instance_lock
  2193. @check_instance_state(task_state=None,
  2194. must_have_launched=False)
  2195. def force_delete(self, context, instance):
  2196. """Force delete an instance in any vm_state/task_state."""
  2197. self._delete(context, instance, 'force_delete', self._do_delete,
  2198. task_state=task_states.DELETING)
  2199. def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2200. LOG.debug("Going to try to stop instance", instance=instance)
  2201. instance.task_state = task_states.POWERING_OFF
  2202. instance.progress = 0
  2203. instance.save(expected_task_state=[None])
  2204. self._record_action_start(context, instance, instance_actions.STOP)
  2205. self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
  2206. clean_shutdown=clean_shutdown)
  2207. @check_instance_lock
  2208. @check_instance_host()
  2209. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
  2210. def stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2211. """Stop an instance."""
  2212. self.force_stop(context, instance, do_cast, clean_shutdown)
  2213. @check_instance_lock
  2214. @check_instance_host()
  2215. @check_instance_state(vm_state=[vm_states.STOPPED])
  2216. def start(self, context, instance):
  2217. """Start an instance."""
  2218. LOG.debug("Going to try to start instance", instance=instance)
  2219. instance.task_state = task_states.POWERING_ON
  2220. instance.save(expected_task_state=[None])
  2221. self._record_action_start(context, instance, instance_actions.START)
  2222. self.compute_rpcapi.start_instance(context, instance)
  2223. @check_instance_lock
  2224. @check_instance_host()
  2225. @check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
  2226. def trigger_crash_dump(self, context, instance):
  2227. """Trigger crash dump in an instance."""
  2228. LOG.debug("Try to trigger crash dump", instance=instance)
  2229. self._record_action_start(context, instance,
  2230. instance_actions.TRIGGER_CRASH_DUMP)
  2231. self.compute_rpcapi.trigger_crash_dump(context, instance)
  2232. def _generate_minimal_construct_for_down_cells(self, context,
  2233. down_cell_uuids,
  2234. project, limit):
  2235. """Generate a list of minimal instance constructs for a given list of
  2236. cells that did not respond to a list operation. This will list
  2237. every instance mapping in the affected cells and return a minimal
  2238. objects.Instance for each (non-queued-for-delete) mapping.
  2239. :param context: RequestContext
  2240. :param down_cell_uuids: A list of cell UUIDs that did not respond
  2241. :param project: A project ID to filter mappings, or None
  2242. :param limit: A numeric limit on the number of results, or None
  2243. :returns: An InstanceList() of partial Instance() objects
  2244. """
  2245. unavailable_servers = objects.InstanceList()
  2246. for cell_uuid in down_cell_uuids:
  2247. LOG.warning("Cell %s is not responding and hence only "
  2248. "partial results are available from this "
  2249. "cell if any.", cell_uuid)
  2250. instance_mappings = (objects.InstanceMappingList.
  2251. get_not_deleted_by_cell_and_project(context, cell_uuid,
  2252. project, limit=limit))
  2253. for im in instance_mappings:
  2254. unavailable_servers.objects.append(
  2255. objects.Instance(
  2256. context=context,
  2257. uuid=im.instance_uuid,
  2258. project_id=im.project_id,
  2259. created_at=im.created_at
  2260. )
  2261. )
  2262. if limit is not None:
  2263. limit -= len(instance_mappings)
  2264. if limit <= 0:
  2265. break
  2266. return unavailable_servers
  2267. def _get_instance_map_or_none(self, context, instance_uuid):
  2268. try:
  2269. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  2270. context, instance_uuid)
  2271. except exception.InstanceMappingNotFound:
  2272. # InstanceMapping should always be found generally. This exception
  2273. # may be raised if a deployment has partially migrated the nova-api
  2274. # services.
  2275. inst_map = None
  2276. return inst_map
  2277. @staticmethod
  2278. def _save_user_id_in_instance_mapping(mapping, instance):
  2279. # TODO(melwitt): We take the opportunity to migrate user_id on the
  2280. # instance mapping if it's not yet been migrated. This can be removed
  2281. # in a future release, when all migrations are complete.
  2282. # If the instance came from a RequestSpec because of a down cell, its
  2283. # user_id could be None and the InstanceMapping.user_id field is
  2284. # non-nullable. Avoid trying to set/save the user_id in that case.
  2285. if 'user_id' not in mapping and instance.user_id is not None:
  2286. mapping.user_id = instance.user_id
  2287. mapping.save()
  2288. def _get_instance_from_cell(self, context, im, expected_attrs,
  2289. cell_down_support):
  2290. # NOTE(danms): Even though we're going to scatter/gather to the
  2291. # right cell, other code depends on this being force targeted when
  2292. # the get call returns.
  2293. nova_context.set_target_cell(context, im.cell_mapping)
  2294. uuid = im.instance_uuid
  2295. result = nova_context.scatter_gather_single_cell(context,
  2296. im.cell_mapping, objects.Instance.get_by_uuid, uuid,
  2297. expected_attrs=expected_attrs)
  2298. cell_uuid = im.cell_mapping.uuid
  2299. if not nova_context.is_cell_failure_sentinel(result[cell_uuid]):
  2300. inst = result[cell_uuid]
  2301. self._save_user_id_in_instance_mapping(im, inst)
  2302. return inst
  2303. elif isinstance(result[cell_uuid], exception.InstanceNotFound):
  2304. raise exception.InstanceNotFound(instance_id=uuid)
  2305. elif cell_down_support:
  2306. if im.queued_for_delete:
  2307. # should be treated like deleted instance.
  2308. raise exception.InstanceNotFound(instance_id=uuid)
  2309. # instance in down cell, return a minimal construct
  2310. LOG.warning("Cell %s is not responding and hence only "
  2311. "partial results are available from this "
  2312. "cell.", cell_uuid)
  2313. try:
  2314. rs = objects.RequestSpec.get_by_instance_uuid(context,
  2315. uuid)
  2316. # For BFV case, we could have rs.image but rs.image.id might
  2317. # still not be set. So we check the existence of both image
  2318. # and its id.
  2319. image_ref = (rs.image.id if rs.image and
  2320. 'id' in rs.image else None)
  2321. inst = objects.Instance(context=context, power_state=0,
  2322. uuid=uuid,
  2323. project_id=im.project_id,
  2324. created_at=im.created_at,
  2325. user_id=rs.user_id,
  2326. flavor=rs.flavor,
  2327. image_ref=image_ref,
  2328. availability_zone=rs.availability_zone)
  2329. self._save_user_id_in_instance_mapping(im, inst)
  2330. return inst
  2331. except exception.RequestSpecNotFound:
  2332. # could be that a deleted instance whose request
  2333. # spec has been archived is being queried.
  2334. raise exception.InstanceNotFound(instance_id=uuid)
  2335. else:
  2336. raise exception.NovaException(
  2337. _("Cell %s is not responding and hence instance "
  2338. "info is not available.") % cell_uuid)
  2339. def _get_instance(self, context, instance_uuid, expected_attrs,
  2340. cell_down_support=False):
  2341. inst_map = self._get_instance_map_or_none(context, instance_uuid)
  2342. if inst_map and (inst_map.cell_mapping is not None):
  2343. instance = self._get_instance_from_cell(context, inst_map,
  2344. expected_attrs, cell_down_support)
  2345. elif inst_map and (inst_map.cell_mapping is None):
  2346. # This means the instance has not been scheduled and put in
  2347. # a cell yet. For now it also may mean that the deployer
  2348. # has not created their cell(s) yet.
  2349. try:
  2350. build_req = objects.BuildRequest.get_by_instance_uuid(
  2351. context, instance_uuid)
  2352. instance = build_req.instance
  2353. except exception.BuildRequestNotFound:
  2354. # Instance was mapped and the BuildRequest was deleted
  2355. # while fetching. Try again.
  2356. inst_map = self._get_instance_map_or_none(context,
  2357. instance_uuid)
  2358. if inst_map and (inst_map.cell_mapping is not None):
  2359. instance = self._get_instance_from_cell(context, inst_map,
  2360. expected_attrs, cell_down_support)
  2361. else:
  2362. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2363. else:
  2364. # If we got here, we don't have an instance mapping, but we aren't
  2365. # sure why. The instance mapping might be missing because the
  2366. # upgrade is incomplete (map_instances wasn't run). Or because the
  2367. # instance was deleted and the DB was archived at which point the
  2368. # mapping is deleted. The former case is bad, but because of the
  2369. # latter case we can't really log any kind of warning/error here
  2370. # since it might be normal.
  2371. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2372. return instance
  2373. def get(self, context, instance_id, expected_attrs=None,
  2374. cell_down_support=False):
  2375. """Get a single instance with the given instance_id.
  2376. :param cell_down_support: True if the API (and caller) support
  2377. returning a minimal instance
  2378. construct if the relevant cell is
  2379. down. If False, an error is raised
  2380. since the instance cannot be retrieved
  2381. due to the cell being down.
  2382. """
  2383. if not expected_attrs:
  2384. expected_attrs = []
  2385. expected_attrs.extend(['metadata', 'system_metadata',
  2386. 'security_groups', 'info_cache'])
  2387. # NOTE(ameade): we still need to support integer ids for ec2
  2388. try:
  2389. if uuidutils.is_uuid_like(instance_id):
  2390. LOG.debug("Fetching instance by UUID",
  2391. instance_uuid=instance_id)
  2392. instance = self._get_instance(context, instance_id,
  2393. expected_attrs, cell_down_support=cell_down_support)
  2394. else:
  2395. LOG.debug("Failed to fetch instance by id %s", instance_id)
  2396. raise exception.InstanceNotFound(instance_id=instance_id)
  2397. except exception.InvalidID:
  2398. LOG.debug("Invalid instance id %s", instance_id)
  2399. raise exception.InstanceNotFound(instance_id=instance_id)
  2400. return instance
  2401. def get_all(self, context, search_opts=None, limit=None, marker=None,
  2402. expected_attrs=None, sort_keys=None, sort_dirs=None,
  2403. cell_down_support=False, all_tenants=False):
  2404. """Get all instances filtered by one of the given parameters.
  2405. If there is no filter and the context is an admin, it will retrieve
  2406. all instances in the system.
  2407. Deleted instances will be returned by default, unless there is a
  2408. search option that says otherwise.
  2409. The results will be sorted based on the list of sort keys in the
  2410. 'sort_keys' parameter (first value is primary sort key, second value is
  2411. secondary sort ket, etc.). For each sort key, the associated sort
  2412. direction is based on the list of sort directions in the 'sort_dirs'
  2413. parameter.
  2414. :param cell_down_support: True if the API (and caller) support
  2415. returning a minimal instance
  2416. construct if the relevant cell is
  2417. down. If False, instances from
  2418. unreachable cells will be omitted.
  2419. :param all_tenants: True if the "all_tenants" filter was passed.
  2420. """
  2421. if search_opts is None:
  2422. search_opts = {}
  2423. LOG.debug("Searching by: %s", str(search_opts))
  2424. # Fixups for the DB call
  2425. filters = {}
  2426. def _remap_flavor_filter(flavor_id):
  2427. flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
  2428. filters['instance_type_id'] = flavor.id
  2429. def _remap_fixed_ip_filter(fixed_ip):
  2430. # Turn fixed_ip into a regexp match. Since '.' matches
  2431. # any character, we need to use regexp escaping for it.
  2432. filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
  2433. # search_option to filter_name mapping.
  2434. filter_mapping = {
  2435. 'image': 'image_ref',
  2436. 'name': 'display_name',
  2437. 'tenant_id': 'project_id',
  2438. 'flavor': _remap_flavor_filter,
  2439. 'fixed_ip': _remap_fixed_ip_filter}
  2440. # copy from search_opts, doing various remappings as necessary
  2441. for opt, value in search_opts.items():
  2442. # Do remappings.
  2443. # Values not in the filter_mapping table are copied as-is.
  2444. # If remapping is None, option is not copied
  2445. # If the remapping is a string, it is the filter_name to use
  2446. try:
  2447. remap_object = filter_mapping[opt]
  2448. except KeyError:
  2449. filters[opt] = value
  2450. else:
  2451. # Remaps are strings to translate to, or functions to call
  2452. # to do the translating as defined by the table above.
  2453. if isinstance(remap_object, six.string_types):
  2454. filters[remap_object] = value
  2455. else:
  2456. try:
  2457. remap_object(value)
  2458. # We already know we can't match the filter, so
  2459. # return an empty list
  2460. except ValueError:
  2461. return objects.InstanceList()
  2462. # IP address filtering cannot be applied at the DB layer, remove any DB
  2463. # limit so that it can be applied after the IP filter.
  2464. filter_ip = 'ip6' in filters or 'ip' in filters
  2465. skip_build_request = False
  2466. orig_limit = limit
  2467. if filter_ip:
  2468. # We cannot skip build requests if there is a marker since the
  2469. # the marker could be a build request.
  2470. skip_build_request = marker is None
  2471. if self.network_api.has_substr_port_filtering_extension(context):
  2472. # We're going to filter by IP using Neutron so set filter_ip
  2473. # to False so we don't attempt post-DB query filtering in
  2474. # memory below.
  2475. filter_ip = False
  2476. instance_uuids = self._ip_filter_using_neutron(context,
  2477. filters)
  2478. if instance_uuids:
  2479. # Note that 'uuid' is not in the 2.1 GET /servers query
  2480. # parameter schema, however, we allow additionalProperties
  2481. # so someone could filter instances by uuid, which doesn't
  2482. # make a lot of sense but we have to account for it.
  2483. if 'uuid' in filters and filters['uuid']:
  2484. filter_uuids = filters['uuid']
  2485. if isinstance(filter_uuids, list):
  2486. instance_uuids.extend(filter_uuids)
  2487. else:
  2488. # Assume a string. If it's a dict or tuple or
  2489. # something, well...that's too bad. This is why
  2490. # we have query parameter schema definitions.
  2491. if filter_uuids not in instance_uuids:
  2492. instance_uuids.append(filter_uuids)
  2493. filters['uuid'] = instance_uuids
  2494. else:
  2495. # No matches on the ip filter(s), return an empty list.
  2496. return objects.InstanceList()
  2497. elif limit:
  2498. LOG.debug('Removing limit for DB query due to IP filter')
  2499. limit = None
  2500. # Skip get BuildRequest if filtering by IP address, as building
  2501. # instances will not have IP addresses.
  2502. if skip_build_request:
  2503. build_requests = objects.BuildRequestList()
  2504. else:
  2505. # The ordering of instances will be
  2506. # [sorted instances with no host] + [sorted instances with host].
  2507. # This means BuildRequest and cell0 instances first, then cell
  2508. # instances
  2509. try:
  2510. build_requests = objects.BuildRequestList.get_by_filters(
  2511. context, filters, limit=limit, marker=marker,
  2512. sort_keys=sort_keys, sort_dirs=sort_dirs)
  2513. # If we found the marker in we need to set it to None
  2514. # so we don't expect to find it in the cells below.
  2515. marker = None
  2516. except exception.MarkerNotFound:
  2517. # If we didn't find the marker in the build requests then keep
  2518. # looking for it in the cells.
  2519. build_requests = objects.BuildRequestList()
  2520. build_req_instances = objects.InstanceList(
  2521. objects=[build_req.instance for build_req in build_requests])
  2522. # Only subtract from limit if it is not None
  2523. limit = (limit - len(build_req_instances)) if limit else limit
  2524. # We could arguably avoid joining on security_groups if we're using
  2525. # neutron (which is the default) but if you're using neutron then the
  2526. # security_group_instance_association table should be empty anyway
  2527. # and the DB should optimize out that join, making it insignificant.
  2528. fields = ['metadata', 'info_cache', 'security_groups']
  2529. if expected_attrs:
  2530. fields.extend(expected_attrs)
  2531. insts, down_cell_uuids = instance_list.get_instance_objects_sorted(
  2532. context, filters, limit, marker, fields, sort_keys, sort_dirs,
  2533. cell_down_support=cell_down_support)
  2534. def _get_unique_filter_method():
  2535. seen_uuids = set()
  2536. def _filter(instance):
  2537. # During a cross-cell move operation we could have the instance
  2538. # in more than one cell database so we not only have to filter
  2539. # duplicates but we want to make sure we only return the
  2540. # "current" one which should also be the one that the instance
  2541. # mapping points to, but we don't want to do that expensive
  2542. # lookup here. The DB API will filter out hidden instances by
  2543. # default but there is a small window where two copies of an
  2544. # instance could be hidden=False in separate cell DBs.
  2545. # NOTE(mriedem): We could make this better in the case that we
  2546. # have duplicate instances that are both hidden=False by
  2547. # showing the one with the newer updated_at value, but that
  2548. # could be tricky if the user is filtering on
  2549. # changes-since/before or updated_at, or sorting on updated_at,
  2550. # but technically that was already potentially broken with this
  2551. # _filter method if we return an older BuildRequest.instance,
  2552. # and given the window should be very small where we have
  2553. # duplicates, it's probably not worth the complexity.
  2554. if instance.uuid in seen_uuids:
  2555. return False
  2556. seen_uuids.add(instance.uuid)
  2557. return True
  2558. return _filter
  2559. filter_method = _get_unique_filter_method()
  2560. # Only subtract from limit if it is not None
  2561. limit = (limit - len(insts)) if limit else limit
  2562. # TODO(alaski): Clean up the objects concatenation when List objects
  2563. # support it natively.
  2564. instances = objects.InstanceList(
  2565. objects=list(filter(filter_method,
  2566. build_req_instances.objects +
  2567. insts.objects)))
  2568. if filter_ip:
  2569. instances = self._ip_filter(instances, filters, orig_limit)
  2570. if cell_down_support:
  2571. # API and client want minimal construct instances for any cells
  2572. # that didn't return, so generate and prefix those to the actual
  2573. # results.
  2574. project = search_opts.get('project_id', context.project_id)
  2575. if all_tenants:
  2576. # NOTE(tssurya): The only scenario where project has to be None
  2577. # is when using "all_tenants" in which case we do not want
  2578. # the query to be restricted based on the project_id.
  2579. project = None
  2580. limit = (orig_limit - len(instances)) if limit else limit
  2581. return (self._generate_minimal_construct_for_down_cells(context,
  2582. down_cell_uuids, project, limit) + instances)
  2583. return instances
  2584. @staticmethod
  2585. def _ip_filter(inst_models, filters, limit):
  2586. ipv4_f = re.compile(str(filters.get('ip')))
  2587. ipv6_f = re.compile(str(filters.get('ip6')))
  2588. def _match_instance(instance):
  2589. nw_info = instance.get_network_info()
  2590. for vif in nw_info:
  2591. for fixed_ip in vif.fixed_ips():
  2592. address = fixed_ip.get('address')
  2593. if not address:
  2594. continue
  2595. version = fixed_ip.get('version')
  2596. if ((version == 4 and ipv4_f.match(address)) or
  2597. (version == 6 and ipv6_f.match(address))):
  2598. return True
  2599. return False
  2600. result_objs = []
  2601. for instance in inst_models:
  2602. if _match_instance(instance):
  2603. result_objs.append(instance)
  2604. if limit and len(result_objs) == limit:
  2605. break
  2606. return objects.InstanceList(objects=result_objs)
  2607. def _ip_filter_using_neutron(self, context, filters):
  2608. ip4_address = filters.get('ip')
  2609. ip6_address = filters.get('ip6')
  2610. addresses = [ip4_address, ip6_address]
  2611. uuids = []
  2612. for address in addresses:
  2613. if address:
  2614. try:
  2615. ports = self.network_api.list_ports(
  2616. context, fixed_ips='ip_address_substr=' + address,
  2617. fields=['device_id'])['ports']
  2618. for port in ports:
  2619. uuids.append(port['device_id'])
  2620. except Exception as e:
  2621. LOG.error('An error occurred while listing ports '
  2622. 'with an ip_address filter value of "%s". '
  2623. 'Error: %s',
  2624. address, six.text_type(e))
  2625. return uuids
  2626. def update_instance(self, context, instance, updates):
  2627. """Updates a single Instance object with some updates dict.
  2628. Returns the updated instance.
  2629. """
  2630. # NOTE(sbauza): Given we only persist the Instance object after we
  2631. # create the BuildRequest, we are sure that if the Instance object
  2632. # has an ID field set, then it was persisted in the right Cell DB.
  2633. if instance.obj_attr_is_set('id'):
  2634. instance.update(updates)
  2635. instance.save()
  2636. else:
  2637. # Instance is not yet mapped to a cell, so we need to update
  2638. # BuildRequest instead
  2639. # TODO(sbauza): Fix the possible race conditions where BuildRequest
  2640. # could be deleted because of either a concurrent instance delete
  2641. # or because the scheduler just returned a destination right
  2642. # after we called the instance in the API.
  2643. try:
  2644. build_req = objects.BuildRequest.get_by_instance_uuid(
  2645. context, instance.uuid)
  2646. instance = build_req.instance
  2647. instance.update(updates)
  2648. # FIXME(sbauza): Here we are updating the current
  2649. # thread-related BuildRequest object. Given that another worker
  2650. # could have looking up at that BuildRequest in the API, it
  2651. # means that it could pass it down to the conductor without
  2652. # making sure that it's not updated, we could have some race
  2653. # condition where it would missing the updated fields, but
  2654. # that's something we could discuss once the instance record
  2655. # is persisted by the conductor.
  2656. build_req.save()
  2657. except exception.BuildRequestNotFound:
  2658. # Instance was mapped and the BuildRequest was deleted
  2659. # while fetching (and possibly the instance could have been
  2660. # deleted as well). We need to lookup again the Instance object
  2661. # in order to correctly update it.
  2662. # TODO(sbauza): Figure out a good way to know the expected
  2663. # attributes by checking which fields are set or not.
  2664. expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
  2665. 'tags', 'metadata', 'system_metadata',
  2666. 'security_groups', 'info_cache']
  2667. inst_map = self._get_instance_map_or_none(context,
  2668. instance.uuid)
  2669. if inst_map and (inst_map.cell_mapping is not None):
  2670. with nova_context.target_cell(
  2671. context,
  2672. inst_map.cell_mapping) as cctxt:
  2673. instance = objects.Instance.get_by_uuid(
  2674. cctxt, instance.uuid,
  2675. expected_attrs=expected_attrs)
  2676. instance.update(updates)
  2677. instance.save()
  2678. else:
  2679. # Conductor doesn't delete the BuildRequest until after the
  2680. # InstanceMapping record is created, so if we didn't get
  2681. # that and the BuildRequest doesn't exist, then the
  2682. # instance is already gone and we need to just error out.
  2683. raise exception.InstanceNotFound(instance_id=instance.uuid)
  2684. return instance
  2685. # NOTE(melwitt): We don't check instance lock for backup because lock is
  2686. # intended to prevent accidental change/delete of instances
  2687. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2688. vm_states.PAUSED, vm_states.SUSPENDED])
  2689. def backup(self, context, instance, name, backup_type, rotation,
  2690. extra_properties=None):
  2691. """Backup the given instance
  2692. :param instance: nova.objects.instance.Instance object
  2693. :param name: name of the backup
  2694. :param backup_type: 'daily' or 'weekly'
  2695. :param rotation: int representing how many backups to keep around;
  2696. None if rotation shouldn't be used (as in the case of snapshots)
  2697. :param extra_properties: dict of extra image properties to include
  2698. when creating the image.
  2699. :returns: A dict containing image metadata
  2700. """
  2701. props_copy = dict(extra_properties, backup_type=backup_type)
  2702. if compute_utils.is_volume_backed_instance(context, instance):
  2703. LOG.info("It's not supported to backup volume backed "
  2704. "instance.", instance=instance)
  2705. raise exception.InvalidRequest(
  2706. _('Backup is not supported for volume-backed instances.'))
  2707. else:
  2708. image_meta = compute_utils.create_image(
  2709. context, instance, name, 'backup', self.image_api,
  2710. extra_properties=props_copy)
  2711. instance.task_state = task_states.IMAGE_BACKUP
  2712. instance.save(expected_task_state=[None])
  2713. self._record_action_start(context, instance,
  2714. instance_actions.BACKUP)
  2715. self.compute_rpcapi.backup_instance(context, instance,
  2716. image_meta['id'],
  2717. backup_type,
  2718. rotation)
  2719. return image_meta
  2720. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2721. # intended to prevent accidental change/delete of instances
  2722. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2723. vm_states.PAUSED, vm_states.SUSPENDED])
  2724. def snapshot(self, context, instance, name, extra_properties=None):
  2725. """Snapshot the given instance.
  2726. :param instance: nova.objects.instance.Instance object
  2727. :param name: name of the snapshot
  2728. :param extra_properties: dict of extra image properties to include
  2729. when creating the image.
  2730. :returns: A dict containing image metadata
  2731. """
  2732. image_meta = compute_utils.create_image(
  2733. context, instance, name, 'snapshot', self.image_api,
  2734. extra_properties=extra_properties)
  2735. instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
  2736. try:
  2737. instance.save(expected_task_state=[None])
  2738. except (exception.InstanceNotFound,
  2739. exception.UnexpectedDeletingTaskStateError) as ex:
  2740. # Changing the instance task state to use in raising the
  2741. # InstanceInvalidException below
  2742. LOG.debug('Instance disappeared during snapshot.',
  2743. instance=instance)
  2744. try:
  2745. image_id = image_meta['id']
  2746. self.image_api.delete(context, image_id)
  2747. LOG.info('Image %s deleted because instance '
  2748. 'deleted before snapshot started.',
  2749. image_id, instance=instance)
  2750. except exception.ImageNotFound:
  2751. pass
  2752. except Exception as exc:
  2753. LOG.warning("Error while trying to clean up image %(img_id)s: "
  2754. "%(error_msg)s",
  2755. {"img_id": image_meta['id'],
  2756. "error_msg": six.text_type(exc)})
  2757. attr = 'task_state'
  2758. state = task_states.DELETING
  2759. if type(ex) == exception.InstanceNotFound:
  2760. attr = 'vm_state'
  2761. state = vm_states.DELETED
  2762. raise exception.InstanceInvalidState(attr=attr,
  2763. instance_uuid=instance.uuid,
  2764. state=state,
  2765. method='snapshot')
  2766. self._record_action_start(context, instance,
  2767. instance_actions.CREATE_IMAGE)
  2768. self.compute_rpcapi.snapshot_instance(context, instance,
  2769. image_meta['id'])
  2770. return image_meta
  2771. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2772. # intended to prevent accidental change/delete of instances
  2773. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2774. vm_states.SUSPENDED])
  2775. def snapshot_volume_backed(self, context, instance, name,
  2776. extra_properties=None):
  2777. """Snapshot the given volume-backed instance.
  2778. :param instance: nova.objects.instance.Instance object
  2779. :param name: name of the backup or snapshot
  2780. :param extra_properties: dict of extra image properties to include
  2781. :returns: the new image metadata
  2782. """
  2783. image_meta = compute_utils.initialize_instance_snapshot_metadata(
  2784. context, instance, name, extra_properties)
  2785. # the new image is simply a bucket of properties (particularly the
  2786. # block device mapping, kernel and ramdisk IDs) with no image data,
  2787. # hence the zero size
  2788. image_meta['size'] = 0
  2789. for attr in ('container_format', 'disk_format'):
  2790. image_meta.pop(attr, None)
  2791. properties = image_meta['properties']
  2792. # clean properties before filling
  2793. for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
  2794. properties.pop(key, None)
  2795. if instance.root_device_name:
  2796. properties['root_device_name'] = instance.root_device_name
  2797. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2798. context, instance.uuid)
  2799. mapping = [] # list of BDM dicts that can go into the image properties
  2800. # Do some up-front filtering of the list of BDMs from
  2801. # which we are going to create snapshots.
  2802. volume_bdms = []
  2803. for bdm in bdms:
  2804. if bdm.no_device:
  2805. continue
  2806. if bdm.is_volume:
  2807. # These will be handled below.
  2808. volume_bdms.append(bdm)
  2809. else:
  2810. mapping.append(bdm.get_image_mapping())
  2811. # Check limits in Cinder before creating snapshots to avoid going over
  2812. # quota in the middle of a list of volumes. This is a best-effort check
  2813. # but concurrently running snapshot requests from the same project
  2814. # could still fail to create volume snapshots if they go over limit.
  2815. if volume_bdms:
  2816. limits = self.volume_api.get_absolute_limits(context)
  2817. total_snapshots_used = limits['totalSnapshotsUsed']
  2818. max_snapshots = limits['maxTotalSnapshots']
  2819. # -1 means there is unlimited quota for snapshots
  2820. if (max_snapshots > -1 and
  2821. len(volume_bdms) + total_snapshots_used > max_snapshots):
  2822. LOG.debug('Unable to create volume snapshots for instance. '
  2823. 'Currently has %s snapshots, requesting %s new '
  2824. 'snapshots, with a limit of %s.',
  2825. total_snapshots_used, len(volume_bdms),
  2826. max_snapshots, instance=instance)
  2827. raise exception.OverQuota(overs='snapshots')
  2828. quiesced = False
  2829. if instance.vm_state == vm_states.ACTIVE:
  2830. try:
  2831. LOG.info("Attempting to quiesce instance before volume "
  2832. "snapshot.", instance=instance)
  2833. self.compute_rpcapi.quiesce_instance(context, instance)
  2834. quiesced = True
  2835. except (exception.InstanceQuiesceNotSupported,
  2836. exception.QemuGuestAgentNotEnabled,
  2837. exception.NovaException, NotImplementedError) as err:
  2838. if strutils.bool_from_string(instance.system_metadata.get(
  2839. 'image_os_require_quiesce')):
  2840. raise
  2841. if isinstance(err, exception.NovaException):
  2842. LOG.info('Skipping quiescing instance: %(reason)s.',
  2843. {'reason': err.format_message()},
  2844. instance=instance)
  2845. else:
  2846. LOG.info('Skipping quiescing instance because the '
  2847. 'operation is not supported by the underlying '
  2848. 'compute driver.', instance=instance)
  2849. # NOTE(tasker): discovered that an uncaught exception could occur
  2850. # after the instance has been frozen. catch and thaw.
  2851. except Exception as ex:
  2852. with excutils.save_and_reraise_exception():
  2853. LOG.error("An error occurred during quiesce of instance. "
  2854. "Unquiescing to ensure instance is thawed. "
  2855. "Error: %s", six.text_type(ex),
  2856. instance=instance)
  2857. self.compute_rpcapi.unquiesce_instance(context, instance,
  2858. mapping=None)
  2859. @wrap_instance_event(prefix='api')
  2860. def snapshot_instance(self, context, instance, bdms):
  2861. try:
  2862. for bdm in volume_bdms:
  2863. # create snapshot based on volume_id
  2864. volume = self.volume_api.get(context, bdm.volume_id)
  2865. # NOTE(yamahata): Should we wait for snapshot creation?
  2866. # Linux LVM snapshot creation completes in
  2867. # short time, it doesn't matter for now.
  2868. name = _('snapshot for %s') % image_meta['name']
  2869. LOG.debug('Creating snapshot from volume %s.',
  2870. volume['id'], instance=instance)
  2871. snapshot = self.volume_api.create_snapshot_force(
  2872. context, volume['id'],
  2873. name, volume['display_description'])
  2874. mapping_dict = block_device.snapshot_from_bdm(
  2875. snapshot['id'], bdm)
  2876. mapping_dict = mapping_dict.get_image_mapping()
  2877. mapping.append(mapping_dict)
  2878. return mapping
  2879. # NOTE(tasker): No error handling is done in the above for loop.
  2880. # This means that if the snapshot fails and throws an exception
  2881. # the traceback will skip right over the unquiesce needed below.
  2882. # Here, catch any exception, unquiesce the instance, and raise the
  2883. # error so that the calling function can do what it needs to in
  2884. # order to properly treat a failed snap.
  2885. except Exception:
  2886. with excutils.save_and_reraise_exception():
  2887. if quiesced:
  2888. LOG.info("Unquiescing instance after volume snapshot "
  2889. "failure.", instance=instance)
  2890. self.compute_rpcapi.unquiesce_instance(
  2891. context, instance, mapping)
  2892. self._record_action_start(context, instance,
  2893. instance_actions.CREATE_IMAGE)
  2894. mapping = snapshot_instance(self, context, instance, bdms)
  2895. if quiesced:
  2896. self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
  2897. if mapping:
  2898. properties['block_device_mapping'] = mapping
  2899. properties['bdm_v2'] = True
  2900. return self.image_api.create(context, image_meta)
  2901. @check_instance_lock
  2902. def reboot(self, context, instance, reboot_type):
  2903. """Reboot the given instance."""
  2904. if reboot_type == 'SOFT':
  2905. self._soft_reboot(context, instance)
  2906. else:
  2907. self._hard_reboot(context, instance)
  2908. @check_instance_state(vm_state=set(vm_states.ALLOW_SOFT_REBOOT),
  2909. task_state=[None])
  2910. def _soft_reboot(self, context, instance):
  2911. expected_task_state = [None]
  2912. instance.task_state = task_states.REBOOTING
  2913. instance.save(expected_task_state=expected_task_state)
  2914. self._record_action_start(context, instance, instance_actions.REBOOT)
  2915. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2916. block_device_info=None,
  2917. reboot_type='SOFT')
  2918. @check_instance_state(vm_state=set(vm_states.ALLOW_HARD_REBOOT),
  2919. task_state=task_states.ALLOW_REBOOT)
  2920. def _hard_reboot(self, context, instance):
  2921. instance.task_state = task_states.REBOOTING_HARD
  2922. instance.save(expected_task_state=task_states.ALLOW_REBOOT)
  2923. self._record_action_start(context, instance, instance_actions.REBOOT)
  2924. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2925. block_device_info=None,
  2926. reboot_type='HARD')
  2927. # TODO(stephenfin): We should expand kwargs out to named args
  2928. @check_instance_lock
  2929. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2930. vm_states.ERROR])
  2931. def rebuild(self, context, instance, image_href, admin_password,
  2932. files_to_inject=None, **kwargs):
  2933. """Rebuild the given instance with the provided attributes."""
  2934. files_to_inject = files_to_inject or []
  2935. metadata = kwargs.get('metadata', {})
  2936. preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
  2937. auto_disk_config = kwargs.get('auto_disk_config')
  2938. if 'key_name' in kwargs:
  2939. key_name = kwargs.pop('key_name')
  2940. if key_name:
  2941. # NOTE(liuyulong): we are intentionally using the user_id from
  2942. # the request context rather than the instance.user_id because
  2943. # users own keys but instances are owned by projects, and
  2944. # another user in the same project can rebuild an instance
  2945. # even if they didn't create it.
  2946. key_pair = objects.KeyPair.get_by_name(context,
  2947. context.user_id,
  2948. key_name)
  2949. instance.key_name = key_pair.name
  2950. instance.key_data = key_pair.public_key
  2951. instance.keypairs = objects.KeyPairList(objects=[key_pair])
  2952. else:
  2953. instance.key_name = None
  2954. instance.key_data = None
  2955. instance.keypairs = objects.KeyPairList(objects=[])
  2956. # Use trusted_certs value from kwargs to create TrustedCerts object
  2957. trusted_certs = None
  2958. if 'trusted_certs' in kwargs:
  2959. # Note that the user can set, change, or unset / reset trusted
  2960. # certs. If they are explicitly specifying
  2961. # trusted_image_certificates=None, that means we'll either unset
  2962. # them on the instance *or* reset to use the defaults (if defaults
  2963. # are configured).
  2964. trusted_certs = kwargs.pop('trusted_certs')
  2965. instance.trusted_certs = self._retrieve_trusted_certs_object(
  2966. context, trusted_certs, rebuild=True)
  2967. image_id, image = self._get_image(context, image_href)
  2968. self._check_auto_disk_config(image=image,
  2969. auto_disk_config=auto_disk_config)
  2970. flavor = instance.get_flavor()
  2971. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2972. context, instance.uuid)
  2973. root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
  2974. # Check to see if the image is changing and we have a volume-backed
  2975. # server. The compute doesn't support changing the image in the
  2976. # root disk of a volume-backed server, so we need to just fail fast.
  2977. is_volume_backed = compute_utils.is_volume_backed_instance(
  2978. context, instance, bdms)
  2979. if is_volume_backed:
  2980. if trusted_certs:
  2981. # The only way we can get here is if the user tried to set
  2982. # trusted certs or specified trusted_image_certificates=None
  2983. # and default_trusted_certificate_ids is configured.
  2984. msg = _("Image certificate validation is not supported "
  2985. "for volume-backed servers.")
  2986. raise exception.CertificateValidationFailed(message=msg)
  2987. # For boot from volume, instance.image_ref is empty, so we need to
  2988. # query the image from the volume.
  2989. if root_bdm is None:
  2990. # This shouldn't happen and is an error, we need to fail. This
  2991. # is not the users fault, it's an internal error. Without a
  2992. # root BDM we have no way of knowing the backing volume (or
  2993. # image in that volume) for this instance.
  2994. raise exception.NovaException(
  2995. _('Unable to find root block device mapping for '
  2996. 'volume-backed instance.'))
  2997. volume = self.volume_api.get(context, root_bdm.volume_id)
  2998. volume_image_metadata = volume.get('volume_image_metadata', {})
  2999. orig_image_ref = volume_image_metadata.get('image_id')
  3000. if orig_image_ref != image_href:
  3001. # Leave a breadcrumb.
  3002. LOG.debug('Requested to rebuild instance with a new image %s '
  3003. 'for a volume-backed server with image %s in its '
  3004. 'root volume which is not supported.', image_href,
  3005. orig_image_ref, instance=instance)
  3006. msg = _('Unable to rebuild with a different image for a '
  3007. 'volume-backed server.')
  3008. raise exception.ImageUnacceptable(
  3009. image_id=image_href, reason=msg)
  3010. else:
  3011. orig_image_ref = instance.image_ref
  3012. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3013. context, instance.uuid)
  3014. self._checks_for_create_and_rebuild(context, image_id, image,
  3015. flavor, metadata, files_to_inject, root_bdm)
  3016. # NOTE(sean-k-mooney): When we rebuild with a new image we need to
  3017. # validate that the NUMA topology does not change as we do a NOOP claim
  3018. # in resource tracker. As such we cannot allow the resource usage or
  3019. # assignment to change as a result of a new image altering the
  3020. # numa constraints.
  3021. if orig_image_ref != image_href:
  3022. self._validate_numa_rebuild(instance, image, flavor)
  3023. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  3024. context, None, None, image)
  3025. def _reset_image_metadata():
  3026. """Remove old image properties that we're storing as instance
  3027. system metadata. These properties start with 'image_'.
  3028. Then add the properties for the new image.
  3029. """
  3030. # FIXME(comstud): There's a race condition here in that if
  3031. # the system_metadata for this instance is updated after
  3032. # we do the previous save() and before we update.. those
  3033. # other updates will be lost. Since this problem exists in
  3034. # a lot of other places, I think it should be addressed in
  3035. # a DB layer overhaul.
  3036. orig_sys_metadata = dict(instance.system_metadata)
  3037. # Remove the old keys
  3038. for key in list(instance.system_metadata.keys()):
  3039. if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
  3040. del instance.system_metadata[key]
  3041. # Add the new ones
  3042. new_sys_metadata = utils.get_system_metadata_from_image(
  3043. image, flavor)
  3044. instance.system_metadata.update(new_sys_metadata)
  3045. instance.save()
  3046. return orig_sys_metadata
  3047. # Since image might have changed, we may have new values for
  3048. # os_type, vm_mode, etc
  3049. options_from_image = self._inherit_properties_from_image(
  3050. image, auto_disk_config)
  3051. instance.update(options_from_image)
  3052. instance.task_state = task_states.REBUILDING
  3053. # An empty instance.image_ref is currently used as an indication
  3054. # of BFV. Preserve that over a rebuild to not break users.
  3055. if not is_volume_backed:
  3056. instance.image_ref = image_href
  3057. instance.kernel_id = kernel_id or ""
  3058. instance.ramdisk_id = ramdisk_id or ""
  3059. instance.progress = 0
  3060. instance.update(kwargs)
  3061. instance.save(expected_task_state=[None])
  3062. # On a rebuild, since we're potentially changing images, we need to
  3063. # wipe out the old image properties that we're storing as instance
  3064. # system metadata... and copy in the properties for the new image.
  3065. orig_sys_metadata = _reset_image_metadata()
  3066. self._record_action_start(context, instance, instance_actions.REBUILD)
  3067. # NOTE(sbauza): The migration script we provided in Newton should make
  3068. # sure that all our instances are currently migrated to have an
  3069. # attached RequestSpec object but let's consider that the operator only
  3070. # half migrated all their instances in the meantime.
  3071. host = instance.host
  3072. # If a new image is provided on rebuild, we will need to run
  3073. # through the scheduler again, but we want the instance to be
  3074. # rebuilt on the same host it's already on.
  3075. if orig_image_ref != image_href:
  3076. # We have to modify the request spec that goes to the scheduler
  3077. # to contain the new image. We persist this since we've already
  3078. # changed the instance.image_ref above so we're being
  3079. # consistent.
  3080. request_spec.image = objects.ImageMeta.from_dict(image)
  3081. request_spec.save()
  3082. if 'scheduler_hints' not in request_spec:
  3083. request_spec.scheduler_hints = {}
  3084. # Nuke the id on this so we can't accidentally save
  3085. # this hint hack later
  3086. del request_spec.id
  3087. # NOTE(danms): Passing host=None tells conductor to
  3088. # call the scheduler. The _nova_check_type hint
  3089. # requires that the scheduler returns only the same
  3090. # host that we are currently on and only checks
  3091. # rebuild-related filters.
  3092. request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
  3093. request_spec.force_hosts = [instance.host]
  3094. request_spec.force_nodes = [instance.node]
  3095. host = None
  3096. self.compute_task_api.rebuild_instance(context, instance=instance,
  3097. new_pass=admin_password, injected_files=files_to_inject,
  3098. image_ref=image_href, orig_image_ref=orig_image_ref,
  3099. orig_sys_metadata=orig_sys_metadata, bdms=bdms,
  3100. preserve_ephemeral=preserve_ephemeral, host=host,
  3101. request_spec=request_spec)
  3102. @staticmethod
  3103. def _validate_numa_rebuild(instance, image, flavor):
  3104. """validates that the NUMA constraints do not change on rebuild.
  3105. :param instance: nova.objects.instance.Instance object
  3106. :param image: the new image the instance will be rebuilt with.
  3107. :param flavor: the flavor of the current instance.
  3108. :raises: nova.exception.ImageNUMATopologyRebuildConflict
  3109. """
  3110. # NOTE(sean-k-mooney): currently it is not possible to express
  3111. # a PCI NUMA affinity policy via flavor or image but that will
  3112. # change in the future. we pull out the image metadata into
  3113. # separate variable to make future testing of this easier.
  3114. old_image_meta = instance.image_meta
  3115. new_image_meta = objects.ImageMeta.from_dict(image)
  3116. old_constraints = hardware.numa_get_constraints(flavor, old_image_meta)
  3117. new_constraints = hardware.numa_get_constraints(flavor, new_image_meta)
  3118. # early out for non NUMA instances
  3119. if old_constraints is None and new_constraints is None:
  3120. return
  3121. # if only one of the constraints are non-None (or 'set') then the
  3122. # constraints changed so raise an exception.
  3123. if old_constraints is None or new_constraints is None:
  3124. action = "removing" if old_constraints else "introducing"
  3125. LOG.debug("NUMA rebuild validation failed. The requested image "
  3126. "would alter the NUMA constraints by %s a NUMA "
  3127. "topology.", action, instance=instance)
  3128. raise exception.ImageNUMATopologyRebuildConflict()
  3129. # otherwise since both the old a new constraints are non none compare
  3130. # them as dictionaries.
  3131. old = old_constraints.obj_to_primitive()
  3132. new = new_constraints.obj_to_primitive()
  3133. if old != new:
  3134. LOG.debug("NUMA rebuild validation failed. The requested image "
  3135. "conflicts with the existing NUMA constraints.",
  3136. instance=instance)
  3137. raise exception.ImageNUMATopologyRebuildConflict()
  3138. # TODO(sean-k-mooney): add PCI NUMA affinity policy check.
  3139. @staticmethod
  3140. def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
  3141. project_id, user_id = quotas_obj.ids_from_instance(context,
  3142. instance)
  3143. # Deltas will be empty if the resize is not an upsize.
  3144. deltas = compute_utils.upsize_quota_delta(new_flavor,
  3145. current_flavor)
  3146. if deltas:
  3147. try:
  3148. res_deltas = {'cores': deltas.get('cores', 0),
  3149. 'ram': deltas.get('ram', 0)}
  3150. objects.Quotas.check_deltas(context, res_deltas,
  3151. project_id, user_id=user_id,
  3152. check_project_id=project_id,
  3153. check_user_id=user_id)
  3154. except exception.OverQuota as exc:
  3155. quotas = exc.kwargs['quotas']
  3156. overs = exc.kwargs['overs']
  3157. usages = exc.kwargs['usages']
  3158. headroom = compute_utils.get_headroom(quotas, usages,
  3159. deltas)
  3160. (overs, reqs, total_alloweds,
  3161. useds) = compute_utils.get_over_quota_detail(headroom,
  3162. overs,
  3163. quotas,
  3164. deltas)
  3165. LOG.info("%(overs)s quota exceeded for %(pid)s,"
  3166. " tried to resize instance.",
  3167. {'overs': overs, 'pid': context.project_id})
  3168. raise exception.TooManyInstances(overs=overs,
  3169. req=reqs,
  3170. used=useds,
  3171. allowed=total_alloweds)
  3172. @check_instance_lock
  3173. @check_instance_state(vm_state=[vm_states.RESIZED])
  3174. def revert_resize(self, context, instance):
  3175. """Reverts a resize or cold migration, deleting the 'new' instance in
  3176. the process.
  3177. """
  3178. elevated = context.elevated()
  3179. migration = objects.Migration.get_by_instance_and_status(
  3180. elevated, instance.uuid, 'finished')
  3181. # If this is a resize down, a revert might go over quota.
  3182. self._check_quota_for_upsize(context, instance, instance.flavor,
  3183. instance.old_flavor)
  3184. # The AZ for the server may have changed when it was migrated so while
  3185. # we are in the API and have access to the API DB, update the
  3186. # instance.availability_zone before casting off to the compute service.
  3187. # Note that we do this in the API to avoid an "up-call" from the
  3188. # compute service to the API DB. This is not great in case something
  3189. # fails during revert before the instance.host is updated to the
  3190. # original source host, but it is good enough for now. Long-term we
  3191. # could consider passing the AZ down to compute so it can set it when
  3192. # the instance.host value is set in finish_revert_resize.
  3193. instance.availability_zone = (
  3194. availability_zones.get_host_availability_zone(
  3195. context, migration.source_compute))
  3196. # Conductor updated the RequestSpec.flavor during the initial resize
  3197. # operation to point at the new flavor, so we need to update the
  3198. # RequestSpec to point back at the original flavor, otherwise
  3199. # subsequent move operations through the scheduler will be using the
  3200. # wrong flavor.
  3201. reqspec = objects.RequestSpec.get_by_instance_uuid(
  3202. context, instance.uuid)
  3203. reqspec.flavor = instance.old_flavor
  3204. reqspec.save()
  3205. # NOTE(gibi): This is a performance optimization. If the network info
  3206. # cache does not have ports with allocations in the binding profile
  3207. # then we can skip reading port resource request from neutron below.
  3208. # If a port has resource request then that would have already caused
  3209. # that the finish_resize call put allocation in the binding profile
  3210. # during the resize.
  3211. if instance.get_network_info().has_port_with_allocation():
  3212. # TODO(gibi): do not directly overwrite the
  3213. # RequestSpec.requested_resources as others like cyborg might added
  3214. # to things there already
  3215. # NOTE(gibi): We need to collect the requested resource again as it
  3216. # is intentionally not persisted in nova. Note that this needs to
  3217. # be done here as the nova API code directly calls revert on the
  3218. # dest compute service skipping the conductor.
  3219. port_res_req = (
  3220. self.network_api.get_requested_resource_for_instance(
  3221. context, instance.uuid))
  3222. reqspec.requested_resources = port_res_req
  3223. instance.task_state = task_states.RESIZE_REVERTING
  3224. instance.save(expected_task_state=[None])
  3225. migration.status = 'reverting'
  3226. migration.save()
  3227. self._record_action_start(context, instance,
  3228. instance_actions.REVERT_RESIZE)
  3229. if migration.cross_cell_move:
  3230. # RPC cast to conductor to orchestrate the revert of the cross-cell
  3231. # resize.
  3232. self.compute_task_api.revert_snapshot_based_resize(
  3233. context, instance, migration)
  3234. else:
  3235. # TODO(melwitt): We're not rechecking for strict quota here to
  3236. # guard against going over quota during a race at this time because
  3237. # the resource consumption for this operation is written to the
  3238. # database by compute.
  3239. self.compute_rpcapi.revert_resize(context, instance,
  3240. migration,
  3241. migration.dest_compute,
  3242. reqspec)
  3243. @check_instance_lock
  3244. @check_instance_state(vm_state=[vm_states.RESIZED])
  3245. def confirm_resize(self, context, instance, migration=None):
  3246. """Confirms a migration/resize and deletes the 'old' instance."""
  3247. elevated = context.elevated()
  3248. # NOTE(melwitt): We're not checking quota here because there isn't a
  3249. # change in resource usage when confirming a resize. Resource
  3250. # consumption for resizes are written to the database by compute, so
  3251. # a confirm resize is just a clean up of the migration objects and a
  3252. # state change in compute.
  3253. if migration is None:
  3254. migration = objects.Migration.get_by_instance_and_status(
  3255. elevated, instance.uuid, 'finished')
  3256. migration.status = 'confirming'
  3257. migration.save()
  3258. self._record_action_start(context, instance,
  3259. instance_actions.CONFIRM_RESIZE)
  3260. # Check to see if this was a cross-cell resize, in which case the
  3261. # resized instance is in the target cell (the migration and instance
  3262. # came from the target cell DB in this case), and we need to cleanup
  3263. # the source host and source cell database records.
  3264. if migration.cross_cell_move:
  3265. self.compute_task_api.confirm_snapshot_based_resize(
  3266. context, instance, migration)
  3267. else:
  3268. # It's a traditional resize within a single cell, so RPC cast to
  3269. # the source compute host to cleanup the host since the instance
  3270. # is already on the target host.
  3271. self.compute_rpcapi.confirm_resize(context,
  3272. instance,
  3273. migration,
  3274. migration.source_compute)
  3275. @staticmethod
  3276. def _allow_cross_cell_resize(context, instance):
  3277. """Determine if the request can perform a cross-cell resize on this
  3278. instance.
  3279. :param context: nova auth request context for the resize operation
  3280. :param instance: Instance object being resized
  3281. :returns: True if cross-cell resize is allowed, False otherwise
  3282. """
  3283. # First check to see if the requesting project/user is allowed by
  3284. # policy to perform cross-cell resize.
  3285. allowed = context.can(
  3286. servers_policies.CROSS_CELL_RESIZE,
  3287. target={'user_id': instance.user_id,
  3288. 'project_id': instance.project_id},
  3289. fatal=False)
  3290. # If the user is allowed by policy, check to make sure the deployment
  3291. # is upgraded to the point of supporting cross-cell resize on all
  3292. # compute services.
  3293. if allowed:
  3294. # TODO(mriedem): We can remove this minimum compute version check
  3295. # in the 22.0.0 "V" release.
  3296. min_compute_version = (
  3297. objects.service.get_minimum_version_all_cells(
  3298. context, ['nova-compute']))
  3299. if min_compute_version < MIN_COMPUTE_CROSS_CELL_RESIZE:
  3300. LOG.debug('Request is allowed by policy to perform cross-cell '
  3301. 'resize but the minimum nova-compute service '
  3302. 'version in the deployment %s is less than %s so '
  3303. 'cross-cell resize is not allowed at this time.',
  3304. min_compute_version, MIN_COMPUTE_CROSS_CELL_RESIZE)
  3305. allowed = False
  3306. return allowed
  3307. @staticmethod
  3308. def _validate_host_for_cold_migrate(
  3309. context, instance, host_name, allow_cross_cell_resize):
  3310. """Validates a host specified for cold migration.
  3311. :param context: nova auth request context for the cold migration
  3312. :param instance: Instance object being cold migrated
  3313. :param host_name: User-specified compute service hostname for the
  3314. desired destination of the instance during the cold migration
  3315. :param allow_cross_cell_resize: If True, cross-cell resize is allowed
  3316. for this operation and the host could be in a different cell from
  3317. the one that the instance is currently in. If False, the speciifed
  3318. host must be in the same cell as the instance.
  3319. :returns: ComputeNode object of the requested host
  3320. :raises: CannotMigrateToSameHost if the host is the same as the
  3321. current instance.host
  3322. :raises: ComputeHostNotFound if the specified host cannot be found
  3323. """
  3324. # Cannot migrate to the host where the instance exists
  3325. # because it is useless.
  3326. if host_name == instance.host:
  3327. raise exception.CannotMigrateToSameHost()
  3328. # Check whether host exists or not. If a cross-cell resize is
  3329. # allowed, the host could be in another cell from the one the
  3330. # instance is currently in, so we need to lookup the HostMapping
  3331. # to get the cell and lookup the ComputeNode in that cell.
  3332. if allow_cross_cell_resize:
  3333. try:
  3334. hm = objects.HostMapping.get_by_host(context, host_name)
  3335. except exception.HostMappingNotFound:
  3336. LOG.info('HostMapping not found for host: %s', host_name)
  3337. raise exception.ComputeHostNotFound(host=host_name)
  3338. with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
  3339. node = objects.ComputeNode.\
  3340. get_first_node_by_host_for_old_compat(
  3341. cctxt, host_name, use_slave=True)
  3342. else:
  3343. node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
  3344. context, host_name, use_slave=True)
  3345. return node
  3346. @check_instance_lock
  3347. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
  3348. @check_instance_host(check_is_up=True)
  3349. def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
  3350. host_name=None, auto_disk_config=None):
  3351. """Resize (ie, migrate) a running instance.
  3352. If flavor_id is None, the process is considered a migration, keeping
  3353. the original flavor_id. If flavor_id is not None, the instance should
  3354. be migrated to a new host and resized to the new flavor_id.
  3355. host_name is always None in the resize case.
  3356. host_name can be set in the cold migration case only.
  3357. """
  3358. allow_cross_cell_resize = self._allow_cross_cell_resize(
  3359. context, instance)
  3360. if host_name is not None:
  3361. node = self._validate_host_for_cold_migrate(
  3362. context, instance, host_name, allow_cross_cell_resize)
  3363. self._check_auto_disk_config(
  3364. instance, auto_disk_config=auto_disk_config)
  3365. current_instance_type = instance.get_flavor()
  3366. # If flavor_id is not provided, only migrate the instance.
  3367. volume_backed = None
  3368. if not flavor_id:
  3369. LOG.debug("flavor_id is None. Assuming migration.",
  3370. instance=instance)
  3371. new_instance_type = current_instance_type
  3372. else:
  3373. new_instance_type = flavors.get_flavor_by_flavor_id(
  3374. flavor_id, read_deleted="no")
  3375. # Check to see if we're resizing to a zero-disk flavor which is
  3376. # only supported with volume-backed servers.
  3377. if (new_instance_type.get('root_gb') == 0 and
  3378. current_instance_type.get('root_gb') != 0):
  3379. volume_backed = compute_utils.is_volume_backed_instance(
  3380. context, instance)
  3381. if not volume_backed:
  3382. reason = _('Resize to zero disk flavor is not allowed.')
  3383. raise exception.CannotResizeDisk(reason=reason)
  3384. current_instance_type_name = current_instance_type['name']
  3385. new_instance_type_name = new_instance_type['name']
  3386. LOG.debug("Old instance type %(current_instance_type_name)s, "
  3387. "new instance type %(new_instance_type_name)s",
  3388. {'current_instance_type_name': current_instance_type_name,
  3389. 'new_instance_type_name': new_instance_type_name},
  3390. instance=instance)
  3391. same_instance_type = (current_instance_type['id'] ==
  3392. new_instance_type['id'])
  3393. # NOTE(sirp): We don't want to force a customer to change their flavor
  3394. # when Ops is migrating off of a failed host.
  3395. if not same_instance_type and new_instance_type.get('disabled'):
  3396. raise exception.FlavorNotFound(flavor_id=flavor_id)
  3397. if same_instance_type and flavor_id:
  3398. raise exception.CannotResizeToSameFlavor()
  3399. # ensure there is sufficient headroom for upsizes
  3400. if flavor_id:
  3401. self._check_quota_for_upsize(context, instance,
  3402. current_instance_type,
  3403. new_instance_type)
  3404. if not same_instance_type:
  3405. image = utils.get_image_from_system_metadata(
  3406. instance.system_metadata)
  3407. # Figure out if the instance is volume-backed but only if we didn't
  3408. # already figure that out above (avoid the extra db hit).
  3409. if volume_backed is None:
  3410. volume_backed = compute_utils.is_volume_backed_instance(
  3411. context, instance)
  3412. # If the server is volume-backed, we still want to validate numa
  3413. # and pci information in the new flavor, but we don't call
  3414. # _validate_flavor_image_nostatus because how it handles checking
  3415. # disk size validation was not intended for a volume-backed
  3416. # resize case.
  3417. if volume_backed:
  3418. self._validate_flavor_image_numa_pci(
  3419. image, new_instance_type, validate_pci=True)
  3420. else:
  3421. self._validate_flavor_image_nostatus(
  3422. context, image, new_instance_type, root_bdm=None,
  3423. validate_pci=True)
  3424. filter_properties = {'ignore_hosts': []}
  3425. if not CONF.allow_resize_to_same_host:
  3426. filter_properties['ignore_hosts'].append(instance.host)
  3427. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3428. context, instance.uuid)
  3429. request_spec.ignore_hosts = filter_properties['ignore_hosts']
  3430. instance.task_state = task_states.RESIZE_PREP
  3431. instance.progress = 0
  3432. instance.auto_disk_config = auto_disk_config or False
  3433. instance.save(expected_task_state=[None])
  3434. if not flavor_id:
  3435. self._record_action_start(context, instance,
  3436. instance_actions.MIGRATE)
  3437. else:
  3438. self._record_action_start(context, instance,
  3439. instance_actions.RESIZE)
  3440. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3441. # against going over quota during a race at this time because the
  3442. # resource consumption for this operation is written to the database
  3443. # by compute.
  3444. scheduler_hint = {'filter_properties': filter_properties}
  3445. if host_name is None:
  3446. # If 'host_name' is not specified,
  3447. # clear the 'requested_destination' field of the RequestSpec
  3448. # except set the allow_cross_cell_move flag since conductor uses
  3449. # it prior to scheduling.
  3450. request_spec.requested_destination = objects.Destination(
  3451. allow_cross_cell_move=allow_cross_cell_resize)
  3452. else:
  3453. # Set the host and the node so that the scheduler will
  3454. # validate them.
  3455. request_spec.requested_destination = objects.Destination(
  3456. host=node.host, node=node.hypervisor_hostname,
  3457. allow_cross_cell_move=allow_cross_cell_resize)
  3458. # Asynchronously RPC cast to conductor so the response is not blocked
  3459. # during scheduling. If something fails the user can find out via
  3460. # instance actions.
  3461. self.compute_task_api.resize_instance(context, instance,
  3462. scheduler_hint=scheduler_hint,
  3463. flavor=new_instance_type,
  3464. clean_shutdown=clean_shutdown,
  3465. request_spec=request_spec,
  3466. do_cast=True)
  3467. @check_instance_lock
  3468. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3469. vm_states.PAUSED, vm_states.SUSPENDED])
  3470. def shelve(self, context, instance, clean_shutdown=True):
  3471. """Shelve an instance.
  3472. Shuts down an instance and frees it up to be removed from the
  3473. hypervisor.
  3474. """
  3475. instance.task_state = task_states.SHELVING
  3476. instance.save(expected_task_state=[None])
  3477. self._record_action_start(context, instance, instance_actions.SHELVE)
  3478. if not compute_utils.is_volume_backed_instance(context, instance):
  3479. name = '%s-shelved' % instance.display_name
  3480. image_meta = compute_utils.create_image(
  3481. context, instance, name, 'snapshot', self.image_api)
  3482. image_id = image_meta['id']
  3483. self.compute_rpcapi.shelve_instance(context, instance=instance,
  3484. image_id=image_id, clean_shutdown=clean_shutdown)
  3485. else:
  3486. self.compute_rpcapi.shelve_offload_instance(context,
  3487. instance=instance, clean_shutdown=clean_shutdown)
  3488. @check_instance_lock
  3489. @check_instance_state(vm_state=[vm_states.SHELVED])
  3490. def shelve_offload(self, context, instance, clean_shutdown=True):
  3491. """Remove a shelved instance from the hypervisor."""
  3492. instance.task_state = task_states.SHELVING_OFFLOADING
  3493. instance.save(expected_task_state=[None])
  3494. self._record_action_start(context, instance,
  3495. instance_actions.SHELVE_OFFLOAD)
  3496. self.compute_rpcapi.shelve_offload_instance(context, instance=instance,
  3497. clean_shutdown=clean_shutdown)
  3498. def _validate_unshelve_az(self, context, instance, availability_zone):
  3499. """Verify the specified availability_zone during unshelve.
  3500. Verifies that the server is shelved offloaded, the AZ exists and
  3501. if [cinder]/cross_az_attach=False, that any attached volumes are in
  3502. the same AZ.
  3503. :param context: nova auth RequestContext for the unshelve action
  3504. :param instance: Instance object for the server being unshelved
  3505. :param availability_zone: The user-requested availability zone in
  3506. which to unshelve the server.
  3507. :raises: UnshelveInstanceInvalidState if the server is not shelved
  3508. offloaded
  3509. :raises: InvalidRequest if the requested AZ does not exist
  3510. :raises: MismatchVolumeAZException if [cinder]/cross_az_attach=False
  3511. and any attached volumes are not in the requested AZ
  3512. """
  3513. if instance.vm_state != vm_states.SHELVED_OFFLOADED:
  3514. # NOTE(brinzhang): If the server status is 'SHELVED', it still
  3515. # belongs to a host, the availability_zone has not changed.
  3516. # Unshelving a shelved offloaded server will go through the
  3517. # scheduler to find a new host.
  3518. raise exception.UnshelveInstanceInvalidState(
  3519. state=instance.vm_state, instance_uuid=instance.uuid)
  3520. available_zones = availability_zones.get_availability_zones(
  3521. context, self.host_api, get_only_available=True)
  3522. if availability_zone not in available_zones:
  3523. msg = _('The requested availability zone is not available')
  3524. raise exception.InvalidRequest(msg)
  3525. # NOTE(brinzhang): When specifying a availability zone to unshelve
  3526. # a shelved offloaded server, and conf cross_az_attach=False, need
  3527. # to determine if attached volume AZ matches the user-specified AZ.
  3528. if not CONF.cinder.cross_az_attach:
  3529. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3530. context, instance.uuid)
  3531. for bdm in bdms:
  3532. if bdm.is_volume and bdm.volume_id:
  3533. volume = self.volume_api.get(context, bdm.volume_id)
  3534. if availability_zone != volume['availability_zone']:
  3535. msg = _("The specified availability zone does not "
  3536. "match the volume %(vol_id)s attached to the "
  3537. "server. Specified availability zone is "
  3538. "%(az)s. Volume is in %(vol_zone)s.") % {
  3539. "vol_id": volume['id'],
  3540. "az": availability_zone,
  3541. "vol_zone": volume['availability_zone']}
  3542. raise exception.MismatchVolumeAZException(reason=msg)
  3543. @check_instance_lock
  3544. @check_instance_state(vm_state=[vm_states.SHELVED,
  3545. vm_states.SHELVED_OFFLOADED])
  3546. def unshelve(self, context, instance, new_az=None):
  3547. """Restore a shelved instance."""
  3548. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3549. context, instance.uuid)
  3550. if new_az:
  3551. self._validate_unshelve_az(context, instance, new_az)
  3552. LOG.debug("Replace the old AZ %(old_az)s in RequestSpec "
  3553. "with a new AZ %(new_az)s of the instance.",
  3554. {"old_az": request_spec.availability_zone,
  3555. "new_az": new_az}, instance=instance)
  3556. # Unshelving a shelved offloaded server will go through the
  3557. # scheduler to pick a new host, so we update the
  3558. # RequestSpec.availability_zone here. Note that if scheduling
  3559. # fails the RequestSpec will remain updated, which is not great,
  3560. # but if we want to change that we need to defer updating the
  3561. # RequestSpec until conductor which probably means RPC changes to
  3562. # pass the new_az variable to conductor. This is likely low
  3563. # priority since the RequestSpec.availability_zone on a shelved
  3564. # offloaded server does not mean much anyway and clearly the user
  3565. # is trying to put the server in the target AZ.
  3566. request_spec.availability_zone = new_az
  3567. request_spec.save()
  3568. instance.task_state = task_states.UNSHELVING
  3569. instance.save(expected_task_state=[None])
  3570. self._record_action_start(context, instance, instance_actions.UNSHELVE)
  3571. self.compute_task_api.unshelve_instance(context, instance,
  3572. request_spec)
  3573. @check_instance_lock
  3574. def add_fixed_ip(self, context, instance, network_id):
  3575. """Add fixed_ip from specified network to given instance."""
  3576. self.compute_rpcapi.add_fixed_ip_to_instance(context,
  3577. instance=instance, network_id=network_id)
  3578. @check_instance_lock
  3579. def remove_fixed_ip(self, context, instance, address):
  3580. """Remove fixed_ip from specified network to given instance."""
  3581. self.compute_rpcapi.remove_fixed_ip_from_instance(context,
  3582. instance=instance, address=address)
  3583. @check_instance_lock
  3584. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3585. def pause(self, context, instance):
  3586. """Pause the given instance."""
  3587. instance.task_state = task_states.PAUSING
  3588. instance.save(expected_task_state=[None])
  3589. self._record_action_start(context, instance, instance_actions.PAUSE)
  3590. self.compute_rpcapi.pause_instance(context, instance)
  3591. @check_instance_lock
  3592. @check_instance_state(vm_state=[vm_states.PAUSED])
  3593. def unpause(self, context, instance):
  3594. """Unpause the given instance."""
  3595. instance.task_state = task_states.UNPAUSING
  3596. instance.save(expected_task_state=[None])
  3597. self._record_action_start(context, instance, instance_actions.UNPAUSE)
  3598. self.compute_rpcapi.unpause_instance(context, instance)
  3599. @check_instance_host()
  3600. def get_diagnostics(self, context, instance):
  3601. """Retrieve diagnostics for the given instance."""
  3602. return self.compute_rpcapi.get_diagnostics(context, instance=instance)
  3603. @check_instance_host()
  3604. def get_instance_diagnostics(self, context, instance):
  3605. """Retrieve diagnostics for the given instance."""
  3606. return self.compute_rpcapi.get_instance_diagnostics(context,
  3607. instance=instance)
  3608. @reject_sev_instances(instance_actions.SUSPEND)
  3609. @check_instance_lock
  3610. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3611. def suspend(self, context, instance):
  3612. """Suspend the given instance."""
  3613. instance.task_state = task_states.SUSPENDING
  3614. instance.save(expected_task_state=[None])
  3615. self._record_action_start(context, instance, instance_actions.SUSPEND)
  3616. self.compute_rpcapi.suspend_instance(context, instance)
  3617. @check_instance_lock
  3618. @check_instance_state(vm_state=[vm_states.SUSPENDED])
  3619. def resume(self, context, instance):
  3620. """Resume the given instance."""
  3621. instance.task_state = task_states.RESUMING
  3622. instance.save(expected_task_state=[None])
  3623. self._record_action_start(context, instance, instance_actions.RESUME)
  3624. self.compute_rpcapi.resume_instance(context, instance)
  3625. @check_instance_lock
  3626. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3627. vm_states.ERROR])
  3628. def rescue(self, context, instance, rescue_password=None,
  3629. rescue_image_ref=None, clean_shutdown=True):
  3630. """Rescue the given instance."""
  3631. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3632. context, instance.uuid)
  3633. for bdm in bdms:
  3634. if bdm.volume_id:
  3635. vol = self.volume_api.get(context, bdm.volume_id)
  3636. self.volume_api.check_attached(context, vol)
  3637. if compute_utils.is_volume_backed_instance(context, instance, bdms):
  3638. reason = _("Cannot rescue a volume-backed instance")
  3639. raise exception.InstanceNotRescuable(instance_id=instance.uuid,
  3640. reason=reason)
  3641. instance.task_state = task_states.RESCUING
  3642. instance.save(expected_task_state=[None])
  3643. self._record_action_start(context, instance, instance_actions.RESCUE)
  3644. self.compute_rpcapi.rescue_instance(context, instance=instance,
  3645. rescue_password=rescue_password, rescue_image_ref=rescue_image_ref,
  3646. clean_shutdown=clean_shutdown)
  3647. @check_instance_lock
  3648. @check_instance_state(vm_state=[vm_states.RESCUED])
  3649. def unrescue(self, context, instance):
  3650. """Unrescue the given instance."""
  3651. instance.task_state = task_states.UNRESCUING
  3652. instance.save(expected_task_state=[None])
  3653. self._record_action_start(context, instance, instance_actions.UNRESCUE)
  3654. self.compute_rpcapi.unrescue_instance(context, instance=instance)
  3655. @check_instance_lock
  3656. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3657. def set_admin_password(self, context, instance, password):
  3658. """Set the root/admin password for the given instance.
  3659. @param context: Nova auth context.
  3660. @param instance: Nova instance object.
  3661. @param password: The admin password for the instance.
  3662. """
  3663. instance.task_state = task_states.UPDATING_PASSWORD
  3664. instance.save(expected_task_state=[None])
  3665. self._record_action_start(context, instance,
  3666. instance_actions.CHANGE_PASSWORD)
  3667. self.compute_rpcapi.set_admin_password(context,
  3668. instance=instance,
  3669. new_pass=password)
  3670. @check_instance_host()
  3671. @reject_instance_state(
  3672. task_state=[task_states.DELETING, task_states.MIGRATING])
  3673. def get_vnc_console(self, context, instance, console_type):
  3674. """Get a url to an instance Console."""
  3675. connect_info = self.compute_rpcapi.get_vnc_console(context,
  3676. instance=instance, console_type=console_type)
  3677. return {'url': connect_info['access_url']}
  3678. @check_instance_host()
  3679. @reject_instance_state(
  3680. task_state=[task_states.DELETING, task_states.MIGRATING])
  3681. def get_spice_console(self, context, instance, console_type):
  3682. """Get a url to an instance Console."""
  3683. connect_info = self.compute_rpcapi.get_spice_console(context,
  3684. instance=instance, console_type=console_type)
  3685. return {'url': connect_info['access_url']}
  3686. @check_instance_host()
  3687. @reject_instance_state(
  3688. task_state=[task_states.DELETING, task_states.MIGRATING])
  3689. def get_rdp_console(self, context, instance, console_type):
  3690. """Get a url to an instance Console."""
  3691. connect_info = self.compute_rpcapi.get_rdp_console(context,
  3692. instance=instance, console_type=console_type)
  3693. return {'url': connect_info['access_url']}
  3694. @check_instance_host()
  3695. @reject_instance_state(
  3696. task_state=[task_states.DELETING, task_states.MIGRATING])
  3697. def get_serial_console(self, context, instance, console_type):
  3698. """Get a url to a serial console."""
  3699. connect_info = self.compute_rpcapi.get_serial_console(context,
  3700. instance=instance, console_type=console_type)
  3701. return {'url': connect_info['access_url']}
  3702. @check_instance_host()
  3703. @reject_instance_state(
  3704. task_state=[task_states.DELETING, task_states.MIGRATING])
  3705. def get_mks_console(self, context, instance, console_type):
  3706. """Get a url to a MKS console."""
  3707. connect_info = self.compute_rpcapi.get_mks_console(context,
  3708. instance=instance, console_type=console_type)
  3709. return {'url': connect_info['access_url']}
  3710. @check_instance_host()
  3711. def get_console_output(self, context, instance, tail_length=None):
  3712. """Get console output for an instance."""
  3713. return self.compute_rpcapi.get_console_output(context,
  3714. instance=instance, tail_length=tail_length)
  3715. def lock(self, context, instance, reason=None):
  3716. """Lock the given instance."""
  3717. # Only update the lock if we are an admin (non-owner)
  3718. is_owner = instance.project_id == context.project_id
  3719. if instance.locked and is_owner:
  3720. return
  3721. context = context.elevated()
  3722. self._record_action_start(context, instance,
  3723. instance_actions.LOCK)
  3724. @wrap_instance_event(prefix='api')
  3725. def lock(self, context, instance, reason=None):
  3726. LOG.debug('Locking', instance=instance)
  3727. instance.locked = True
  3728. instance.locked_by = 'owner' if is_owner else 'admin'
  3729. if reason:
  3730. instance.system_metadata['locked_reason'] = reason
  3731. instance.save()
  3732. lock(self, context, instance, reason=reason)
  3733. compute_utils.notify_about_instance_action(
  3734. context, instance, CONF.host,
  3735. action=fields_obj.NotificationAction.LOCK,
  3736. source=fields_obj.NotificationSource.API)
  3737. def is_expected_locked_by(self, context, instance):
  3738. is_owner = instance.project_id == context.project_id
  3739. expect_locked_by = 'owner' if is_owner else 'admin'
  3740. locked_by = instance.locked_by
  3741. if locked_by and locked_by != expect_locked_by:
  3742. return False
  3743. return True
  3744. def unlock(self, context, instance):
  3745. """Unlock the given instance."""
  3746. context = context.elevated()
  3747. self._record_action_start(context, instance,
  3748. instance_actions.UNLOCK)
  3749. @wrap_instance_event(prefix='api')
  3750. def unlock(self, context, instance):
  3751. LOG.debug('Unlocking', instance=instance)
  3752. instance.locked = False
  3753. instance.locked_by = None
  3754. instance.system_metadata.pop('locked_reason', None)
  3755. instance.save()
  3756. unlock(self, context, instance)
  3757. compute_utils.notify_about_instance_action(
  3758. context, instance, CONF.host,
  3759. action=fields_obj.NotificationAction.UNLOCK,
  3760. source=fields_obj.NotificationSource.API)
  3761. @check_instance_lock
  3762. def reset_network(self, context, instance):
  3763. """Reset networking on the instance."""
  3764. self.compute_rpcapi.reset_network(context, instance=instance)
  3765. @check_instance_lock
  3766. def inject_network_info(self, context, instance):
  3767. """Inject network info for the instance."""
  3768. self.compute_rpcapi.inject_network_info(context, instance=instance)
  3769. def _create_volume_bdm(self, context, instance, device, volume,
  3770. disk_bus, device_type, is_local_creation=False,
  3771. tag=None, delete_on_termination=False):
  3772. volume_id = volume['id']
  3773. if is_local_creation:
  3774. # when the creation is done locally we can't specify the device
  3775. # name as we do not have a way to check that the name specified is
  3776. # a valid one.
  3777. # We leave the setting of that value when the actual attach
  3778. # happens on the compute manager
  3779. # NOTE(artom) Local attach (to a shelved-offload instance) cannot
  3780. # support device tagging because we have no way to call the compute
  3781. # manager to check that it supports device tagging. In fact, we
  3782. # don't even know which computer manager the instance will
  3783. # eventually end up on when it's unshelved.
  3784. volume_bdm = objects.BlockDeviceMapping(
  3785. context=context,
  3786. source_type='volume', destination_type='volume',
  3787. instance_uuid=instance.uuid, boot_index=None,
  3788. volume_id=volume_id,
  3789. device_name=None, guest_format=None,
  3790. disk_bus=disk_bus, device_type=device_type,
  3791. delete_on_termination=delete_on_termination)
  3792. volume_bdm.create()
  3793. else:
  3794. # NOTE(vish): This is done on the compute host because we want
  3795. # to avoid a race where two devices are requested at
  3796. # the same time. When db access is removed from
  3797. # compute, the bdm will be created here and we will
  3798. # have to make sure that they are assigned atomically.
  3799. volume_bdm = self.compute_rpcapi.reserve_block_device_name(
  3800. context, instance, device, volume_id, disk_bus=disk_bus,
  3801. device_type=device_type, tag=tag,
  3802. multiattach=volume['multiattach'])
  3803. volume_bdm.delete_on_termination = delete_on_termination
  3804. volume_bdm.save()
  3805. return volume_bdm
  3806. def _check_volume_already_attached_to_instance(self, context, instance,
  3807. volume_id):
  3808. """Avoid attaching the same volume to the same instance twice.
  3809. As the new Cinder flow (microversion 3.44) is handling the checks
  3810. differently and allows to attach the same volume to the same
  3811. instance twice to enable live_migrate we are checking whether the
  3812. BDM already exists for this combination for the new flow and fail
  3813. if it does.
  3814. """
  3815. try:
  3816. objects.BlockDeviceMapping.get_by_volume_and_instance(
  3817. context, volume_id, instance.uuid)
  3818. msg = _("volume %s already attached") % volume_id
  3819. raise exception.InvalidVolume(reason=msg)
  3820. except exception.VolumeBDMNotFound:
  3821. pass
  3822. def _check_attach_and_reserve_volume(self, context, volume, instance,
  3823. bdm, supports_multiattach=False,
  3824. validate_az=True):
  3825. """Perform checks against the instance and volume before attaching.
  3826. If validation succeeds, the bdm is updated with an attachment_id which
  3827. effectively reserves it during the attach process in cinder.
  3828. :param context: nova auth RequestContext
  3829. :param volume: volume dict from cinder
  3830. :param instance: Instance object
  3831. :param bdm: BlockDeviceMapping object
  3832. :param supports_multiattach: True if the request supports multiattach
  3833. volumes, i.e. microversion >= 2.60, False otherwise
  3834. :param validate_az: True if the instance and volume availability zones
  3835. should be validated for cross_az_attach, False to not validate AZ
  3836. """
  3837. volume_id = volume['id']
  3838. if validate_az:
  3839. self.volume_api.check_availability_zone(context, volume,
  3840. instance=instance)
  3841. # If volume.multiattach=True and the microversion to
  3842. # support multiattach is not used, fail the request.
  3843. if volume['multiattach'] and not supports_multiattach:
  3844. raise exception.MultiattachNotSupportedOldMicroversion()
  3845. attachment_id = self.volume_api.attachment_create(
  3846. context, volume_id, instance.uuid)['id']
  3847. bdm.attachment_id = attachment_id
  3848. # NOTE(ildikov): In case of boot from volume the BDM at this
  3849. # point is not yet created in a cell database, so we can't
  3850. # call save(). When attaching a volume to an existing
  3851. # instance, the instance is already in a cell and the BDM has
  3852. # been created in that same cell so updating here in that case
  3853. # is "ok".
  3854. if bdm.obj_attr_is_set('id'):
  3855. bdm.save()
  3856. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3857. # override it.
  3858. def _attach_volume(self, context, instance, volume, device,
  3859. disk_bus, device_type, tag=None,
  3860. supports_multiattach=False,
  3861. delete_on_termination=False):
  3862. """Attach an existing volume to an existing instance.
  3863. This method is separated to make it possible for cells version
  3864. to override it.
  3865. """
  3866. volume_bdm = self._create_volume_bdm(
  3867. context, instance, device, volume, disk_bus=disk_bus,
  3868. device_type=device_type, tag=tag,
  3869. delete_on_termination=delete_on_termination)
  3870. try:
  3871. self._check_attach_and_reserve_volume(context, volume, instance,
  3872. volume_bdm,
  3873. supports_multiattach)
  3874. self._record_action_start(
  3875. context, instance, instance_actions.ATTACH_VOLUME)
  3876. self.compute_rpcapi.attach_volume(context, instance, volume_bdm)
  3877. except Exception:
  3878. with excutils.save_and_reraise_exception():
  3879. volume_bdm.destroy()
  3880. return volume_bdm.device_name
  3881. def _attach_volume_shelved_offloaded(self, context, instance, volume,
  3882. device, disk_bus, device_type,
  3883. delete_on_termination):
  3884. """Attach an existing volume to an instance in shelved offloaded state.
  3885. Attaching a volume for an instance in shelved offloaded state requires
  3886. to perform the regular check to see if we can attach and reserve the
  3887. volume then we need to call the attach method on the volume API
  3888. to mark the volume as 'in-use'.
  3889. The instance at this stage is not managed by a compute manager
  3890. therefore the actual attachment will be performed once the
  3891. instance will be unshelved.
  3892. """
  3893. volume_id = volume['id']
  3894. @wrap_instance_event(prefix='api')
  3895. def attach_volume(self, context, v_id, instance, dev, attachment_id):
  3896. if attachment_id:
  3897. # Normally we wouldn't complete an attachment without a host
  3898. # connector, but we do this to make the volume status change
  3899. # to "in-use" to maintain the API semantics with the old flow.
  3900. # When unshelving the instance, the compute service will deal
  3901. # with this disconnected attachment.
  3902. self.volume_api.attachment_complete(context, attachment_id)
  3903. else:
  3904. self.volume_api.attach(context,
  3905. v_id,
  3906. instance.uuid,
  3907. dev)
  3908. volume_bdm = self._create_volume_bdm(
  3909. context, instance, device, volume, disk_bus=disk_bus,
  3910. device_type=device_type, is_local_creation=True,
  3911. delete_on_termination=delete_on_termination)
  3912. try:
  3913. self._check_attach_and_reserve_volume(context, volume, instance,
  3914. volume_bdm)
  3915. self._record_action_start(
  3916. context, instance,
  3917. instance_actions.ATTACH_VOLUME)
  3918. attach_volume(self, context, volume_id, instance, device,
  3919. volume_bdm.attachment_id)
  3920. except Exception:
  3921. with excutils.save_and_reraise_exception():
  3922. volume_bdm.destroy()
  3923. return volume_bdm.device_name
  3924. @check_instance_lock
  3925. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3926. vm_states.STOPPED, vm_states.RESIZED,
  3927. vm_states.SOFT_DELETED, vm_states.SHELVED,
  3928. vm_states.SHELVED_OFFLOADED])
  3929. def attach_volume(self, context, instance, volume_id, device=None,
  3930. disk_bus=None, device_type=None, tag=None,
  3931. supports_multiattach=False,
  3932. delete_on_termination=False):
  3933. """Attach an existing volume to an existing instance."""
  3934. # NOTE(vish): Fail fast if the device is not going to pass. This
  3935. # will need to be removed along with the test if we
  3936. # change the logic in the manager for what constitutes
  3937. # a valid device.
  3938. if device and not block_device.match_device(device):
  3939. raise exception.InvalidDevicePath(path=device)
  3940. # Make sure the volume isn't already attached to this instance
  3941. # because we'll use the v3.44 attachment flow in
  3942. # _check_attach_and_reserve_volume and Cinder will allow multiple
  3943. # attachments between the same volume and instance but the old flow
  3944. # API semantics don't allow that so we enforce it here.
  3945. self._check_volume_already_attached_to_instance(context,
  3946. instance,
  3947. volume_id)
  3948. volume = self.volume_api.get(context, volume_id)
  3949. is_shelved_offloaded = instance.vm_state == vm_states.SHELVED_OFFLOADED
  3950. if is_shelved_offloaded:
  3951. if tag:
  3952. # NOTE(artom) Local attach (to a shelved-offload instance)
  3953. # cannot support device tagging because we have no way to call
  3954. # the compute manager to check that it supports device tagging.
  3955. # In fact, we don't even know which computer manager the
  3956. # instance will eventually end up on when it's unshelved.
  3957. raise exception.VolumeTaggedAttachToShelvedNotSupported()
  3958. if volume['multiattach']:
  3959. # NOTE(mriedem): Similar to tagged attach, we don't support
  3960. # attaching a multiattach volume to shelved offloaded instances
  3961. # because we can't tell if the compute host (since there isn't
  3962. # one) supports it. This could possibly be supported in the
  3963. # future if the scheduler was made aware of which computes
  3964. # support multiattach volumes.
  3965. raise exception.MultiattachToShelvedNotSupported()
  3966. return self._attach_volume_shelved_offloaded(context,
  3967. instance,
  3968. volume,
  3969. device,
  3970. disk_bus,
  3971. device_type,
  3972. delete_on_termination)
  3973. return self._attach_volume(context, instance, volume, device,
  3974. disk_bus, device_type, tag=tag,
  3975. supports_multiattach=supports_multiattach,
  3976. delete_on_termination=delete_on_termination)
  3977. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3978. # override it.
  3979. def _detach_volume(self, context, instance, volume):
  3980. """Detach volume from instance.
  3981. This method is separated to make it easier for cells version
  3982. to override.
  3983. """
  3984. try:
  3985. self.volume_api.begin_detaching(context, volume['id'])
  3986. except exception.InvalidInput as exc:
  3987. raise exception.InvalidVolume(reason=exc.format_message())
  3988. attachments = volume.get('attachments', {})
  3989. attachment_id = None
  3990. if attachments and instance.uuid in attachments:
  3991. attachment_id = attachments[instance.uuid]['attachment_id']
  3992. self._record_action_start(
  3993. context, instance, instance_actions.DETACH_VOLUME)
  3994. self.compute_rpcapi.detach_volume(context, instance=instance,
  3995. volume_id=volume['id'], attachment_id=attachment_id)
  3996. def _detach_volume_shelved_offloaded(self, context, instance, volume):
  3997. """Detach a volume from an instance in shelved offloaded state.
  3998. If the instance is shelved offloaded we just need to cleanup volume
  3999. calling the volume api detach, the volume api terminate_connection
  4000. and delete the bdm record.
  4001. If the volume has delete_on_termination option set then we call the
  4002. volume api delete as well.
  4003. """
  4004. @wrap_instance_event(prefix='api')
  4005. def detach_volume(self, context, instance, bdms):
  4006. self._local_cleanup_bdm_volumes(bdms, instance, context)
  4007. bdms = [objects.BlockDeviceMapping.get_by_volume_id(
  4008. context, volume['id'], instance.uuid)]
  4009. # The begin_detaching() call only works with in-use volumes,
  4010. # which will not be the case for volumes attached to a shelved
  4011. # offloaded server via the attachments API since those volumes
  4012. # will have `reserved` status.
  4013. if not bdms[0].attachment_id:
  4014. try:
  4015. self.volume_api.begin_detaching(context, volume['id'])
  4016. except exception.InvalidInput as exc:
  4017. raise exception.InvalidVolume(reason=exc.format_message())
  4018. self._record_action_start(
  4019. context, instance,
  4020. instance_actions.DETACH_VOLUME)
  4021. detach_volume(self, context, instance, bdms)
  4022. @check_instance_lock
  4023. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4024. vm_states.STOPPED, vm_states.RESIZED,
  4025. vm_states.SOFT_DELETED, vm_states.SHELVED,
  4026. vm_states.SHELVED_OFFLOADED])
  4027. def detach_volume(self, context, instance, volume):
  4028. """Detach a volume from an instance."""
  4029. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  4030. self._detach_volume_shelved_offloaded(context, instance, volume)
  4031. else:
  4032. self._detach_volume(context, instance, volume)
  4033. def _count_attachments_for_swap(self, ctxt, volume):
  4034. """Counts the number of attachments for a swap-related volume.
  4035. Attempts to only count read/write attachments if the volume attachment
  4036. records exist, otherwise simply just counts the number of attachments
  4037. regardless of attach mode.
  4038. :param ctxt: nova.context.RequestContext - user request context
  4039. :param volume: nova-translated volume dict from nova.volume.cinder.
  4040. :returns: count of attachments for the volume
  4041. """
  4042. # This is a dict, keyed by server ID, to a dict of attachment_id and
  4043. # mountpoint.
  4044. attachments = volume.get('attachments', {})
  4045. # Multiattach volumes can have more than one attachment, so if there
  4046. # is more than one attachment, attempt to count the read/write
  4047. # attachments.
  4048. if len(attachments) > 1:
  4049. count = 0
  4050. for attachment in attachments.values():
  4051. attachment_id = attachment['attachment_id']
  4052. # Get the attachment record for this attachment so we can
  4053. # get the attach_mode.
  4054. # TODO(mriedem): This could be optimized if we had
  4055. # GET /attachments/detail?volume_id=volume['id'] in Cinder.
  4056. try:
  4057. attachment_record = self.volume_api.attachment_get(
  4058. ctxt, attachment_id)
  4059. # Note that the attachment record from Cinder has
  4060. # attach_mode in the top-level of the resource but the
  4061. # nova.volume.cinder code translates it and puts the
  4062. # attach_mode in the connection_info for some legacy
  4063. # reason...
  4064. if attachment_record['attach_mode'] == 'rw':
  4065. count += 1
  4066. except exception.VolumeAttachmentNotFound:
  4067. # attachments are read/write by default so count it
  4068. count += 1
  4069. else:
  4070. count = len(attachments)
  4071. return count
  4072. @check_instance_lock
  4073. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4074. vm_states.RESIZED])
  4075. def swap_volume(self, context, instance, old_volume, new_volume):
  4076. """Swap volume attached to an instance."""
  4077. # The caller likely got the instance from volume['attachments']
  4078. # in the first place, but let's sanity check.
  4079. if not old_volume.get('attachments', {}).get(instance.uuid):
  4080. msg = _("Old volume is attached to a different instance.")
  4081. raise exception.InvalidVolume(reason=msg)
  4082. if new_volume['attach_status'] == 'attached':
  4083. msg = _("New volume must be detached in order to swap.")
  4084. raise exception.InvalidVolume(reason=msg)
  4085. if int(new_volume['size']) < int(old_volume['size']):
  4086. msg = _("New volume must be the same size or larger.")
  4087. raise exception.InvalidVolume(reason=msg)
  4088. self.volume_api.check_availability_zone(context, new_volume,
  4089. instance=instance)
  4090. try:
  4091. self.volume_api.begin_detaching(context, old_volume['id'])
  4092. except exception.InvalidInput as exc:
  4093. raise exception.InvalidVolume(reason=exc.format_message())
  4094. # Disallow swapping from multiattach volumes that have more than one
  4095. # read/write attachment. We know the old_volume has at least one
  4096. # attachment since it's attached to this server. The new_volume
  4097. # can't have any attachments because of the attach_status check above.
  4098. # We do this count after calling "begin_detaching" to lock against
  4099. # concurrent attachments being made while we're counting.
  4100. try:
  4101. if self._count_attachments_for_swap(context, old_volume) > 1:
  4102. raise exception.MultiattachSwapVolumeNotSupported()
  4103. except Exception: # This is generic to handle failures while counting
  4104. # We need to reset the detaching status before raising.
  4105. with excutils.save_and_reraise_exception():
  4106. self.volume_api.roll_detaching(context, old_volume['id'])
  4107. # Get the BDM for the attached (old) volume so we can tell if it was
  4108. # attached with the new-style Cinder 3.44 API.
  4109. bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
  4110. context, old_volume['id'], instance.uuid)
  4111. new_attachment_id = None
  4112. if bdm.attachment_id is None:
  4113. # This is an old-style attachment so reserve the new volume before
  4114. # we cast to the compute host.
  4115. self.volume_api.reserve_volume(context, new_volume['id'])
  4116. else:
  4117. try:
  4118. self._check_volume_already_attached_to_instance(
  4119. context, instance, new_volume['id'])
  4120. except exception.InvalidVolume:
  4121. with excutils.save_and_reraise_exception():
  4122. self.volume_api.roll_detaching(context, old_volume['id'])
  4123. # This is a new-style attachment so for the volume that we are
  4124. # going to swap to, create a new volume attachment.
  4125. new_attachment_id = self.volume_api.attachment_create(
  4126. context, new_volume['id'], instance.uuid)['id']
  4127. self._record_action_start(
  4128. context, instance, instance_actions.SWAP_VOLUME)
  4129. try:
  4130. self.compute_rpcapi.swap_volume(
  4131. context, instance=instance,
  4132. old_volume_id=old_volume['id'],
  4133. new_volume_id=new_volume['id'],
  4134. new_attachment_id=new_attachment_id)
  4135. except Exception:
  4136. with excutils.save_and_reraise_exception():
  4137. self.volume_api.roll_detaching(context, old_volume['id'])
  4138. if new_attachment_id is None:
  4139. self.volume_api.unreserve_volume(context, new_volume['id'])
  4140. else:
  4141. self.volume_api.attachment_delete(
  4142. context, new_attachment_id)
  4143. @check_instance_lock
  4144. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4145. vm_states.STOPPED],
  4146. task_state=[None])
  4147. def attach_interface(self, context, instance, network_id, port_id,
  4148. requested_ip, tag=None):
  4149. """Use hotplug to add an network adapter to an instance."""
  4150. self._record_action_start(
  4151. context, instance, instance_actions.ATTACH_INTERFACE)
  4152. # NOTE(gibi): Checking if the requested port has resource request as
  4153. # such ports are currently not supported as they would at least
  4154. # need resource allocation manipulation in placement but might also
  4155. # need a new scheduling if resource on this host is not available.
  4156. if port_id:
  4157. port = self.network_api.show_port(context, port_id)
  4158. if port['port'].get(constants.RESOURCE_REQUEST):
  4159. raise exception.AttachInterfaceWithQoSPolicyNotSupported(
  4160. instance_uuid=instance.uuid)
  4161. return self.compute_rpcapi.attach_interface(context,
  4162. instance=instance, network_id=network_id, port_id=port_id,
  4163. requested_ip=requested_ip, tag=tag)
  4164. @check_instance_lock
  4165. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4166. vm_states.STOPPED],
  4167. task_state=[None])
  4168. def detach_interface(self, context, instance, port_id):
  4169. """Detach an network adapter from an instance."""
  4170. self._record_action_start(
  4171. context, instance, instance_actions.DETACH_INTERFACE)
  4172. self.compute_rpcapi.detach_interface(context, instance=instance,
  4173. port_id=port_id)
  4174. def get_instance_metadata(self, context, instance):
  4175. """Get all metadata associated with an instance."""
  4176. return self.db.instance_metadata_get(context, instance.uuid)
  4177. @check_instance_lock
  4178. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4179. vm_states.SUSPENDED, vm_states.STOPPED],
  4180. task_state=None)
  4181. def delete_instance_metadata(self, context, instance, key):
  4182. """Delete the given metadata item from an instance."""
  4183. instance.delete_metadata_key(key)
  4184. self.compute_rpcapi.change_instance_metadata(context,
  4185. instance=instance,
  4186. diff={key: ['-']})
  4187. @check_instance_lock
  4188. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4189. vm_states.SUSPENDED, vm_states.STOPPED],
  4190. task_state=None)
  4191. def update_instance_metadata(self, context, instance,
  4192. metadata, delete=False):
  4193. """Updates or creates instance metadata.
  4194. If delete is True, metadata items that are not specified in the
  4195. `metadata` argument will be deleted.
  4196. """
  4197. orig = dict(instance.metadata)
  4198. if delete:
  4199. _metadata = metadata
  4200. else:
  4201. _metadata = dict(instance.metadata)
  4202. _metadata.update(metadata)
  4203. self._check_metadata_properties_quota(context, _metadata)
  4204. instance.metadata = _metadata
  4205. instance.save()
  4206. diff = _diff_dict(orig, instance.metadata)
  4207. self.compute_rpcapi.change_instance_metadata(context,
  4208. instance=instance,
  4209. diff=diff)
  4210. return _metadata
  4211. @reject_sev_instances(instance_actions.LIVE_MIGRATION)
  4212. @check_instance_lock
  4213. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
  4214. def live_migrate(self, context, instance, block_migration,
  4215. disk_over_commit, host_name, force=None, async_=False):
  4216. """Migrate a server lively to a new host."""
  4217. LOG.debug("Going to try to live migrate instance to %s",
  4218. host_name or "another host", instance=instance)
  4219. if host_name:
  4220. # Validate the specified host before changing the instance task
  4221. # state.
  4222. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  4223. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4224. context, instance.uuid)
  4225. instance.task_state = task_states.MIGRATING
  4226. instance.save(expected_task_state=[None])
  4227. self._record_action_start(context, instance,
  4228. instance_actions.LIVE_MIGRATION)
  4229. # NOTE(sbauza): Force is a boolean by the new related API version
  4230. if force is False and host_name:
  4231. # Unset the host to make sure we call the scheduler
  4232. # from the conductor LiveMigrationTask. Yes this is tightly-coupled
  4233. # to behavior in conductor and not great.
  4234. host_name = None
  4235. # FIXME(sbauza): Since only Ironic driver uses more than one
  4236. # compute per service but doesn't support live migrations,
  4237. # let's provide the first one.
  4238. target = nodes[0]
  4239. destination = objects.Destination(
  4240. host=target.host,
  4241. node=target.hypervisor_hostname
  4242. )
  4243. # This is essentially a hint to the scheduler to only consider
  4244. # the specified host but still run it through the filters.
  4245. request_spec.requested_destination = destination
  4246. try:
  4247. self.compute_task_api.live_migrate_instance(context, instance,
  4248. host_name, block_migration=block_migration,
  4249. disk_over_commit=disk_over_commit,
  4250. request_spec=request_spec, async_=async_)
  4251. except oslo_exceptions.MessagingTimeout as messaging_timeout:
  4252. with excutils.save_and_reraise_exception():
  4253. # NOTE(pkoniszewski): It is possible that MessagingTimeout
  4254. # occurs, but LM will still be in progress, so write
  4255. # instance fault to database
  4256. compute_utils.add_instance_fault_from_exc(context,
  4257. instance,
  4258. messaging_timeout)
  4259. @check_instance_lock
  4260. @check_instance_state(vm_state=[vm_states.ACTIVE],
  4261. task_state=[task_states.MIGRATING])
  4262. def live_migrate_force_complete(self, context, instance, migration_id):
  4263. """Force live migration to complete.
  4264. :param context: Security context
  4265. :param instance: The instance that is being migrated
  4266. :param migration_id: ID of ongoing migration
  4267. """
  4268. LOG.debug("Going to try to force live migration to complete",
  4269. instance=instance)
  4270. # NOTE(pkoniszewski): Get migration object to check if there is ongoing
  4271. # live migration for particular instance. Also pass migration id to
  4272. # compute to double check and avoid possible race condition.
  4273. migration = objects.Migration.get_by_id_and_instance(
  4274. context, migration_id, instance.uuid)
  4275. if migration.status != 'running':
  4276. raise exception.InvalidMigrationState(migration_id=migration_id,
  4277. instance_uuid=instance.uuid,
  4278. state=migration.status,
  4279. method='force complete')
  4280. self._record_action_start(
  4281. context, instance, instance_actions.LIVE_MIGRATION_FORCE_COMPLETE)
  4282. self.compute_rpcapi.live_migration_force_complete(
  4283. context, instance, migration)
  4284. @check_instance_lock
  4285. @check_instance_state(task_state=[task_states.MIGRATING])
  4286. def live_migrate_abort(self, context, instance, migration_id,
  4287. support_abort_in_queue=False):
  4288. """Abort an in-progress live migration.
  4289. :param context: Security context
  4290. :param instance: The instance that is being migrated
  4291. :param migration_id: ID of in-progress live migration
  4292. :param support_abort_in_queue: Flag indicating whether we can support
  4293. abort migrations in "queued" or "preparing" status.
  4294. """
  4295. migration = objects.Migration.get_by_id_and_instance(context,
  4296. migration_id, instance.uuid)
  4297. LOG.debug("Going to cancel live migration %s",
  4298. migration.id, instance=instance)
  4299. # If the microversion does not support abort migration in queue,
  4300. # we are only be able to abort migrations with `running` status;
  4301. # if it is supported, we are able to also abort migrations in
  4302. # `queued` and `preparing` status.
  4303. allowed_states = ['running']
  4304. queued_states = ['queued', 'preparing']
  4305. if support_abort_in_queue:
  4306. # The user requested a microversion that supports aborting a queued
  4307. # or preparing live migration. But we need to check that the
  4308. # compute service hosting the instance is new enough to support
  4309. # aborting a queued/preparing live migration, so we check the
  4310. # service version here.
  4311. allowed_states.extend(queued_states)
  4312. if migration.status not in allowed_states:
  4313. raise exception.InvalidMigrationState(migration_id=migration_id,
  4314. instance_uuid=instance.uuid,
  4315. state=migration.status,
  4316. method='abort live migration')
  4317. self._record_action_start(context, instance,
  4318. instance_actions.LIVE_MIGRATION_CANCEL)
  4319. self.compute_rpcapi.live_migration_abort(context,
  4320. instance, migration.id)
  4321. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  4322. vm_states.ERROR])
  4323. def evacuate(self, context, instance, host, on_shared_storage,
  4324. admin_password=None, force=None):
  4325. """Running evacuate to target host.
  4326. Checking vm compute host state, if the host not in expected_state,
  4327. raising an exception.
  4328. :param instance: The instance to evacuate
  4329. :param host: Target host. if not set, the scheduler will pick up one
  4330. :param on_shared_storage: True if instance files on shared storage
  4331. :param admin_password: password to set on rebuilt instance
  4332. :param force: Force the evacuation to the specific host target
  4333. """
  4334. LOG.debug('vm evacuation scheduled', instance=instance)
  4335. inst_host = instance.host
  4336. service = objects.Service.get_by_compute_host(context, inst_host)
  4337. if self.servicegroup_api.service_is_up(service):
  4338. LOG.error('Instance compute service state on %s '
  4339. 'expected to be down, but it was up.', inst_host)
  4340. raise exception.ComputeServiceInUse(host=inst_host)
  4341. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4342. context, instance.uuid)
  4343. instance.task_state = task_states.REBUILDING
  4344. instance.save(expected_task_state=[None])
  4345. self._record_action_start(context, instance, instance_actions.EVACUATE)
  4346. # NOTE(danms): Create this as a tombstone for the source compute
  4347. # to find and cleanup. No need to pass it anywhere else.
  4348. migration = objects.Migration(context,
  4349. source_compute=instance.host,
  4350. source_node=instance.node,
  4351. instance_uuid=instance.uuid,
  4352. status='accepted',
  4353. migration_type='evacuation')
  4354. if host:
  4355. migration.dest_compute = host
  4356. migration.create()
  4357. compute_utils.notify_about_instance_usage(
  4358. self.notifier, context, instance, "evacuate")
  4359. compute_utils.notify_about_instance_action(
  4360. context, instance, CONF.host,
  4361. action=fields_obj.NotificationAction.EVACUATE,
  4362. source=fields_obj.NotificationSource.API)
  4363. # NOTE(sbauza): Force is a boolean by the new related API version
  4364. # TODO(stephenfin): Any reason we can't use 'not force' here to handle
  4365. # the pre-v2.29 API microversion, which wouldn't set force
  4366. if force is False and host:
  4367. nodes = objects.ComputeNodeList.get_all_by_host(context, host)
  4368. # NOTE(sbauza): Unset the host to make sure we call the scheduler
  4369. host = None
  4370. # FIXME(sbauza): Since only Ironic driver uses more than one
  4371. # compute per service but doesn't support evacuations,
  4372. # let's provide the first one.
  4373. target = nodes[0]
  4374. destination = objects.Destination(
  4375. host=target.host,
  4376. node=target.hypervisor_hostname
  4377. )
  4378. request_spec.requested_destination = destination
  4379. return self.compute_task_api.rebuild_instance(context,
  4380. instance=instance,
  4381. new_pass=admin_password,
  4382. injected_files=None,
  4383. image_ref=None,
  4384. orig_image_ref=None,
  4385. orig_sys_metadata=None,
  4386. bdms=None,
  4387. recreate=True,
  4388. on_shared_storage=on_shared_storage,
  4389. host=host,
  4390. request_spec=request_spec,
  4391. )
  4392. def get_migrations(self, context, filters):
  4393. """Get all migrations for the given filters."""
  4394. load_cells()
  4395. migrations = []
  4396. for cell in CELLS:
  4397. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4398. continue
  4399. with nova_context.target_cell(context, cell) as cctxt:
  4400. migrations.extend(objects.MigrationList.get_by_filters(
  4401. cctxt, filters).objects)
  4402. return objects.MigrationList(objects=migrations)
  4403. def get_migrations_sorted(self, context, filters, sort_dirs=None,
  4404. sort_keys=None, limit=None, marker=None):
  4405. """Get all migrations for the given parameters."""
  4406. mig_objs = migration_list.get_migration_objects_sorted(
  4407. context, filters, limit, marker, sort_keys, sort_dirs)
  4408. # Due to cross-cell resize, we could have duplicate migration records
  4409. # while the instance is in VERIFY_RESIZE state in the destination cell
  4410. # but the original migration record still exists in the source cell.
  4411. # Filter out duplicate migration records here based on which record
  4412. # is newer (last updated).
  4413. def _get_newer_obj(obj1, obj2):
  4414. # created_at will always be set.
  4415. created_at1 = obj1.created_at
  4416. created_at2 = obj2.created_at
  4417. # updated_at might be None
  4418. updated_at1 = obj1.updated_at
  4419. updated_at2 = obj2.updated_at
  4420. # If both have updated_at, compare using that field.
  4421. if updated_at1 and updated_at2:
  4422. if updated_at1 > updated_at2:
  4423. return obj1
  4424. return obj2
  4425. # Compare created_at versus updated_at.
  4426. if updated_at1:
  4427. if updated_at1 > created_at2:
  4428. return obj1
  4429. return obj2
  4430. if updated_at2:
  4431. if updated_at2 > created_at1:
  4432. return obj2
  4433. return obj1
  4434. # Compare created_at only.
  4435. if created_at1 > created_at2:
  4436. return obj1
  4437. return obj2
  4438. # TODO(mriedem): This could be easier if we leveraged the "hidden"
  4439. # field on the Migration record and then just did like
  4440. # _get_unique_filter_method in the get_all() method for instances.
  4441. migrations_by_uuid = collections.OrderedDict() # maintain sort order
  4442. for migration in mig_objs:
  4443. if migration.uuid not in migrations_by_uuid:
  4444. migrations_by_uuid[migration.uuid] = migration
  4445. else:
  4446. # We have a collision, keep the newer record.
  4447. # Note that using updated_at could be wrong if changes-since or
  4448. # changes-before filters are being used but we have the same
  4449. # issue in _get_unique_filter_method for instances.
  4450. doppelganger = migrations_by_uuid[migration.uuid]
  4451. newer = _get_newer_obj(doppelganger, migration)
  4452. migrations_by_uuid[migration.uuid] = newer
  4453. return objects.MigrationList(objects=list(migrations_by_uuid.values()))
  4454. def get_migrations_in_progress_by_instance(self, context, instance_uuid,
  4455. migration_type=None):
  4456. """Get all migrations of an instance in progress."""
  4457. return objects.MigrationList.get_in_progress_by_instance(
  4458. context, instance_uuid, migration_type)
  4459. def get_migration_by_id_and_instance(self, context,
  4460. migration_id, instance_uuid):
  4461. """Get the migration of an instance by id."""
  4462. return objects.Migration.get_by_id_and_instance(
  4463. context, migration_id, instance_uuid)
  4464. def _get_bdm_by_volume_id(self, context, volume_id, expected_attrs=None):
  4465. """Retrieve a BDM without knowing its cell.
  4466. .. note:: The context will be targeted to the cell in which the
  4467. BDM is found, if any.
  4468. :param context: The API request context.
  4469. :param volume_id: The ID of the volume.
  4470. :param expected_attrs: list of any additional attributes that should
  4471. be joined when the BDM is loaded from the database.
  4472. :raises: nova.exception.VolumeBDMNotFound if not found in any cell
  4473. """
  4474. load_cells()
  4475. for cell in CELLS:
  4476. nova_context.set_target_cell(context, cell)
  4477. try:
  4478. return objects.BlockDeviceMapping.get_by_volume(
  4479. context, volume_id, expected_attrs=expected_attrs)
  4480. except exception.NotFound:
  4481. continue
  4482. raise exception.VolumeBDMNotFound(volume_id=volume_id)
  4483. def volume_snapshot_create(self, context, volume_id, create_info):
  4484. bdm = self._get_bdm_by_volume_id(
  4485. context, volume_id, expected_attrs=['instance'])
  4486. # We allow creating the snapshot in any vm_state as long as there is
  4487. # no task being performed on the instance and it has a host.
  4488. @check_instance_host()
  4489. @check_instance_state(vm_state=None)
  4490. def do_volume_snapshot_create(self, context, instance):
  4491. self.compute_rpcapi.volume_snapshot_create(context, instance,
  4492. volume_id, create_info)
  4493. snapshot = {
  4494. 'snapshot': {
  4495. 'id': create_info.get('id'),
  4496. 'volumeId': volume_id
  4497. }
  4498. }
  4499. return snapshot
  4500. return do_volume_snapshot_create(self, context, bdm.instance)
  4501. def volume_snapshot_delete(self, context, volume_id, snapshot_id,
  4502. delete_info):
  4503. bdm = self._get_bdm_by_volume_id(
  4504. context, volume_id, expected_attrs=['instance'])
  4505. # We allow deleting the snapshot in any vm_state as long as there is
  4506. # no task being performed on the instance and it has a host.
  4507. @check_instance_host()
  4508. @check_instance_state(vm_state=None)
  4509. def do_volume_snapshot_delete(self, context, instance):
  4510. self.compute_rpcapi.volume_snapshot_delete(context, instance,
  4511. volume_id, snapshot_id, delete_info)
  4512. do_volume_snapshot_delete(self, context, bdm.instance)
  4513. def external_instance_event(self, api_context, instances, events):
  4514. # NOTE(danms): The external API consumer just provides events,
  4515. # but doesn't know where they go. We need to collate lists
  4516. # by the host the affected instance is on and dispatch them
  4517. # according to host
  4518. instances_by_host = collections.defaultdict(list)
  4519. events_by_host = collections.defaultdict(list)
  4520. hosts_by_instance = collections.defaultdict(list)
  4521. cell_contexts_by_host = {}
  4522. for instance in instances:
  4523. # instance._context is used here since it's already targeted to
  4524. # the cell that the instance lives in, and we need to use that
  4525. # cell context to lookup any migrations associated to the instance.
  4526. hosts, cross_cell_move = self._get_relevant_hosts(
  4527. instance._context, instance)
  4528. for host in hosts:
  4529. # NOTE(danms): All instances on a host must have the same
  4530. # mapping, so just use that
  4531. if host not in cell_contexts_by_host:
  4532. # NOTE(mriedem): If the instance is being migrated across
  4533. # cells then we have to get the host mapping to determine
  4534. # which cell a given host is in.
  4535. if cross_cell_move:
  4536. hm = objects.HostMapping.get_by_host(api_context, host)
  4537. ctxt = nova_context.get_admin_context()
  4538. nova_context.set_target_cell(ctxt, hm.cell_mapping)
  4539. cell_contexts_by_host[host] = ctxt
  4540. else:
  4541. # The instance is not migrating across cells so just
  4542. # use the cell-targeted context already in the
  4543. # instance since the host has to be in that same cell.
  4544. cell_contexts_by_host[host] = instance._context
  4545. instances_by_host[host].append(instance)
  4546. hosts_by_instance[instance.uuid].append(host)
  4547. for event in events:
  4548. if event.name == 'volume-extended':
  4549. # Volume extend is a user-initiated operation starting in the
  4550. # Block Storage service API. We record an instance action so
  4551. # the user can monitor the operation to completion.
  4552. host = hosts_by_instance[event.instance_uuid][0]
  4553. cell_context = cell_contexts_by_host[host]
  4554. objects.InstanceAction.action_start(
  4555. cell_context, event.instance_uuid,
  4556. instance_actions.EXTEND_VOLUME, want_result=False)
  4557. elif event.name == 'power-update':
  4558. host = hosts_by_instance[event.instance_uuid][0]
  4559. cell_context = cell_contexts_by_host[host]
  4560. if event.tag == external_event_obj.POWER_ON:
  4561. inst_action = instance_actions.START
  4562. elif event.tag == external_event_obj.POWER_OFF:
  4563. inst_action = instance_actions.STOP
  4564. else:
  4565. LOG.warning("Invalid power state %s. Cannot process "
  4566. "the event %s. Skipping it.", event.tag,
  4567. event)
  4568. continue
  4569. objects.InstanceAction.action_start(
  4570. cell_context, event.instance_uuid, inst_action,
  4571. want_result=False)
  4572. for host in hosts_by_instance[event.instance_uuid]:
  4573. events_by_host[host].append(event)
  4574. for host in instances_by_host:
  4575. cell_context = cell_contexts_by_host[host]
  4576. # TODO(salv-orlando): Handle exceptions raised by the rpc api layer
  4577. # in order to ensure that a failure in processing events on a host
  4578. # will not prevent processing events on other hosts
  4579. self.compute_rpcapi.external_instance_event(
  4580. cell_context, instances_by_host[host], events_by_host[host],
  4581. host=host)
  4582. def _get_relevant_hosts(self, context, instance):
  4583. """Get the relevant hosts for an external server event on an instance.
  4584. :param context: nova auth request context targeted at the same cell
  4585. that the instance lives in
  4586. :param instance: Instance object which is the target of an external
  4587. server event
  4588. :returns: 2-item tuple of:
  4589. - set of at least one host (the host where the instance lives); if
  4590. the instance is being migrated the source and dest compute
  4591. hostnames are in the returned set
  4592. - boolean indicating if the instance is being migrated across cells
  4593. """
  4594. hosts = set()
  4595. hosts.add(instance.host)
  4596. cross_cell_move = False
  4597. if instance.migration_context is not None:
  4598. migration_id = instance.migration_context.migration_id
  4599. migration = objects.Migration.get_by_id(context, migration_id)
  4600. cross_cell_move = migration.cross_cell_move
  4601. hosts.add(migration.dest_compute)
  4602. hosts.add(migration.source_compute)
  4603. cells_msg = (
  4604. 'across cells' if cross_cell_move else 'within the same cell')
  4605. LOG.debug('Instance %(instance)s is migrating %(cells_msg)s, '
  4606. 'copying events to all relevant hosts: '
  4607. '%(hosts)s', {'cells_msg': cells_msg,
  4608. 'instance': instance.uuid,
  4609. 'hosts': hosts})
  4610. return hosts, cross_cell_move
  4611. def get_instance_host_status(self, instance):
  4612. if instance.host:
  4613. try:
  4614. service = [service for service in instance.services if
  4615. service.binary == 'nova-compute'][0]
  4616. if service.forced_down:
  4617. host_status = fields_obj.HostStatus.DOWN
  4618. elif service.disabled:
  4619. host_status = fields_obj.HostStatus.MAINTENANCE
  4620. else:
  4621. alive = self.servicegroup_api.service_is_up(service)
  4622. host_status = ((alive and fields_obj.HostStatus.UP) or
  4623. fields_obj.HostStatus.UNKNOWN)
  4624. except IndexError:
  4625. host_status = fields_obj.HostStatus.NONE
  4626. else:
  4627. host_status = fields_obj.HostStatus.NONE
  4628. return host_status
  4629. def get_instances_host_statuses(self, instance_list):
  4630. host_status_dict = dict()
  4631. host_statuses = dict()
  4632. for instance in instance_list:
  4633. if instance.host:
  4634. if instance.host not in host_status_dict:
  4635. host_status = self.get_instance_host_status(instance)
  4636. host_status_dict[instance.host] = host_status
  4637. else:
  4638. host_status = host_status_dict[instance.host]
  4639. else:
  4640. host_status = fields_obj.HostStatus.NONE
  4641. host_statuses[instance.uuid] = host_status
  4642. return host_statuses
  4643. def target_host_cell(fn):
  4644. """Target a host-based function to a cell.
  4645. Expects to wrap a function of signature:
  4646. func(self, context, host, ...)
  4647. """
  4648. @functools.wraps(fn)
  4649. def targeted(self, context, host, *args, **kwargs):
  4650. mapping = objects.HostMapping.get_by_host(context, host)
  4651. nova_context.set_target_cell(context, mapping.cell_mapping)
  4652. return fn(self, context, host, *args, **kwargs)
  4653. return targeted
  4654. def _get_service_in_cell_by_host(context, host_name):
  4655. # validates the host; ComputeHostNotFound is raised if invalid
  4656. try:
  4657. mapping = objects.HostMapping.get_by_host(context, host_name)
  4658. nova_context.set_target_cell(context, mapping.cell_mapping)
  4659. service = objects.Service.get_by_compute_host(context, host_name)
  4660. except exception.HostMappingNotFound:
  4661. try:
  4662. # NOTE(danms): This targets our cell
  4663. service = _find_service_in_cell(context, service_host=host_name)
  4664. except exception.NotFound:
  4665. raise exception.ComputeHostNotFound(host=host_name)
  4666. return service
  4667. def _find_service_in_cell(context, service_id=None, service_host=None):
  4668. """Find a service by id or hostname by searching all cells.
  4669. If one matching service is found, return it. If none or multiple
  4670. are found, raise an exception.
  4671. :param context: A context.RequestContext
  4672. :param service_id: If not none, the DB ID of the service to find
  4673. :param service_host: If not None, the hostname of the service to find
  4674. :returns: An objects.Service
  4675. :raises: ServiceNotUnique if multiple matching IDs are found
  4676. :raises: NotFound if no matches are found
  4677. :raises: NovaException if called with neither search option
  4678. """
  4679. load_cells()
  4680. service = None
  4681. found_in_cell = None
  4682. is_uuid = False
  4683. if service_id is not None:
  4684. is_uuid = uuidutils.is_uuid_like(service_id)
  4685. if is_uuid:
  4686. lookup_fn = lambda c: objects.Service.get_by_uuid(c, service_id)
  4687. else:
  4688. lookup_fn = lambda c: objects.Service.get_by_id(c, service_id)
  4689. elif service_host is not None:
  4690. lookup_fn = lambda c: (
  4691. objects.Service.get_by_compute_host(c, service_host))
  4692. else:
  4693. LOG.exception('_find_service_in_cell called with no search parameters')
  4694. # This is intentionally cryptic so we don't leak implementation details
  4695. # out of the API.
  4696. raise exception.NovaException()
  4697. for cell in CELLS:
  4698. # NOTE(danms): Services can be in cell0, so don't skip it here
  4699. try:
  4700. with nova_context.target_cell(context, cell) as cctxt:
  4701. cell_service = lookup_fn(cctxt)
  4702. except exception.NotFound:
  4703. # NOTE(danms): Keep looking in other cells
  4704. continue
  4705. if service and cell_service:
  4706. raise exception.ServiceNotUnique()
  4707. service = cell_service
  4708. found_in_cell = cell
  4709. if service and is_uuid:
  4710. break
  4711. if service:
  4712. # NOTE(danms): Set the cell on the context so it remains
  4713. # when we return to our caller
  4714. nova_context.set_target_cell(context, found_in_cell)
  4715. return service
  4716. else:
  4717. raise exception.NotFound()
  4718. class HostAPI(base.Base):
  4719. """Sub-set of the Compute Manager API for managing host operations."""
  4720. def __init__(self, rpcapi=None, servicegroup_api=None):
  4721. self.rpcapi = rpcapi or compute_rpcapi.ComputeAPI()
  4722. self.servicegroup_api = servicegroup_api or servicegroup.API()
  4723. super(HostAPI, self).__init__()
  4724. def _assert_host_exists(self, context, host_name, must_be_up=False):
  4725. """Raise HostNotFound if compute host doesn't exist."""
  4726. service = objects.Service.get_by_compute_host(context, host_name)
  4727. if not service:
  4728. raise exception.HostNotFound(host=host_name)
  4729. if must_be_up and not self.servicegroup_api.service_is_up(service):
  4730. raise exception.ComputeServiceUnavailable(host=host_name)
  4731. return service['host']
  4732. @wrap_exception()
  4733. @target_host_cell
  4734. def set_host_enabled(self, context, host_name, enabled):
  4735. """Sets the specified host's ability to accept new instances."""
  4736. host_name = self._assert_host_exists(context, host_name)
  4737. payload = {'host_name': host_name, 'enabled': enabled}
  4738. compute_utils.notify_about_host_update(context,
  4739. 'set_enabled.start',
  4740. payload)
  4741. result = self.rpcapi.set_host_enabled(context, enabled=enabled,
  4742. host=host_name)
  4743. compute_utils.notify_about_host_update(context,
  4744. 'set_enabled.end',
  4745. payload)
  4746. return result
  4747. @target_host_cell
  4748. def get_host_uptime(self, context, host_name):
  4749. """Returns the result of calling "uptime" on the target host."""
  4750. host_name = self._assert_host_exists(context, host_name,
  4751. must_be_up=True)
  4752. return self.rpcapi.get_host_uptime(context, host=host_name)
  4753. @wrap_exception()
  4754. @target_host_cell
  4755. def host_power_action(self, context, host_name, action):
  4756. """Reboots, shuts down or powers up the host."""
  4757. host_name = self._assert_host_exists(context, host_name)
  4758. payload = {'host_name': host_name, 'action': action}
  4759. compute_utils.notify_about_host_update(context,
  4760. 'power_action.start',
  4761. payload)
  4762. result = self.rpcapi.host_power_action(context, action=action,
  4763. host=host_name)
  4764. compute_utils.notify_about_host_update(context,
  4765. 'power_action.end',
  4766. payload)
  4767. return result
  4768. @wrap_exception()
  4769. @target_host_cell
  4770. def set_host_maintenance(self, context, host_name, mode):
  4771. """Start/Stop host maintenance window. On start, it triggers
  4772. guest VMs evacuation.
  4773. """
  4774. host_name = self._assert_host_exists(context, host_name)
  4775. payload = {'host_name': host_name, 'mode': mode}
  4776. compute_utils.notify_about_host_update(context,
  4777. 'set_maintenance.start',
  4778. payload)
  4779. result = self.rpcapi.host_maintenance_mode(context,
  4780. host_param=host_name, mode=mode, host=host_name)
  4781. compute_utils.notify_about_host_update(context,
  4782. 'set_maintenance.end',
  4783. payload)
  4784. return result
  4785. def service_get_all(self, context, filters=None, set_zones=False,
  4786. all_cells=False, cell_down_support=False):
  4787. """Returns a list of services, optionally filtering the results.
  4788. If specified, 'filters' should be a dictionary containing services
  4789. attributes and matching values. Ie, to get a list of services for
  4790. the 'compute' topic, use filters={'topic': 'compute'}.
  4791. If all_cells=True, then scan all cells and merge the results.
  4792. If cell_down_support=True then return minimal service records
  4793. for cells that do not respond based on what we have in the
  4794. host mappings. These will have only 'binary' and 'host' set.
  4795. """
  4796. if filters is None:
  4797. filters = {}
  4798. disabled = filters.pop('disabled', None)
  4799. if 'availability_zone' in filters:
  4800. set_zones = True
  4801. # NOTE(danms): Eventually this all_cells nonsense should go away
  4802. # and we should always iterate over the cells. However, certain
  4803. # callers need the legacy behavior for now.
  4804. if all_cells:
  4805. services = []
  4806. service_dict = nova_context.scatter_gather_all_cells(context,
  4807. objects.ServiceList.get_all, disabled, set_zones=set_zones)
  4808. for cell_uuid, service in service_dict.items():
  4809. if not nova_context.is_cell_failure_sentinel(service):
  4810. services.extend(service)
  4811. elif cell_down_support:
  4812. unavailable_services = objects.ServiceList()
  4813. cid = [cm.id for cm in nova_context.CELLS
  4814. if cm.uuid == cell_uuid]
  4815. # We know cid[0] is in the list because we are using the
  4816. # same list that scatter_gather_all_cells used
  4817. hms = objects.HostMappingList.get_by_cell_id(context,
  4818. cid[0])
  4819. for hm in hms:
  4820. unavailable_services.objects.append(objects.Service(
  4821. binary='nova-compute', host=hm.host))
  4822. LOG.warning("Cell %s is not responding and hence only "
  4823. "partial results are available from this "
  4824. "cell.", cell_uuid)
  4825. services.extend(unavailable_services)
  4826. else:
  4827. LOG.warning("Cell %s is not responding and hence skipped "
  4828. "from the results.", cell_uuid)
  4829. else:
  4830. services = objects.ServiceList.get_all(context, disabled,
  4831. set_zones=set_zones)
  4832. ret_services = []
  4833. for service in services:
  4834. for key, val in filters.items():
  4835. if service[key] != val:
  4836. break
  4837. else:
  4838. # All filters matched.
  4839. ret_services.append(service)
  4840. return ret_services
  4841. def service_get_by_id(self, context, service_id):
  4842. """Get service entry for the given service id or uuid."""
  4843. try:
  4844. return _find_service_in_cell(context, service_id=service_id)
  4845. except exception.NotFound:
  4846. raise exception.ServiceNotFound(service_id=service_id)
  4847. @target_host_cell
  4848. def service_get_by_compute_host(self, context, host_name):
  4849. """Get service entry for the given compute hostname."""
  4850. return objects.Service.get_by_compute_host(context, host_name)
  4851. def _update_compute_provider_status(self, context, service):
  4852. """Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
  4853. There are two cases where the API will not call the compute service:
  4854. * The compute service is down. In this case the trait is synchronized
  4855. when the compute service is restarted.
  4856. * The compute service is old. In this case the trait is synchronized
  4857. when the compute service is upgraded and restarted.
  4858. :param context: nova auth RequestContext
  4859. :param service: nova.objects.Service object which has been enabled
  4860. or disabled (see ``service_update``).
  4861. """
  4862. # Make sure the service is up so we can make the RPC call.
  4863. if not self.servicegroup_api.service_is_up(service):
  4864. LOG.info('Compute service on host %s is down. The '
  4865. 'COMPUTE_STATUS_DISABLED trait will be synchronized '
  4866. 'when the service is restarted.', service.host)
  4867. return
  4868. # Make sure the compute service is new enough for the trait sync
  4869. # behavior.
  4870. # TODO(mriedem): Remove this compat check in the U release.
  4871. if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
  4872. LOG.info('Compute service on host %s is too old to sync the '
  4873. 'COMPUTE_STATUS_DISABLED trait in Placement. The '
  4874. 'trait will be synchronized when the service is '
  4875. 'upgraded and restarted.', service.host)
  4876. return
  4877. enabled = not service.disabled
  4878. # Avoid leaking errors out of the API.
  4879. try:
  4880. LOG.debug('Calling the compute service on host %s to sync the '
  4881. 'COMPUTE_STATUS_DISABLED trait.', service.host)
  4882. self.rpcapi.set_host_enabled(context, service.host, enabled)
  4883. except Exception:
  4884. LOG.exception('An error occurred while updating the '
  4885. 'COMPUTE_STATUS_DISABLED trait on compute node '
  4886. 'resource providers managed by host %s. The trait '
  4887. 'will be synchronized automatically by the compute '
  4888. 'service when the update_available_resource '
  4889. 'periodic task runs.', service.host)
  4890. def service_update(self, context, service):
  4891. """Performs the actual service update operation.
  4892. If the "disabled" field is changed, potentially calls the compute
  4893. service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
  4894. resource providers managed by this compute service.
  4895. :param context: nova auth RequestContext
  4896. :param service: nova.objects.Service object with changes already
  4897. set on the object
  4898. """
  4899. # Before persisting changes and resetting the changed fields on the
  4900. # Service object, determine if the disabled field changed.
  4901. update_placement = 'disabled' in service.obj_what_changed()
  4902. # Persist the Service object changes to the database.
  4903. service.save()
  4904. # If the disabled field changed, potentially call the compute service
  4905. # to sync the COMPUTE_STATUS_DISABLED trait.
  4906. if update_placement:
  4907. self._update_compute_provider_status(context, service)
  4908. return service
  4909. @target_host_cell
  4910. def service_update_by_host_and_binary(self, context, host_name, binary,
  4911. params_to_update):
  4912. """Enable / Disable a service.
  4913. Determines the cell that the service is in using the HostMapping.
  4914. For compute services, this stops new builds and migrations going to
  4915. the host.
  4916. See also ``service_update``.
  4917. :param context: nova auth RequestContext
  4918. :param host_name: hostname of the service
  4919. :param binary: service binary (really only supports "nova-compute")
  4920. :param params_to_update: dict of changes to make to the Service object
  4921. :raises: HostMappingNotFound if the host is not mapped to a cell
  4922. :raises: HostBinaryNotFound if a services table record is not found
  4923. with the given host_name and binary
  4924. """
  4925. # TODO(mriedem): Service.get_by_args is deprecated; we should use
  4926. # get_by_compute_host here (remember to update the "raises" docstring).
  4927. service = objects.Service.get_by_args(context, host_name, binary)
  4928. service.update(params_to_update)
  4929. return self.service_update(context, service)
  4930. @target_host_cell
  4931. def instance_get_all_by_host(self, context, host_name):
  4932. """Return all instances on the given host."""
  4933. return objects.InstanceList.get_by_host(context, host_name)
  4934. def task_log_get_all(self, context, task_name, period_beginning,
  4935. period_ending, host=None, state=None):
  4936. """Return the task logs within a given range, optionally
  4937. filtering by host and/or state.
  4938. """
  4939. return self.db.task_log_get_all(context, task_name,
  4940. period_beginning,
  4941. period_ending,
  4942. host=host,
  4943. state=state)
  4944. def compute_node_get(self, context, compute_id):
  4945. """Return compute node entry for particular integer ID or UUID."""
  4946. load_cells()
  4947. # NOTE(danms): Unfortunately this API exposes database identifiers
  4948. # which means we really can't do something efficient here
  4949. is_uuid = uuidutils.is_uuid_like(compute_id)
  4950. for cell in CELLS:
  4951. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4952. continue
  4953. with nova_context.target_cell(context, cell) as cctxt:
  4954. try:
  4955. if is_uuid:
  4956. return objects.ComputeNode.get_by_uuid(cctxt,
  4957. compute_id)
  4958. return objects.ComputeNode.get_by_id(cctxt,
  4959. int(compute_id))
  4960. except exception.ComputeHostNotFound:
  4961. # NOTE(danms): Keep looking in other cells
  4962. continue
  4963. raise exception.ComputeHostNotFound(host=compute_id)
  4964. def compute_node_get_all(self, context, limit=None, marker=None):
  4965. load_cells()
  4966. computes = []
  4967. uuid_marker = marker and uuidutils.is_uuid_like(marker)
  4968. for cell in CELLS:
  4969. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4970. continue
  4971. with nova_context.target_cell(context, cell) as cctxt:
  4972. # If we have a marker and it's a uuid, see if the compute node
  4973. # is in this cell.
  4974. if marker and uuid_marker:
  4975. try:
  4976. compute_marker = objects.ComputeNode.get_by_uuid(
  4977. cctxt, marker)
  4978. # we found the marker compute node, so use it's id
  4979. # for the actual marker for paging in this cell's db
  4980. marker = compute_marker.id
  4981. except exception.ComputeHostNotFound:
  4982. # The marker node isn't in this cell so keep looking.
  4983. continue
  4984. try:
  4985. cell_computes = objects.ComputeNodeList.get_by_pagination(
  4986. cctxt, limit=limit, marker=marker)
  4987. except exception.MarkerNotFound:
  4988. # NOTE(danms): Keep looking through cells
  4989. continue
  4990. computes.extend(cell_computes)
  4991. # NOTE(danms): We must have found the marker, so continue on
  4992. # without one
  4993. marker = None
  4994. if limit:
  4995. limit -= len(cell_computes)
  4996. if limit <= 0:
  4997. break
  4998. if marker is not None and len(computes) == 0:
  4999. # NOTE(danms): If we did not find the marker in any cell,
  5000. # mimic the db_api behavior here.
  5001. raise exception.MarkerNotFound(marker=marker)
  5002. return objects.ComputeNodeList(objects=computes)
  5003. def compute_node_search_by_hypervisor(self, context, hypervisor_match):
  5004. load_cells()
  5005. computes = []
  5006. for cell in CELLS:
  5007. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5008. continue
  5009. with nova_context.target_cell(context, cell) as cctxt:
  5010. cell_computes = objects.ComputeNodeList.get_by_hypervisor(
  5011. cctxt, hypervisor_match)
  5012. computes.extend(cell_computes)
  5013. return objects.ComputeNodeList(objects=computes)
  5014. def compute_node_statistics(self, context):
  5015. load_cells()
  5016. cell_stats = []
  5017. for cell in CELLS:
  5018. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5019. continue
  5020. with nova_context.target_cell(context, cell) as cctxt:
  5021. cell_stats.append(self.db.compute_node_statistics(cctxt))
  5022. if cell_stats:
  5023. keys = cell_stats[0].keys()
  5024. return {k: sum(stats[k] for stats in cell_stats)
  5025. for k in keys}
  5026. else:
  5027. return {}
  5028. class InstanceActionAPI(base.Base):
  5029. """Sub-set of the Compute Manager API for managing instance actions."""
  5030. def actions_get(self, context, instance, limit=None, marker=None,
  5031. filters=None):
  5032. return objects.InstanceActionList.get_by_instance_uuid(
  5033. context, instance.uuid, limit, marker, filters)
  5034. def action_get_by_request_id(self, context, instance, request_id):
  5035. return objects.InstanceAction.get_by_request_id(
  5036. context, instance.uuid, request_id)
  5037. def action_events_get(self, context, instance, action_id):
  5038. return objects.InstanceActionEventList.get_by_action(
  5039. context, action_id)
  5040. class AggregateAPI(base.Base):
  5041. """Sub-set of the Compute Manager API for managing host aggregates."""
  5042. def __init__(self, **kwargs):
  5043. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  5044. self.query_client = query.SchedulerQueryClient()
  5045. self._placement_client = None # Lazy-load on first access.
  5046. super(AggregateAPI, self).__init__(**kwargs)
  5047. @property
  5048. def placement_client(self):
  5049. if self._placement_client is None:
  5050. self._placement_client = report.SchedulerReportClient()
  5051. return self._placement_client
  5052. @wrap_exception()
  5053. def create_aggregate(self, context, aggregate_name, availability_zone):
  5054. """Creates the model for the aggregate."""
  5055. aggregate = objects.Aggregate(context=context)
  5056. aggregate.name = aggregate_name
  5057. if availability_zone:
  5058. aggregate.metadata = {'availability_zone': availability_zone}
  5059. aggregate.create()
  5060. self.query_client.update_aggregates(context, [aggregate])
  5061. return aggregate
  5062. def get_aggregate(self, context, aggregate_id):
  5063. """Get an aggregate by id."""
  5064. return objects.Aggregate.get_by_id(context, aggregate_id)
  5065. def get_aggregate_list(self, context):
  5066. """Get all the aggregates."""
  5067. return objects.AggregateList.get_all(context)
  5068. def get_aggregates_by_host(self, context, compute_host):
  5069. """Get all the aggregates where the given host is presented."""
  5070. return objects.AggregateList.get_by_host(context, compute_host)
  5071. @wrap_exception()
  5072. def update_aggregate(self, context, aggregate_id, values):
  5073. """Update the properties of an aggregate."""
  5074. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5075. if 'name' in values:
  5076. aggregate.name = values.pop('name')
  5077. aggregate.save()
  5078. self.is_safe_to_update_az(context, values, aggregate=aggregate,
  5079. action_name=AGGREGATE_ACTION_UPDATE,
  5080. check_no_instances_in_az=True)
  5081. if values:
  5082. aggregate.update_metadata(values)
  5083. aggregate.updated_at = timeutils.utcnow()
  5084. self.query_client.update_aggregates(context, [aggregate])
  5085. # If updated values include availability_zones, then the cache
  5086. # which stored availability_zones and host need to be reset
  5087. if values.get('availability_zone'):
  5088. availability_zones.reset_cache()
  5089. return aggregate
  5090. @wrap_exception()
  5091. def update_aggregate_metadata(self, context, aggregate_id, metadata):
  5092. """Updates the aggregate metadata."""
  5093. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5094. self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
  5095. action_name=AGGREGATE_ACTION_UPDATE_META,
  5096. check_no_instances_in_az=True)
  5097. aggregate.update_metadata(metadata)
  5098. self.query_client.update_aggregates(context, [aggregate])
  5099. # If updated metadata include availability_zones, then the cache
  5100. # which stored availability_zones and host need to be reset
  5101. if metadata and metadata.get('availability_zone'):
  5102. availability_zones.reset_cache()
  5103. aggregate.updated_at = timeutils.utcnow()
  5104. return aggregate
  5105. @wrap_exception()
  5106. def delete_aggregate(self, context, aggregate_id):
  5107. """Deletes the aggregate."""
  5108. aggregate_payload = {'aggregate_id': aggregate_id}
  5109. compute_utils.notify_about_aggregate_update(context,
  5110. "delete.start",
  5111. aggregate_payload)
  5112. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5113. compute_utils.notify_about_aggregate_action(
  5114. context=context,
  5115. aggregate=aggregate,
  5116. action=fields_obj.NotificationAction.DELETE,
  5117. phase=fields_obj.NotificationPhase.START)
  5118. if len(aggregate.hosts) > 0:
  5119. msg = _("Host aggregate is not empty")
  5120. raise exception.InvalidAggregateActionDelete(
  5121. aggregate_id=aggregate_id, reason=msg)
  5122. aggregate.destroy()
  5123. self.query_client.delete_aggregate(context, aggregate)
  5124. compute_utils.notify_about_aggregate_update(context,
  5125. "delete.end",
  5126. aggregate_payload)
  5127. compute_utils.notify_about_aggregate_action(
  5128. context=context,
  5129. aggregate=aggregate,
  5130. action=fields_obj.NotificationAction.DELETE,
  5131. phase=fields_obj.NotificationPhase.END)
  5132. def is_safe_to_update_az(self, context, metadata, aggregate,
  5133. hosts=None,
  5134. action_name=AGGREGATE_ACTION_ADD,
  5135. check_no_instances_in_az=False):
  5136. """Determine if updates alter an aggregate's availability zone.
  5137. :param context: local context
  5138. :param metadata: Target metadata for updating aggregate
  5139. :param aggregate: Aggregate to update
  5140. :param hosts: Hosts to check. If None, aggregate.hosts is used
  5141. :type hosts: list
  5142. :param action_name: Calling method for logging purposes
  5143. :param check_no_instances_in_az: if True, it checks
  5144. there is no instances on any hosts of the aggregate
  5145. """
  5146. if 'availability_zone' in metadata:
  5147. if not metadata['availability_zone']:
  5148. msg = _("Aggregate %s does not support empty named "
  5149. "availability zone") % aggregate.name
  5150. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5151. msg)
  5152. _hosts = hosts or aggregate.hosts
  5153. host_aggregates = objects.AggregateList.get_by_metadata_key(
  5154. context, 'availability_zone', hosts=_hosts)
  5155. conflicting_azs = [
  5156. agg.availability_zone for agg in host_aggregates
  5157. if agg.availability_zone != metadata['availability_zone'] and
  5158. agg.id != aggregate.id]
  5159. if conflicting_azs:
  5160. msg = _("One or more hosts already in availability zone(s) "
  5161. "%s") % conflicting_azs
  5162. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5163. msg)
  5164. same_az_name = (aggregate.availability_zone ==
  5165. metadata['availability_zone'])
  5166. if check_no_instances_in_az and not same_az_name:
  5167. instance_count_by_cell = (
  5168. nova_context.scatter_gather_skip_cell0(
  5169. context,
  5170. objects.InstanceList.get_count_by_hosts,
  5171. _hosts))
  5172. if any(cnt for cnt in instance_count_by_cell.values()):
  5173. msg = _("One or more hosts contain instances in this zone")
  5174. self._raise_invalid_aggregate_exc(
  5175. action_name, aggregate.id, msg)
  5176. def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
  5177. if action_name == AGGREGATE_ACTION_ADD:
  5178. raise exception.InvalidAggregateActionAdd(
  5179. aggregate_id=aggregate_id, reason=reason)
  5180. elif action_name == AGGREGATE_ACTION_UPDATE:
  5181. raise exception.InvalidAggregateActionUpdate(
  5182. aggregate_id=aggregate_id, reason=reason)
  5183. elif action_name == AGGREGATE_ACTION_UPDATE_META:
  5184. raise exception.InvalidAggregateActionUpdateMeta(
  5185. aggregate_id=aggregate_id, reason=reason)
  5186. elif action_name == AGGREGATE_ACTION_DELETE:
  5187. raise exception.InvalidAggregateActionDelete(
  5188. aggregate_id=aggregate_id, reason=reason)
  5189. raise exception.NovaException(
  5190. _("Unexpected aggregate action %s") % action_name)
  5191. def _update_az_cache_for_host(self, context, host_name, aggregate_meta):
  5192. # Update the availability_zone cache to avoid getting wrong
  5193. # availability_zone in cache retention time when add/remove
  5194. # host to/from aggregate.
  5195. if aggregate_meta and aggregate_meta.get('availability_zone'):
  5196. availability_zones.update_host_availability_zone_cache(context,
  5197. host_name)
  5198. @wrap_exception()
  5199. def add_host_to_aggregate(self, context, aggregate_id, host_name):
  5200. """Adds the host to an aggregate."""
  5201. aggregate_payload = {'aggregate_id': aggregate_id,
  5202. 'host_name': host_name}
  5203. compute_utils.notify_about_aggregate_update(context,
  5204. "addhost.start",
  5205. aggregate_payload)
  5206. service = _get_service_in_cell_by_host(context, host_name)
  5207. if service.host != host_name:
  5208. # NOTE(danms): If we found a service but it is not an
  5209. # exact match, we may have a case-insensitive backend
  5210. # database (like mysql) which will end up with us
  5211. # adding the host-aggregate mapping with a
  5212. # non-matching hostname.
  5213. raise exception.ComputeHostNotFound(host=host_name)
  5214. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5215. compute_utils.notify_about_aggregate_action(
  5216. context=context,
  5217. aggregate=aggregate,
  5218. action=fields_obj.NotificationAction.ADD_HOST,
  5219. phase=fields_obj.NotificationPhase.START)
  5220. self.is_safe_to_update_az(context, aggregate.metadata,
  5221. hosts=[host_name], aggregate=aggregate)
  5222. aggregate.add_host(host_name)
  5223. self.query_client.update_aggregates(context, [aggregate])
  5224. try:
  5225. self.placement_client.aggregate_add_host(
  5226. context, aggregate.uuid, host_name=host_name)
  5227. except (exception.ResourceProviderNotFound,
  5228. exception.ResourceProviderAggregateRetrievalFailed,
  5229. exception.ResourceProviderUpdateFailed,
  5230. exception.ResourceProviderUpdateConflict) as err:
  5231. # NOTE(jaypipes): We don't want a failure perform the mirroring
  5232. # action in the placement service to be returned to the user (they
  5233. # probably don't know anything about the placement service and
  5234. # would just be confused). So, we just log a warning here, noting
  5235. # that on the next run of nova-manage placement sync_aggregates
  5236. # things will go back to normal
  5237. LOG.warning("Failed to associate %s with a placement "
  5238. "aggregate: %s. This may be corrected after running "
  5239. "nova-manage placement sync_aggregates.",
  5240. host_name, err)
  5241. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5242. # NOTE(jogo): Send message to host to support resource pools
  5243. self.compute_rpcapi.add_aggregate_host(context,
  5244. aggregate=aggregate, host_param=host_name, host=host_name)
  5245. aggregate_payload.update({'name': aggregate.name})
  5246. compute_utils.notify_about_aggregate_update(context,
  5247. "addhost.end",
  5248. aggregate_payload)
  5249. compute_utils.notify_about_aggregate_action(
  5250. context=context,
  5251. aggregate=aggregate,
  5252. action=fields_obj.NotificationAction.ADD_HOST,
  5253. phase=fields_obj.NotificationPhase.END)
  5254. return aggregate
  5255. @wrap_exception()
  5256. def remove_host_from_aggregate(self, context, aggregate_id, host_name):
  5257. """Removes host from the aggregate."""
  5258. aggregate_payload = {'aggregate_id': aggregate_id,
  5259. 'host_name': host_name}
  5260. compute_utils.notify_about_aggregate_update(context,
  5261. "removehost.start",
  5262. aggregate_payload)
  5263. _get_service_in_cell_by_host(context, host_name)
  5264. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5265. compute_utils.notify_about_aggregate_action(
  5266. context=context,
  5267. aggregate=aggregate,
  5268. action=fields_obj.NotificationAction.REMOVE_HOST,
  5269. phase=fields_obj.NotificationPhase.START)
  5270. # Remove the resource provider from the provider aggregate first before
  5271. # we change anything on the nova side because if we did the nova stuff
  5272. # first we can't re-attempt this from the compute API if cleaning up
  5273. # placement fails.
  5274. try:
  5275. # Anything else this raises is handled in the route handler as
  5276. # either a 409 (ResourceProviderUpdateConflict) or 500.
  5277. self.placement_client.aggregate_remove_host(
  5278. context, aggregate.uuid, host_name)
  5279. except exception.ResourceProviderNotFound as err:
  5280. # If the resource provider is not found then it's likely not part
  5281. # of the aggregate anymore anyway since provider aggregates are
  5282. # not resources themselves with metadata like nova aggregates, they
  5283. # are just a grouping concept around resource providers. Log and
  5284. # continue.
  5285. LOG.warning("Failed to remove association of %s with a placement "
  5286. "aggregate: %s.", host_name, err)
  5287. aggregate.delete_host(host_name)
  5288. self.query_client.update_aggregates(context, [aggregate])
  5289. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5290. self.compute_rpcapi.remove_aggregate_host(context,
  5291. aggregate=aggregate, host_param=host_name, host=host_name)
  5292. compute_utils.notify_about_aggregate_update(context,
  5293. "removehost.end",
  5294. aggregate_payload)
  5295. compute_utils.notify_about_aggregate_action(
  5296. context=context,
  5297. aggregate=aggregate,
  5298. action=fields_obj.NotificationAction.REMOVE_HOST,
  5299. phase=fields_obj.NotificationPhase.END)
  5300. return aggregate
  5301. class KeypairAPI(base.Base):
  5302. """Subset of the Compute Manager API for managing key pairs."""
  5303. get_notifier = functools.partial(rpc.get_notifier, service='api')
  5304. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  5305. get_notifier=get_notifier,
  5306. binary='nova-api')
  5307. def _notify(self, context, event_suffix, keypair_name):
  5308. payload = {
  5309. 'tenant_id': context.project_id,
  5310. 'user_id': context.user_id,
  5311. 'key_name': keypair_name,
  5312. }
  5313. notify = self.get_notifier()
  5314. notify.info(context, 'keypair.%s' % event_suffix, payload)
  5315. def _validate_new_key_pair(self, context, user_id, key_name, key_type):
  5316. safe_chars = "_- " + string.digits + string.ascii_letters
  5317. clean_value = "".join(x for x in key_name if x in safe_chars)
  5318. if clean_value != key_name:
  5319. raise exception.InvalidKeypair(
  5320. reason=_("Keypair name contains unsafe characters"))
  5321. try:
  5322. utils.check_string_length(key_name, min_length=1, max_length=255)
  5323. except exception.InvalidInput:
  5324. raise exception.InvalidKeypair(
  5325. reason=_('Keypair name must be string and between '
  5326. '1 and 255 characters long'))
  5327. try:
  5328. objects.Quotas.check_deltas(context, {'key_pairs': 1}, user_id)
  5329. except exception.OverQuota:
  5330. raise exception.KeypairLimitExceeded()
  5331. @wrap_exception()
  5332. def import_key_pair(self, context, user_id, key_name, public_key,
  5333. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5334. """Import a key pair using an existing public key."""
  5335. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5336. self._notify(context, 'import.start', key_name)
  5337. keypair = objects.KeyPair(context)
  5338. keypair.user_id = user_id
  5339. keypair.name = key_name
  5340. keypair.type = key_type
  5341. keypair.fingerprint = None
  5342. keypair.public_key = public_key
  5343. compute_utils.notify_about_keypair_action(
  5344. context=context,
  5345. keypair=keypair,
  5346. action=fields_obj.NotificationAction.IMPORT,
  5347. phase=fields_obj.NotificationPhase.START)
  5348. fingerprint = self._generate_fingerprint(public_key, key_type)
  5349. keypair.fingerprint = fingerprint
  5350. keypair.create()
  5351. compute_utils.notify_about_keypair_action(
  5352. context=context,
  5353. keypair=keypair,
  5354. action=fields_obj.NotificationAction.IMPORT,
  5355. phase=fields_obj.NotificationPhase.END)
  5356. self._notify(context, 'import.end', key_name)
  5357. return keypair
  5358. @wrap_exception()
  5359. def create_key_pair(self, context, user_id, key_name,
  5360. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5361. """Create a new key pair."""
  5362. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5363. keypair = objects.KeyPair(context)
  5364. keypair.user_id = user_id
  5365. keypair.name = key_name
  5366. keypair.type = key_type
  5367. keypair.fingerprint = None
  5368. keypair.public_key = None
  5369. self._notify(context, 'create.start', key_name)
  5370. compute_utils.notify_about_keypair_action(
  5371. context=context,
  5372. keypair=keypair,
  5373. action=fields_obj.NotificationAction.CREATE,
  5374. phase=fields_obj.NotificationPhase.START)
  5375. private_key, public_key, fingerprint = self._generate_key_pair(
  5376. user_id, key_type)
  5377. keypair.fingerprint = fingerprint
  5378. keypair.public_key = public_key
  5379. keypair.create()
  5380. # NOTE(melwitt): We recheck the quota after creating the object to
  5381. # prevent users from allocating more resources than their allowed quota
  5382. # in the event of a race. This is configurable because it can be
  5383. # expensive if strict quota limits are not required in a deployment.
  5384. if CONF.quota.recheck_quota:
  5385. try:
  5386. objects.Quotas.check_deltas(context, {'key_pairs': 0}, user_id)
  5387. except exception.OverQuota:
  5388. keypair.destroy()
  5389. raise exception.KeypairLimitExceeded()
  5390. compute_utils.notify_about_keypair_action(
  5391. context=context,
  5392. keypair=keypair,
  5393. action=fields_obj.NotificationAction.CREATE,
  5394. phase=fields_obj.NotificationPhase.END)
  5395. self._notify(context, 'create.end', key_name)
  5396. return keypair, private_key
  5397. def _generate_fingerprint(self, public_key, key_type):
  5398. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5399. return crypto.generate_fingerprint(public_key)
  5400. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5401. return crypto.generate_x509_fingerprint(public_key)
  5402. def _generate_key_pair(self, user_id, key_type):
  5403. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5404. return crypto.generate_key_pair()
  5405. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5406. return crypto.generate_winrm_x509_cert(user_id)
  5407. @wrap_exception()
  5408. def delete_key_pair(self, context, user_id, key_name):
  5409. """Delete a keypair by name."""
  5410. self._notify(context, 'delete.start', key_name)
  5411. keypair = self.get_key_pair(context, user_id, key_name)
  5412. compute_utils.notify_about_keypair_action(
  5413. context=context,
  5414. keypair=keypair,
  5415. action=fields_obj.NotificationAction.DELETE,
  5416. phase=fields_obj.NotificationPhase.START)
  5417. objects.KeyPair.destroy_by_name(context, user_id, key_name)
  5418. compute_utils.notify_about_keypair_action(
  5419. context=context,
  5420. keypair=keypair,
  5421. action=fields_obj.NotificationAction.DELETE,
  5422. phase=fields_obj.NotificationPhase.END)
  5423. self._notify(context, 'delete.end', key_name)
  5424. def get_key_pairs(self, context, user_id, limit=None, marker=None):
  5425. """List key pairs."""
  5426. return objects.KeyPairList.get_by_user(
  5427. context, user_id, limit=limit, marker=marker)
  5428. def get_key_pair(self, context, user_id, key_name):
  5429. """Get a keypair by name."""
  5430. return objects.KeyPair.get_by_name(context, user_id, key_name)