OpenStack Compute (Nova)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

6175 line
287KB

  1. # Copyright 2010 United States Government as represented by the
  2. # Administrator of the National Aeronautics and Space Administration.
  3. # Copyright 2011 Piston Cloud Computing, Inc.
  4. # Copyright 2012-2013 Red Hat, Inc.
  5. # All Rights Reserved.
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  8. # not use this file except in compliance with the License. You may obtain
  9. # a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. # License for the specific language governing permissions and limitations
  17. # under the License.
  18. """Handles all requests relating to compute resources (e.g. guest VMs,
  19. networking and storage of VMs, and compute hosts on which they run)."""
  20. import collections
  21. import functools
  22. import re
  23. import string
  24. from castellan import key_manager
  25. from oslo_log import log as logging
  26. from oslo_messaging import exceptions as oslo_exceptions
  27. from oslo_serialization import base64 as base64utils
  28. from oslo_utils import excutils
  29. from oslo_utils import strutils
  30. from oslo_utils import timeutils
  31. from oslo_utils import units
  32. from oslo_utils import uuidutils
  33. import six
  34. from six.moves import range
  35. from nova import availability_zones
  36. from nova import block_device
  37. from nova.compute import flavors
  38. from nova.compute import instance_actions
  39. from nova.compute import instance_list
  40. from nova.compute import migration_list
  41. from nova.compute import power_state
  42. from nova.compute import rpcapi as compute_rpcapi
  43. from nova.compute import task_states
  44. from nova.compute import utils as compute_utils
  45. from nova.compute.utils import wrap_instance_event
  46. from nova.compute import vm_states
  47. from nova import conductor
  48. import nova.conf
  49. from nova import context as nova_context
  50. from nova import crypto
  51. from nova.db import base
  52. from nova.db.sqlalchemy import api as db_api
  53. from nova import exception
  54. from nova import exception_wrapper
  55. from nova import hooks
  56. from nova.i18n import _
  57. from nova import image
  58. from nova.network import constants
  59. from nova.network import model as network_model
  60. from nova.network import neutron
  61. from nova.network import security_group_api
  62. from nova import objects
  63. from nova.objects import base as obj_base
  64. from nova.objects import block_device as block_device_obj
  65. from nova.objects import external_event as external_event_obj
  66. from nova.objects import fields as fields_obj
  67. from nova.objects import keypair as keypair_obj
  68. from nova.objects import quotas as quotas_obj
  69. from nova.pci import request as pci_request
  70. from nova.policies import servers as servers_policies
  71. import nova.policy
  72. from nova import profiler
  73. from nova import rpc
  74. from nova.scheduler.client import query
  75. from nova.scheduler.client import report
  76. from nova.scheduler import utils as scheduler_utils
  77. from nova import servicegroup
  78. from nova import utils
  79. from nova.virt import hardware
  80. from nova.volume import cinder
  81. LOG = logging.getLogger(__name__)
  82. get_notifier = functools.partial(rpc.get_notifier, service='compute')
  83. # NOTE(gibi): legacy notification used compute as a service but these
  84. # calls still run on the client side of the compute service which is
  85. # nova-api. By setting the binary to nova-api below, we can make sure
  86. # that the new versioned notifications has the right publisher_id but the
  87. # legacy notifications does not change.
  88. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  89. get_notifier=get_notifier,
  90. binary='nova-api')
  91. CONF = nova.conf.CONF
  92. AGGREGATE_ACTION_UPDATE = 'Update'
  93. AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
  94. AGGREGATE_ACTION_DELETE = 'Delete'
  95. AGGREGATE_ACTION_ADD = 'Add'
  96. MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
  97. MIN_COMPUTE_CROSS_CELL_RESIZE = 47
  98. # FIXME(danms): Keep a global cache of the cells we find the
  99. # first time we look. This needs to be refreshed on a timer or
  100. # trigger.
  101. CELLS = []
  102. def check_instance_state(vm_state=None, task_state=(None,),
  103. must_have_launched=True):
  104. """Decorator to check VM and/or task state before entry to API functions.
  105. If the instance is in the wrong state, or has not been successfully
  106. started at least once the wrapper will raise an exception.
  107. """
  108. if vm_state is not None and not isinstance(vm_state, set):
  109. vm_state = set(vm_state)
  110. if task_state is not None and not isinstance(task_state, set):
  111. task_state = set(task_state)
  112. def outer(f):
  113. @six.wraps(f)
  114. def inner(self, context, instance, *args, **kw):
  115. if vm_state is not None and instance.vm_state not in vm_state:
  116. raise exception.InstanceInvalidState(
  117. attr='vm_state',
  118. instance_uuid=instance.uuid,
  119. state=instance.vm_state,
  120. method=f.__name__)
  121. if (task_state is not None and
  122. instance.task_state not in task_state):
  123. raise exception.InstanceInvalidState(
  124. attr='task_state',
  125. instance_uuid=instance.uuid,
  126. state=instance.task_state,
  127. method=f.__name__)
  128. if must_have_launched and not instance.launched_at:
  129. raise exception.InstanceInvalidState(
  130. attr='launched_at',
  131. instance_uuid=instance.uuid,
  132. state=instance.launched_at,
  133. method=f.__name__)
  134. return f(self, context, instance, *args, **kw)
  135. return inner
  136. return outer
  137. def _set_or_none(q):
  138. return q if q is None or isinstance(q, set) else set(q)
  139. def reject_instance_state(vm_state=None, task_state=None):
  140. """Decorator. Raise InstanceInvalidState if instance is in any of the
  141. given states.
  142. """
  143. vm_state = _set_or_none(vm_state)
  144. task_state = _set_or_none(task_state)
  145. def outer(f):
  146. @six.wraps(f)
  147. def inner(self, context, instance, *args, **kw):
  148. _InstanceInvalidState = functools.partial(
  149. exception.InstanceInvalidState,
  150. instance_uuid=instance.uuid,
  151. method=f.__name__)
  152. if vm_state is not None and instance.vm_state in vm_state:
  153. raise _InstanceInvalidState(
  154. attr='vm_state', state=instance.vm_state)
  155. if task_state is not None and instance.task_state in task_state:
  156. raise _InstanceInvalidState(
  157. attr='task_state', state=instance.task_state)
  158. return f(self, context, instance, *args, **kw)
  159. return inner
  160. return outer
  161. def check_instance_host(check_is_up=False):
  162. """Validate the instance.host before performing the operation.
  163. At a minimum this method will check that the instance.host is set.
  164. :param check_is_up: If True, check that the instance.host status is UP
  165. or MAINTENANCE (disabled but not down).
  166. :raises: InstanceNotReady if the instance.host is not set
  167. :raises: ServiceUnavailable if check_is_up=True and the instance.host
  168. compute service status is not UP or MAINTENANCE
  169. """
  170. def outer(function):
  171. @six.wraps(function)
  172. def wrapped(self, context, instance, *args, **kwargs):
  173. if not instance.host:
  174. raise exception.InstanceNotReady(instance_id=instance.uuid)
  175. if check_is_up:
  176. # Make sure the source compute service is not down otherwise we
  177. # cannot proceed.
  178. host_status = self.get_instance_host_status(instance)
  179. if host_status not in (fields_obj.HostStatus.UP,
  180. fields_obj.HostStatus.MAINTENANCE):
  181. # ComputeServiceUnavailable would make more sense here but
  182. # we do not want to leak hostnames to end users.
  183. raise exception.ServiceUnavailable()
  184. return function(self, context, instance, *args, **kwargs)
  185. return wrapped
  186. return outer
  187. def check_instance_lock(function):
  188. @six.wraps(function)
  189. def inner(self, context, instance, *args, **kwargs):
  190. if instance.locked and not context.is_admin:
  191. raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
  192. return function(self, context, instance, *args, **kwargs)
  193. return inner
  194. def reject_sev_instances(operation):
  195. """Decorator. Raise OperationNotSupportedForSEV if instance has SEV
  196. enabled.
  197. """
  198. def outer(f):
  199. @six.wraps(f)
  200. def inner(self, context, instance, *args, **kw):
  201. if hardware.get_mem_encryption_constraint(instance.flavor,
  202. instance.image_meta):
  203. raise exception.OperationNotSupportedForSEV(
  204. instance_uuid=instance.uuid,
  205. operation=operation)
  206. return f(self, context, instance, *args, **kw)
  207. return inner
  208. return outer
  209. def _diff_dict(orig, new):
  210. """Return a dict describing how to change orig to new. The keys
  211. correspond to values that have changed; the value will be a list
  212. of one or two elements. The first element of the list will be
  213. either '+' or '-', indicating whether the key was updated or
  214. deleted; if the key was updated, the list will contain a second
  215. element, giving the updated value.
  216. """
  217. # Figure out what keys went away
  218. result = {k: ['-'] for k in set(orig.keys()) - set(new.keys())}
  219. # Compute the updates
  220. for key, value in new.items():
  221. if key not in orig or value != orig[key]:
  222. result[key] = ['+', value]
  223. return result
  224. def load_cells():
  225. global CELLS
  226. if not CELLS:
  227. CELLS = objects.CellMappingList.get_all(
  228. nova_context.get_admin_context())
  229. LOG.debug('Found %(count)i cells: %(cells)s',
  230. dict(count=len(CELLS),
  231. cells=','.join([c.identity for c in CELLS])))
  232. if not CELLS:
  233. LOG.error('No cells are configured, unable to continue')
  234. def _get_image_meta_obj(image_meta_dict):
  235. try:
  236. image_meta = objects.ImageMeta.from_dict(image_meta_dict)
  237. except ValueError as e:
  238. # there must be invalid values in the image meta properties so
  239. # consider this an invalid request
  240. msg = _('Invalid image metadata. Error: %s') % six.text_type(e)
  241. raise exception.InvalidRequest(msg)
  242. return image_meta
  243. @profiler.trace_cls("compute_api")
  244. class API(base.Base):
  245. """API for interacting with the compute manager."""
  246. def __init__(self, image_api=None, network_api=None, volume_api=None,
  247. **kwargs):
  248. self.image_api = image_api or image.API()
  249. self.network_api = network_api or neutron.API()
  250. self.volume_api = volume_api or cinder.API()
  251. self._placementclient = None # Lazy-load on first access.
  252. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  253. self.compute_task_api = conductor.ComputeTaskAPI()
  254. self.servicegroup_api = servicegroup.API()
  255. self.host_api = HostAPI(self.compute_rpcapi, self.servicegroup_api)
  256. self.notifier = rpc.get_notifier('compute', CONF.host)
  257. if CONF.ephemeral_storage_encryption.enabled:
  258. self.key_manager = key_manager.API()
  259. # Help us to record host in EventReporter
  260. self.host = CONF.host
  261. super(API, self).__init__(**kwargs)
  262. def _record_action_start(self, context, instance, action):
  263. objects.InstanceAction.action_start(context, instance.uuid,
  264. action, want_result=False)
  265. def _check_injected_file_quota(self, context, injected_files):
  266. """Enforce quota limits on injected files.
  267. Raises a QuotaError if any limit is exceeded.
  268. """
  269. if not injected_files:
  270. return
  271. # Check number of files first
  272. try:
  273. objects.Quotas.limit_check(context,
  274. injected_files=len(injected_files))
  275. except exception.OverQuota:
  276. raise exception.OnsetFileLimitExceeded()
  277. # OK, now count path and content lengths; we're looking for
  278. # the max...
  279. max_path = 0
  280. max_content = 0
  281. for path, content in injected_files:
  282. max_path = max(max_path, len(path))
  283. max_content = max(max_content, len(content))
  284. try:
  285. objects.Quotas.limit_check(context,
  286. injected_file_path_bytes=max_path,
  287. injected_file_content_bytes=max_content)
  288. except exception.OverQuota as exc:
  289. # Favor path limit over content limit for reporting
  290. # purposes
  291. if 'injected_file_path_bytes' in exc.kwargs['overs']:
  292. raise exception.OnsetFilePathLimitExceeded(
  293. allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
  294. else:
  295. raise exception.OnsetFileContentLimitExceeded(
  296. allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
  297. def _check_metadata_properties_quota(self, context, metadata=None):
  298. """Enforce quota limits on metadata properties."""
  299. if not metadata:
  300. return
  301. if not isinstance(metadata, dict):
  302. msg = (_("Metadata type should be dict."))
  303. raise exception.InvalidMetadata(reason=msg)
  304. num_metadata = len(metadata)
  305. try:
  306. objects.Quotas.limit_check(context, metadata_items=num_metadata)
  307. except exception.OverQuota as exc:
  308. quota_metadata = exc.kwargs['quotas']['metadata_items']
  309. raise exception.MetadataLimitExceeded(allowed=quota_metadata)
  310. # Because metadata is stored in the DB, we hard-code the size limits
  311. # In future, we may support more variable length strings, so we act
  312. # as if this is quota-controlled for forwards compatibility.
  313. # Those are only used in V2 API, from V2.1 API, those checks are
  314. # validated at API layer schema validation.
  315. for k, v in metadata.items():
  316. try:
  317. utils.check_string_length(v)
  318. utils.check_string_length(k, min_length=1)
  319. except exception.InvalidInput as e:
  320. raise exception.InvalidMetadata(reason=e.format_message())
  321. if len(k) > 255:
  322. msg = _("Metadata property key greater than 255 characters")
  323. raise exception.InvalidMetadataSize(reason=msg)
  324. if len(v) > 255:
  325. msg = _("Metadata property value greater than 255 characters")
  326. raise exception.InvalidMetadataSize(reason=msg)
  327. def _check_requested_secgroups(self, context, secgroups):
  328. """Check if the security group requested exists and belongs to
  329. the project.
  330. :param context: The nova request context.
  331. :type context: nova.context.RequestContext
  332. :param secgroups: list of requested security group names, or uuids in
  333. the case of Neutron.
  334. :type secgroups: list
  335. :returns: list of requested security group names unmodified if using
  336. nova-network. If using Neutron, the list returned is all uuids.
  337. Note that 'default' is a special case and will be unmodified if
  338. it's requested.
  339. """
  340. security_groups = []
  341. for secgroup in secgroups:
  342. # NOTE(sdague): default is handled special
  343. if secgroup == "default":
  344. security_groups.append(secgroup)
  345. continue
  346. secgroup_dict = security_group_api.get(context, secgroup)
  347. if not secgroup_dict:
  348. raise exception.SecurityGroupNotFoundForProject(
  349. project_id=context.project_id, security_group_id=secgroup)
  350. security_groups.append(secgroup_dict['id'])
  351. return security_groups
  352. def _check_requested_networks(self, context, requested_networks,
  353. max_count):
  354. """Check if the networks requested belongs to the project
  355. and the fixed IP address for each network provided is within
  356. same the network block
  357. """
  358. if requested_networks is not None:
  359. if requested_networks.no_allocate:
  360. # If the network request was specifically 'none' meaning don't
  361. # allocate any networks, we just return the number of requested
  362. # instances since quotas don't change at all.
  363. return max_count
  364. # NOTE(danms): Temporary transition
  365. requested_networks = requested_networks.as_tuples()
  366. return self.network_api.validate_networks(context, requested_networks,
  367. max_count)
  368. def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
  369. image):
  370. """Choose kernel and ramdisk appropriate for the instance.
  371. The kernel and ramdisk can be chosen in one of two ways:
  372. 1. Passed in with create-instance request.
  373. 2. Inherited from image metadata.
  374. If inherited from image metadata, and if that image metadata value is
  375. set to 'nokernel', both kernel and ramdisk will default to None.
  376. """
  377. # Inherit from image if not specified
  378. image_properties = image.get('properties', {})
  379. if kernel_id is None:
  380. kernel_id = image_properties.get('kernel_id')
  381. if ramdisk_id is None:
  382. ramdisk_id = image_properties.get('ramdisk_id')
  383. # Force to None if kernel_id indicates that a kernel is not to be used
  384. if kernel_id == 'nokernel':
  385. kernel_id = None
  386. ramdisk_id = None
  387. # Verify kernel and ramdisk exist (fail-fast)
  388. if kernel_id is not None:
  389. kernel_image = self.image_api.get(context, kernel_id)
  390. # kernel_id could have been a URI, not a UUID, so to keep behaviour
  391. # from before, which leaked that implementation detail out to the
  392. # caller, we return the image UUID of the kernel image and ramdisk
  393. # image (below) and not any image URIs that might have been
  394. # supplied.
  395. # TODO(jaypipes): Get rid of this silliness once we move to a real
  396. # Image object and hide all of that stuff within nova.image.api.
  397. kernel_id = kernel_image['id']
  398. if ramdisk_id is not None:
  399. ramdisk_image = self.image_api.get(context, ramdisk_id)
  400. ramdisk_id = ramdisk_image['id']
  401. return kernel_id, ramdisk_id
  402. @staticmethod
  403. def parse_availability_zone(context, availability_zone):
  404. # NOTE(vish): We have a legacy hack to allow admins to specify hosts
  405. # via az using az:host:node. It might be nice to expose an
  406. # api to specify specific hosts to force onto, but for
  407. # now it just supports this legacy hack.
  408. # NOTE(deva): It is also possible to specify az::node, in which case
  409. # the host manager will determine the correct host.
  410. forced_host = None
  411. forced_node = None
  412. if availability_zone and ':' in availability_zone:
  413. c = availability_zone.count(':')
  414. if c == 1:
  415. availability_zone, forced_host = availability_zone.split(':')
  416. elif c == 2:
  417. if '::' in availability_zone:
  418. availability_zone, forced_node = \
  419. availability_zone.split('::')
  420. else:
  421. availability_zone, forced_host, forced_node = \
  422. availability_zone.split(':')
  423. else:
  424. raise exception.InvalidInput(
  425. reason="Unable to parse availability_zone")
  426. if not availability_zone:
  427. availability_zone = CONF.default_schedule_zone
  428. return availability_zone, forced_host, forced_node
  429. def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
  430. auto_disk_config, image):
  431. auto_disk_config_disabled = \
  432. utils.is_auto_disk_config_disabled(auto_disk_config_img)
  433. if auto_disk_config_disabled and auto_disk_config:
  434. raise exception.AutoDiskConfigDisabledByImage(image=image)
  435. def _inherit_properties_from_image(self, image, auto_disk_config):
  436. image_properties = image.get('properties', {})
  437. auto_disk_config_img = \
  438. utils.get_auto_disk_config_from_image_props(image_properties)
  439. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  440. auto_disk_config,
  441. image.get("id"))
  442. if auto_disk_config is None:
  443. auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
  444. return {
  445. 'os_type': image_properties.get('os_type'),
  446. 'architecture': image_properties.get('architecture'),
  447. 'vm_mode': image_properties.get('vm_mode'),
  448. 'auto_disk_config': auto_disk_config
  449. }
  450. def _check_config_drive(self, config_drive):
  451. if config_drive:
  452. try:
  453. bool_val = strutils.bool_from_string(config_drive,
  454. strict=True)
  455. except ValueError:
  456. raise exception.ConfigDriveInvalidValue(option=config_drive)
  457. else:
  458. bool_val = False
  459. # FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
  460. # but this is because the config drive column is a String. False
  461. # is represented by using an empty string. And for whatever
  462. # reason, we rely on the DB to cast True to a String.
  463. return True if bool_val else ''
  464. def _validate_flavor_image(self, context, image_id, image,
  465. instance_type, root_bdm, validate_numa=True):
  466. """Validate the flavor and image.
  467. This is called from the API service to ensure that the flavor
  468. extra-specs and image properties are self-consistent and compatible
  469. with each other.
  470. :param context: A context.RequestContext
  471. :param image_id: UUID of the image
  472. :param image: a dict representation of the image including properties,
  473. enforces the image status is active.
  474. :param instance_type: Flavor object
  475. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  476. the resize case.
  477. :param validate_numa: Flag to indicate whether or not to validate
  478. the NUMA-related metadata.
  479. :raises: Many different possible exceptions. See
  480. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  481. for the full list.
  482. """
  483. if image and image['status'] != 'active':
  484. raise exception.ImageNotActive(image_id=image_id)
  485. self._validate_flavor_image_nostatus(context, image, instance_type,
  486. root_bdm, validate_numa)
  487. @staticmethod
  488. def _validate_flavor_image_nostatus(context, image, instance_type,
  489. root_bdm, validate_numa=True,
  490. validate_pci=False):
  491. """Validate the flavor and image.
  492. This is called from the API service to ensure that the flavor
  493. extra-specs and image properties are self-consistent and compatible
  494. with each other.
  495. :param context: A context.RequestContext
  496. :param image: a dict representation of the image including properties
  497. :param instance_type: Flavor object
  498. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  499. the resize case.
  500. :param validate_numa: Flag to indicate whether or not to validate
  501. the NUMA-related metadata.
  502. :param validate_pci: Flag to indicate whether or not to validate
  503. the PCI-related metadata.
  504. :raises: Many different possible exceptions. See
  505. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  506. for the full list.
  507. """
  508. if not image:
  509. return
  510. image_properties = image.get('properties', {})
  511. config_drive_option = image_properties.get(
  512. 'img_config_drive', 'optional')
  513. if config_drive_option not in ['optional', 'mandatory']:
  514. raise exception.InvalidImageConfigDrive(
  515. config_drive=config_drive_option)
  516. if instance_type['memory_mb'] < int(image.get('min_ram') or 0):
  517. raise exception.FlavorMemoryTooSmall()
  518. # Image min_disk is in gb, size is in bytes. For sanity, have them both
  519. # in bytes.
  520. image_min_disk = int(image.get('min_disk') or 0) * units.Gi
  521. image_size = int(image.get('size') or 0)
  522. # Target disk is a volume. Don't check flavor disk size because it
  523. # doesn't make sense, and check min_disk against the volume size.
  524. if root_bdm is not None and root_bdm.is_volume:
  525. # There are 2 possibilities here:
  526. #
  527. # 1. The target volume already exists but bdm.volume_size is not
  528. # yet set because this method is called before
  529. # _bdm_validate_set_size_and_instance during server create.
  530. # 2. The target volume doesn't exist, in which case the bdm will
  531. # contain the intended volume size
  532. #
  533. # Note that rebuild also calls this method with potentially a new
  534. # image but you can't rebuild a volume-backed server with a new
  535. # image (yet).
  536. #
  537. # Cinder does its own check against min_disk, so if the target
  538. # volume already exists this has already been done and we don't
  539. # need to check it again here. In this case, volume_size may not be
  540. # set on the bdm.
  541. #
  542. # If we're going to create the volume, the bdm will contain
  543. # volume_size. Therefore we should check it if it exists. This will
  544. # still be checked again by cinder when the volume is created, but
  545. # that will not happen until the request reaches a host. By
  546. # checking it here, the user gets an immediate and useful failure
  547. # indication.
  548. #
  549. # The third possibility is that we have failed to consider
  550. # something, and there are actually more than 2 possibilities. In
  551. # this case cinder will still do the check at volume creation time.
  552. # The behaviour will still be correct, but the user will not get an
  553. # immediate failure from the api, and will instead have to
  554. # determine why the instance is in an error state with a task of
  555. # block_device_mapping.
  556. #
  557. # We could reasonably refactor this check into _validate_bdm at
  558. # some future date, as the various size logic is already split out
  559. # in there.
  560. dest_size = root_bdm.volume_size
  561. if dest_size is not None:
  562. dest_size *= units.Gi
  563. if image_min_disk > dest_size:
  564. raise exception.VolumeSmallerThanMinDisk(
  565. volume_size=dest_size, image_min_disk=image_min_disk)
  566. # Target disk is a local disk whose size is taken from the flavor
  567. else:
  568. dest_size = instance_type['root_gb'] * units.Gi
  569. # NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
  570. # since libvirt interpreted the value differently than other
  571. # drivers. A value of 0 means don't check size.
  572. if dest_size != 0:
  573. if image_size > dest_size:
  574. raise exception.FlavorDiskSmallerThanImage(
  575. flavor_size=dest_size, image_size=image_size)
  576. if image_min_disk > dest_size:
  577. raise exception.FlavorDiskSmallerThanMinDisk(
  578. flavor_size=dest_size, image_min_disk=image_min_disk)
  579. else:
  580. # The user is attempting to create a server with a 0-disk
  581. # image-backed flavor, which can lead to issues with a large
  582. # image consuming an unexpectedly large amount of local disk
  583. # on the compute host. Check to see if the deployment will
  584. # allow that.
  585. if not context.can(
  586. servers_policies.ZERO_DISK_FLAVOR, fatal=False):
  587. raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
  588. API._validate_flavor_image_numa_pci(
  589. image, instance_type, validate_numa=validate_numa,
  590. validate_pci=validate_pci)
  591. @staticmethod
  592. def _validate_flavor_image_numa_pci(image, instance_type,
  593. validate_numa=True,
  594. validate_pci=False):
  595. """Validate the flavor and image NUMA/PCI values.
  596. This is called from the API service to ensure that the flavor
  597. extra-specs and image properties are self-consistent and compatible
  598. with each other.
  599. :param image: a dict representation of the image including properties
  600. :param instance_type: Flavor object
  601. :param validate_numa: Flag to indicate whether or not to validate
  602. the NUMA-related metadata.
  603. :param validate_pci: Flag to indicate whether or not to validate
  604. the PCI-related metadata.
  605. :raises: Many different possible exceptions. See
  606. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  607. for the full list.
  608. """
  609. image_meta = _get_image_meta_obj(image)
  610. API._validate_flavor_image_mem_encryption(instance_type, image_meta)
  611. # validate PMU extra spec and image metadata
  612. flavor_pmu = instance_type.extra_specs.get('hw:pmu')
  613. image_pmu = image_meta.properties.get('hw_pmu')
  614. if (flavor_pmu is not None and image_pmu is not None and
  615. image_pmu != strutils.bool_from_string(flavor_pmu)):
  616. raise exception.ImagePMUConflict()
  617. # Only validate values of flavor/image so the return results of
  618. # following 'get' functions are not used.
  619. hardware.get_number_of_serial_ports(instance_type, image_meta)
  620. if hardware.is_realtime_enabled(instance_type):
  621. hardware.vcpus_realtime_topology(instance_type, image_meta)
  622. hardware.get_cpu_topology_constraints(instance_type, image_meta)
  623. if validate_numa:
  624. hardware.numa_get_constraints(instance_type, image_meta)
  625. if validate_pci:
  626. pci_request.get_pci_requests_from_flavor(instance_type)
  627. @staticmethod
  628. def _validate_flavor_image_mem_encryption(instance_type, image):
  629. """Validate that the flavor and image don't make contradictory
  630. requests regarding memory encryption.
  631. :param instance_type: Flavor object
  632. :param image: an ImageMeta object
  633. :raises: nova.exception.FlavorImageConflict
  634. """
  635. # This library function will raise the exception for us if
  636. # necessary; if not, we can ignore the result returned.
  637. hardware.get_mem_encryption_constraint(instance_type, image)
  638. def _get_image_defined_bdms(self, instance_type, image_meta,
  639. root_device_name):
  640. image_properties = image_meta.get('properties', {})
  641. # Get the block device mappings defined by the image.
  642. image_defined_bdms = image_properties.get('block_device_mapping', [])
  643. legacy_image_defined = not image_properties.get('bdm_v2', False)
  644. image_mapping = image_properties.get('mappings', [])
  645. if legacy_image_defined:
  646. image_defined_bdms = block_device.from_legacy_mapping(
  647. image_defined_bdms, None, root_device_name)
  648. else:
  649. image_defined_bdms = list(map(block_device.BlockDeviceDict,
  650. image_defined_bdms))
  651. if image_mapping:
  652. image_mapping = self._prepare_image_mapping(instance_type,
  653. image_mapping)
  654. image_defined_bdms = self._merge_bdms_lists(
  655. image_mapping, image_defined_bdms)
  656. return image_defined_bdms
  657. def _get_flavor_defined_bdms(self, instance_type, block_device_mapping):
  658. flavor_defined_bdms = []
  659. have_ephemeral_bdms = any(filter(
  660. block_device.new_format_is_ephemeral, block_device_mapping))
  661. have_swap_bdms = any(filter(
  662. block_device.new_format_is_swap, block_device_mapping))
  663. if instance_type.get('ephemeral_gb') and not have_ephemeral_bdms:
  664. flavor_defined_bdms.append(
  665. block_device.create_blank_bdm(instance_type['ephemeral_gb']))
  666. if instance_type.get('swap') and not have_swap_bdms:
  667. flavor_defined_bdms.append(
  668. block_device.create_blank_bdm(instance_type['swap'], 'swap'))
  669. return flavor_defined_bdms
  670. def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
  671. """Override any block devices from the first list by device name
  672. :param overridable_mappings: list which items are overridden
  673. :param overrider_mappings: list which items override
  674. :returns: A merged list of bdms
  675. """
  676. device_names = set(bdm['device_name'] for bdm in overrider_mappings
  677. if bdm['device_name'])
  678. return (overrider_mappings +
  679. [bdm for bdm in overridable_mappings
  680. if bdm['device_name'] not in device_names])
  681. def _check_and_transform_bdm(self, context, base_options, instance_type,
  682. image_meta, min_count, max_count,
  683. block_device_mapping, legacy_bdm):
  684. # NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
  685. # It's needed for legacy conversion to work.
  686. root_device_name = (base_options.get('root_device_name') or 'vda')
  687. image_ref = base_options.get('image_ref', '')
  688. # If the instance is booted by image and has a volume attached,
  689. # the volume cannot have the same device name as root_device_name
  690. if image_ref:
  691. for bdm in block_device_mapping:
  692. if (bdm.get('destination_type') == 'volume' and
  693. block_device.strip_dev(bdm.get(
  694. 'device_name')) == root_device_name):
  695. msg = _('The volume cannot be assigned the same device'
  696. ' name as the root device %s') % root_device_name
  697. raise exception.InvalidRequest(msg)
  698. image_defined_bdms = self._get_image_defined_bdms(
  699. instance_type, image_meta, root_device_name)
  700. root_in_image_bdms = (
  701. block_device.get_root_bdm(image_defined_bdms) is not None)
  702. if legacy_bdm:
  703. block_device_mapping = block_device.from_legacy_mapping(
  704. block_device_mapping, image_ref, root_device_name,
  705. no_root=root_in_image_bdms)
  706. elif root_in_image_bdms:
  707. # NOTE (ndipanov): client will insert an image mapping into the v2
  708. # block_device_mapping, but if there is a bootable device in image
  709. # mappings - we need to get rid of the inserted image
  710. # NOTE (gibi): another case is when a server is booted with an
  711. # image to bdm mapping where the image only contains a bdm to a
  712. # snapshot. In this case the other image to bdm mapping
  713. # contains an unnecessary device with boot_index == 0.
  714. # Also in this case the image_ref is None as we are booting from
  715. # an image to volume bdm.
  716. def not_image_and_root_bdm(bdm):
  717. return not (bdm.get('boot_index') == 0 and
  718. bdm.get('source_type') == 'image')
  719. block_device_mapping = list(
  720. filter(not_image_and_root_bdm, block_device_mapping))
  721. block_device_mapping = self._merge_bdms_lists(
  722. image_defined_bdms, block_device_mapping)
  723. if min_count > 1 or max_count > 1:
  724. if any(map(lambda bdm: bdm['source_type'] == 'volume',
  725. block_device_mapping)):
  726. msg = _('Cannot attach one or more volumes to multiple'
  727. ' instances')
  728. raise exception.InvalidRequest(msg)
  729. block_device_mapping += self._get_flavor_defined_bdms(
  730. instance_type, block_device_mapping)
  731. return block_device_obj.block_device_make_list_from_dicts(
  732. context, block_device_mapping)
  733. def _get_image(self, context, image_href):
  734. if not image_href:
  735. return None, {}
  736. image = self.image_api.get(context, image_href)
  737. return image['id'], image
  738. def _checks_for_create_and_rebuild(self, context, image_id, image,
  739. instance_type, metadata,
  740. files_to_inject, root_bdm,
  741. validate_numa=True):
  742. self._check_metadata_properties_quota(context, metadata)
  743. self._check_injected_file_quota(context, files_to_inject)
  744. self._validate_flavor_image(context, image_id, image,
  745. instance_type, root_bdm,
  746. validate_numa=validate_numa)
  747. def _validate_and_build_base_options(self, context, instance_type,
  748. boot_meta, image_href, image_id,
  749. kernel_id, ramdisk_id, display_name,
  750. display_description, key_name,
  751. key_data, security_groups,
  752. availability_zone, user_data,
  753. metadata, access_ip_v4, access_ip_v6,
  754. requested_networks, config_drive,
  755. auto_disk_config, reservation_id,
  756. max_count,
  757. supports_port_resource_request):
  758. """Verify all the input parameters regardless of the provisioning
  759. strategy being performed.
  760. """
  761. if instance_type['disabled']:
  762. raise exception.FlavorNotFound(flavor_id=instance_type['id'])
  763. if user_data:
  764. try:
  765. base64utils.decode_as_bytes(user_data)
  766. except TypeError:
  767. raise exception.InstanceUserDataMalformed()
  768. # When using Neutron, _check_requested_secgroups will translate and
  769. # return any requested security group names to uuids.
  770. security_groups = (
  771. self._check_requested_secgroups(context, security_groups))
  772. # Note: max_count is the number of instances requested by the user,
  773. # max_network_count is the maximum number of instances taking into
  774. # account any network quotas
  775. max_network_count = self._check_requested_networks(context,
  776. requested_networks, max_count)
  777. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  778. context, kernel_id, ramdisk_id, boot_meta)
  779. config_drive = self._check_config_drive(config_drive)
  780. if key_data is None and key_name is not None:
  781. key_pair = objects.KeyPair.get_by_name(context,
  782. context.user_id,
  783. key_name)
  784. key_data = key_pair.public_key
  785. else:
  786. key_pair = None
  787. root_device_name = block_device.prepend_dev(
  788. block_device.properties_root_device_name(
  789. boot_meta.get('properties', {})))
  790. image_meta = _get_image_meta_obj(boot_meta)
  791. numa_topology = hardware.numa_get_constraints(
  792. instance_type, image_meta)
  793. system_metadata = {}
  794. pci_numa_affinity_policy = hardware.get_pci_numa_policy_constraint(
  795. instance_type, image_meta)
  796. # PCI requests come from two sources: instance flavor and
  797. # requested_networks. The first call in below returns an
  798. # InstancePCIRequests object which is a list of InstancePCIRequest
  799. # objects. The second call in below creates an InstancePCIRequest
  800. # object for each SR-IOV port, and append it to the list in the
  801. # InstancePCIRequests object
  802. pci_request_info = pci_request.get_pci_requests_from_flavor(
  803. instance_type, affinity_policy=pci_numa_affinity_policy)
  804. result = self.network_api.create_resource_requests(
  805. context, requested_networks, pci_request_info,
  806. affinity_policy=pci_numa_affinity_policy)
  807. network_metadata, port_resource_requests = result
  808. # Creating servers with ports that have resource requests, like QoS
  809. # minimum bandwidth rules, is only supported in a requested minimum
  810. # microversion.
  811. if port_resource_requests and not supports_port_resource_request:
  812. raise exception.CreateWithPortResourceRequestOldVersion()
  813. base_options = {
  814. 'reservation_id': reservation_id,
  815. 'image_ref': image_href,
  816. 'kernel_id': kernel_id or '',
  817. 'ramdisk_id': ramdisk_id or '',
  818. 'power_state': power_state.NOSTATE,
  819. 'vm_state': vm_states.BUILDING,
  820. 'config_drive': config_drive,
  821. 'user_id': context.user_id,
  822. 'project_id': context.project_id,
  823. 'instance_type_id': instance_type['id'],
  824. 'memory_mb': instance_type['memory_mb'],
  825. 'vcpus': instance_type['vcpus'],
  826. 'root_gb': instance_type['root_gb'],
  827. 'ephemeral_gb': instance_type['ephemeral_gb'],
  828. 'display_name': display_name,
  829. 'display_description': display_description,
  830. 'user_data': user_data,
  831. 'key_name': key_name,
  832. 'key_data': key_data,
  833. 'locked': False,
  834. 'metadata': metadata or {},
  835. 'access_ip_v4': access_ip_v4,
  836. 'access_ip_v6': access_ip_v6,
  837. 'availability_zone': availability_zone,
  838. 'root_device_name': root_device_name,
  839. 'progress': 0,
  840. 'pci_requests': pci_request_info,
  841. 'numa_topology': numa_topology,
  842. 'system_metadata': system_metadata,
  843. 'port_resource_requests': port_resource_requests}
  844. options_from_image = self._inherit_properties_from_image(
  845. boot_meta, auto_disk_config)
  846. base_options.update(options_from_image)
  847. # return the validated options and maximum number of instances allowed
  848. # by the network quotas
  849. return (base_options, max_network_count, key_pair, security_groups,
  850. network_metadata)
  851. @staticmethod
  852. @db_api.api_context_manager.writer
  853. def _create_reqspec_buildreq_instmapping(context, rs, br, im):
  854. """Create the request spec, build request, and instance mapping in a
  855. single database transaction.
  856. The RequestContext must be passed in to this method so that the
  857. database transaction context manager decorator will nest properly and
  858. include each create() into the same transaction context.
  859. """
  860. rs.create()
  861. br.create()
  862. im.create()
  863. def _validate_host_or_node(self, context, host, hypervisor_hostname):
  864. """Check whether compute nodes exist by validating the host
  865. and/or the hypervisor_hostname. There are three cases:
  866. 1. If only host is supplied, we can lookup the HostMapping in
  867. the API DB.
  868. 2. If only node is supplied, we can query a resource provider
  869. with that name in placement.
  870. 3. If both host and node are supplied, we can get the cell from
  871. HostMapping and from that lookup the ComputeNode with the
  872. given cell.
  873. :param context: The API request context.
  874. :param host: Target host.
  875. :param hypervisor_hostname: Target node.
  876. :raises: ComputeHostNotFound if we find no compute nodes with host
  877. and/or hypervisor_hostname.
  878. """
  879. if host:
  880. # When host is specified.
  881. try:
  882. host_mapping = objects.HostMapping.get_by_host(context, host)
  883. except exception.HostMappingNotFound:
  884. LOG.warning('No host-to-cell mapping found for host '
  885. '%(host)s.', {'host': host})
  886. raise exception.ComputeHostNotFound(host=host)
  887. # When both host and node are specified.
  888. if hypervisor_hostname:
  889. cell = host_mapping.cell_mapping
  890. with nova_context.target_cell(context, cell) as cctxt:
  891. # Here we only do an existence check, so we don't
  892. # need to store the return value into a variable.
  893. objects.ComputeNode.get_by_host_and_nodename(
  894. cctxt, host, hypervisor_hostname)
  895. elif hypervisor_hostname:
  896. # When only node is specified.
  897. try:
  898. self.placementclient.get_provider_by_name(
  899. context, hypervisor_hostname)
  900. except exception.ResourceProviderNotFound:
  901. raise exception.ComputeHostNotFound(host=hypervisor_hostname)
  902. def _get_volumes_for_bdms(self, context, bdms):
  903. """Get the pre-existing volumes from cinder for the list of BDMs.
  904. :param context: nova auth RequestContext
  905. :param bdms: BlockDeviceMappingList which has zero or more BDMs with
  906. a pre-existing volume_id specified.
  907. :return: dict, keyed by volume id, of volume dicts
  908. :raises: VolumeNotFound - if a given volume does not exist
  909. :raises: CinderConnectionFailed - if there are problems communicating
  910. with the cinder API
  911. :raises: Forbidden - if the user token does not have authority to see
  912. a volume
  913. """
  914. volumes = {}
  915. for bdm in bdms:
  916. if bdm.volume_id:
  917. volumes[bdm.volume_id] = self.volume_api.get(
  918. context, bdm.volume_id)
  919. return volumes
  920. @staticmethod
  921. def _validate_vol_az_for_create(instance_az, volumes):
  922. """Performs cross_az_attach validation for the instance and volumes.
  923. If [cinder]/cross_az_attach=True (default) this method is a no-op.
  924. If [cinder]/cross_az_attach=False, this method will validate that:
  925. 1. All volumes are in the same availability zone.
  926. 2. The volume AZ matches the instance AZ. If the instance is being
  927. created without a specific AZ (either via the user request or the
  928. [DEFAULT]/default_schedule_zone option), and the volume AZ matches
  929. [DEFAULT]/default_availability_zone for compute services, then the
  930. method returns the volume AZ so it can be set in the RequestSpec as
  931. if the user requested the zone explicitly.
  932. :param instance_az: Availability zone for the instance. In this case
  933. the host is not yet selected so the instance AZ value should come
  934. from one of the following cases:
  935. * The user requested availability zone.
  936. * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  937. does not specify an AZ (see parse_availability_zone).
  938. :param volumes: iterable of dicts of cinder volumes to be attached to
  939. the server being created
  940. :returns: None or volume AZ to set in the RequestSpec for the instance
  941. :raises: MismatchVolumeAZException if the instance and volume AZ do
  942. not match
  943. """
  944. if CONF.cinder.cross_az_attach:
  945. return
  946. if not volumes:
  947. return
  948. # First make sure that all of the volumes are in the same zone.
  949. vol_zones = [vol['availability_zone'] for vol in volumes]
  950. if len(set(vol_zones)) > 1:
  951. msg = (_("Volumes are in different availability zones: %s")
  952. % ','.join(vol_zones))
  953. raise exception.MismatchVolumeAZException(reason=msg)
  954. volume_az = vol_zones[0]
  955. # In this case the instance.host should not be set so the instance AZ
  956. # value should come from instance.availability_zone which will be one
  957. # of the following cases:
  958. # * The user requested availability zone.
  959. # * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  960. # does not specify an AZ (see parse_availability_zone).
  961. # If the instance is not being created with a specific AZ (the AZ is
  962. # input via the API create request *or* [DEFAULT]/default_schedule_zone
  963. # is not None), then check to see if we should use the default AZ
  964. # (which by default matches the default AZ in Cinder, i.e. 'nova').
  965. if instance_az is None:
  966. # Check if the volume AZ is the same as our default AZ for compute
  967. # hosts (nova) and if so, assume we are OK because the user did not
  968. # request an AZ and will get the same default. If the volume AZ is
  969. # not the same as our default, return the volume AZ so the caller
  970. # can put it into the request spec so the instance is scheduled
  971. # to the same zone as the volume. Note that we are paranoid about
  972. # the default here since both nova and cinder's default backend AZ
  973. # is "nova" and we do not want to pin the server to that AZ since
  974. # it's special, i.e. just like we tell users in the docs to not
  975. # specify availability_zone='nova' when creating a server since we
  976. # might not be able to migrate it later.
  977. if volume_az != CONF.default_availability_zone:
  978. return volume_az # indication to set in request spec
  979. # The volume AZ is the same as the default nova AZ so we will be OK
  980. return
  981. if instance_az != volume_az:
  982. msg = _("Server and volumes are not in the same availability "
  983. "zone. Server is in: %(instance_az)s. Volumes are in: "
  984. "%(volume_az)s") % {
  985. 'instance_az': instance_az, 'volume_az': volume_az}
  986. raise exception.MismatchVolumeAZException(reason=msg)
  987. def _provision_instances(self, context, instance_type, min_count,
  988. max_count, base_options, boot_meta, security_groups,
  989. block_device_mapping, shutdown_terminate,
  990. instance_group, check_server_group_quota, filter_properties,
  991. key_pair, tags, trusted_certs, supports_multiattach,
  992. network_metadata=None, requested_host=None,
  993. requested_hypervisor_hostname=None):
  994. # NOTE(boxiang): Check whether compute nodes exist by validating
  995. # the host and/or the hypervisor_hostname. Pass the destination
  996. # to the scheduler with host and/or hypervisor_hostname(node).
  997. destination = None
  998. if requested_host or requested_hypervisor_hostname:
  999. self._validate_host_or_node(context, requested_host,
  1000. requested_hypervisor_hostname)
  1001. destination = objects.Destination()
  1002. if requested_host:
  1003. destination.host = requested_host
  1004. destination.node = requested_hypervisor_hostname
  1005. # Check quotas
  1006. num_instances = compute_utils.check_num_instances_quota(
  1007. context, instance_type, min_count, max_count)
  1008. security_groups = security_group_api.populate_security_groups(
  1009. security_groups)
  1010. port_resource_requests = base_options.pop('port_resource_requests')
  1011. instances_to_build = []
  1012. # We could be iterating over several instances with several BDMs per
  1013. # instance and those BDMs could be using a lot of the same images so
  1014. # we want to cache the image API GET results for performance.
  1015. image_cache = {} # dict of image dicts keyed by image id
  1016. # Before processing the list of instances get all of the requested
  1017. # pre-existing volumes so we can do some validation here rather than
  1018. # down in the bowels of _validate_bdm.
  1019. volumes = self._get_volumes_for_bdms(context, block_device_mapping)
  1020. volume_az = self._validate_vol_az_for_create(
  1021. base_options['availability_zone'], volumes.values())
  1022. if volume_az:
  1023. # This means the instance is not being created in a specific zone
  1024. # but needs to match the zone that the volumes are in so update
  1025. # base_options to match the volume zone.
  1026. base_options['availability_zone'] = volume_az
  1027. LOG.debug("Going to run %s instances...", num_instances)
  1028. try:
  1029. for i in range(num_instances):
  1030. # Create a uuid for the instance so we can store the
  1031. # RequestSpec before the instance is created.
  1032. instance_uuid = uuidutils.generate_uuid()
  1033. # Store the RequestSpec that will be used for scheduling.
  1034. req_spec = objects.RequestSpec.from_components(context,
  1035. instance_uuid, boot_meta, instance_type,
  1036. base_options['numa_topology'],
  1037. base_options['pci_requests'], filter_properties,
  1038. instance_group, base_options['availability_zone'],
  1039. security_groups=security_groups,
  1040. port_resource_requests=port_resource_requests)
  1041. if block_device_mapping:
  1042. # Record whether or not we are a BFV instance
  1043. root = block_device_mapping.root_bdm()
  1044. req_spec.is_bfv = bool(root and root.is_volume)
  1045. else:
  1046. # If we have no BDMs, we're clearly not BFV
  1047. req_spec.is_bfv = False
  1048. # NOTE(danms): We need to record num_instances on the request
  1049. # spec as this is how the conductor knows how many were in this
  1050. # batch.
  1051. req_spec.num_instances = num_instances
  1052. # NOTE(stephenfin): The network_metadata field is not persisted
  1053. # inside RequestSpec object.
  1054. if network_metadata:
  1055. req_spec.network_metadata = network_metadata
  1056. if destination:
  1057. req_spec.requested_destination = destination
  1058. # Create an instance object, but do not store in db yet.
  1059. instance = objects.Instance(context=context)
  1060. instance.uuid = instance_uuid
  1061. instance.update(base_options)
  1062. instance.keypairs = objects.KeyPairList(objects=[])
  1063. if key_pair:
  1064. instance.keypairs.objects.append(key_pair)
  1065. instance.trusted_certs = self._retrieve_trusted_certs_object(
  1066. context, trusted_certs)
  1067. instance = self.create_db_entry_for_new_instance(context,
  1068. instance_type, boot_meta, instance, security_groups,
  1069. block_device_mapping, num_instances, i,
  1070. shutdown_terminate, create_instance=False)
  1071. block_device_mapping = (
  1072. self._bdm_validate_set_size_and_instance(context,
  1073. instance, instance_type, block_device_mapping,
  1074. image_cache, volumes, supports_multiattach))
  1075. instance_tags = self._transform_tags(tags, instance.uuid)
  1076. build_request = objects.BuildRequest(context,
  1077. instance=instance, instance_uuid=instance.uuid,
  1078. project_id=instance.project_id,
  1079. block_device_mappings=block_device_mapping,
  1080. tags=instance_tags)
  1081. # Create an instance_mapping. The null cell_mapping indicates
  1082. # that the instance doesn't yet exist in a cell, and lookups
  1083. # for it need to instead look for the RequestSpec.
  1084. # cell_mapping will be populated after scheduling, with a
  1085. # scheduling failure using the cell_mapping for the special
  1086. # cell0.
  1087. inst_mapping = objects.InstanceMapping(context=context)
  1088. inst_mapping.instance_uuid = instance_uuid
  1089. inst_mapping.project_id = context.project_id
  1090. inst_mapping.user_id = context.user_id
  1091. inst_mapping.cell_mapping = None
  1092. # Create the request spec, build request, and instance mapping
  1093. # records in a single transaction so that if a DBError is
  1094. # raised from any of them, all INSERTs will be rolled back and
  1095. # no orphaned records will be left behind.
  1096. self._create_reqspec_buildreq_instmapping(context, req_spec,
  1097. build_request,
  1098. inst_mapping)
  1099. instances_to_build.append(
  1100. (req_spec, build_request, inst_mapping))
  1101. if instance_group:
  1102. if check_server_group_quota:
  1103. try:
  1104. objects.Quotas.check_deltas(
  1105. context, {'server_group_members': 1},
  1106. instance_group, context.user_id)
  1107. except exception.OverQuota:
  1108. msg = _("Quota exceeded, too many servers in "
  1109. "group")
  1110. raise exception.QuotaError(msg)
  1111. members = objects.InstanceGroup.add_members(
  1112. context, instance_group.uuid, [instance.uuid])
  1113. # NOTE(melwitt): We recheck the quota after creating the
  1114. # object to prevent users from allocating more resources
  1115. # than their allowed quota in the event of a race. This is
  1116. # configurable because it can be expensive if strict quota
  1117. # limits are not required in a deployment.
  1118. if CONF.quota.recheck_quota and check_server_group_quota:
  1119. try:
  1120. objects.Quotas.check_deltas(
  1121. context, {'server_group_members': 0},
  1122. instance_group, context.user_id)
  1123. except exception.OverQuota:
  1124. objects.InstanceGroup._remove_members_in_db(
  1125. context, instance_group.id, [instance.uuid])
  1126. msg = _("Quota exceeded, too many servers in "
  1127. "group")
  1128. raise exception.QuotaError(msg)
  1129. # list of members added to servers group in this iteration
  1130. # is needed to check quota of server group during add next
  1131. # instance
  1132. instance_group.members.extend(members)
  1133. # In the case of any exceptions, attempt DB cleanup
  1134. except Exception:
  1135. with excutils.save_and_reraise_exception():
  1136. self._cleanup_build_artifacts(None, instances_to_build)
  1137. return instances_to_build
  1138. @staticmethod
  1139. def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
  1140. """Convert user-requested trusted cert IDs to TrustedCerts object
  1141. Also validates that the deployment is new enough to support trusted
  1142. image certification validation.
  1143. :param context: The user request auth context
  1144. :param trusted_certs: list of user-specified trusted cert string IDs,
  1145. may be None
  1146. :param rebuild: True if rebuilding the server, False if creating a
  1147. new server
  1148. :returns: nova.objects.TrustedCerts object or None if no user-specified
  1149. trusted cert IDs were given and nova is not configured with
  1150. default trusted cert IDs
  1151. """
  1152. # Retrieve trusted_certs parameter, or use CONF value if certificate
  1153. # validation is enabled
  1154. if trusted_certs:
  1155. certs_to_return = objects.TrustedCerts(ids=trusted_certs)
  1156. elif (CONF.glance.verify_glance_signatures and
  1157. CONF.glance.enable_certificate_validation and
  1158. CONF.glance.default_trusted_certificate_ids):
  1159. certs_to_return = objects.TrustedCerts(
  1160. ids=CONF.glance.default_trusted_certificate_ids)
  1161. else:
  1162. return None
  1163. return certs_to_return
  1164. def _get_bdm_image_metadata(self, context, block_device_mapping,
  1165. legacy_bdm=True):
  1166. """If we are booting from a volume, we need to get the
  1167. volume details from Cinder and make sure we pass the
  1168. metadata back accordingly.
  1169. """
  1170. if not block_device_mapping:
  1171. return {}
  1172. for bdm in block_device_mapping:
  1173. if (legacy_bdm and
  1174. block_device.get_device_letter(
  1175. bdm.get('device_name', '')) != 'a'):
  1176. continue
  1177. elif not legacy_bdm and bdm.get('boot_index') != 0:
  1178. continue
  1179. volume_id = bdm.get('volume_id')
  1180. snapshot_id = bdm.get('snapshot_id')
  1181. if snapshot_id:
  1182. # NOTE(alaski): A volume snapshot inherits metadata from the
  1183. # originating volume, but the API does not expose metadata
  1184. # on the snapshot itself. So we query the volume for it below.
  1185. snapshot = self.volume_api.get_snapshot(context, snapshot_id)
  1186. volume_id = snapshot['volume_id']
  1187. if bdm.get('image_id'):
  1188. try:
  1189. image_id = bdm['image_id']
  1190. image_meta = self.image_api.get(context, image_id)
  1191. return image_meta
  1192. except Exception:
  1193. raise exception.InvalidBDMImage(id=image_id)
  1194. elif volume_id:
  1195. try:
  1196. volume = self.volume_api.get(context, volume_id)
  1197. except exception.CinderConnectionFailed:
  1198. raise
  1199. except Exception:
  1200. raise exception.InvalidBDMVolume(id=volume_id)
  1201. if not volume.get('bootable', True):
  1202. raise exception.InvalidBDMVolumeNotBootable(id=volume_id)
  1203. return utils.get_image_metadata_from_volume(volume)
  1204. return {}
  1205. @staticmethod
  1206. def _get_requested_instance_group(context, filter_properties):
  1207. if (not filter_properties or
  1208. not filter_properties.get('scheduler_hints')):
  1209. return
  1210. group_hint = filter_properties.get('scheduler_hints').get('group')
  1211. if not group_hint:
  1212. return
  1213. return objects.InstanceGroup.get_by_uuid(context, group_hint)
  1214. def _create_instance(self, context, instance_type,
  1215. image_href, kernel_id, ramdisk_id,
  1216. min_count, max_count,
  1217. display_name, display_description,
  1218. key_name, key_data, security_groups,
  1219. availability_zone, user_data, metadata, injected_files,
  1220. admin_password, access_ip_v4, access_ip_v6,
  1221. requested_networks, config_drive,
  1222. block_device_mapping, auto_disk_config, filter_properties,
  1223. reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
  1224. check_server_group_quota=False, tags=None,
  1225. supports_multiattach=False, trusted_certs=None,
  1226. supports_port_resource_request=False,
  1227. requested_host=None, requested_hypervisor_hostname=None):
  1228. """Verify all the input parameters regardless of the provisioning
  1229. strategy being performed and schedule the instance(s) for
  1230. creation.
  1231. """
  1232. # Normalize and setup some parameters
  1233. if reservation_id is None:
  1234. reservation_id = utils.generate_uid('r')
  1235. security_groups = security_groups or ['default']
  1236. min_count = min_count or 1
  1237. max_count = max_count or min_count
  1238. block_device_mapping = block_device_mapping or []
  1239. tags = tags or []
  1240. if image_href:
  1241. image_id, boot_meta = self._get_image(context, image_href)
  1242. else:
  1243. # This is similar to the logic in _retrieve_trusted_certs_object.
  1244. if (trusted_certs or
  1245. (CONF.glance.verify_glance_signatures and
  1246. CONF.glance.enable_certificate_validation and
  1247. CONF.glance.default_trusted_certificate_ids)):
  1248. msg = _("Image certificate validation is not supported "
  1249. "when booting from volume")
  1250. raise exception.CertificateValidationFailed(message=msg)
  1251. image_id = None
  1252. boot_meta = self._get_bdm_image_metadata(
  1253. context, block_device_mapping, legacy_bdm)
  1254. self._check_auto_disk_config(image=boot_meta,
  1255. auto_disk_config=auto_disk_config)
  1256. base_options, max_net_count, key_pair, security_groups, \
  1257. network_metadata = self._validate_and_build_base_options(
  1258. context, instance_type, boot_meta, image_href, image_id,
  1259. kernel_id, ramdisk_id, display_name, display_description,
  1260. key_name, key_data, security_groups, availability_zone,
  1261. user_data, metadata, access_ip_v4, access_ip_v6,
  1262. requested_networks, config_drive, auto_disk_config,
  1263. reservation_id, max_count, supports_port_resource_request)
  1264. # max_net_count is the maximum number of instances requested by the
  1265. # user adjusted for any network quota constraints, including
  1266. # consideration of connections to each requested network
  1267. if max_net_count < min_count:
  1268. raise exception.PortLimitExceeded()
  1269. elif max_net_count < max_count:
  1270. LOG.info("max count reduced from %(max_count)d to "
  1271. "%(max_net_count)d due to network port quota",
  1272. {'max_count': max_count,
  1273. 'max_net_count': max_net_count})
  1274. max_count = max_net_count
  1275. block_device_mapping = self._check_and_transform_bdm(context,
  1276. base_options, instance_type, boot_meta, min_count, max_count,
  1277. block_device_mapping, legacy_bdm)
  1278. # We can't do this check earlier because we need bdms from all sources
  1279. # to have been merged in order to get the root bdm.
  1280. # Set validate_numa=False since numa validation is already done by
  1281. # _validate_and_build_base_options().
  1282. self._checks_for_create_and_rebuild(context, image_id, boot_meta,
  1283. instance_type, metadata, injected_files,
  1284. block_device_mapping.root_bdm(), validate_numa=False)
  1285. instance_group = self._get_requested_instance_group(context,
  1286. filter_properties)
  1287. tags = self._create_tag_list_obj(context, tags)
  1288. instances_to_build = self._provision_instances(
  1289. context, instance_type, min_count, max_count, base_options,
  1290. boot_meta, security_groups, block_device_mapping,
  1291. shutdown_terminate, instance_group, check_server_group_quota,
  1292. filter_properties, key_pair, tags, trusted_certs,
  1293. supports_multiattach, network_metadata,
  1294. requested_host, requested_hypervisor_hostname)
  1295. instances = []
  1296. request_specs = []
  1297. build_requests = []
  1298. for rs, build_request, im in instances_to_build:
  1299. build_requests.append(build_request)
  1300. instance = build_request.get_new_instance(context)
  1301. instances.append(instance)
  1302. request_specs.append(rs)
  1303. self.compute_task_api.schedule_and_build_instances(
  1304. context,
  1305. build_requests=build_requests,
  1306. request_spec=request_specs,
  1307. image=boot_meta,
  1308. admin_password=admin_password,
  1309. injected_files=injected_files,
  1310. requested_networks=requested_networks,
  1311. block_device_mapping=block_device_mapping,
  1312. tags=tags)
  1313. return instances, reservation_id
  1314. @staticmethod
  1315. def _cleanup_build_artifacts(instances, instances_to_build):
  1316. # instances_to_build is a list of tuples:
  1317. # (RequestSpec, BuildRequest, InstanceMapping)
  1318. # Be paranoid about artifacts being deleted underneath us.
  1319. for instance in instances or []:
  1320. try:
  1321. instance.destroy()
  1322. except exception.InstanceNotFound:
  1323. pass
  1324. for rs, build_request, im in instances_to_build or []:
  1325. try:
  1326. rs.destroy()
  1327. except exception.RequestSpecNotFound:
  1328. pass
  1329. try:
  1330. build_request.destroy()
  1331. except exception.BuildRequestNotFound:
  1332. pass
  1333. try:
  1334. im.destroy()
  1335. except exception.InstanceMappingNotFound:
  1336. pass
  1337. @staticmethod
  1338. def _volume_size(instance_type, bdm):
  1339. size = bdm.get('volume_size')
  1340. # NOTE (ndipanov): inherit flavor size only for swap and ephemeral
  1341. if (size is None and bdm.get('source_type') == 'blank' and
  1342. bdm.get('destination_type') == 'local'):
  1343. if bdm.get('guest_format') == 'swap':
  1344. size = instance_type.get('swap', 0)
  1345. else:
  1346. size = instance_type.get('ephemeral_gb', 0)
  1347. return size
  1348. def _prepare_image_mapping(self, instance_type, mappings):
  1349. """Extract and format blank devices from image mappings."""
  1350. prepared_mappings = []
  1351. for bdm in block_device.mappings_prepend_dev(mappings):
  1352. LOG.debug("Image bdm %s", bdm)
  1353. virtual_name = bdm['virtual']
  1354. if virtual_name == 'ami' or virtual_name == 'root':
  1355. continue
  1356. if not block_device.is_swap_or_ephemeral(virtual_name):
  1357. continue
  1358. guest_format = bdm.get('guest_format')
  1359. if virtual_name == 'swap':
  1360. guest_format = 'swap'
  1361. if not guest_format:
  1362. guest_format = CONF.default_ephemeral_format
  1363. values = block_device.BlockDeviceDict({
  1364. 'device_name': bdm['device'],
  1365. 'source_type': 'blank',
  1366. 'destination_type': 'local',
  1367. 'device_type': 'disk',
  1368. 'guest_format': guest_format,
  1369. 'delete_on_termination': True,
  1370. 'boot_index': -1})
  1371. values['volume_size'] = self._volume_size(
  1372. instance_type, values)
  1373. if values['volume_size'] == 0:
  1374. continue
  1375. prepared_mappings.append(values)
  1376. return prepared_mappings
  1377. def _bdm_validate_set_size_and_instance(self, context, instance,
  1378. instance_type,
  1379. block_device_mapping,
  1380. image_cache, volumes,
  1381. supports_multiattach=False):
  1382. """Ensure the bdms are valid, then set size and associate with instance
  1383. Because this method can be called multiple times when more than one
  1384. instance is booted in a single request it makes a copy of the bdm list.
  1385. :param context: nova auth RequestContext
  1386. :param instance: Instance object
  1387. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1388. :param block_device_mapping: BlockDeviceMappingList object
  1389. :param image_cache: dict of image dicts keyed by id which is used as a
  1390. cache in case there are multiple BDMs in the same request using
  1391. the same image to avoid redundant GET calls to the image service
  1392. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1393. :param supports_multiattach: True if the request supports multiattach
  1394. volumes, False otherwise
  1395. """
  1396. LOG.debug("block_device_mapping %s", list(block_device_mapping),
  1397. instance_uuid=instance.uuid)
  1398. self._validate_bdm(
  1399. context, instance, instance_type, block_device_mapping,
  1400. image_cache, volumes, supports_multiattach)
  1401. instance_block_device_mapping = block_device_mapping.obj_clone()
  1402. for bdm in instance_block_device_mapping:
  1403. bdm.volume_size = self._volume_size(instance_type, bdm)
  1404. bdm.instance_uuid = instance.uuid
  1405. return instance_block_device_mapping
  1406. @staticmethod
  1407. def _check_requested_volume_type(bdm, volume_type_id_or_name,
  1408. volume_types):
  1409. """If we are specifying a volume type, we need to get the
  1410. volume type details from Cinder and make sure the ``volume_type``
  1411. is available.
  1412. """
  1413. # NOTE(brinzhang): Verify that the specified volume type exists.
  1414. # And save the volume type name internally for consistency in the
  1415. # BlockDeviceMapping object.
  1416. for vol_type in volume_types:
  1417. if (volume_type_id_or_name == vol_type['id'] or
  1418. volume_type_id_or_name == vol_type['name']):
  1419. bdm.volume_type = vol_type['name']
  1420. break
  1421. else:
  1422. raise exception.VolumeTypeNotFound(
  1423. id_or_name=volume_type_id_or_name)
  1424. def _validate_bdm(self, context, instance, instance_type,
  1425. block_device_mappings, image_cache, volumes,
  1426. supports_multiattach=False):
  1427. """Validate requested block device mappings.
  1428. :param context: nova auth RequestContext
  1429. :param instance: Instance object
  1430. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1431. :param block_device_mappings: BlockDeviceMappingList object
  1432. :param image_cache: dict of image dicts keyed by id which is used as a
  1433. cache in case there are multiple BDMs in the same request using
  1434. the same image to avoid redundant GET calls to the image service
  1435. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1436. :param supports_multiattach: True if the request supports multiattach
  1437. volumes, False otherwise
  1438. """
  1439. # Make sure that the boot indexes make sense.
  1440. # Setting a negative value or None indicates that the device should not
  1441. # be used for booting.
  1442. boot_indexes = sorted([bdm.boot_index
  1443. for bdm in block_device_mappings
  1444. if bdm.boot_index is not None and
  1445. bdm.boot_index >= 0])
  1446. # Each device which is capable of being used as boot device should
  1447. # be given a unique boot index, starting from 0 in ascending order, and
  1448. # there needs to be at least one boot device.
  1449. if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
  1450. # Convert the BlockDeviceMappingList to a list for repr details.
  1451. LOG.debug('Invalid block device mapping boot sequence for '
  1452. 'instance: %s', list(block_device_mappings),
  1453. instance=instance)
  1454. raise exception.InvalidBDMBootSequence()
  1455. volume_types = None
  1456. for bdm in block_device_mappings:
  1457. volume_type = bdm.volume_type
  1458. if volume_type:
  1459. if not volume_types:
  1460. # In order to reduce the number of hit cinder APIs,
  1461. # initialize our cache of volume types.
  1462. volume_types = self.volume_api.get_all_volume_types(
  1463. context)
  1464. # NOTE(brinzhang): Ensure the validity of volume_type.
  1465. self._check_requested_volume_type(bdm, volume_type,
  1466. volume_types)
  1467. # NOTE(vish): For now, just make sure the volumes are accessible.
  1468. # Additionally, check that the volume can be attached to this
  1469. # instance.
  1470. snapshot_id = bdm.snapshot_id
  1471. volume_id = bdm.volume_id
  1472. image_id = bdm.image_id
  1473. if image_id is not None:
  1474. if (image_id != instance.get('image_ref') and
  1475. image_id not in image_cache):
  1476. try:
  1477. # Cache the results of the image GET so we do not make
  1478. # the same request for the same image if processing
  1479. # multiple BDMs or multiple servers with the same image
  1480. image_cache[image_id] = self._get_image(
  1481. context, image_id)
  1482. except Exception:
  1483. raise exception.InvalidBDMImage(id=image_id)
  1484. if (bdm.source_type == 'image' and
  1485. bdm.destination_type == 'volume' and
  1486. not bdm.volume_size):
  1487. raise exception.InvalidBDM(message=_("Images with "
  1488. "destination_type 'volume' need to have a non-zero "
  1489. "size specified"))
  1490. elif volume_id is not None:
  1491. try:
  1492. volume = volumes[volume_id]
  1493. # We do not validate the instance and volume AZ here
  1494. # because that is done earlier by _provision_instances.
  1495. self._check_attach_and_reserve_volume(
  1496. context, volume, instance, bdm, supports_multiattach,
  1497. validate_az=False)
  1498. bdm.volume_size = volume.get('size')
  1499. except (exception.CinderConnectionFailed,
  1500. exception.InvalidVolume,
  1501. exception.MultiattachNotSupportedOldMicroversion):
  1502. raise
  1503. except exception.InvalidInput as exc:
  1504. raise exception.InvalidVolume(reason=exc.format_message())
  1505. except Exception as e:
  1506. LOG.info('Failed validating volume %s. Error: %s',
  1507. volume_id, e)
  1508. raise exception.InvalidBDMVolume(id=volume_id)
  1509. elif snapshot_id is not None:
  1510. try:
  1511. snap = self.volume_api.get_snapshot(context, snapshot_id)
  1512. bdm.volume_size = bdm.volume_size or snap.get('size')
  1513. except exception.CinderConnectionFailed:
  1514. raise
  1515. except Exception:
  1516. raise exception.InvalidBDMSnapshot(id=snapshot_id)
  1517. elif (bdm.source_type == 'blank' and
  1518. bdm.destination_type == 'volume' and
  1519. not bdm.volume_size):
  1520. raise exception.InvalidBDM(message=_("Blank volumes "
  1521. "(source: 'blank', dest: 'volume') need to have non-zero "
  1522. "size"))
  1523. ephemeral_size = sum(bdm.volume_size or instance_type['ephemeral_gb']
  1524. for bdm in block_device_mappings
  1525. if block_device.new_format_is_ephemeral(bdm))
  1526. if ephemeral_size > instance_type['ephemeral_gb']:
  1527. raise exception.InvalidBDMEphemeralSize()
  1528. # There should be only one swap
  1529. swap_list = block_device.get_bdm_swap_list(block_device_mappings)
  1530. if len(swap_list) > 1:
  1531. msg = _("More than one swap drive requested.")
  1532. raise exception.InvalidBDMFormat(details=msg)
  1533. if swap_list:
  1534. swap_size = swap_list[0].volume_size or 0
  1535. if swap_size > instance_type['swap']:
  1536. raise exception.InvalidBDMSwapSize()
  1537. max_local = CONF.max_local_block_devices
  1538. if max_local >= 0:
  1539. num_local = len([bdm for bdm in block_device_mappings
  1540. if bdm.destination_type == 'local'])
  1541. if num_local > max_local:
  1542. raise exception.InvalidBDMLocalsLimit()
  1543. def _populate_instance_names(self, instance, num_instances, index):
  1544. """Populate instance display_name and hostname.
  1545. :param instance: The instance to set the display_name, hostname for
  1546. :type instance: nova.objects.Instance
  1547. :param num_instances: Total number of instances being created in this
  1548. request
  1549. :param index: The 0-based index of this particular instance
  1550. """
  1551. # NOTE(mriedem): This is only here for test simplicity since a server
  1552. # name is required in the REST API.
  1553. if 'display_name' not in instance or instance.display_name is None:
  1554. instance.display_name = 'Server %s' % instance.uuid
  1555. # if we're booting multiple instances, we need to add an indexing
  1556. # suffix to both instance.hostname and instance.display_name. This is
  1557. # not necessary for a single instance.
  1558. if num_instances == 1:
  1559. default_hostname = 'Server-%s' % instance.uuid
  1560. instance.hostname = utils.sanitize_hostname(
  1561. instance.display_name, default_hostname)
  1562. elif num_instances > 1:
  1563. old_display_name = instance.display_name
  1564. new_display_name = '%s-%d' % (old_display_name, index + 1)
  1565. if utils.sanitize_hostname(old_display_name) == "":
  1566. instance.hostname = 'Server-%s' % instance.uuid
  1567. else:
  1568. instance.hostname = utils.sanitize_hostname(
  1569. new_display_name)
  1570. instance.display_name = new_display_name
  1571. def _populate_instance_for_create(self, context, instance, image,
  1572. index, security_groups, instance_type,
  1573. num_instances, shutdown_terminate):
  1574. """Build the beginning of a new instance."""
  1575. instance.launch_index = index
  1576. instance.vm_state = vm_states.BUILDING
  1577. instance.task_state = task_states.SCHEDULING
  1578. info_cache = objects.InstanceInfoCache()
  1579. info_cache.instance_uuid = instance.uuid
  1580. info_cache.network_info = network_model.NetworkInfo()
  1581. instance.info_cache = info_cache
  1582. instance.flavor = instance_type
  1583. instance.old_flavor = None
  1584. instance.new_flavor = None
  1585. if CONF.ephemeral_storage_encryption.enabled:
  1586. # NOTE(kfarr): dm-crypt expects the cipher in a
  1587. # hyphenated format: cipher-chainmode-ivmode
  1588. # (ex: aes-xts-plain64). The algorithm needs
  1589. # to be parsed out to pass to the key manager (ex: aes).
  1590. cipher = CONF.ephemeral_storage_encryption.cipher
  1591. algorithm = cipher.split('-')[0] if cipher else None
  1592. instance.ephemeral_key_uuid = self.key_manager.create_key(
  1593. context,
  1594. algorithm=algorithm,
  1595. length=CONF.ephemeral_storage_encryption.key_size)
  1596. else:
  1597. instance.ephemeral_key_uuid = None
  1598. # Store image properties so we can use them later
  1599. # (for notifications, etc). Only store what we can.
  1600. if not instance.obj_attr_is_set('system_metadata'):
  1601. instance.system_metadata = {}
  1602. # Make sure we have the dict form that we need for instance_update.
  1603. instance.system_metadata = utils.instance_sys_meta(instance)
  1604. system_meta = utils.get_system_metadata_from_image(
  1605. image, instance_type)
  1606. # In case we couldn't find any suitable base_image
  1607. system_meta.setdefault('image_base_image_ref', instance.image_ref)
  1608. system_meta['owner_user_name'] = context.user_name
  1609. system_meta['owner_project_name'] = context.project_name
  1610. instance.system_metadata.update(system_meta)
  1611. # Since the removal of nova-network, we don't actually store anything
  1612. # in the database. Instead, we proxy the security groups on the
  1613. # instance from the ports attached to the instance.
  1614. instance.security_groups = objects.SecurityGroupList()
  1615. self._populate_instance_names(instance, num_instances, index)
  1616. instance.shutdown_terminate = shutdown_terminate
  1617. return instance
  1618. def _create_tag_list_obj(self, context, tags):
  1619. """Create TagList objects from simple string tags.
  1620. :param context: security context.
  1621. :param tags: simple string tags from API request.
  1622. :returns: TagList object.
  1623. """
  1624. tag_list = [objects.Tag(context=context, tag=t) for t in tags]
  1625. tag_list_obj = objects.TagList(objects=tag_list)
  1626. return tag_list_obj
  1627. def _transform_tags(self, tags, resource_id):
  1628. """Change the resource_id of the tags according to the input param.
  1629. Because this method can be called multiple times when more than one
  1630. instance is booted in a single request it makes a copy of the tags
  1631. list.
  1632. :param tags: TagList object.
  1633. :param resource_id: string.
  1634. :returns: TagList object.
  1635. """
  1636. instance_tags = tags.obj_clone()
  1637. for tag in instance_tags:
  1638. tag.resource_id = resource_id
  1639. return instance_tags
  1640. # This method remains because cellsv1 uses it in the scheduler
  1641. def create_db_entry_for_new_instance(self, context, instance_type, image,
  1642. instance, security_group, block_device_mapping, num_instances,
  1643. index, shutdown_terminate=False, create_instance=True):
  1644. """Create an entry in the DB for this new instance,
  1645. including any related table updates (such as security group,
  1646. etc).
  1647. This is called by the scheduler after a location for the
  1648. instance has been determined.
  1649. :param create_instance: Determines if the instance is created here or
  1650. just populated for later creation. This is done so that this code
  1651. can be shared with cellsv1 which needs the instance creation to
  1652. happen here. It should be removed and this method cleaned up when
  1653. cellsv1 is a distant memory.
  1654. """
  1655. self._populate_instance_for_create(context, instance, image, index,
  1656. security_group, instance_type,
  1657. num_instances, shutdown_terminate)
  1658. if create_instance:
  1659. instance.create()
  1660. return instance
  1661. def _check_multiple_instances_with_neutron_ports(self,
  1662. requested_networks):
  1663. """Check whether multiple instances are created from port id(s)."""
  1664. for requested_net in requested_networks:
  1665. if requested_net.port_id:
  1666. msg = _("Unable to launch multiple instances with"
  1667. " a single configured port ID. Please launch your"
  1668. " instance one by one with different ports.")
  1669. raise exception.MultiplePortsNotApplicable(reason=msg)
  1670. def _check_multiple_instances_with_specified_ip(self, requested_networks):
  1671. """Check whether multiple instances are created with specified ip."""
  1672. for requested_net in requested_networks:
  1673. if requested_net.network_id and requested_net.address:
  1674. msg = _("max_count cannot be greater than 1 if an fixed_ip "
  1675. "is specified.")
  1676. raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
  1677. @hooks.add_hook("create_instance")
  1678. def create(self, context, instance_type,
  1679. image_href, kernel_id=None, ramdisk_id=None,
  1680. min_count=None, max_count=None,
  1681. display_name=None, display_description=None,
  1682. key_name=None, key_data=None, security_groups=None,
  1683. availability_zone=None, forced_host=None, forced_node=None,
  1684. user_data=None, metadata=None, injected_files=None,
  1685. admin_password=None, block_device_mapping=None,
  1686. access_ip_v4=None, access_ip_v6=None, requested_networks=None,
  1687. config_drive=None, auto_disk_config=None, scheduler_hints=None,
  1688. legacy_bdm=True, shutdown_terminate=False,
  1689. check_server_group_quota=False, tags=None,
  1690. supports_multiattach=False, trusted_certs=None,
  1691. supports_port_resource_request=False,
  1692. requested_host=None, requested_hypervisor_hostname=None):
  1693. """Provision instances, sending instance information to the
  1694. scheduler. The scheduler will determine where the instance(s)
  1695. go and will handle creating the DB entries.
  1696. Returns a tuple of (instances, reservation_id)
  1697. """
  1698. if requested_networks and max_count is not None and max_count > 1:
  1699. self._check_multiple_instances_with_specified_ip(
  1700. requested_networks)
  1701. self._check_multiple_instances_with_neutron_ports(
  1702. requested_networks)
  1703. if availability_zone:
  1704. available_zones = availability_zones.\
  1705. get_availability_zones(context.elevated(), self.host_api,
  1706. get_only_available=True)
  1707. if forced_host is None and availability_zone not in \
  1708. available_zones:
  1709. msg = _('The requested availability zone is not available')
  1710. raise exception.InvalidRequest(msg)
  1711. filter_properties = scheduler_utils.build_filter_properties(
  1712. scheduler_hints, forced_host, forced_node, instance_type)
  1713. return self._create_instance(
  1714. context, instance_type,
  1715. image_href, kernel_id, ramdisk_id,
  1716. min_count, max_count,
  1717. display_name, display_description,
  1718. key_name, key_data, security_groups,
  1719. availability_zone, user_data, metadata,
  1720. injected_files, admin_password,
  1721. access_ip_v4, access_ip_v6,
  1722. requested_networks, config_drive,
  1723. block_device_mapping, auto_disk_config,
  1724. filter_properties=filter_properties,
  1725. legacy_bdm=legacy_bdm,
  1726. shutdown_terminate=shutdown_terminate,
  1727. check_server_group_quota=check_server_group_quota,
  1728. tags=tags, supports_multiattach=supports_multiattach,
  1729. trusted_certs=trusted_certs,
  1730. supports_port_resource_request=supports_port_resource_request,
  1731. requested_host=requested_host,
  1732. requested_hypervisor_hostname=requested_hypervisor_hostname)
  1733. def _check_auto_disk_config(self, instance=None, image=None,
  1734. auto_disk_config=None):
  1735. if auto_disk_config is None:
  1736. return
  1737. if not image and not instance:
  1738. return
  1739. if image:
  1740. image_props = image.get("properties", {})
  1741. auto_disk_config_img = \
  1742. utils.get_auto_disk_config_from_image_props(image_props)
  1743. image_ref = image.get("id")
  1744. else:
  1745. sys_meta = utils.instance_sys_meta(instance)
  1746. image_ref = sys_meta.get('image_base_image_ref')
  1747. auto_disk_config_img = \
  1748. utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
  1749. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  1750. auto_disk_config,
  1751. image_ref)
  1752. def _lookup_instance(self, context, uuid):
  1753. '''Helper method for pulling an instance object from a database.
  1754. During the transition to cellsv2 there is some complexity around
  1755. retrieving an instance from the database which this method hides. If
  1756. there is an instance mapping then query the cell for the instance, if
  1757. no mapping exists then query the configured nova database.
  1758. Once we are past the point that all deployments can be assumed to be
  1759. migrated to cellsv2 this method can go away.
  1760. '''
  1761. inst_map = None
  1762. try:
  1763. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  1764. context, uuid)
  1765. except exception.InstanceMappingNotFound:
  1766. # TODO(alaski): This exception block can be removed once we're
  1767. # guaranteed everyone is using cellsv2.
  1768. pass
  1769. if inst_map is None or inst_map.cell_mapping is None:
  1770. # If inst_map is None then the deployment has not migrated to
  1771. # cellsv2 yet.
  1772. # If inst_map.cell_mapping is None then the instance is not in a
  1773. # cell yet. Until instance creation moves to the conductor the
  1774. # instance can be found in the configured database, so attempt
  1775. # to look it up.
  1776. cell = None
  1777. try:
  1778. instance = objects.Instance.get_by_uuid(context, uuid)
  1779. except exception.InstanceNotFound:
  1780. # If we get here then the conductor is in charge of writing the
  1781. # instance to the database and hasn't done that yet. It's up to
  1782. # the caller of this method to determine what to do with that
  1783. # information.
  1784. return None, None
  1785. else:
  1786. cell = inst_map.cell_mapping
  1787. with nova_context.target_cell(context, cell) as cctxt:
  1788. try:
  1789. instance = objects.Instance.get_by_uuid(cctxt, uuid)
  1790. except exception.InstanceNotFound:
  1791. # Since the cell_mapping exists we know the instance is in
  1792. # the cell, however InstanceNotFound means it's already
  1793. # deleted.
  1794. return None, None
  1795. return cell, instance
  1796. def _delete_while_booting(self, context, instance):
  1797. """Handle deletion if the instance has not reached a cell yet
  1798. Deletion before an instance reaches a cell needs to be handled
  1799. differently. What we're attempting to do is delete the BuildRequest
  1800. before the api level conductor does. If we succeed here then the boot
  1801. request stops before reaching a cell. If not then the instance will
  1802. need to be looked up in a cell db and the normal delete path taken.
  1803. """
  1804. deleted = self._attempt_delete_of_buildrequest(context, instance)
  1805. if deleted:
  1806. # If we've reached this block the successful deletion of the
  1807. # buildrequest indicates that the build process should be halted by
  1808. # the conductor.
  1809. # NOTE(alaski): Though the conductor halts the build process it
  1810. # does not currently delete the instance record. This is
  1811. # because in the near future the instance record will not be
  1812. # created if the buildrequest has been deleted here. For now we
  1813. # ensure the instance has been set to deleted at this point.
  1814. # Yes this directly contradicts the comment earlier in this
  1815. # method, but this is a temporary measure.
  1816. # Look up the instance because the current instance object was
  1817. # stashed on the buildrequest and therefore not complete enough
  1818. # to run .destroy().
  1819. try:
  1820. instance_uuid = instance.uuid
  1821. cell, instance = self._lookup_instance(context, instance_uuid)
  1822. if instance is not None:
  1823. # If instance is None it has already been deleted.
  1824. if cell:
  1825. with nova_context.target_cell(context, cell) as cctxt:
  1826. # FIXME: When the instance context is targeted,
  1827. # we can remove this
  1828. with compute_utils.notify_about_instance_delete(
  1829. self.notifier, cctxt, instance):
  1830. instance.destroy()
  1831. else:
  1832. instance.destroy()
  1833. except exception.InstanceNotFound:
  1834. pass
  1835. return True
  1836. return False
  1837. def _attempt_delete_of_buildrequest(self, context, instance):
  1838. # If there is a BuildRequest then the instance may not have been
  1839. # written to a cell db yet. Delete the BuildRequest here, which
  1840. # will indicate that the Instance build should not proceed.
  1841. try:
  1842. build_req = objects.BuildRequest.get_by_instance_uuid(
  1843. context, instance.uuid)
  1844. build_req.destroy()
  1845. except exception.BuildRequestNotFound:
  1846. # This means that conductor has deleted the BuildRequest so the
  1847. # instance is now in a cell and the delete needs to proceed
  1848. # normally.
  1849. return False
  1850. # We need to detach from any volumes so they aren't orphaned.
  1851. self._local_cleanup_bdm_volumes(
  1852. build_req.block_device_mappings, instance, context)
  1853. return True
  1854. def _delete(self, context, instance, delete_type, cb, **instance_attrs):
  1855. if instance.disable_terminate:
  1856. LOG.info('instance termination disabled', instance=instance)
  1857. return
  1858. cell = None
  1859. # If there is an instance.host (or the instance is shelved-offloaded or
  1860. # in error state), the instance has been scheduled and sent to a
  1861. # cell/compute which means it was pulled from the cell db.
  1862. # Normal delete should be attempted.
  1863. may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
  1864. instance)
  1865. if not instance.host and not may_have_ports_or_volumes:
  1866. try:
  1867. if self._delete_while_booting(context, instance):
  1868. return
  1869. # If instance.host was not set it's possible that the Instance
  1870. # object here was pulled from a BuildRequest object and is not
  1871. # fully populated. Notably it will be missing an 'id' field
  1872. # which will prevent instance.destroy from functioning
  1873. # properly. A lookup is attempted which will either return a
  1874. # full Instance or None if not found. If not found then it's
  1875. # acceptable to skip the rest of the delete processing.
  1876. cell, instance = self._lookup_instance(context, instance.uuid)
  1877. if cell and instance:
  1878. try:
  1879. # Now destroy the instance from the cell it lives in.
  1880. with compute_utils.notify_about_instance_delete(
  1881. self.notifier, context, instance):
  1882. instance.destroy()
  1883. except exception.InstanceNotFound:
  1884. pass
  1885. # The instance was deleted or is already gone.
  1886. return
  1887. if not instance:
  1888. # Instance is already deleted.
  1889. return
  1890. except exception.ObjectActionError:
  1891. # NOTE(melwitt): This means the instance.host changed
  1892. # under us indicating the instance became scheduled
  1893. # during the destroy(). Refresh the instance from the DB and
  1894. # continue on with the delete logic for a scheduled instance.
  1895. # NOTE(danms): If instance.host is set, we should be able to
  1896. # do the following lookup. If not, there's not much we can
  1897. # do to recover.
  1898. cell, instance = self._lookup_instance(context, instance.uuid)
  1899. if not instance:
  1900. # Instance is already deleted
  1901. return
  1902. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  1903. context, instance.uuid)
  1904. # At these states an instance has a snapshot associate.
  1905. if instance.vm_state in (vm_states.SHELVED,
  1906. vm_states.SHELVED_OFFLOADED):
  1907. snapshot_id = instance.system_metadata.get('shelved_image_id')
  1908. LOG.info("Working on deleting snapshot %s "
  1909. "from shelved instance...",
  1910. snapshot_id, instance=instance)
  1911. try:
  1912. self.image_api.delete(context, snapshot_id)
  1913. except (exception.ImageNotFound,
  1914. exception.ImageNotAuthorized) as exc:
  1915. LOG.warning("Failed to delete snapshot "
  1916. "from shelved instance (%s).",
  1917. exc.format_message(), instance=instance)
  1918. except Exception:
  1919. LOG.exception("Something wrong happened when trying to "
  1920. "delete snapshot from shelved instance.",
  1921. instance=instance)
  1922. original_task_state = instance.task_state
  1923. try:
  1924. # NOTE(maoy): no expected_task_state needs to be set
  1925. instance.update(instance_attrs)
  1926. instance.progress = 0
  1927. instance.save()
  1928. if not instance.host and not may_have_ports_or_volumes:
  1929. try:
  1930. with compute_utils.notify_about_instance_delete(
  1931. self.notifier, context, instance,
  1932. delete_type
  1933. if delete_type != 'soft_delete'
  1934. else 'delete'):
  1935. instance.destroy()
  1936. LOG.info('Instance deleted and does not have host '
  1937. 'field, its vm_state is %(state)s.',
  1938. {'state': instance.vm_state},
  1939. instance=instance)
  1940. return
  1941. except exception.ObjectActionError as ex:
  1942. # The instance's host likely changed under us as
  1943. # this instance could be building and has since been
  1944. # scheduled. Continue with attempts to delete it.
  1945. LOG.debug('Refreshing instance because: %s', ex,
  1946. instance=instance)
  1947. instance.refresh()
  1948. if instance.vm_state == vm_states.RESIZED:
  1949. self._confirm_resize_on_deleting(context, instance)
  1950. # NOTE(neha_alhat): After confirm resize vm_state will become
  1951. # 'active' and task_state will be set to 'None'. But for soft
  1952. # deleting a vm, the _do_soft_delete callback requires
  1953. # task_state in 'SOFT_DELETING' status. So, we need to set
  1954. # task_state as 'SOFT_DELETING' again for soft_delete case.
  1955. # After confirm resize and before saving the task_state to
  1956. # "SOFT_DELETING", during the short window, user can submit
  1957. # soft delete vm request again and system will accept and
  1958. # process it without any errors.
  1959. if delete_type == 'soft_delete':
  1960. instance.task_state = instance_attrs['task_state']
  1961. instance.save()
  1962. is_local_delete = True
  1963. try:
  1964. # instance.host must be set in order to look up the service.
  1965. if instance.host is not None:
  1966. service = objects.Service.get_by_compute_host(
  1967. context.elevated(), instance.host)
  1968. is_local_delete = not self.servicegroup_api.service_is_up(
  1969. service)
  1970. if not is_local_delete:
  1971. if original_task_state in (task_states.DELETING,
  1972. task_states.SOFT_DELETING):
  1973. LOG.info('Instance is already in deleting state, '
  1974. 'ignoring this request',
  1975. instance=instance)
  1976. return
  1977. self._record_action_start(context, instance,
  1978. instance_actions.DELETE)
  1979. cb(context, instance, bdms)
  1980. except exception.ComputeHostNotFound:
  1981. LOG.debug('Compute host %s not found during service up check, '
  1982. 'going to local delete instance', instance.host,
  1983. instance=instance)
  1984. if is_local_delete:
  1985. # If instance is in shelved_offloaded state or compute node
  1986. # isn't up, delete instance from db and clean bdms info and
  1987. # network info
  1988. if cell is None:
  1989. # NOTE(danms): If we didn't get our cell from one of the
  1990. # paths above, look it up now.
  1991. try:
  1992. im = objects.InstanceMapping.get_by_instance_uuid(
  1993. context, instance.uuid)
  1994. cell = im.cell_mapping
  1995. except exception.InstanceMappingNotFound:
  1996. LOG.warning('During local delete, failed to find '
  1997. 'instance mapping', instance=instance)
  1998. return
  1999. LOG.debug('Doing local delete in cell %s', cell.identity,
  2000. instance=instance)
  2001. with nova_context.target_cell(context, cell) as cctxt:
  2002. self._local_delete(cctxt, instance, bdms, delete_type, cb)
  2003. except exception.InstanceNotFound:
  2004. # NOTE(comstud): Race condition. Instance already gone.
  2005. pass
  2006. def _confirm_resize_on_deleting(self, context, instance):
  2007. # If in the middle of a resize, use confirm_resize to
  2008. # ensure the original instance is cleaned up too along
  2009. # with its allocations (and migration-based allocations)
  2010. # in placement.
  2011. migration = None
  2012. for status in ('finished', 'confirming'):
  2013. try:
  2014. migration = objects.Migration.get_by_instance_and_status(
  2015. context.elevated(), instance.uuid, status)
  2016. LOG.info('Found an unconfirmed migration during delete, '
  2017. 'id: %(id)s, status: %(status)s',
  2018. {'id': migration.id,
  2019. 'status': migration.status},
  2020. instance=instance)
  2021. break
  2022. except exception.MigrationNotFoundByStatus:
  2023. pass
  2024. if not migration:
  2025. LOG.info('Instance may have been confirmed during delete',
  2026. instance=instance)
  2027. return
  2028. self._record_action_start(context, instance,
  2029. instance_actions.CONFIRM_RESIZE)
  2030. # If migration.cross_cell_move, we need to also cleanup the instance
  2031. # data from the source cell database.
  2032. if migration.cross_cell_move:
  2033. self.compute_task_api.confirm_snapshot_based_resize(
  2034. context, instance, migration, do_cast=False)
  2035. else:
  2036. self.compute_rpcapi.confirm_resize(context,
  2037. instance, migration, migration.source_compute, cast=False)
  2038. def _local_cleanup_bdm_volumes(self, bdms, instance, context):
  2039. """The method deletes the bdm records and, if a bdm is a volume, call
  2040. the terminate connection and the detach volume via the Volume API.
  2041. """
  2042. elevated = context.elevated()
  2043. for bdm in bdms:
  2044. if bdm.is_volume:
  2045. try:
  2046. if bdm.attachment_id:
  2047. self.volume_api.attachment_delete(context,
  2048. bdm.attachment_id)
  2049. else:
  2050. connector = compute_utils.get_stashed_volume_connector(
  2051. bdm, instance)
  2052. if connector:
  2053. self.volume_api.terminate_connection(context,
  2054. bdm.volume_id,
  2055. connector)
  2056. else:
  2057. LOG.debug('Unable to find connector for volume %s,'
  2058. ' not attempting terminate_connection.',
  2059. bdm.volume_id, instance=instance)
  2060. # Attempt to detach the volume. If there was no
  2061. # connection made in the first place this is just
  2062. # cleaning up the volume state in the Cinder DB.
  2063. self.volume_api.detach(elevated, bdm.volume_id,
  2064. instance.uuid)
  2065. if bdm.delete_on_termination:
  2066. self.volume_api.delete(context, bdm.volume_id)
  2067. except Exception as exc:
  2068. LOG.warning("Ignoring volume cleanup failure due to %s",
  2069. exc, instance=instance)
  2070. # If we're cleaning up volumes from an instance that wasn't yet
  2071. # created in a cell, i.e. the user deleted the server while
  2072. # the BuildRequest still existed, then the BDM doesn't actually
  2073. # exist in the DB to destroy it.
  2074. if 'id' in bdm:
  2075. bdm.destroy()
  2076. @property
  2077. def placementclient(self):
  2078. if self._placementclient is None:
  2079. self._placementclient = report.SchedulerReportClient()
  2080. return self._placementclient
  2081. def _local_delete(self, context, instance, bdms, delete_type, cb):
  2082. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2083. LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
  2084. " the instance's info from database.",
  2085. instance=instance)
  2086. else:
  2087. LOG.warning("instance's host %s is down, deleting from "
  2088. "database", instance.host, instance=instance)
  2089. with compute_utils.notify_about_instance_delete(
  2090. self.notifier, context, instance,
  2091. delete_type if delete_type != 'soft_delete' else 'delete'):
  2092. elevated = context.elevated()
  2093. orig_host = instance.host
  2094. try:
  2095. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2096. sysmeta = getattr(instance,
  2097. obj_base.get_attrname(
  2098. 'system_metadata'))
  2099. instance.host = sysmeta.get('shelved_host')
  2100. self.network_api.deallocate_for_instance(elevated,
  2101. instance)
  2102. finally:
  2103. instance.host = orig_host
  2104. # cleanup volumes
  2105. self._local_cleanup_bdm_volumes(bdms, instance, context)
  2106. # Cleanup allocations in Placement since we can't do it from the
  2107. # compute service.
  2108. self.placementclient.delete_allocation_for_instance(
  2109. context, instance.uuid)
  2110. cb(context, instance, bdms, local=True)
  2111. instance.destroy()
  2112. @staticmethod
  2113. def _update_queued_for_deletion(context, instance, qfd):
  2114. # NOTE(tssurya): We query the instance_mapping record of this instance
  2115. # and update the queued_for_delete flag to True (or False according to
  2116. # the state of the instance). This just means that the instance is
  2117. # queued for deletion (or is no longer queued for deletion). It does
  2118. # not guarantee its successful deletion (or restoration). Hence the
  2119. # value could be stale which is fine, considering its use is only
  2120. # during down cell (desperate) situation.
  2121. im = objects.InstanceMapping.get_by_instance_uuid(context,
  2122. instance.uuid)
  2123. im.queued_for_delete = qfd
  2124. im.save()
  2125. def _do_delete(self, context, instance, bdms, local=False):
  2126. if local:
  2127. instance.vm_state = vm_states.DELETED
  2128. instance.task_state = None
  2129. instance.terminated_at = timeutils.utcnow()
  2130. instance.save()
  2131. else:
  2132. self.compute_rpcapi.terminate_instance(context, instance, bdms)
  2133. self._update_queued_for_deletion(context, instance, True)
  2134. def _do_soft_delete(self, context, instance, bdms, local=False):
  2135. if local:
  2136. instance.vm_state = vm_states.SOFT_DELETED
  2137. instance.task_state = None
  2138. instance.terminated_at = timeutils.utcnow()
  2139. instance.save()
  2140. else:
  2141. self.compute_rpcapi.soft_delete_instance(context, instance)
  2142. self._update_queued_for_deletion(context, instance, True)
  2143. # NOTE(maoy): we allow delete to be called no matter what vm_state says.
  2144. @check_instance_lock
  2145. @check_instance_state(vm_state=None, task_state=None,
  2146. must_have_launched=True)
  2147. def soft_delete(self, context, instance):
  2148. """Terminate an instance."""
  2149. LOG.debug('Going to try to soft delete instance',
  2150. instance=instance)
  2151. self._delete(context, instance, 'soft_delete', self._do_soft_delete,
  2152. task_state=task_states.SOFT_DELETING,
  2153. deleted_at=timeutils.utcnow())
  2154. def _delete_instance(self, context, instance):
  2155. self._delete(context, instance, 'delete', self._do_delete,
  2156. task_state=task_states.DELETING)
  2157. @check_instance_lock
  2158. @check_instance_state(vm_state=None, task_state=None,
  2159. must_have_launched=False)
  2160. def delete(self, context, instance):
  2161. """Terminate an instance."""
  2162. LOG.debug("Going to try to terminate instance", instance=instance)
  2163. self._delete_instance(context, instance)
  2164. @check_instance_lock
  2165. @check_instance_state(vm_state=[vm_states.SOFT_DELETED])
  2166. def restore(self, context, instance):
  2167. """Restore a previously deleted (but not reclaimed) instance."""
  2168. # Check quotas
  2169. flavor = instance.get_flavor()
  2170. project_id, user_id = quotas_obj.ids_from_instance(context, instance)
  2171. compute_utils.check_num_instances_quota(context, flavor, 1, 1,
  2172. project_id=project_id, user_id=user_id)
  2173. self._record_action_start(context, instance, instance_actions.RESTORE)
  2174. if instance.host:
  2175. instance.task_state = task_states.RESTORING
  2176. instance.deleted_at = None
  2177. instance.save(expected_task_state=[None])
  2178. # TODO(melwitt): We're not rechecking for strict quota here to
  2179. # guard against going over quota during a race at this time because
  2180. # the resource consumption for this operation is written to the
  2181. # database by compute.
  2182. self.compute_rpcapi.restore_instance(context, instance)
  2183. else:
  2184. instance.vm_state = vm_states.ACTIVE
  2185. instance.task_state = None
  2186. instance.deleted_at = None
  2187. instance.save(expected_task_state=[None])
  2188. self._update_queued_for_deletion(context, instance, False)
  2189. @check_instance_lock
  2190. @check_instance_state(task_state=None,
  2191. must_have_launched=False)
  2192. def force_delete(self, context, instance):
  2193. """Force delete an instance in any vm_state/task_state."""
  2194. self._delete(context, instance, 'force_delete', self._do_delete,
  2195. task_state=task_states.DELETING)
  2196. def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2197. LOG.debug("Going to try to stop instance", instance=instance)
  2198. instance.task_state = task_states.POWERING_OFF
  2199. instance.progress = 0
  2200. instance.save(expected_task_state=[None])
  2201. self._record_action_start(context, instance, instance_actions.STOP)
  2202. self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
  2203. clean_shutdown=clean_shutdown)
  2204. @check_instance_lock
  2205. @check_instance_host()
  2206. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
  2207. def stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2208. """Stop an instance."""
  2209. self.force_stop(context, instance, do_cast, clean_shutdown)
  2210. @check_instance_lock
  2211. @check_instance_host()
  2212. @check_instance_state(vm_state=[vm_states.STOPPED])
  2213. def start(self, context, instance):
  2214. """Start an instance."""
  2215. LOG.debug("Going to try to start instance", instance=instance)
  2216. instance.task_state = task_states.POWERING_ON
  2217. instance.save(expected_task_state=[None])
  2218. self._record_action_start(context, instance, instance_actions.START)
  2219. self.compute_rpcapi.start_instance(context, instance)
  2220. @check_instance_lock
  2221. @check_instance_host()
  2222. @check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
  2223. def trigger_crash_dump(self, context, instance):
  2224. """Trigger crash dump in an instance."""
  2225. LOG.debug("Try to trigger crash dump", instance=instance)
  2226. self._record_action_start(context, instance,
  2227. instance_actions.TRIGGER_CRASH_DUMP)
  2228. self.compute_rpcapi.trigger_crash_dump(context, instance)
  2229. def _generate_minimal_construct_for_down_cells(self, context,
  2230. down_cell_uuids,
  2231. project, limit):
  2232. """Generate a list of minimal instance constructs for a given list of
  2233. cells that did not respond to a list operation. This will list
  2234. every instance mapping in the affected cells and return a minimal
  2235. objects.Instance for each (non-queued-for-delete) mapping.
  2236. :param context: RequestContext
  2237. :param down_cell_uuids: A list of cell UUIDs that did not respond
  2238. :param project: A project ID to filter mappings, or None
  2239. :param limit: A numeric limit on the number of results, or None
  2240. :returns: An InstanceList() of partial Instance() objects
  2241. """
  2242. unavailable_servers = objects.InstanceList()
  2243. for cell_uuid in down_cell_uuids:
  2244. LOG.warning("Cell %s is not responding and hence only "
  2245. "partial results are available from this "
  2246. "cell if any.", cell_uuid)
  2247. instance_mappings = (objects.InstanceMappingList.
  2248. get_not_deleted_by_cell_and_project(context, cell_uuid,
  2249. project, limit=limit))
  2250. for im in instance_mappings:
  2251. unavailable_servers.objects.append(
  2252. objects.Instance(
  2253. context=context,
  2254. uuid=im.instance_uuid,
  2255. project_id=im.project_id,
  2256. created_at=im.created_at
  2257. )
  2258. )
  2259. if limit is not None:
  2260. limit -= len(instance_mappings)
  2261. if limit <= 0:
  2262. break
  2263. return unavailable_servers
  2264. def _get_instance_map_or_none(self, context, instance_uuid):
  2265. try:
  2266. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  2267. context, instance_uuid)
  2268. except exception.InstanceMappingNotFound:
  2269. # InstanceMapping should always be found generally. This exception
  2270. # may be raised if a deployment has partially migrated the nova-api
  2271. # services.
  2272. inst_map = None
  2273. return inst_map
  2274. @staticmethod
  2275. def _save_user_id_in_instance_mapping(mapping, instance):
  2276. # TODO(melwitt): We take the opportunity to migrate user_id on the
  2277. # instance mapping if it's not yet been migrated. This can be removed
  2278. # in a future release, when all migrations are complete.
  2279. # If the instance came from a RequestSpec because of a down cell, its
  2280. # user_id could be None and the InstanceMapping.user_id field is
  2281. # non-nullable. Avoid trying to set/save the user_id in that case.
  2282. if 'user_id' not in mapping and instance.user_id is not None:
  2283. mapping.user_id = instance.user_id
  2284. mapping.save()
  2285. def _get_instance_from_cell(self, context, im, expected_attrs,
  2286. cell_down_support):
  2287. # NOTE(danms): Even though we're going to scatter/gather to the
  2288. # right cell, other code depends on this being force targeted when
  2289. # the get call returns.
  2290. nova_context.set_target_cell(context, im.cell_mapping)
  2291. uuid = im.instance_uuid
  2292. result = nova_context.scatter_gather_single_cell(context,
  2293. im.cell_mapping, objects.Instance.get_by_uuid, uuid,
  2294. expected_attrs=expected_attrs)
  2295. cell_uuid = im.cell_mapping.uuid
  2296. if not nova_context.is_cell_failure_sentinel(result[cell_uuid]):
  2297. inst = result[cell_uuid]
  2298. self._save_user_id_in_instance_mapping(im, inst)
  2299. return inst
  2300. elif isinstance(result[cell_uuid], exception.InstanceNotFound):
  2301. raise exception.InstanceNotFound(instance_id=uuid)
  2302. elif cell_down_support:
  2303. if im.queued_for_delete:
  2304. # should be treated like deleted instance.
  2305. raise exception.InstanceNotFound(instance_id=uuid)
  2306. # instance in down cell, return a minimal construct
  2307. LOG.warning("Cell %s is not responding and hence only "
  2308. "partial results are available from this "
  2309. "cell.", cell_uuid)
  2310. try:
  2311. rs = objects.RequestSpec.get_by_instance_uuid(context,
  2312. uuid)
  2313. # For BFV case, we could have rs.image but rs.image.id might
  2314. # still not be set. So we check the existence of both image
  2315. # and its id.
  2316. image_ref = (rs.image.id if rs.image and
  2317. 'id' in rs.image else None)
  2318. inst = objects.Instance(context=context, power_state=0,
  2319. uuid=uuid,
  2320. project_id=im.project_id,
  2321. created_at=im.created_at,
  2322. user_id=rs.user_id,
  2323. flavor=rs.flavor,
  2324. image_ref=image_ref,
  2325. availability_zone=rs.availability_zone)
  2326. self._save_user_id_in_instance_mapping(im, inst)
  2327. return inst
  2328. except exception.RequestSpecNotFound:
  2329. # could be that a deleted instance whose request
  2330. # spec has been archived is being queried.
  2331. raise exception.InstanceNotFound(instance_id=uuid)
  2332. else:
  2333. raise exception.NovaException(
  2334. _("Cell %s is not responding and hence instance "
  2335. "info is not available.") % cell_uuid)
  2336. def _get_instance(self, context, instance_uuid, expected_attrs,
  2337. cell_down_support=False):
  2338. inst_map = self._get_instance_map_or_none(context, instance_uuid)
  2339. if inst_map and (inst_map.cell_mapping is not None):
  2340. instance = self._get_instance_from_cell(context, inst_map,
  2341. expected_attrs, cell_down_support)
  2342. elif inst_map and (inst_map.cell_mapping is None):
  2343. # This means the instance has not been scheduled and put in
  2344. # a cell yet. For now it also may mean that the deployer
  2345. # has not created their cell(s) yet.
  2346. try:
  2347. build_req = objects.BuildRequest.get_by_instance_uuid(
  2348. context, instance_uuid)
  2349. instance = build_req.instance
  2350. except exception.BuildRequestNotFound:
  2351. # Instance was mapped and the BuildRequest was deleted
  2352. # while fetching. Try again.
  2353. inst_map = self._get_instance_map_or_none(context,
  2354. instance_uuid)
  2355. if inst_map and (inst_map.cell_mapping is not None):
  2356. instance = self._get_instance_from_cell(context, inst_map,
  2357. expected_attrs, cell_down_support)
  2358. else:
  2359. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2360. else:
  2361. # If we got here, we don't have an instance mapping, but we aren't
  2362. # sure why. The instance mapping might be missing because the
  2363. # upgrade is incomplete (map_instances wasn't run). Or because the
  2364. # instance was deleted and the DB was archived at which point the
  2365. # mapping is deleted. The former case is bad, but because of the
  2366. # latter case we can't really log any kind of warning/error here
  2367. # since it might be normal.
  2368. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2369. return instance
  2370. def get(self, context, instance_id, expected_attrs=None,
  2371. cell_down_support=False):
  2372. """Get a single instance with the given instance_id.
  2373. :param cell_down_support: True if the API (and caller) support
  2374. returning a minimal instance
  2375. construct if the relevant cell is
  2376. down. If False, an error is raised
  2377. since the instance cannot be retrieved
  2378. due to the cell being down.
  2379. """
  2380. if not expected_attrs:
  2381. expected_attrs = []
  2382. expected_attrs.extend(['metadata', 'system_metadata',
  2383. 'security_groups', 'info_cache'])
  2384. # NOTE(ameade): we still need to support integer ids for ec2
  2385. try:
  2386. if uuidutils.is_uuid_like(instance_id):
  2387. LOG.debug("Fetching instance by UUID",
  2388. instance_uuid=instance_id)
  2389. instance = self._get_instance(context, instance_id,
  2390. expected_attrs, cell_down_support=cell_down_support)
  2391. else:
  2392. LOG.debug("Failed to fetch instance by id %s", instance_id)
  2393. raise exception.InstanceNotFound(instance_id=instance_id)
  2394. except exception.InvalidID:
  2395. LOG.debug("Invalid instance id %s", instance_id)
  2396. raise exception.InstanceNotFound(instance_id=instance_id)
  2397. return instance
  2398. def get_all(self, context, search_opts=None, limit=None, marker=None,
  2399. expected_attrs=None, sort_keys=None, sort_dirs=None,
  2400. cell_down_support=False, all_tenants=False):
  2401. """Get all instances filtered by one of the given parameters.
  2402. If there is no filter and the context is an admin, it will retrieve
  2403. all instances in the system.
  2404. Deleted instances will be returned by default, unless there is a
  2405. search option that says otherwise.
  2406. The results will be sorted based on the list of sort keys in the
  2407. 'sort_keys' parameter (first value is primary sort key, second value is
  2408. secondary sort ket, etc.). For each sort key, the associated sort
  2409. direction is based on the list of sort directions in the 'sort_dirs'
  2410. parameter.
  2411. :param cell_down_support: True if the API (and caller) support
  2412. returning a minimal instance
  2413. construct if the relevant cell is
  2414. down. If False, instances from
  2415. unreachable cells will be omitted.
  2416. :param all_tenants: True if the "all_tenants" filter was passed.
  2417. """
  2418. if search_opts is None:
  2419. search_opts = {}
  2420. LOG.debug("Searching by: %s", str(search_opts))
  2421. # Fixups for the DB call
  2422. filters = {}
  2423. def _remap_flavor_filter(flavor_id):
  2424. flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
  2425. filters['instance_type_id'] = flavor.id
  2426. def _remap_fixed_ip_filter(fixed_ip):
  2427. # Turn fixed_ip into a regexp match. Since '.' matches
  2428. # any character, we need to use regexp escaping for it.
  2429. filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
  2430. # search_option to filter_name mapping.
  2431. filter_mapping = {
  2432. 'image': 'image_ref',
  2433. 'name': 'display_name',
  2434. 'tenant_id': 'project_id',
  2435. 'flavor': _remap_flavor_filter,
  2436. 'fixed_ip': _remap_fixed_ip_filter}
  2437. # copy from search_opts, doing various remappings as necessary
  2438. for opt, value in search_opts.items():
  2439. # Do remappings.
  2440. # Values not in the filter_mapping table are copied as-is.
  2441. # If remapping is None, option is not copied
  2442. # If the remapping is a string, it is the filter_name to use
  2443. try:
  2444. remap_object = filter_mapping[opt]
  2445. except KeyError:
  2446. filters[opt] = value
  2447. else:
  2448. # Remaps are strings to translate to, or functions to call
  2449. # to do the translating as defined by the table above.
  2450. if isinstance(remap_object, six.string_types):
  2451. filters[remap_object] = value
  2452. else:
  2453. try:
  2454. remap_object(value)
  2455. # We already know we can't match the filter, so
  2456. # return an empty list
  2457. except ValueError:
  2458. return objects.InstanceList()
  2459. # IP address filtering cannot be applied at the DB layer, remove any DB
  2460. # limit so that it can be applied after the IP filter.
  2461. filter_ip = 'ip6' in filters or 'ip' in filters
  2462. skip_build_request = False
  2463. orig_limit = limit
  2464. if filter_ip:
  2465. # We cannot skip build requests if there is a marker since the
  2466. # the marker could be a build request.
  2467. skip_build_request = marker is None
  2468. if self.network_api.has_substr_port_filtering_extension(context):
  2469. # We're going to filter by IP using Neutron so set filter_ip
  2470. # to False so we don't attempt post-DB query filtering in
  2471. # memory below.
  2472. filter_ip = False
  2473. instance_uuids = self._ip_filter_using_neutron(context,
  2474. filters)
  2475. if instance_uuids:
  2476. # Note that 'uuid' is not in the 2.1 GET /servers query
  2477. # parameter schema, however, we allow additionalProperties
  2478. # so someone could filter instances by uuid, which doesn't
  2479. # make a lot of sense but we have to account for it.
  2480. if 'uuid' in filters and filters['uuid']:
  2481. filter_uuids = filters['uuid']
  2482. if isinstance(filter_uuids, list):
  2483. instance_uuids.extend(filter_uuids)
  2484. else:
  2485. # Assume a string. If it's a dict or tuple or
  2486. # something, well...that's too bad. This is why
  2487. # we have query parameter schema definitions.
  2488. if filter_uuids not in instance_uuids:
  2489. instance_uuids.append(filter_uuids)
  2490. filters['uuid'] = instance_uuids
  2491. else:
  2492. # No matches on the ip filter(s), return an empty list.
  2493. return objects.InstanceList()
  2494. elif limit:
  2495. LOG.debug('Removing limit for DB query due to IP filter')
  2496. limit = None
  2497. # Skip get BuildRequest if filtering by IP address, as building
  2498. # instances will not have IP addresses.
  2499. if skip_build_request:
  2500. build_requests = objects.BuildRequestList()
  2501. else:
  2502. # The ordering of instances will be
  2503. # [sorted instances with no host] + [sorted instances with host].
  2504. # This means BuildRequest and cell0 instances first, then cell
  2505. # instances
  2506. try:
  2507. build_requests = objects.BuildRequestList.get_by_filters(
  2508. context, filters, limit=limit, marker=marker,
  2509. sort_keys=sort_keys, sort_dirs=sort_dirs)
  2510. # If we found the marker in we need to set it to None
  2511. # so we don't expect to find it in the cells below.
  2512. marker = None
  2513. except exception.MarkerNotFound:
  2514. # If we didn't find the marker in the build requests then keep
  2515. # looking for it in the cells.
  2516. build_requests = objects.BuildRequestList()
  2517. build_req_instances = objects.InstanceList(
  2518. objects=[build_req.instance for build_req in build_requests])
  2519. # Only subtract from limit if it is not None
  2520. limit = (limit - len(build_req_instances)) if limit else limit
  2521. # We could arguably avoid joining on security_groups if we're using
  2522. # neutron (which is the default) but if you're using neutron then the
  2523. # security_group_instance_association table should be empty anyway
  2524. # and the DB should optimize out that join, making it insignificant.
  2525. fields = ['metadata', 'info_cache', 'security_groups']
  2526. if expected_attrs:
  2527. fields.extend(expected_attrs)
  2528. insts, down_cell_uuids = instance_list.get_instance_objects_sorted(
  2529. context, filters, limit, marker, fields, sort_keys, sort_dirs,
  2530. cell_down_support=cell_down_support)
  2531. def _get_unique_filter_method():
  2532. seen_uuids = set()
  2533. def _filter(instance):
  2534. # During a cross-cell move operation we could have the instance
  2535. # in more than one cell database so we not only have to filter
  2536. # duplicates but we want to make sure we only return the
  2537. # "current" one which should also be the one that the instance
  2538. # mapping points to, but we don't want to do that expensive
  2539. # lookup here. The DB API will filter out hidden instances by
  2540. # default but there is a small window where two copies of an
  2541. # instance could be hidden=False in separate cell DBs.
  2542. # NOTE(mriedem): We could make this better in the case that we
  2543. # have duplicate instances that are both hidden=False by
  2544. # showing the one with the newer updated_at value, but that
  2545. # could be tricky if the user is filtering on
  2546. # changes-since/before or updated_at, or sorting on updated_at,
  2547. # but technically that was already potentially broken with this
  2548. # _filter method if we return an older BuildRequest.instance,
  2549. # and given the window should be very small where we have
  2550. # duplicates, it's probably not worth the complexity.
  2551. if instance.uuid in seen_uuids:
  2552. return False
  2553. seen_uuids.add(instance.uuid)
  2554. return True
  2555. return _filter
  2556. filter_method = _get_unique_filter_method()
  2557. # Only subtract from limit if it is not None
  2558. limit = (limit - len(insts)) if limit else limit
  2559. # TODO(alaski): Clean up the objects concatenation when List objects
  2560. # support it natively.
  2561. instances = objects.InstanceList(
  2562. objects=list(filter(filter_method,
  2563. build_req_instances.objects +
  2564. insts.objects)))
  2565. if filter_ip:
  2566. instances = self._ip_filter(instances, filters, orig_limit)
  2567. if cell_down_support:
  2568. # API and client want minimal construct instances for any cells
  2569. # that didn't return, so generate and prefix those to the actual
  2570. # results.
  2571. project = search_opts.get('project_id', context.project_id)
  2572. if all_tenants:
  2573. # NOTE(tssurya): The only scenario where project has to be None
  2574. # is when using "all_tenants" in which case we do not want
  2575. # the query to be restricted based on the project_id.
  2576. project = None
  2577. limit = (orig_limit - len(instances)) if limit else limit
  2578. return (self._generate_minimal_construct_for_down_cells(context,
  2579. down_cell_uuids, project, limit) + instances)
  2580. return instances
  2581. @staticmethod
  2582. def _ip_filter(inst_models, filters, limit):
  2583. ipv4_f = re.compile(str(filters.get('ip')))
  2584. ipv6_f = re.compile(str(filters.get('ip6')))
  2585. def _match_instance(instance):
  2586. nw_info = instance.get_network_info()
  2587. for vif in nw_info:
  2588. for fixed_ip in vif.fixed_ips():
  2589. address = fixed_ip.get('address')
  2590. if not address:
  2591. continue
  2592. version = fixed_ip.get('version')
  2593. if ((version == 4 and ipv4_f.match(address)) or
  2594. (version == 6 and ipv6_f.match(address))):
  2595. return True
  2596. return False
  2597. result_objs = []
  2598. for instance in inst_models:
  2599. if _match_instance(instance):
  2600. result_objs.append(instance)
  2601. if limit and len(result_objs) == limit:
  2602. break
  2603. return objects.InstanceList(objects=result_objs)
  2604. def _ip_filter_using_neutron(self, context, filters):
  2605. ip4_address = filters.get('ip')
  2606. ip6_address = filters.get('ip6')
  2607. addresses = [ip4_address, ip6_address]
  2608. uuids = []
  2609. for address in addresses:
  2610. if address:
  2611. try:
  2612. ports = self.network_api.list_ports(
  2613. context, fixed_ips='ip_address_substr=' + address,
  2614. fields=['device_id'])['ports']
  2615. for port in ports:
  2616. uuids.append(port['device_id'])
  2617. except Exception as e:
  2618. LOG.error('An error occurred while listing ports '
  2619. 'with an ip_address filter value of "%s". '
  2620. 'Error: %s',
  2621. address, six.text_type(e))
  2622. return uuids
  2623. def update_instance(self, context, instance, updates):
  2624. """Updates a single Instance object with some updates dict.
  2625. Returns the updated instance.
  2626. """
  2627. # NOTE(sbauza): Given we only persist the Instance object after we
  2628. # create the BuildRequest, we are sure that if the Instance object
  2629. # has an ID field set, then it was persisted in the right Cell DB.
  2630. if instance.obj_attr_is_set('id'):
  2631. instance.update(updates)
  2632. instance.save()
  2633. else:
  2634. # Instance is not yet mapped to a cell, so we need to update
  2635. # BuildRequest instead
  2636. # TODO(sbauza): Fix the possible race conditions where BuildRequest
  2637. # could be deleted because of either a concurrent instance delete
  2638. # or because the scheduler just returned a destination right
  2639. # after we called the instance in the API.
  2640. try:
  2641. build_req = objects.BuildRequest.get_by_instance_uuid(
  2642. context, instance.uuid)
  2643. instance = build_req.instance
  2644. instance.update(updates)
  2645. # FIXME(sbauza): Here we are updating the current
  2646. # thread-related BuildRequest object. Given that another worker
  2647. # could have looking up at that BuildRequest in the API, it
  2648. # means that it could pass it down to the conductor without
  2649. # making sure that it's not updated, we could have some race
  2650. # condition where it would missing the updated fields, but
  2651. # that's something we could discuss once the instance record
  2652. # is persisted by the conductor.
  2653. build_req.save()
  2654. except exception.BuildRequestNotFound:
  2655. # Instance was mapped and the BuildRequest was deleted
  2656. # while fetching (and possibly the instance could have been
  2657. # deleted as well). We need to lookup again the Instance object
  2658. # in order to correctly update it.
  2659. # TODO(sbauza): Figure out a good way to know the expected
  2660. # attributes by checking which fields are set or not.
  2661. expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
  2662. 'tags', 'metadata', 'system_metadata',
  2663. 'security_groups', 'info_cache']
  2664. inst_map = self._get_instance_map_or_none(context,
  2665. instance.uuid)
  2666. if inst_map and (inst_map.cell_mapping is not None):
  2667. with nova_context.target_cell(
  2668. context,
  2669. inst_map.cell_mapping) as cctxt:
  2670. instance = objects.Instance.get_by_uuid(
  2671. cctxt, instance.uuid,
  2672. expected_attrs=expected_attrs)
  2673. instance.update(updates)
  2674. instance.save()
  2675. else:
  2676. # Conductor doesn't delete the BuildRequest until after the
  2677. # InstanceMapping record is created, so if we didn't get
  2678. # that and the BuildRequest doesn't exist, then the
  2679. # instance is already gone and we need to just error out.
  2680. raise exception.InstanceNotFound(instance_id=instance.uuid)
  2681. return instance
  2682. # NOTE(melwitt): We don't check instance lock for backup because lock is
  2683. # intended to prevent accidental change/delete of instances
  2684. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2685. vm_states.PAUSED, vm_states.SUSPENDED])
  2686. def backup(self, context, instance, name, backup_type, rotation,
  2687. extra_properties=None):
  2688. """Backup the given instance
  2689. :param instance: nova.objects.instance.Instance object
  2690. :param name: name of the backup
  2691. :param backup_type: 'daily' or 'weekly'
  2692. :param rotation: int representing how many backups to keep around;
  2693. None if rotation shouldn't be used (as in the case of snapshots)
  2694. :param extra_properties: dict of extra image properties to include
  2695. when creating the image.
  2696. :returns: A dict containing image metadata
  2697. """
  2698. props_copy = dict(extra_properties, backup_type=backup_type)
  2699. if compute_utils.is_volume_backed_instance(context, instance):
  2700. LOG.info("It's not supported to backup volume backed "
  2701. "instance.", instance=instance)
  2702. raise exception.InvalidRequest(
  2703. _('Backup is not supported for volume-backed instances.'))
  2704. else:
  2705. image_meta = compute_utils.create_image(
  2706. context, instance, name, 'backup', self.image_api,
  2707. extra_properties=props_copy)
  2708. instance.task_state = task_states.IMAGE_BACKUP
  2709. instance.save(expected_task_state=[None])
  2710. self._record_action_start(context, instance,
  2711. instance_actions.BACKUP)
  2712. self.compute_rpcapi.backup_instance(context, instance,
  2713. image_meta['id'],
  2714. backup_type,
  2715. rotation)
  2716. return image_meta
  2717. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2718. # intended to prevent accidental change/delete of instances
  2719. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2720. vm_states.PAUSED, vm_states.SUSPENDED])
  2721. def snapshot(self, context, instance, name, extra_properties=None):
  2722. """Snapshot the given instance.
  2723. :param instance: nova.objects.instance.Instance object
  2724. :param name: name of the snapshot
  2725. :param extra_properties: dict of extra image properties to include
  2726. when creating the image.
  2727. :returns: A dict containing image metadata
  2728. """
  2729. image_meta = compute_utils.create_image(
  2730. context, instance, name, 'snapshot', self.image_api,
  2731. extra_properties=extra_properties)
  2732. instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
  2733. try:
  2734. instance.save(expected_task_state=[None])
  2735. except (exception.InstanceNotFound,
  2736. exception.UnexpectedDeletingTaskStateError) as ex:
  2737. # Changing the instance task state to use in raising the
  2738. # InstanceInvalidException below
  2739. LOG.debug('Instance disappeared during snapshot.',
  2740. instance=instance)
  2741. try:
  2742. image_id = image_meta['id']
  2743. self.image_api.delete(context, image_id)
  2744. LOG.info('Image %s deleted because instance '
  2745. 'deleted before snapshot started.',
  2746. image_id, instance=instance)
  2747. except exception.ImageNotFound:
  2748. pass
  2749. except Exception as exc:
  2750. LOG.warning("Error while trying to clean up image %(img_id)s: "
  2751. "%(error_msg)s",
  2752. {"img_id": image_meta['id'],
  2753. "error_msg": six.text_type(exc)})
  2754. attr = 'task_state'
  2755. state = task_states.DELETING
  2756. if type(ex) == exception.InstanceNotFound:
  2757. attr = 'vm_state'
  2758. state = vm_states.DELETED
  2759. raise exception.InstanceInvalidState(attr=attr,
  2760. instance_uuid=instance.uuid,
  2761. state=state,
  2762. method='snapshot')
  2763. self._record_action_start(context, instance,
  2764. instance_actions.CREATE_IMAGE)
  2765. self.compute_rpcapi.snapshot_instance(context, instance,
  2766. image_meta['id'])
  2767. return image_meta
  2768. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2769. # intended to prevent accidental change/delete of instances
  2770. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2771. vm_states.SUSPENDED])
  2772. def snapshot_volume_backed(self, context, instance, name,
  2773. extra_properties=None):
  2774. """Snapshot the given volume-backed instance.
  2775. :param instance: nova.objects.instance.Instance object
  2776. :param name: name of the backup or snapshot
  2777. :param extra_properties: dict of extra image properties to include
  2778. :returns: the new image metadata
  2779. """
  2780. image_meta = compute_utils.initialize_instance_snapshot_metadata(
  2781. context, instance, name, extra_properties)
  2782. # the new image is simply a bucket of properties (particularly the
  2783. # block device mapping, kernel and ramdisk IDs) with no image data,
  2784. # hence the zero size
  2785. image_meta['size'] = 0
  2786. for attr in ('container_format', 'disk_format'):
  2787. image_meta.pop(attr, None)
  2788. properties = image_meta['properties']
  2789. # clean properties before filling
  2790. for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
  2791. properties.pop(key, None)
  2792. if instance.root_device_name:
  2793. properties['root_device_name'] = instance.root_device_name
  2794. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2795. context, instance.uuid)
  2796. mapping = [] # list of BDM dicts that can go into the image properties
  2797. # Do some up-front filtering of the list of BDMs from
  2798. # which we are going to create snapshots.
  2799. volume_bdms = []
  2800. for bdm in bdms:
  2801. if bdm.no_device:
  2802. continue
  2803. if bdm.is_volume:
  2804. # These will be handled below.
  2805. volume_bdms.append(bdm)
  2806. else:
  2807. mapping.append(bdm.get_image_mapping())
  2808. # Check limits in Cinder before creating snapshots to avoid going over
  2809. # quota in the middle of a list of volumes. This is a best-effort check
  2810. # but concurrently running snapshot requests from the same project
  2811. # could still fail to create volume snapshots if they go over limit.
  2812. if volume_bdms:
  2813. limits = self.volume_api.get_absolute_limits(context)
  2814. total_snapshots_used = limits['totalSnapshotsUsed']
  2815. max_snapshots = limits['maxTotalSnapshots']
  2816. # -1 means there is unlimited quota for snapshots
  2817. if (max_snapshots > -1 and
  2818. len(volume_bdms) + total_snapshots_used > max_snapshots):
  2819. LOG.debug('Unable to create volume snapshots for instance. '
  2820. 'Currently has %s snapshots, requesting %s new '
  2821. 'snapshots, with a limit of %s.',
  2822. total_snapshots_used, len(volume_bdms),
  2823. max_snapshots, instance=instance)
  2824. raise exception.OverQuota(overs='snapshots')
  2825. quiesced = False
  2826. if instance.vm_state == vm_states.ACTIVE:
  2827. try:
  2828. LOG.info("Attempting to quiesce instance before volume "
  2829. "snapshot.", instance=instance)
  2830. self.compute_rpcapi.quiesce_instance(context, instance)
  2831. quiesced = True
  2832. except (exception.InstanceQuiesceNotSupported,
  2833. exception.QemuGuestAgentNotEnabled,
  2834. exception.NovaException, NotImplementedError) as err:
  2835. if strutils.bool_from_string(instance.system_metadata.get(
  2836. 'image_os_require_quiesce')):
  2837. raise
  2838. if isinstance(err, exception.NovaException):
  2839. LOG.info('Skipping quiescing instance: %(reason)s.',
  2840. {'reason': err.format_message()},
  2841. instance=instance)
  2842. else:
  2843. LOG.info('Skipping quiescing instance because the '
  2844. 'operation is not supported by the underlying '
  2845. 'compute driver.', instance=instance)
  2846. # NOTE(tasker): discovered that an uncaught exception could occur
  2847. # after the instance has been frozen. catch and thaw.
  2848. except Exception as ex:
  2849. with excutils.save_and_reraise_exception():
  2850. LOG.error("An error occurred during quiesce of instance. "
  2851. "Unquiescing to ensure instance is thawed. "
  2852. "Error: %s", six.text_type(ex),
  2853. instance=instance)
  2854. self.compute_rpcapi.unquiesce_instance(context, instance,
  2855. mapping=None)
  2856. @wrap_instance_event(prefix='api')
  2857. def snapshot_instance(self, context, instance, bdms):
  2858. try:
  2859. for bdm in volume_bdms:
  2860. # create snapshot based on volume_id
  2861. volume = self.volume_api.get(context, bdm.volume_id)
  2862. # NOTE(yamahata): Should we wait for snapshot creation?
  2863. # Linux LVM snapshot creation completes in
  2864. # short time, it doesn't matter for now.
  2865. name = _('snapshot for %s') % image_meta['name']
  2866. LOG.debug('Creating snapshot from volume %s.',
  2867. volume['id'], instance=instance)
  2868. snapshot = self.volume_api.create_snapshot_force(
  2869. context, volume['id'],
  2870. name, volume['display_description'])
  2871. mapping_dict = block_device.snapshot_from_bdm(
  2872. snapshot['id'], bdm)
  2873. mapping_dict = mapping_dict.get_image_mapping()
  2874. mapping.append(mapping_dict)
  2875. return mapping
  2876. # NOTE(tasker): No error handling is done in the above for loop.
  2877. # This means that if the snapshot fails and throws an exception
  2878. # the traceback will skip right over the unquiesce needed below.
  2879. # Here, catch any exception, unquiesce the instance, and raise the
  2880. # error so that the calling function can do what it needs to in
  2881. # order to properly treat a failed snap.
  2882. except Exception:
  2883. with excutils.save_and_reraise_exception():
  2884. if quiesced:
  2885. LOG.info("Unquiescing instance after volume snapshot "
  2886. "failure.", instance=instance)
  2887. self.compute_rpcapi.unquiesce_instance(
  2888. context, instance, mapping)
  2889. self._record_action_start(context, instance,
  2890. instance_actions.CREATE_IMAGE)
  2891. mapping = snapshot_instance(self, context, instance, bdms)
  2892. if quiesced:
  2893. self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
  2894. if mapping:
  2895. properties['block_device_mapping'] = mapping
  2896. properties['bdm_v2'] = True
  2897. return self.image_api.create(context, image_meta)
  2898. @check_instance_lock
  2899. def reboot(self, context, instance, reboot_type):
  2900. """Reboot the given instance."""
  2901. if reboot_type == 'SOFT':
  2902. self._soft_reboot(context, instance)
  2903. else:
  2904. self._hard_reboot(context, instance)
  2905. @check_instance_state(vm_state=set(vm_states.ALLOW_SOFT_REBOOT),
  2906. task_state=[None])
  2907. def _soft_reboot(self, context, instance):
  2908. expected_task_state = [None]
  2909. instance.task_state = task_states.REBOOTING
  2910. instance.save(expected_task_state=expected_task_state)
  2911. self._record_action_start(context, instance, instance_actions.REBOOT)
  2912. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2913. block_device_info=None,
  2914. reboot_type='SOFT')
  2915. @check_instance_state(vm_state=set(vm_states.ALLOW_HARD_REBOOT),
  2916. task_state=task_states.ALLOW_REBOOT)
  2917. def _hard_reboot(self, context, instance):
  2918. instance.task_state = task_states.REBOOTING_HARD
  2919. instance.save(expected_task_state=task_states.ALLOW_REBOOT)
  2920. self._record_action_start(context, instance, instance_actions.REBOOT)
  2921. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2922. block_device_info=None,
  2923. reboot_type='HARD')
  2924. # TODO(stephenfin): We should expand kwargs out to named args
  2925. @check_instance_lock
  2926. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2927. vm_states.ERROR])
  2928. def rebuild(self, context, instance, image_href, admin_password,
  2929. files_to_inject=None, **kwargs):
  2930. """Rebuild the given instance with the provided attributes."""
  2931. files_to_inject = files_to_inject or []
  2932. metadata = kwargs.get('metadata', {})
  2933. preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
  2934. auto_disk_config = kwargs.get('auto_disk_config')
  2935. if 'key_name' in kwargs:
  2936. key_name = kwargs.pop('key_name')
  2937. if key_name:
  2938. # NOTE(liuyulong): we are intentionally using the user_id from
  2939. # the request context rather than the instance.user_id because
  2940. # users own keys but instances are owned by projects, and
  2941. # another user in the same project can rebuild an instance
  2942. # even if they didn't create it.
  2943. key_pair = objects.KeyPair.get_by_name(context,
  2944. context.user_id,
  2945. key_name)
  2946. instance.key_name = key_pair.name
  2947. instance.key_data = key_pair.public_key
  2948. instance.keypairs = objects.KeyPairList(objects=[key_pair])
  2949. else:
  2950. instance.key_name = None
  2951. instance.key_data = None
  2952. instance.keypairs = objects.KeyPairList(objects=[])
  2953. # Use trusted_certs value from kwargs to create TrustedCerts object
  2954. trusted_certs = None
  2955. if 'trusted_certs' in kwargs:
  2956. # Note that the user can set, change, or unset / reset trusted
  2957. # certs. If they are explicitly specifying
  2958. # trusted_image_certificates=None, that means we'll either unset
  2959. # them on the instance *or* reset to use the defaults (if defaults
  2960. # are configured).
  2961. trusted_certs = kwargs.pop('trusted_certs')
  2962. instance.trusted_certs = self._retrieve_trusted_certs_object(
  2963. context, trusted_certs, rebuild=True)
  2964. image_id, image = self._get_image(context, image_href)
  2965. self._check_auto_disk_config(image=image,
  2966. auto_disk_config=auto_disk_config)
  2967. flavor = instance.get_flavor()
  2968. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2969. context, instance.uuid)
  2970. root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
  2971. # Check to see if the image is changing and we have a volume-backed
  2972. # server. The compute doesn't support changing the image in the
  2973. # root disk of a volume-backed server, so we need to just fail fast.
  2974. is_volume_backed = compute_utils.is_volume_backed_instance(
  2975. context, instance, bdms)
  2976. if is_volume_backed:
  2977. if trusted_certs:
  2978. # The only way we can get here is if the user tried to set
  2979. # trusted certs or specified trusted_image_certificates=None
  2980. # and default_trusted_certificate_ids is configured.
  2981. msg = _("Image certificate validation is not supported "
  2982. "for volume-backed servers.")
  2983. raise exception.CertificateValidationFailed(message=msg)
  2984. # For boot from volume, instance.image_ref is empty, so we need to
  2985. # query the image from the volume.
  2986. if root_bdm is None:
  2987. # This shouldn't happen and is an error, we need to fail. This
  2988. # is not the users fault, it's an internal error. Without a
  2989. # root BDM we have no way of knowing the backing volume (or
  2990. # image in that volume) for this instance.
  2991. raise exception.NovaException(
  2992. _('Unable to find root block device mapping for '
  2993. 'volume-backed instance.'))
  2994. volume = self.volume_api.get(context, root_bdm.volume_id)
  2995. volume_image_metadata = volume.get('volume_image_metadata', {})
  2996. orig_image_ref = volume_image_metadata.get('image_id')
  2997. if orig_image_ref != image_href:
  2998. # Leave a breadcrumb.
  2999. LOG.debug('Requested to rebuild instance with a new image %s '
  3000. 'for a volume-backed server with image %s in its '
  3001. 'root volume which is not supported.', image_href,
  3002. orig_image_ref, instance=instance)
  3003. msg = _('Unable to rebuild with a different image for a '
  3004. 'volume-backed server.')
  3005. raise exception.ImageUnacceptable(
  3006. image_id=image_href, reason=msg)
  3007. else:
  3008. orig_image_ref = instance.image_ref
  3009. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3010. context, instance.uuid)
  3011. self._checks_for_create_and_rebuild(context, image_id, image,
  3012. flavor, metadata, files_to_inject, root_bdm)
  3013. # NOTE(sean-k-mooney): When we rebuild with a new image we need to
  3014. # validate that the NUMA topology does not change as we do a NOOP claim
  3015. # in resource tracker. As such we cannot allow the resource usage or
  3016. # assignment to change as a result of a new image altering the
  3017. # numa constraints.
  3018. if orig_image_ref != image_href:
  3019. self._validate_numa_rebuild(instance, image, flavor)
  3020. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  3021. context, None, None, image)
  3022. def _reset_image_metadata():
  3023. """Remove old image properties that we're storing as instance
  3024. system metadata. These properties start with 'image_'.
  3025. Then add the properties for the new image.
  3026. """
  3027. # FIXME(comstud): There's a race condition here in that if
  3028. # the system_metadata for this instance is updated after
  3029. # we do the previous save() and before we update.. those
  3030. # other updates will be lost. Since this problem exists in
  3031. # a lot of other places, I think it should be addressed in
  3032. # a DB layer overhaul.
  3033. orig_sys_metadata = dict(instance.system_metadata)
  3034. # Remove the old keys
  3035. for key in list(instance.system_metadata.keys()):
  3036. if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
  3037. del instance.system_metadata[key]
  3038. # Add the new ones
  3039. new_sys_metadata = utils.get_system_metadata_from_image(
  3040. image, flavor)
  3041. instance.system_metadata.update(new_sys_metadata)
  3042. instance.save()
  3043. return orig_sys_metadata
  3044. # Since image might have changed, we may have new values for
  3045. # os_type, vm_mode, etc
  3046. options_from_image = self._inherit_properties_from_image(
  3047. image, auto_disk_config)
  3048. instance.update(options_from_image)
  3049. instance.task_state = task_states.REBUILDING
  3050. # An empty instance.image_ref is currently used as an indication
  3051. # of BFV. Preserve that over a rebuild to not break users.
  3052. if not is_volume_backed:
  3053. instance.image_ref = image_href
  3054. instance.kernel_id = kernel_id or ""
  3055. instance.ramdisk_id = ramdisk_id or ""
  3056. instance.progress = 0
  3057. instance.update(kwargs)
  3058. instance.save(expected_task_state=[None])
  3059. # On a rebuild, since we're potentially changing images, we need to
  3060. # wipe out the old image properties that we're storing as instance
  3061. # system metadata... and copy in the properties for the new image.
  3062. orig_sys_metadata = _reset_image_metadata()
  3063. self._record_action_start(context, instance, instance_actions.REBUILD)
  3064. # NOTE(sbauza): The migration script we provided in Newton should make
  3065. # sure that all our instances are currently migrated to have an
  3066. # attached RequestSpec object but let's consider that the operator only
  3067. # half migrated all their instances in the meantime.
  3068. host = instance.host
  3069. # If a new image is provided on rebuild, we will need to run
  3070. # through the scheduler again, but we want the instance to be
  3071. # rebuilt on the same host it's already on.
  3072. if orig_image_ref != image_href:
  3073. # We have to modify the request spec that goes to the scheduler
  3074. # to contain the new image. We persist this since we've already
  3075. # changed the instance.image_ref above so we're being
  3076. # consistent.
  3077. request_spec.image = objects.ImageMeta.from_dict(image)
  3078. request_spec.save()
  3079. if 'scheduler_hints' not in request_spec:
  3080. request_spec.scheduler_hints = {}
  3081. # Nuke the id on this so we can't accidentally save
  3082. # this hint hack later
  3083. del request_spec.id
  3084. # NOTE(danms): Passing host=None tells conductor to
  3085. # call the scheduler. The _nova_check_type hint
  3086. # requires that the scheduler returns only the same
  3087. # host that we are currently on and only checks
  3088. # rebuild-related filters.
  3089. request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
  3090. request_spec.force_hosts = [instance.host]
  3091. request_spec.force_nodes = [instance.node]
  3092. host = None
  3093. self.compute_task_api.rebuild_instance(context, instance=instance,
  3094. new_pass=admin_password, injected_files=files_to_inject,
  3095. image_ref=image_href, orig_image_ref=orig_image_ref,
  3096. orig_sys_metadata=orig_sys_metadata, bdms=bdms,
  3097. preserve_ephemeral=preserve_ephemeral, host=host,
  3098. request_spec=request_spec)
  3099. @staticmethod
  3100. def _validate_numa_rebuild(instance, image, flavor):
  3101. """validates that the NUMA constraints do not change on rebuild.
  3102. :param instance: nova.objects.instance.Instance object
  3103. :param image: the new image the instance will be rebuilt with.
  3104. :param flavor: the flavor of the current instance.
  3105. :raises: nova.exception.ImageNUMATopologyRebuildConflict
  3106. """
  3107. # NOTE(sean-k-mooney): currently it is not possible to express
  3108. # a PCI NUMA affinity policy via flavor or image but that will
  3109. # change in the future. we pull out the image metadata into
  3110. # separate variable to make future testing of this easier.
  3111. old_image_meta = instance.image_meta
  3112. new_image_meta = objects.ImageMeta.from_dict(image)
  3113. old_constraints = hardware.numa_get_constraints(flavor, old_image_meta)
  3114. new_constraints = hardware.numa_get_constraints(flavor, new_image_meta)
  3115. # early out for non NUMA instances
  3116. if old_constraints is None and new_constraints is None:
  3117. return
  3118. # if only one of the constraints are non-None (or 'set') then the
  3119. # constraints changed so raise an exception.
  3120. if old_constraints is None or new_constraints is None:
  3121. action = "removing" if old_constraints else "introducing"
  3122. LOG.debug("NUMA rebuild validation failed. The requested image "
  3123. "would alter the NUMA constraints by %s a NUMA "
  3124. "topology.", action, instance=instance)
  3125. raise exception.ImageNUMATopologyRebuildConflict()
  3126. # otherwise since both the old a new constraints are non none compare
  3127. # them as dictionaries.
  3128. old = old_constraints.obj_to_primitive()
  3129. new = new_constraints.obj_to_primitive()
  3130. if old != new:
  3131. LOG.debug("NUMA rebuild validation failed. The requested image "
  3132. "conflicts with the existing NUMA constraints.",
  3133. instance=instance)
  3134. raise exception.ImageNUMATopologyRebuildConflict()
  3135. # TODO(sean-k-mooney): add PCI NUMA affinity policy check.
  3136. @staticmethod
  3137. def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
  3138. project_id, user_id = quotas_obj.ids_from_instance(context,
  3139. instance)
  3140. # Deltas will be empty if the resize is not an upsize.
  3141. deltas = compute_utils.upsize_quota_delta(new_flavor,
  3142. current_flavor)
  3143. if deltas:
  3144. try:
  3145. res_deltas = {'cores': deltas.get('cores', 0),
  3146. 'ram': deltas.get('ram', 0)}
  3147. objects.Quotas.check_deltas(context, res_deltas,
  3148. project_id, user_id=user_id,
  3149. check_project_id=project_id,
  3150. check_user_id=user_id)
  3151. except exception.OverQuota as exc:
  3152. quotas = exc.kwargs['quotas']
  3153. overs = exc.kwargs['overs']
  3154. usages = exc.kwargs['usages']
  3155. headroom = compute_utils.get_headroom(quotas, usages,
  3156. deltas)
  3157. (overs, reqs, total_alloweds,
  3158. useds) = compute_utils.get_over_quota_detail(headroom,
  3159. overs,
  3160. quotas,
  3161. deltas)
  3162. LOG.info("%(overs)s quota exceeded for %(pid)s,"
  3163. " tried to resize instance.",
  3164. {'overs': overs, 'pid': context.project_id})
  3165. raise exception.TooManyInstances(overs=overs,
  3166. req=reqs,
  3167. used=useds,
  3168. allowed=total_alloweds)
  3169. @check_instance_lock
  3170. @check_instance_state(vm_state=[vm_states.RESIZED])
  3171. def revert_resize(self, context, instance):
  3172. """Reverts a resize or cold migration, deleting the 'new' instance in
  3173. the process.
  3174. """
  3175. elevated = context.elevated()
  3176. migration = objects.Migration.get_by_instance_and_status(
  3177. elevated, instance.uuid, 'finished')
  3178. # If this is a resize down, a revert might go over quota.
  3179. self._check_quota_for_upsize(context, instance, instance.flavor,
  3180. instance.old_flavor)
  3181. # The AZ for the server may have changed when it was migrated so while
  3182. # we are in the API and have access to the API DB, update the
  3183. # instance.availability_zone before casting off to the compute service.
  3184. # Note that we do this in the API to avoid an "up-call" from the
  3185. # compute service to the API DB. This is not great in case something
  3186. # fails during revert before the instance.host is updated to the
  3187. # original source host, but it is good enough for now. Long-term we
  3188. # could consider passing the AZ down to compute so it can set it when
  3189. # the instance.host value is set in finish_revert_resize.
  3190. instance.availability_zone = (
  3191. availability_zones.get_host_availability_zone(
  3192. context, migration.source_compute))
  3193. # Conductor updated the RequestSpec.flavor during the initial resize
  3194. # operation to point at the new flavor, so we need to update the
  3195. # RequestSpec to point back at the original flavor, otherwise
  3196. # subsequent move operations through the scheduler will be using the
  3197. # wrong flavor.
  3198. reqspec = objects.RequestSpec.get_by_instance_uuid(
  3199. context, instance.uuid)
  3200. reqspec.flavor = instance.old_flavor
  3201. reqspec.save()
  3202. # NOTE(gibi): This is a performance optimization. If the network info
  3203. # cache does not have ports with allocations in the binding profile
  3204. # then we can skip reading port resource request from neutron below.
  3205. # If a port has resource request then that would have already caused
  3206. # that the finish_resize call put allocation in the binding profile
  3207. # during the resize.
  3208. if instance.get_network_info().has_port_with_allocation():
  3209. # TODO(gibi): do not directly overwrite the
  3210. # RequestSpec.requested_resources as others like cyborg might added
  3211. # to things there already
  3212. # NOTE(gibi): We need to collect the requested resource again as it
  3213. # is intentionally not persisted in nova. Note that this needs to
  3214. # be done here as the nova API code directly calls revert on the
  3215. # dest compute service skipping the conductor.
  3216. port_res_req = (
  3217. self.network_api.get_requested_resource_for_instance(
  3218. context, instance.uuid))
  3219. reqspec.requested_resources = port_res_req
  3220. instance.task_state = task_states.RESIZE_REVERTING
  3221. instance.save(expected_task_state=[None])
  3222. migration.status = 'reverting'
  3223. migration.save()
  3224. self._record_action_start(context, instance,
  3225. instance_actions.REVERT_RESIZE)
  3226. if migration.cross_cell_move:
  3227. # RPC cast to conductor to orchestrate the revert of the cross-cell
  3228. # resize.
  3229. self.compute_task_api.revert_snapshot_based_resize(
  3230. context, instance, migration)
  3231. else:
  3232. # TODO(melwitt): We're not rechecking for strict quota here to
  3233. # guard against going over quota during a race at this time because
  3234. # the resource consumption for this operation is written to the
  3235. # database by compute.
  3236. self.compute_rpcapi.revert_resize(context, instance,
  3237. migration,
  3238. migration.dest_compute,
  3239. reqspec)
  3240. @check_instance_lock
  3241. @check_instance_state(vm_state=[vm_states.RESIZED])
  3242. def confirm_resize(self, context, instance, migration=None):
  3243. """Confirms a migration/resize and deletes the 'old' instance."""
  3244. elevated = context.elevated()
  3245. # NOTE(melwitt): We're not checking quota here because there isn't a
  3246. # change in resource usage when confirming a resize. Resource
  3247. # consumption for resizes are written to the database by compute, so
  3248. # a confirm resize is just a clean up of the migration objects and a
  3249. # state change in compute.
  3250. if migration is None:
  3251. migration = objects.Migration.get_by_instance_and_status(
  3252. elevated, instance.uuid, 'finished')
  3253. migration.status = 'confirming'
  3254. migration.save()
  3255. self._record_action_start(context, instance,
  3256. instance_actions.CONFIRM_RESIZE)
  3257. # Check to see if this was a cross-cell resize, in which case the
  3258. # resized instance is in the target cell (the migration and instance
  3259. # came from the target cell DB in this case), and we need to cleanup
  3260. # the source host and source cell database records.
  3261. if migration.cross_cell_move:
  3262. self.compute_task_api.confirm_snapshot_based_resize(
  3263. context, instance, migration)
  3264. else:
  3265. # It's a traditional resize within a single cell, so RPC cast to
  3266. # the source compute host to cleanup the host since the instance
  3267. # is already on the target host.
  3268. self.compute_rpcapi.confirm_resize(context,
  3269. instance,
  3270. migration,
  3271. migration.source_compute)
  3272. @staticmethod
  3273. def _allow_cross_cell_resize(context, instance):
  3274. """Determine if the request can perform a cross-cell resize on this
  3275. instance.
  3276. :param context: nova auth request context for the resize operation
  3277. :param instance: Instance object being resized
  3278. :returns: True if cross-cell resize is allowed, False otherwise
  3279. """
  3280. # First check to see if the requesting project/user is allowed by
  3281. # policy to perform cross-cell resize.
  3282. allowed = context.can(
  3283. servers_policies.CROSS_CELL_RESIZE,
  3284. target={'user_id': instance.user_id,
  3285. 'project_id': instance.project_id},
  3286. fatal=False)
  3287. # If the user is allowed by policy, check to make sure the deployment
  3288. # is upgraded to the point of supporting cross-cell resize on all
  3289. # compute services.
  3290. if allowed:
  3291. # TODO(mriedem): We can remove this minimum compute version check
  3292. # in the 22.0.0 "V" release.
  3293. min_compute_version = (
  3294. objects.service.get_minimum_version_all_cells(
  3295. context, ['nova-compute']))
  3296. if min_compute_version < MIN_COMPUTE_CROSS_CELL_RESIZE:
  3297. LOG.debug('Request is allowed by policy to perform cross-cell '
  3298. 'resize but the minimum nova-compute service '
  3299. 'version in the deployment %s is less than %s so '
  3300. 'cross-cell resize is not allowed at this time.',
  3301. min_compute_version, MIN_COMPUTE_CROSS_CELL_RESIZE)
  3302. allowed = False
  3303. return allowed
  3304. @staticmethod
  3305. def _validate_host_for_cold_migrate(
  3306. context, instance, host_name, allow_cross_cell_resize):
  3307. """Validates a host specified for cold migration.
  3308. :param context: nova auth request context for the cold migration
  3309. :param instance: Instance object being cold migrated
  3310. :param host_name: User-specified compute service hostname for the
  3311. desired destination of the instance during the cold migration
  3312. :param allow_cross_cell_resize: If True, cross-cell resize is allowed
  3313. for this operation and the host could be in a different cell from
  3314. the one that the instance is currently in. If False, the speciifed
  3315. host must be in the same cell as the instance.
  3316. :returns: ComputeNode object of the requested host
  3317. :raises: CannotMigrateToSameHost if the host is the same as the
  3318. current instance.host
  3319. :raises: ComputeHostNotFound if the specified host cannot be found
  3320. """
  3321. # Cannot migrate to the host where the instance exists
  3322. # because it is useless.
  3323. if host_name == instance.host:
  3324. raise exception.CannotMigrateToSameHost()
  3325. # Check whether host exists or not. If a cross-cell resize is
  3326. # allowed, the host could be in another cell from the one the
  3327. # instance is currently in, so we need to lookup the HostMapping
  3328. # to get the cell and lookup the ComputeNode in that cell.
  3329. if allow_cross_cell_resize:
  3330. try:
  3331. hm = objects.HostMapping.get_by_host(context, host_name)
  3332. except exception.HostMappingNotFound:
  3333. LOG.info('HostMapping not found for host: %s', host_name)
  3334. raise exception.ComputeHostNotFound(host=host_name)
  3335. with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
  3336. node = objects.ComputeNode.\
  3337. get_first_node_by_host_for_old_compat(
  3338. cctxt, host_name, use_slave=True)
  3339. else:
  3340. node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
  3341. context, host_name, use_slave=True)
  3342. return node
  3343. @check_instance_lock
  3344. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
  3345. @check_instance_host(check_is_up=True)
  3346. def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
  3347. host_name=None, auto_disk_config=None):
  3348. """Resize (ie, migrate) a running instance.
  3349. If flavor_id is None, the process is considered a migration, keeping
  3350. the original flavor_id. If flavor_id is not None, the instance should
  3351. be migrated to a new host and resized to the new flavor_id.
  3352. host_name is always None in the resize case.
  3353. host_name can be set in the cold migration case only.
  3354. """
  3355. allow_cross_cell_resize = self._allow_cross_cell_resize(
  3356. context, instance)
  3357. if host_name is not None:
  3358. node = self._validate_host_for_cold_migrate(
  3359. context, instance, host_name, allow_cross_cell_resize)
  3360. self._check_auto_disk_config(
  3361. instance, auto_disk_config=auto_disk_config)
  3362. current_instance_type = instance.get_flavor()
  3363. # If flavor_id is not provided, only migrate the instance.
  3364. volume_backed = None
  3365. if not flavor_id:
  3366. LOG.debug("flavor_id is None. Assuming migration.",
  3367. instance=instance)
  3368. new_instance_type = current_instance_type
  3369. else:
  3370. new_instance_type = flavors.get_flavor_by_flavor_id(
  3371. flavor_id, read_deleted="no")
  3372. # Check to see if we're resizing to a zero-disk flavor which is
  3373. # only supported with volume-backed servers.
  3374. if (new_instance_type.get('root_gb') == 0 and
  3375. current_instance_type.get('root_gb') != 0):
  3376. volume_backed = compute_utils.is_volume_backed_instance(
  3377. context, instance)
  3378. if not volume_backed:
  3379. reason = _('Resize to zero disk flavor is not allowed.')
  3380. raise exception.CannotResizeDisk(reason=reason)
  3381. current_instance_type_name = current_instance_type['name']
  3382. new_instance_type_name = new_instance_type['name']
  3383. LOG.debug("Old instance type %(current_instance_type_name)s, "
  3384. "new instance type %(new_instance_type_name)s",
  3385. {'current_instance_type_name': current_instance_type_name,
  3386. 'new_instance_type_name': new_instance_type_name},
  3387. instance=instance)
  3388. same_instance_type = (current_instance_type['id'] ==
  3389. new_instance_type['id'])
  3390. # NOTE(sirp): We don't want to force a customer to change their flavor
  3391. # when Ops is migrating off of a failed host.
  3392. if not same_instance_type and new_instance_type.get('disabled'):
  3393. raise exception.FlavorNotFound(flavor_id=flavor_id)
  3394. if same_instance_type and flavor_id:
  3395. raise exception.CannotResizeToSameFlavor()
  3396. # ensure there is sufficient headroom for upsizes
  3397. if flavor_id:
  3398. self._check_quota_for_upsize(context, instance,
  3399. current_instance_type,
  3400. new_instance_type)
  3401. if not same_instance_type:
  3402. image = utils.get_image_from_system_metadata(
  3403. instance.system_metadata)
  3404. # Figure out if the instance is volume-backed but only if we didn't
  3405. # already figure that out above (avoid the extra db hit).
  3406. if volume_backed is None:
  3407. volume_backed = compute_utils.is_volume_backed_instance(
  3408. context, instance)
  3409. # If the server is volume-backed, we still want to validate numa
  3410. # and pci information in the new flavor, but we don't call
  3411. # _validate_flavor_image_nostatus because how it handles checking
  3412. # disk size validation was not intended for a volume-backed
  3413. # resize case.
  3414. if volume_backed:
  3415. self._validate_flavor_image_numa_pci(
  3416. image, new_instance_type, validate_pci=True)
  3417. else:
  3418. self._validate_flavor_image_nostatus(
  3419. context, image, new_instance_type, root_bdm=None,
  3420. validate_pci=True)
  3421. filter_properties = {'ignore_hosts': []}
  3422. if not CONF.allow_resize_to_same_host:
  3423. filter_properties['ignore_hosts'].append(instance.host)
  3424. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3425. context, instance.uuid)
  3426. request_spec.ignore_hosts = filter_properties['ignore_hosts']
  3427. instance.task_state = task_states.RESIZE_PREP
  3428. instance.progress = 0
  3429. instance.auto_disk_config = auto_disk_config or False
  3430. instance.save(expected_task_state=[None])
  3431. if not flavor_id:
  3432. self._record_action_start(context, instance,
  3433. instance_actions.MIGRATE)
  3434. else:
  3435. self._record_action_start(context, instance,
  3436. instance_actions.RESIZE)
  3437. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3438. # against going over quota during a race at this time because the
  3439. # resource consumption for this operation is written to the database
  3440. # by compute.
  3441. scheduler_hint = {'filter_properties': filter_properties}
  3442. if host_name is None:
  3443. # If 'host_name' is not specified,
  3444. # clear the 'requested_destination' field of the RequestSpec
  3445. # except set the allow_cross_cell_move flag since conductor uses
  3446. # it prior to scheduling.
  3447. request_spec.requested_destination = objects.Destination(
  3448. allow_cross_cell_move=allow_cross_cell_resize)
  3449. else:
  3450. # Set the host and the node so that the scheduler will
  3451. # validate them.
  3452. request_spec.requested_destination = objects.Destination(
  3453. host=node.host, node=node.hypervisor_hostname,
  3454. allow_cross_cell_move=allow_cross_cell_resize)
  3455. # Asynchronously RPC cast to conductor so the response is not blocked
  3456. # during scheduling. If something fails the user can find out via
  3457. # instance actions.
  3458. self.compute_task_api.resize_instance(context, instance,
  3459. scheduler_hint=scheduler_hint,
  3460. flavor=new_instance_type,
  3461. clean_shutdown=clean_shutdown,
  3462. request_spec=request_spec,
  3463. do_cast=True)
  3464. @check_instance_lock
  3465. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3466. vm_states.PAUSED, vm_states.SUSPENDED])
  3467. def shelve(self, context, instance, clean_shutdown=True):
  3468. """Shelve an instance.
  3469. Shuts down an instance and frees it up to be removed from the
  3470. hypervisor.
  3471. """
  3472. instance.task_state = task_states.SHELVING
  3473. instance.save(expected_task_state=[None])
  3474. self._record_action_start(context, instance, instance_actions.SHELVE)
  3475. if not compute_utils.is_volume_backed_instance(context, instance):
  3476. name = '%s-shelved' % instance.display_name
  3477. image_meta = compute_utils.create_image(
  3478. context, instance, name, 'snapshot', self.image_api)
  3479. image_id = image_meta['id']
  3480. self.compute_rpcapi.shelve_instance(context, instance=instance,
  3481. image_id=image_id, clean_shutdown=clean_shutdown)
  3482. else:
  3483. self.compute_rpcapi.shelve_offload_instance(context,
  3484. instance=instance, clean_shutdown=clean_shutdown)
  3485. @check_instance_lock
  3486. @check_instance_state(vm_state=[vm_states.SHELVED])
  3487. def shelve_offload(self, context, instance, clean_shutdown=True):
  3488. """Remove a shelved instance from the hypervisor."""
  3489. instance.task_state = task_states.SHELVING_OFFLOADING
  3490. instance.save(expected_task_state=[None])
  3491. self._record_action_start(context, instance,
  3492. instance_actions.SHELVE_OFFLOAD)
  3493. self.compute_rpcapi.shelve_offload_instance(context, instance=instance,
  3494. clean_shutdown=clean_shutdown)
  3495. def _validate_unshelve_az(self, context, instance, availability_zone):
  3496. """Verify the specified availability_zone during unshelve.
  3497. Verifies that the server is shelved offloaded, the AZ exists and
  3498. if [cinder]/cross_az_attach=False, that any attached volumes are in
  3499. the same AZ.
  3500. :param context: nova auth RequestContext for the unshelve action
  3501. :param instance: Instance object for the server being unshelved
  3502. :param availability_zone: The user-requested availability zone in
  3503. which to unshelve the server.
  3504. :raises: UnshelveInstanceInvalidState if the server is not shelved
  3505. offloaded
  3506. :raises: InvalidRequest if the requested AZ does not exist
  3507. :raises: MismatchVolumeAZException if [cinder]/cross_az_attach=False
  3508. and any attached volumes are not in the requested AZ
  3509. """
  3510. if instance.vm_state != vm_states.SHELVED_OFFLOADED:
  3511. # NOTE(brinzhang): If the server status is 'SHELVED', it still
  3512. # belongs to a host, the availability_zone has not changed.
  3513. # Unshelving a shelved offloaded server will go through the
  3514. # scheduler to find a new host.
  3515. raise exception.UnshelveInstanceInvalidState(
  3516. state=instance.vm_state, instance_uuid=instance.uuid)
  3517. available_zones = availability_zones.get_availability_zones(
  3518. context, self.host_api, get_only_available=True)
  3519. if availability_zone not in available_zones:
  3520. msg = _('The requested availability zone is not available')
  3521. raise exception.InvalidRequest(msg)
  3522. # NOTE(brinzhang): When specifying a availability zone to unshelve
  3523. # a shelved offloaded server, and conf cross_az_attach=False, need
  3524. # to determine if attached volume AZ matches the user-specified AZ.
  3525. if not CONF.cinder.cross_az_attach:
  3526. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3527. context, instance.uuid)
  3528. for bdm in bdms:
  3529. if bdm.is_volume and bdm.volume_id:
  3530. volume = self.volume_api.get(context, bdm.volume_id)
  3531. if availability_zone != volume['availability_zone']:
  3532. msg = _("The specified availability zone does not "
  3533. "match the volume %(vol_id)s attached to the "
  3534. "server. Specified availability zone is "
  3535. "%(az)s. Volume is in %(vol_zone)s.") % {
  3536. "vol_id": volume['id'],
  3537. "az": availability_zone,
  3538. "vol_zone": volume['availability_zone']}
  3539. raise exception.MismatchVolumeAZException(reason=msg)
  3540. @check_instance_lock
  3541. @check_instance_state(vm_state=[vm_states.SHELVED,
  3542. vm_states.SHELVED_OFFLOADED])
  3543. def unshelve(self, context, instance, new_az=None):
  3544. """Restore a shelved instance."""
  3545. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3546. context, instance.uuid)
  3547. if new_az:
  3548. self._validate_unshelve_az(context, instance, new_az)
  3549. LOG.debug("Replace the old AZ %(old_az)s in RequestSpec "
  3550. "with a new AZ %(new_az)s of the instance.",
  3551. {"old_az": request_spec.availability_zone,
  3552. "new_az": new_az}, instance=instance)
  3553. # Unshelving a shelved offloaded server will go through the
  3554. # scheduler to pick a new host, so we update the
  3555. # RequestSpec.availability_zone here. Note that if scheduling
  3556. # fails the RequestSpec will remain updated, which is not great,
  3557. # but if we want to change that we need to defer updating the
  3558. # RequestSpec until conductor which probably means RPC changes to
  3559. # pass the new_az variable to conductor. This is likely low
  3560. # priority since the RequestSpec.availability_zone on a shelved
  3561. # offloaded server does not mean much anyway and clearly the user
  3562. # is trying to put the server in the target AZ.
  3563. request_spec.availability_zone = new_az
  3564. request_spec.save()
  3565. instance.task_state = task_states.UNSHELVING
  3566. instance.save(expected_task_state=[None])
  3567. self._record_action_start(context, instance, instance_actions.UNSHELVE)
  3568. self.compute_task_api.unshelve_instance(context, instance,
  3569. request_spec)
  3570. @check_instance_lock
  3571. def add_fixed_ip(self, context, instance, network_id):
  3572. """Add fixed_ip from specified network to given instance."""
  3573. self.compute_rpcapi.add_fixed_ip_to_instance(context,
  3574. instance=instance, network_id=network_id)
  3575. @check_instance_lock
  3576. def remove_fixed_ip(self, context, instance, address):
  3577. """Remove fixed_ip from specified network to given instance."""
  3578. self.compute_rpcapi.remove_fixed_ip_from_instance(context,
  3579. instance=instance, address=address)
  3580. @check_instance_lock
  3581. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3582. def pause(self, context, instance):
  3583. """Pause the given instance."""
  3584. instance.task_state = task_states.PAUSING
  3585. instance.save(expected_task_state=[None])
  3586. self._record_action_start(context, instance, instance_actions.PAUSE)
  3587. self.compute_rpcapi.pause_instance(context, instance)
  3588. @check_instance_lock
  3589. @check_instance_state(vm_state=[vm_states.PAUSED])
  3590. def unpause(self, context, instance):
  3591. """Unpause the given instance."""
  3592. instance.task_state = task_states.UNPAUSING
  3593. instance.save(expected_task_state=[None])
  3594. self._record_action_start(context, instance, instance_actions.UNPAUSE)
  3595. self.compute_rpcapi.unpause_instance(context, instance)
  3596. @check_instance_host()
  3597. def get_diagnostics(self, context, instance):
  3598. """Retrieve diagnostics for the given instance."""
  3599. return self.compute_rpcapi.get_diagnostics(context, instance=instance)
  3600. @check_instance_host()
  3601. def get_instance_diagnostics(self, context, instance):
  3602. """Retrieve diagnostics for the given instance."""
  3603. return self.compute_rpcapi.get_instance_diagnostics(context,
  3604. instance=instance)
  3605. @reject_sev_instances(instance_actions.SUSPEND)
  3606. @check_instance_lock
  3607. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3608. def suspend(self, context, instance):
  3609. """Suspend the given instance."""
  3610. instance.task_state = task_states.SUSPENDING
  3611. instance.save(expected_task_state=[None])
  3612. self._record_action_start(context, instance, instance_actions.SUSPEND)
  3613. self.compute_rpcapi.suspend_instance(context, instance)
  3614. @check_instance_lock
  3615. @check_instance_state(vm_state=[vm_states.SUSPENDED])
  3616. def resume(self, context, instance):
  3617. """Resume the given instance."""
  3618. instance.task_state = task_states.RESUMING
  3619. instance.save(expected_task_state=[None])
  3620. self._record_action_start(context, instance, instance_actions.RESUME)
  3621. self.compute_rpcapi.resume_instance(context, instance)
  3622. @check_instance_lock
  3623. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3624. vm_states.ERROR])
  3625. def rescue(self, context, instance, rescue_password=None,
  3626. rescue_image_ref=None, clean_shutdown=True):
  3627. """Rescue the given instance."""
  3628. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3629. context, instance.uuid)
  3630. for bdm in bdms:
  3631. if bdm.volume_id:
  3632. vol = self.volume_api.get(context, bdm.volume_id)
  3633. self.volume_api.check_attached(context, vol)
  3634. if compute_utils.is_volume_backed_instance(context, instance, bdms):
  3635. reason = _("Cannot rescue a volume-backed instance")
  3636. raise exception.InstanceNotRescuable(instance_id=instance.uuid,
  3637. reason=reason)
  3638. instance.task_state = task_states.RESCUING
  3639. instance.save(expected_task_state=[None])
  3640. self._record_action_start(context, instance, instance_actions.RESCUE)
  3641. self.compute_rpcapi.rescue_instance(context, instance=instance,
  3642. rescue_password=rescue_password, rescue_image_ref=rescue_image_ref,
  3643. clean_shutdown=clean_shutdown)
  3644. @check_instance_lock
  3645. @check_instance_state(vm_state=[vm_states.RESCUED])
  3646. def unrescue(self, context, instance):
  3647. """Unrescue the given instance."""
  3648. instance.task_state = task_states.UNRESCUING
  3649. instance.save(expected_task_state=[None])
  3650. self._record_action_start(context, instance, instance_actions.UNRESCUE)
  3651. self.compute_rpcapi.unrescue_instance(context, instance=instance)
  3652. @check_instance_lock
  3653. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3654. def set_admin_password(self, context, instance, password):
  3655. """Set the root/admin password for the given instance.
  3656. @param context: Nova auth context.
  3657. @param instance: Nova instance object.
  3658. @param password: The admin password for the instance.
  3659. """
  3660. instance.task_state = task_states.UPDATING_PASSWORD
  3661. instance.save(expected_task_state=[None])
  3662. self._record_action_start(context, instance,
  3663. instance_actions.CHANGE_PASSWORD)
  3664. self.compute_rpcapi.set_admin_password(context,
  3665. instance=instance,
  3666. new_pass=password)
  3667. @check_instance_host()
  3668. @reject_instance_state(
  3669. task_state=[task_states.DELETING, task_states.MIGRATING])
  3670. def get_vnc_console(self, context, instance, console_type):
  3671. """Get a url to an instance Console."""
  3672. connect_info = self.compute_rpcapi.get_vnc_console(context,
  3673. instance=instance, console_type=console_type)
  3674. return {'url': connect_info['access_url']}
  3675. @check_instance_host()
  3676. @reject_instance_state(
  3677. task_state=[task_states.DELETING, task_states.MIGRATING])
  3678. def get_spice_console(self, context, instance, console_type):
  3679. """Get a url to an instance Console."""
  3680. connect_info = self.compute_rpcapi.get_spice_console(context,
  3681. instance=instance, console_type=console_type)
  3682. return {'url': connect_info['access_url']}
  3683. @check_instance_host()
  3684. @reject_instance_state(
  3685. task_state=[task_states.DELETING, task_states.MIGRATING])
  3686. def get_rdp_console(self, context, instance, console_type):
  3687. """Get a url to an instance Console."""
  3688. connect_info = self.compute_rpcapi.get_rdp_console(context,
  3689. instance=instance, console_type=console_type)
  3690. return {'url': connect_info['access_url']}
  3691. @check_instance_host()
  3692. @reject_instance_state(
  3693. task_state=[task_states.DELETING, task_states.MIGRATING])
  3694. def get_serial_console(self, context, instance, console_type):
  3695. """Get a url to a serial console."""
  3696. connect_info = self.compute_rpcapi.get_serial_console(context,
  3697. instance=instance, console_type=console_type)
  3698. return {'url': connect_info['access_url']}
  3699. @check_instance_host()
  3700. @reject_instance_state(
  3701. task_state=[task_states.DELETING, task_states.MIGRATING])
  3702. def get_mks_console(self, context, instance, console_type):
  3703. """Get a url to a MKS console."""
  3704. connect_info = self.compute_rpcapi.get_mks_console(context,
  3705. instance=instance, console_type=console_type)
  3706. return {'url': connect_info['access_url']}
  3707. @check_instance_host()
  3708. def get_console_output(self, context, instance, tail_length=None):
  3709. """Get console output for an instance."""
  3710. return self.compute_rpcapi.get_console_output(context,
  3711. instance=instance, tail_length=tail_length)
  3712. def lock(self, context, instance, reason=None):
  3713. """Lock the given instance."""
  3714. # Only update the lock if we are an admin (non-owner)
  3715. is_owner = instance.project_id == context.project_id
  3716. if instance.locked and is_owner:
  3717. return
  3718. context = context.elevated()
  3719. self._record_action_start(context, instance,
  3720. instance_actions.LOCK)
  3721. @wrap_instance_event(prefix='api')
  3722. def lock(self, context, instance, reason=None):
  3723. LOG.debug('Locking', instance=instance)
  3724. instance.locked = True
  3725. instance.locked_by = 'owner' if is_owner else 'admin'
  3726. if reason:
  3727. instance.system_metadata['locked_reason'] = reason
  3728. instance.save()
  3729. lock(self, context, instance, reason=reason)
  3730. compute_utils.notify_about_instance_action(
  3731. context, instance, CONF.host,
  3732. action=fields_obj.NotificationAction.LOCK,
  3733. source=fields_obj.NotificationSource.API)
  3734. def is_expected_locked_by(self, context, instance):
  3735. is_owner = instance.project_id == context.project_id
  3736. expect_locked_by = 'owner' if is_owner else 'admin'
  3737. locked_by = instance.locked_by
  3738. if locked_by and locked_by != expect_locked_by:
  3739. return False
  3740. return True
  3741. def unlock(self, context, instance):
  3742. """Unlock the given instance."""
  3743. context = context.elevated()
  3744. self._record_action_start(context, instance,
  3745. instance_actions.UNLOCK)
  3746. @wrap_instance_event(prefix='api')
  3747. def unlock(self, context, instance):
  3748. LOG.debug('Unlocking', instance=instance)
  3749. instance.locked = False
  3750. instance.locked_by = None
  3751. instance.system_metadata.pop('locked_reason', None)
  3752. instance.save()
  3753. unlock(self, context, instance)
  3754. compute_utils.notify_about_instance_action(
  3755. context, instance, CONF.host,
  3756. action=fields_obj.NotificationAction.UNLOCK,
  3757. source=fields_obj.NotificationSource.API)
  3758. @check_instance_lock
  3759. def reset_network(self, context, instance):
  3760. """Reset networking on the instance."""
  3761. self.compute_rpcapi.reset_network(context, instance=instance)
  3762. @check_instance_lock
  3763. def inject_network_info(self, context, instance):
  3764. """Inject network info for the instance."""
  3765. self.compute_rpcapi.inject_network_info(context, instance=instance)
  3766. def _create_volume_bdm(self, context, instance, device, volume,
  3767. disk_bus, device_type, is_local_creation=False,
  3768. tag=None, delete_on_termination=False):
  3769. volume_id = volume['id']
  3770. if is_local_creation:
  3771. # when the creation is done locally we can't specify the device
  3772. # name as we do not have a way to check that the name specified is
  3773. # a valid one.
  3774. # We leave the setting of that value when the actual attach
  3775. # happens on the compute manager
  3776. # NOTE(artom) Local attach (to a shelved-offload instance) cannot
  3777. # support device tagging because we have no way to call the compute
  3778. # manager to check that it supports device tagging. In fact, we
  3779. # don't even know which computer manager the instance will
  3780. # eventually end up on when it's unshelved.
  3781. volume_bdm = objects.BlockDeviceMapping(
  3782. context=context,
  3783. source_type='volume', destination_type='volume',
  3784. instance_uuid=instance.uuid, boot_index=None,
  3785. volume_id=volume_id,
  3786. device_name=None, guest_format=None,
  3787. disk_bus=disk_bus, device_type=device_type,
  3788. delete_on_termination=delete_on_termination)
  3789. volume_bdm.create()
  3790. else:
  3791. # NOTE(vish): This is done on the compute host because we want
  3792. # to avoid a race where two devices are requested at
  3793. # the same time. When db access is removed from
  3794. # compute, the bdm will be created here and we will
  3795. # have to make sure that they are assigned atomically.
  3796. volume_bdm = self.compute_rpcapi.reserve_block_device_name(
  3797. context, instance, device, volume_id, disk_bus=disk_bus,
  3798. device_type=device_type, tag=tag,
  3799. multiattach=volume['multiattach'])
  3800. volume_bdm.delete_on_termination = delete_on_termination
  3801. volume_bdm.save()
  3802. return volume_bdm
  3803. def _check_volume_already_attached_to_instance(self, context, instance,
  3804. volume_id):
  3805. """Avoid attaching the same volume to the same instance twice.
  3806. As the new Cinder flow (microversion 3.44) is handling the checks
  3807. differently and allows to attach the same volume to the same
  3808. instance twice to enable live_migrate we are checking whether the
  3809. BDM already exists for this combination for the new flow and fail
  3810. if it does.
  3811. """
  3812. try:
  3813. objects.BlockDeviceMapping.get_by_volume_and_instance(
  3814. context, volume_id, instance.uuid)
  3815. msg = _("volume %s already attached") % volume_id
  3816. raise exception.InvalidVolume(reason=msg)
  3817. except exception.VolumeBDMNotFound:
  3818. pass
  3819. def _check_attach_and_reserve_volume(self, context, volume, instance,
  3820. bdm, supports_multiattach=False,
  3821. validate_az=True):
  3822. """Perform checks against the instance and volume before attaching.
  3823. If validation succeeds, the bdm is updated with an attachment_id which
  3824. effectively reserves it during the attach process in cinder.
  3825. :param context: nova auth RequestContext
  3826. :param volume: volume dict from cinder
  3827. :param instance: Instance object
  3828. :param bdm: BlockDeviceMapping object
  3829. :param supports_multiattach: True if the request supports multiattach
  3830. volumes, i.e. microversion >= 2.60, False otherwise
  3831. :param validate_az: True if the instance and volume availability zones
  3832. should be validated for cross_az_attach, False to not validate AZ
  3833. """
  3834. volume_id = volume['id']
  3835. if validate_az:
  3836. self.volume_api.check_availability_zone(context, volume,
  3837. instance=instance)
  3838. # If volume.multiattach=True and the microversion to
  3839. # support multiattach is not used, fail the request.
  3840. if volume['multiattach'] and not supports_multiattach:
  3841. raise exception.MultiattachNotSupportedOldMicroversion()
  3842. attachment_id = self.volume_api.attachment_create(
  3843. context, volume_id, instance.uuid)['id']
  3844. bdm.attachment_id = attachment_id
  3845. # NOTE(ildikov): In case of boot from volume the BDM at this
  3846. # point is not yet created in a cell database, so we can't
  3847. # call save(). When attaching a volume to an existing
  3848. # instance, the instance is already in a cell and the BDM has
  3849. # been created in that same cell so updating here in that case
  3850. # is "ok".
  3851. if bdm.obj_attr_is_set('id'):
  3852. bdm.save()
  3853. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3854. # override it.
  3855. def _attach_volume(self, context, instance, volume, device,
  3856. disk_bus, device_type, tag=None,
  3857. supports_multiattach=False,
  3858. delete_on_termination=False):
  3859. """Attach an existing volume to an existing instance.
  3860. This method is separated to make it possible for cells version
  3861. to override it.
  3862. """
  3863. volume_bdm = self._create_volume_bdm(
  3864. context, instance, device, volume, disk_bus=disk_bus,
  3865. device_type=device_type, tag=tag,
  3866. delete_on_termination=delete_on_termination)
  3867. try:
  3868. self._check_attach_and_reserve_volume(context, volume, instance,
  3869. volume_bdm,
  3870. supports_multiattach)
  3871. self._record_action_start(
  3872. context, instance, instance_actions.ATTACH_VOLUME)
  3873. self.compute_rpcapi.attach_volume(context, instance, volume_bdm)
  3874. except Exception:
  3875. with excutils.save_and_reraise_exception():
  3876. volume_bdm.destroy()
  3877. return volume_bdm.device_name
  3878. def _attach_volume_shelved_offloaded(self, context, instance, volume,
  3879. device, disk_bus, device_type,
  3880. delete_on_termination):
  3881. """Attach an existing volume to an instance in shelved offloaded state.
  3882. Attaching a volume for an instance in shelved offloaded state requires
  3883. to perform the regular check to see if we can attach and reserve the
  3884. volume then we need to call the attach method on the volume API
  3885. to mark the volume as 'in-use'.
  3886. The instance at this stage is not managed by a compute manager
  3887. therefore the actual attachment will be performed once the
  3888. instance will be unshelved.
  3889. """
  3890. volume_id = volume['id']
  3891. @wrap_instance_event(prefix='api')
  3892. def attach_volume(self, context, v_id, instance, dev, attachment_id):
  3893. if attachment_id:
  3894. # Normally we wouldn't complete an attachment without a host
  3895. # connector, but we do this to make the volume status change
  3896. # to "in-use" to maintain the API semantics with the old flow.
  3897. # When unshelving the instance, the compute service will deal
  3898. # with this disconnected attachment.
  3899. self.volume_api.attachment_complete(context, attachment_id)
  3900. else:
  3901. self.volume_api.attach(context,
  3902. v_id,
  3903. instance.uuid,
  3904. dev)
  3905. volume_bdm = self._create_volume_bdm(
  3906. context, instance, device, volume, disk_bus=disk_bus,
  3907. device_type=device_type, is_local_creation=True,
  3908. delete_on_termination=delete_on_termination)
  3909. try:
  3910. self._check_attach_and_reserve_volume(context, volume, instance,
  3911. volume_bdm)
  3912. self._record_action_start(
  3913. context, instance,
  3914. instance_actions.ATTACH_VOLUME)
  3915. attach_volume(self, context, volume_id, instance, device,
  3916. volume_bdm.attachment_id)
  3917. except Exception:
  3918. with excutils.save_and_reraise_exception():
  3919. volume_bdm.destroy()
  3920. return volume_bdm.device_name
  3921. @check_instance_lock
  3922. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3923. vm_states.STOPPED, vm_states.RESIZED,
  3924. vm_states.SOFT_DELETED, vm_states.SHELVED,
  3925. vm_states.SHELVED_OFFLOADED])
  3926. def attach_volume(self, context, instance, volume_id, device=None,
  3927. disk_bus=None, device_type=None, tag=None,
  3928. supports_multiattach=False,
  3929. delete_on_termination=False):
  3930. """Attach an existing volume to an existing instance."""
  3931. # NOTE(vish): Fail fast if the device is not going to pass. This
  3932. # will need to be removed along with the test if we
  3933. # change the logic in the manager for what constitutes
  3934. # a valid device.
  3935. if device and not block_device.match_device(device):
  3936. raise exception.InvalidDevicePath(path=device)
  3937. # Make sure the volume isn't already attached to this instance
  3938. # because we'll use the v3.44 attachment flow in
  3939. # _check_attach_and_reserve_volume and Cinder will allow multiple
  3940. # attachments between the same volume and instance but the old flow
  3941. # API semantics don't allow that so we enforce it here.
  3942. self._check_volume_already_attached_to_instance(context,
  3943. instance,
  3944. volume_id)
  3945. volume = self.volume_api.get(context, volume_id)
  3946. is_shelved_offloaded = instance.vm_state == vm_states.SHELVED_OFFLOADED
  3947. if is_shelved_offloaded:
  3948. if tag:
  3949. # NOTE(artom) Local attach (to a shelved-offload instance)
  3950. # cannot support device tagging because we have no way to call
  3951. # the compute manager to check that it supports device tagging.
  3952. # In fact, we don't even know which computer manager the
  3953. # instance will eventually end up on when it's unshelved.
  3954. raise exception.VolumeTaggedAttachToShelvedNotSupported()
  3955. if volume['multiattach']:
  3956. # NOTE(mriedem): Similar to tagged attach, we don't support
  3957. # attaching a multiattach volume to shelved offloaded instances
  3958. # because we can't tell if the compute host (since there isn't
  3959. # one) supports it. This could possibly be supported in the
  3960. # future if the scheduler was made aware of which computes
  3961. # support multiattach volumes.
  3962. raise exception.MultiattachToShelvedNotSupported()
  3963. return self._attach_volume_shelved_offloaded(context,
  3964. instance,
  3965. volume,
  3966. device,
  3967. disk_bus,
  3968. device_type,
  3969. delete_on_termination)
  3970. return self._attach_volume(context, instance, volume, device,
  3971. disk_bus, device_type, tag=tag,
  3972. supports_multiattach=supports_multiattach,
  3973. delete_on_termination=delete_on_termination)
  3974. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3975. # override it.
  3976. def _detach_volume(self, context, instance, volume):
  3977. """Detach volume from instance.
  3978. This method is separated to make it easier for cells version
  3979. to override.
  3980. """
  3981. try:
  3982. self.volume_api.begin_detaching(context, volume['id'])
  3983. except exception.InvalidInput as exc:
  3984. raise exception.InvalidVolume(reason=exc.format_message())
  3985. attachments = volume.get('attachments', {})
  3986. attachment_id = None
  3987. if attachments and instance.uuid in attachments:
  3988. attachment_id = attachments[instance.uuid]['attachment_id']
  3989. self._record_action_start(
  3990. context, instance, instance_actions.DETACH_VOLUME)
  3991. self.compute_rpcapi.detach_volume(context, instance=instance,
  3992. volume_id=volume['id'], attachment_id=attachment_id)
  3993. def _detach_volume_shelved_offloaded(self, context, instance, volume):
  3994. """Detach a volume from an instance in shelved offloaded state.
  3995. If the instance is shelved offloaded we just need to cleanup volume
  3996. calling the volume api detach, the volume api terminate_connection
  3997. and delete the bdm record.
  3998. If the volume has delete_on_termination option set then we call the
  3999. volume api delete as well.
  4000. """
  4001. @wrap_instance_event(prefix='api')
  4002. def detach_volume(self, context, instance, bdms):
  4003. self._local_cleanup_bdm_volumes(bdms, instance, context)
  4004. bdms = [objects.BlockDeviceMapping.get_by_volume_id(
  4005. context, volume['id'], instance.uuid)]
  4006. # The begin_detaching() call only works with in-use volumes,
  4007. # which will not be the case for volumes attached to a shelved
  4008. # offloaded server via the attachments API since those volumes
  4009. # will have `reserved` status.
  4010. if not bdms[0].attachment_id:
  4011. try:
  4012. self.volume_api.begin_detaching(context, volume['id'])
  4013. except exception.InvalidInput as exc:
  4014. raise exception.InvalidVolume(reason=exc.format_message())
  4015. self._record_action_start(
  4016. context, instance,
  4017. instance_actions.DETACH_VOLUME)
  4018. detach_volume(self, context, instance, bdms)
  4019. @check_instance_lock
  4020. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4021. vm_states.STOPPED, vm_states.RESIZED,
  4022. vm_states.SOFT_DELETED, vm_states.SHELVED,
  4023. vm_states.SHELVED_OFFLOADED])
  4024. def detach_volume(self, context, instance, volume):
  4025. """Detach a volume from an instance."""
  4026. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  4027. self._detach_volume_shelved_offloaded(context, instance, volume)
  4028. else:
  4029. self._detach_volume(context, instance, volume)
  4030. def _count_attachments_for_swap(self, ctxt, volume):
  4031. """Counts the number of attachments for a swap-related volume.
  4032. Attempts to only count read/write attachments if the volume attachment
  4033. records exist, otherwise simply just counts the number of attachments
  4034. regardless of attach mode.
  4035. :param ctxt: nova.context.RequestContext - user request context
  4036. :param volume: nova-translated volume dict from nova.volume.cinder.
  4037. :returns: count of attachments for the volume
  4038. """
  4039. # This is a dict, keyed by server ID, to a dict of attachment_id and
  4040. # mountpoint.
  4041. attachments = volume.get('attachments', {})
  4042. # Multiattach volumes can have more than one attachment, so if there
  4043. # is more than one attachment, attempt to count the read/write
  4044. # attachments.
  4045. if len(attachments) > 1:
  4046. count = 0
  4047. for attachment in attachments.values():
  4048. attachment_id = attachment['attachment_id']
  4049. # Get the attachment record for this attachment so we can
  4050. # get the attach_mode.
  4051. # TODO(mriedem): This could be optimized if we had
  4052. # GET /attachments/detail?volume_id=volume['id'] in Cinder.
  4053. try:
  4054. attachment_record = self.volume_api.attachment_get(
  4055. ctxt, attachment_id)
  4056. # Note that the attachment record from Cinder has
  4057. # attach_mode in the top-level of the resource but the
  4058. # nova.volume.cinder code translates it and puts the
  4059. # attach_mode in the connection_info for some legacy
  4060. # reason...
  4061. if attachment_record['attach_mode'] == 'rw':
  4062. count += 1
  4063. except exception.VolumeAttachmentNotFound:
  4064. # attachments are read/write by default so count it
  4065. count += 1
  4066. else:
  4067. count = len(attachments)
  4068. return count
  4069. @check_instance_lock
  4070. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4071. vm_states.RESIZED])
  4072. def swap_volume(self, context, instance, old_volume, new_volume):
  4073. """Swap volume attached to an instance."""
  4074. # The caller likely got the instance from volume['attachments']
  4075. # in the first place, but let's sanity check.
  4076. if not old_volume.get('attachments', {}).get(instance.uuid):
  4077. msg = _("Old volume is attached to a different instance.")
  4078. raise exception.InvalidVolume(reason=msg)
  4079. if new_volume['attach_status'] == 'attached':
  4080. msg = _("New volume must be detached in order to swap.")
  4081. raise exception.InvalidVolume(reason=msg)
  4082. if int(new_volume['size']) < int(old_volume['size']):
  4083. msg = _("New volume must be the same size or larger.")
  4084. raise exception.InvalidVolume(reason=msg)
  4085. self.volume_api.check_availability_zone(context, new_volume,
  4086. instance=instance)
  4087. try:
  4088. self.volume_api.begin_detaching(context, old_volume['id'])
  4089. except exception.InvalidInput as exc:
  4090. raise exception.InvalidVolume(reason=exc.format_message())
  4091. # Disallow swapping from multiattach volumes that have more than one
  4092. # read/write attachment. We know the old_volume has at least one
  4093. # attachment since it's attached to this server. The new_volume
  4094. # can't have any attachments because of the attach_status check above.
  4095. # We do this count after calling "begin_detaching" to lock against
  4096. # concurrent attachments being made while we're counting.
  4097. try:
  4098. if self._count_attachments_for_swap(context, old_volume) > 1:
  4099. raise exception.MultiattachSwapVolumeNotSupported()
  4100. except Exception: # This is generic to handle failures while counting
  4101. # We need to reset the detaching status before raising.
  4102. with excutils.save_and_reraise_exception():
  4103. self.volume_api.roll_detaching(context, old_volume['id'])
  4104. # Get the BDM for the attached (old) volume so we can tell if it was
  4105. # attached with the new-style Cinder 3.44 API.
  4106. bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
  4107. context, old_volume['id'], instance.uuid)
  4108. new_attachment_id = None
  4109. if bdm.attachment_id is None:
  4110. # This is an old-style attachment so reserve the new volume before
  4111. # we cast to the compute host.
  4112. self.volume_api.reserve_volume(context, new_volume['id'])
  4113. else:
  4114. try:
  4115. self._check_volume_already_attached_to_instance(
  4116. context, instance, new_volume['id'])
  4117. except exception.InvalidVolume:
  4118. with excutils.save_and_reraise_exception():
  4119. self.volume_api.roll_detaching(context, old_volume['id'])
  4120. # This is a new-style attachment so for the volume that we are
  4121. # going to swap to, create a new volume attachment.
  4122. new_attachment_id = self.volume_api.attachment_create(
  4123. context, new_volume['id'], instance.uuid)['id']
  4124. self._record_action_start(
  4125. context, instance, instance_actions.SWAP_VOLUME)
  4126. try:
  4127. self.compute_rpcapi.swap_volume(
  4128. context, instance=instance,
  4129. old_volume_id=old_volume['id'],
  4130. new_volume_id=new_volume['id'],
  4131. new_attachment_id=new_attachment_id)
  4132. except Exception:
  4133. with excutils.save_and_reraise_exception():
  4134. self.volume_api.roll_detaching(context, old_volume['id'])
  4135. if new_attachment_id is None:
  4136. self.volume_api.unreserve_volume(context, new_volume['id'])
  4137. else:
  4138. self.volume_api.attachment_delete(
  4139. context, new_attachment_id)
  4140. @check_instance_lock
  4141. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4142. vm_states.STOPPED],
  4143. task_state=[None])
  4144. def attach_interface(self, context, instance, network_id, port_id,
  4145. requested_ip, tag=None):
  4146. """Use hotplug to add an network adapter to an instance."""
  4147. self._record_action_start(
  4148. context, instance, instance_actions.ATTACH_INTERFACE)
  4149. # NOTE(gibi): Checking if the requested port has resource request as
  4150. # such ports are currently not supported as they would at least
  4151. # need resource allocation manipulation in placement but might also
  4152. # need a new scheduling if resource on this host is not available.
  4153. if port_id:
  4154. port = self.network_api.show_port(context, port_id)
  4155. if port['port'].get(constants.RESOURCE_REQUEST):
  4156. raise exception.AttachInterfaceWithQoSPolicyNotSupported(
  4157. instance_uuid=instance.uuid)
  4158. return self.compute_rpcapi.attach_interface(context,
  4159. instance=instance, network_id=network_id, port_id=port_id,
  4160. requested_ip=requested_ip, tag=tag)
  4161. @check_instance_lock
  4162. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4163. vm_states.STOPPED],
  4164. task_state=[None])
  4165. def detach_interface(self, context, instance, port_id):
  4166. """Detach an network adapter from an instance."""
  4167. self._record_action_start(
  4168. context, instance, instance_actions.DETACH_INTERFACE)
  4169. self.compute_rpcapi.detach_interface(context, instance=instance,
  4170. port_id=port_id)
  4171. def get_instance_metadata(self, context, instance):
  4172. """Get all metadata associated with an instance."""
  4173. return self.db.instance_metadata_get(context, instance.uuid)
  4174. @check_instance_lock
  4175. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4176. vm_states.SUSPENDED, vm_states.STOPPED],
  4177. task_state=None)
  4178. def delete_instance_metadata(self, context, instance, key):
  4179. """Delete the given metadata item from an instance."""
  4180. instance.delete_metadata_key(key)
  4181. self.compute_rpcapi.change_instance_metadata(context,
  4182. instance=instance,
  4183. diff={key: ['-']})
  4184. @check_instance_lock
  4185. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4186. vm_states.SUSPENDED, vm_states.STOPPED],
  4187. task_state=None)
  4188. def update_instance_metadata(self, context, instance,
  4189. metadata, delete=False):
  4190. """Updates or creates instance metadata.
  4191. If delete is True, metadata items that are not specified in the
  4192. `metadata` argument will be deleted.
  4193. """
  4194. orig = dict(instance.metadata)
  4195. if delete:
  4196. _metadata = metadata
  4197. else:
  4198. _metadata = dict(instance.metadata)
  4199. _metadata.update(metadata)
  4200. self._check_metadata_properties_quota(context, _metadata)
  4201. instance.metadata = _metadata
  4202. instance.save()
  4203. diff = _diff_dict(orig, instance.metadata)
  4204. self.compute_rpcapi.change_instance_metadata(context,
  4205. instance=instance,
  4206. diff=diff)
  4207. return _metadata
  4208. @reject_sev_instances(instance_actions.LIVE_MIGRATION)
  4209. @check_instance_lock
  4210. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
  4211. def live_migrate(self, context, instance, block_migration,
  4212. disk_over_commit, host_name, force=None, async_=False):
  4213. """Migrate a server lively to a new host."""
  4214. LOG.debug("Going to try to live migrate instance to %s",
  4215. host_name or "another host", instance=instance)
  4216. if host_name:
  4217. # Validate the specified host before changing the instance task
  4218. # state.
  4219. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  4220. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4221. context, instance.uuid)
  4222. instance.task_state = task_states.MIGRATING
  4223. instance.save(expected_task_state=[None])
  4224. self._record_action_start(context, instance,
  4225. instance_actions.LIVE_MIGRATION)
  4226. # NOTE(sbauza): Force is a boolean by the new related API version
  4227. if force is False and host_name:
  4228. # Unset the host to make sure we call the scheduler
  4229. # from the conductor LiveMigrationTask. Yes this is tightly-coupled
  4230. # to behavior in conductor and not great.
  4231. host_name = None
  4232. # FIXME(sbauza): Since only Ironic driver uses more than one
  4233. # compute per service but doesn't support live migrations,
  4234. # let's provide the first one.
  4235. target = nodes[0]
  4236. destination = objects.Destination(
  4237. host=target.host,
  4238. node=target.hypervisor_hostname
  4239. )
  4240. # This is essentially a hint to the scheduler to only consider
  4241. # the specified host but still run it through the filters.
  4242. request_spec.requested_destination = destination
  4243. try:
  4244. self.compute_task_api.live_migrate_instance(context, instance,
  4245. host_name, block_migration=block_migration,
  4246. disk_over_commit=disk_over_commit,
  4247. request_spec=request_spec, async_=async_)
  4248. except oslo_exceptions.MessagingTimeout as messaging_timeout:
  4249. with excutils.save_and_reraise_exception():
  4250. # NOTE(pkoniszewski): It is possible that MessagingTimeout
  4251. # occurs, but LM will still be in progress, so write
  4252. # instance fault to database
  4253. compute_utils.add_instance_fault_from_exc(context,
  4254. instance,
  4255. messaging_timeout)
  4256. @check_instance_lock
  4257. @check_instance_state(vm_state=[vm_states.ACTIVE],
  4258. task_state=[task_states.MIGRATING])
  4259. def live_migrate_force_complete(self, context, instance, migration_id):
  4260. """Force live migration to complete.
  4261. :param context: Security context
  4262. :param instance: The instance that is being migrated
  4263. :param migration_id: ID of ongoing migration
  4264. """
  4265. LOG.debug("Going to try to force live migration to complete",
  4266. instance=instance)
  4267. # NOTE(pkoniszewski): Get migration object to check if there is ongoing
  4268. # live migration for particular instance. Also pass migration id to
  4269. # compute to double check and avoid possible race condition.
  4270. migration = objects.Migration.get_by_id_and_instance(
  4271. context, migration_id, instance.uuid)
  4272. if migration.status != 'running':
  4273. raise exception.InvalidMigrationState(migration_id=migration_id,
  4274. instance_uuid=instance.uuid,
  4275. state=migration.status,
  4276. method='force complete')
  4277. self._record_action_start(
  4278. context, instance, instance_actions.LIVE_MIGRATION_FORCE_COMPLETE)
  4279. self.compute_rpcapi.live_migration_force_complete(
  4280. context, instance, migration)
  4281. @check_instance_lock
  4282. @check_instance_state(task_state=[task_states.MIGRATING])
  4283. def live_migrate_abort(self, context, instance, migration_id,
  4284. support_abort_in_queue=False):
  4285. """Abort an in-progress live migration.
  4286. :param context: Security context
  4287. :param instance: The instance that is being migrated
  4288. :param migration_id: ID of in-progress live migration
  4289. :param support_abort_in_queue: Flag indicating whether we can support
  4290. abort migrations in "queued" or "preparing" status.
  4291. """
  4292. migration = objects.Migration.get_by_id_and_instance(context,
  4293. migration_id, instance.uuid)
  4294. LOG.debug("Going to cancel live migration %s",
  4295. migration.id, instance=instance)
  4296. # If the microversion does not support abort migration in queue,
  4297. # we are only be able to abort migrations with `running` status;
  4298. # if it is supported, we are able to also abort migrations in
  4299. # `queued` and `preparing` status.
  4300. allowed_states = ['running']
  4301. queued_states = ['queued', 'preparing']
  4302. if support_abort_in_queue:
  4303. # The user requested a microversion that supports aborting a queued
  4304. # or preparing live migration. But we need to check that the
  4305. # compute service hosting the instance is new enough to support
  4306. # aborting a queued/preparing live migration, so we check the
  4307. # service version here.
  4308. allowed_states.extend(queued_states)
  4309. if migration.status not in allowed_states:
  4310. raise exception.InvalidMigrationState(migration_id=migration_id,
  4311. instance_uuid=instance.uuid,
  4312. state=migration.status,
  4313. method='abort live migration')
  4314. self._record_action_start(context, instance,
  4315. instance_actions.LIVE_MIGRATION_CANCEL)
  4316. self.compute_rpcapi.live_migration_abort(context,
  4317. instance, migration.id)
  4318. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  4319. vm_states.ERROR])
  4320. def evacuate(self, context, instance, host, on_shared_storage,
  4321. admin_password=None, force=None):
  4322. """Running evacuate to target host.
  4323. Checking vm compute host state, if the host not in expected_state,
  4324. raising an exception.
  4325. :param instance: The instance to evacuate
  4326. :param host: Target host. if not set, the scheduler will pick up one
  4327. :param on_shared_storage: True if instance files on shared storage
  4328. :param admin_password: password to set on rebuilt instance
  4329. :param force: Force the evacuation to the specific host target
  4330. """
  4331. LOG.debug('vm evacuation scheduled', instance=instance)
  4332. inst_host = instance.host
  4333. service = objects.Service.get_by_compute_host(context, inst_host)
  4334. if self.servicegroup_api.service_is_up(service):
  4335. LOG.error('Instance compute service state on %s '
  4336. 'expected to be down, but it was up.', inst_host)
  4337. raise exception.ComputeServiceInUse(host=inst_host)
  4338. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4339. context, instance.uuid)
  4340. instance.task_state = task_states.REBUILDING
  4341. instance.save(expected_task_state=[None])
  4342. self._record_action_start(context, instance, instance_actions.EVACUATE)
  4343. # NOTE(danms): Create this as a tombstone for the source compute
  4344. # to find and cleanup. No need to pass it anywhere else.
  4345. migration = objects.Migration(context,
  4346. source_compute=instance.host,
  4347. source_node=instance.node,
  4348. instance_uuid=instance.uuid,
  4349. status='accepted',
  4350. migration_type='evacuation')
  4351. if host:
  4352. migration.dest_compute = host
  4353. migration.create()
  4354. compute_utils.notify_about_instance_usage(
  4355. self.notifier, context, instance, "evacuate")
  4356. compute_utils.notify_about_instance_action(
  4357. context, instance, CONF.host,
  4358. action=fields_obj.NotificationAction.EVACUATE,
  4359. source=fields_obj.NotificationSource.API)
  4360. # NOTE(sbauza): Force is a boolean by the new related API version
  4361. # TODO(stephenfin): Any reason we can't use 'not force' here to handle
  4362. # the pre-v2.29 API microversion, which wouldn't set force
  4363. if force is False and host:
  4364. nodes = objects.ComputeNodeList.get_all_by_host(context, host)
  4365. # NOTE(sbauza): Unset the host to make sure we call the scheduler
  4366. host = None
  4367. # FIXME(sbauza): Since only Ironic driver uses more than one
  4368. # compute per service but doesn't support evacuations,
  4369. # let's provide the first one.
  4370. target = nodes[0]
  4371. destination = objects.Destination(
  4372. host=target.host,
  4373. node=target.hypervisor_hostname
  4374. )
  4375. request_spec.requested_destination = destination
  4376. return self.compute_task_api.rebuild_instance(context,
  4377. instance=instance,
  4378. new_pass=admin_password,
  4379. injected_files=None,
  4380. image_ref=None,
  4381. orig_image_ref=None,
  4382. orig_sys_metadata=None,
  4383. bdms=None,
  4384. recreate=True,
  4385. on_shared_storage=on_shared_storage,
  4386. host=host,
  4387. request_spec=request_spec,
  4388. )
  4389. def get_migrations(self, context, filters):
  4390. """Get all migrations for the given filters."""
  4391. load_cells()
  4392. migrations = []
  4393. for cell in CELLS:
  4394. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4395. continue
  4396. with nova_context.target_cell(context, cell) as cctxt:
  4397. migrations.extend(objects.MigrationList.get_by_filters(
  4398. cctxt, filters).objects)
  4399. return objects.MigrationList(objects=migrations)
  4400. def get_migrations_sorted(self, context, filters, sort_dirs=None,
  4401. sort_keys=None, limit=None, marker=None):
  4402. """Get all migrations for the given parameters."""
  4403. mig_objs = migration_list.get_migration_objects_sorted(
  4404. context, filters, limit, marker, sort_keys, sort_dirs)
  4405. # Due to cross-cell resize, we could have duplicate migration records
  4406. # while the instance is in VERIFY_RESIZE state in the destination cell
  4407. # but the original migration record still exists in the source cell.
  4408. # Filter out duplicate migration records here based on which record
  4409. # is newer (last updated).
  4410. def _get_newer_obj(obj1, obj2):
  4411. # created_at will always be set.
  4412. created_at1 = obj1.created_at
  4413. created_at2 = obj2.created_at
  4414. # updated_at might be None
  4415. updated_at1 = obj1.updated_at
  4416. updated_at2 = obj2.updated_at
  4417. # If both have updated_at, compare using that field.
  4418. if updated_at1 and updated_at2:
  4419. if updated_at1 > updated_at2:
  4420. return obj1
  4421. return obj2
  4422. # Compare created_at versus updated_at.
  4423. if updated_at1:
  4424. if updated_at1 > created_at2:
  4425. return obj1
  4426. return obj2
  4427. if updated_at2:
  4428. if updated_at2 > created_at1:
  4429. return obj2
  4430. return obj1
  4431. # Compare created_at only.
  4432. if created_at1 > created_at2:
  4433. return obj1
  4434. return obj2
  4435. # TODO(mriedem): This could be easier if we leveraged the "hidden"
  4436. # field on the Migration record and then just did like
  4437. # _get_unique_filter_method in the get_all() method for instances.
  4438. migrations_by_uuid = collections.OrderedDict() # maintain sort order
  4439. for migration in mig_objs:
  4440. if migration.uuid not in migrations_by_uuid:
  4441. migrations_by_uuid[migration.uuid] = migration
  4442. else:
  4443. # We have a collision, keep the newer record.
  4444. # Note that using updated_at could be wrong if changes-since or
  4445. # changes-before filters are being used but we have the same
  4446. # issue in _get_unique_filter_method for instances.
  4447. doppelganger = migrations_by_uuid[migration.uuid]
  4448. newer = _get_newer_obj(doppelganger, migration)
  4449. migrations_by_uuid[migration.uuid] = newer
  4450. return objects.MigrationList(objects=list(migrations_by_uuid.values()))
  4451. def get_migrations_in_progress_by_instance(self, context, instance_uuid,
  4452. migration_type=None):
  4453. """Get all migrations of an instance in progress."""
  4454. return objects.MigrationList.get_in_progress_by_instance(
  4455. context, instance_uuid, migration_type)
  4456. def get_migration_by_id_and_instance(self, context,
  4457. migration_id, instance_uuid):
  4458. """Get the migration of an instance by id."""
  4459. return objects.Migration.get_by_id_and_instance(
  4460. context, migration_id, instance_uuid)
  4461. def _get_bdm_by_volume_id(self, context, volume_id, expected_attrs=None):
  4462. """Retrieve a BDM without knowing its cell.
  4463. .. note:: The context will be targeted to the cell in which the
  4464. BDM is found, if any.
  4465. :param context: The API request context.
  4466. :param volume_id: The ID of the volume.
  4467. :param expected_attrs: list of any additional attributes that should
  4468. be joined when the BDM is loaded from the database.
  4469. :raises: nova.exception.VolumeBDMNotFound if not found in any cell
  4470. """
  4471. load_cells()
  4472. for cell in CELLS:
  4473. nova_context.set_target_cell(context, cell)
  4474. try:
  4475. return objects.BlockDeviceMapping.get_by_volume(
  4476. context, volume_id, expected_attrs=expected_attrs)
  4477. except exception.NotFound:
  4478. continue
  4479. raise exception.VolumeBDMNotFound(volume_id=volume_id)
  4480. def volume_snapshot_create(self, context, volume_id, create_info):
  4481. bdm = self._get_bdm_by_volume_id(
  4482. context, volume_id, expected_attrs=['instance'])
  4483. # We allow creating the snapshot in any vm_state as long as there is
  4484. # no task being performed on the instance and it has a host.
  4485. @check_instance_host()
  4486. @check_instance_state(vm_state=None)
  4487. def do_volume_snapshot_create(self, context, instance):
  4488. self.compute_rpcapi.volume_snapshot_create(context, instance,
  4489. volume_id, create_info)
  4490. snapshot = {
  4491. 'snapshot': {
  4492. 'id': create_info.get('id'),
  4493. 'volumeId': volume_id
  4494. }
  4495. }
  4496. return snapshot
  4497. return do_volume_snapshot_create(self, context, bdm.instance)
  4498. def volume_snapshot_delete(self, context, volume_id, snapshot_id,
  4499. delete_info):
  4500. bdm = self._get_bdm_by_volume_id(
  4501. context, volume_id, expected_attrs=['instance'])
  4502. # We allow deleting the snapshot in any vm_state as long as there is
  4503. # no task being performed on the instance and it has a host.
  4504. @check_instance_host()
  4505. @check_instance_state(vm_state=None)
  4506. def do_volume_snapshot_delete(self, context, instance):
  4507. self.compute_rpcapi.volume_snapshot_delete(context, instance,
  4508. volume_id, snapshot_id, delete_info)
  4509. do_volume_snapshot_delete(self, context, bdm.instance)
  4510. def external_instance_event(self, api_context, instances, events):
  4511. # NOTE(danms): The external API consumer just provides events,
  4512. # but doesn't know where they go. We need to collate lists
  4513. # by the host the affected instance is on and dispatch them
  4514. # according to host
  4515. instances_by_host = collections.defaultdict(list)
  4516. events_by_host = collections.defaultdict(list)
  4517. hosts_by_instance = collections.defaultdict(list)
  4518. cell_contexts_by_host = {}
  4519. for instance in instances:
  4520. # instance._context is used here since it's already targeted to
  4521. # the cell that the instance lives in, and we need to use that
  4522. # cell context to lookup any migrations associated to the instance.
  4523. hosts, cross_cell_move = self._get_relevant_hosts(
  4524. instance._context, instance)
  4525. for host in hosts:
  4526. # NOTE(danms): All instances on a host must have the same
  4527. # mapping, so just use that
  4528. if host not in cell_contexts_by_host:
  4529. # NOTE(mriedem): If the instance is being migrated across
  4530. # cells then we have to get the host mapping to determine
  4531. # which cell a given host is in.
  4532. if cross_cell_move:
  4533. hm = objects.HostMapping.get_by_host(api_context, host)
  4534. ctxt = nova_context.get_admin_context()
  4535. nova_context.set_target_cell(ctxt, hm.cell_mapping)
  4536. cell_contexts_by_host[host] = ctxt
  4537. else:
  4538. # The instance is not migrating across cells so just
  4539. # use the cell-targeted context already in the
  4540. # instance since the host has to be in that same cell.
  4541. cell_contexts_by_host[host] = instance._context
  4542. instances_by_host[host].append(instance)
  4543. hosts_by_instance[instance.uuid].append(host)
  4544. for event in events:
  4545. if event.name == 'volume-extended':
  4546. # Volume extend is a user-initiated operation starting in the
  4547. # Block Storage service API. We record an instance action so
  4548. # the user can monitor the operation to completion.
  4549. host = hosts_by_instance[event.instance_uuid][0]
  4550. cell_context = cell_contexts_by_host[host]
  4551. objects.InstanceAction.action_start(
  4552. cell_context, event.instance_uuid,
  4553. instance_actions.EXTEND_VOLUME, want_result=False)
  4554. elif event.name == 'power-update':
  4555. host = hosts_by_instance[event.instance_uuid][0]
  4556. cell_context = cell_contexts_by_host[host]
  4557. if event.tag == external_event_obj.POWER_ON:
  4558. inst_action = instance_actions.START
  4559. elif event.tag == external_event_obj.POWER_OFF:
  4560. inst_action = instance_actions.STOP
  4561. else:
  4562. LOG.warning("Invalid power state %s. Cannot process "
  4563. "the event %s. Skipping it.", event.tag,
  4564. event)
  4565. continue
  4566. objects.InstanceAction.action_start(
  4567. cell_context, event.instance_uuid, inst_action,
  4568. want_result=False)
  4569. for host in hosts_by_instance[event.instance_uuid]:
  4570. events_by_host[host].append(event)
  4571. for host in instances_by_host:
  4572. cell_context = cell_contexts_by_host[host]
  4573. # TODO(salv-orlando): Handle exceptions raised by the rpc api layer
  4574. # in order to ensure that a failure in processing events on a host
  4575. # will not prevent processing events on other hosts
  4576. self.compute_rpcapi.external_instance_event(
  4577. cell_context, instances_by_host[host], events_by_host[host],
  4578. host=host)
  4579. def _get_relevant_hosts(self, context, instance):
  4580. """Get the relevant hosts for an external server event on an instance.
  4581. :param context: nova auth request context targeted at the same cell
  4582. that the instance lives in
  4583. :param instance: Instance object which is the target of an external
  4584. server event
  4585. :returns: 2-item tuple of:
  4586. - set of at least one host (the host where the instance lives); if
  4587. the instance is being migrated the source and dest compute
  4588. hostnames are in the returned set
  4589. - boolean indicating if the instance is being migrated across cells
  4590. """
  4591. hosts = set()
  4592. hosts.add(instance.host)
  4593. cross_cell_move = False
  4594. if instance.migration_context is not None:
  4595. migration_id = instance.migration_context.migration_id
  4596. migration = objects.Migration.get_by_id(context, migration_id)
  4597. cross_cell_move = migration.cross_cell_move
  4598. hosts.add(migration.dest_compute)
  4599. hosts.add(migration.source_compute)
  4600. cells_msg = (
  4601. 'across cells' if cross_cell_move else 'within the same cell')
  4602. LOG.debug('Instance %(instance)s is migrating %(cells_msg)s, '
  4603. 'copying events to all relevant hosts: '
  4604. '%(hosts)s', {'cells_msg': cells_msg,
  4605. 'instance': instance.uuid,
  4606. 'hosts': hosts})
  4607. return hosts, cross_cell_move
  4608. def get_instance_host_status(self, instance):
  4609. if instance.host:
  4610. try:
  4611. service = [service for service in instance.services if
  4612. service.binary == 'nova-compute'][0]
  4613. if service.forced_down:
  4614. host_status = fields_obj.HostStatus.DOWN
  4615. elif service.disabled:
  4616. host_status = fields_obj.HostStatus.MAINTENANCE
  4617. else:
  4618. alive = self.servicegroup_api.service_is_up(service)
  4619. host_status = ((alive and fields_obj.HostStatus.UP) or
  4620. fields_obj.HostStatus.UNKNOWN)
  4621. except IndexError:
  4622. host_status = fields_obj.HostStatus.NONE
  4623. else:
  4624. host_status = fields_obj.HostStatus.NONE
  4625. return host_status
  4626. def get_instances_host_statuses(self, instance_list):
  4627. host_status_dict = dict()
  4628. host_statuses = dict()
  4629. for instance in instance_list:
  4630. if instance.host:
  4631. if instance.host not in host_status_dict:
  4632. host_status = self.get_instance_host_status(instance)
  4633. host_status_dict[instance.host] = host_status
  4634. else:
  4635. host_status = host_status_dict[instance.host]
  4636. else:
  4637. host_status = fields_obj.HostStatus.NONE
  4638. host_statuses[instance.uuid] = host_status
  4639. return host_statuses
  4640. def target_host_cell(fn):
  4641. """Target a host-based function to a cell.
  4642. Expects to wrap a function of signature:
  4643. func(self, context, host, ...)
  4644. """
  4645. @functools.wraps(fn)
  4646. def targeted(self, context, host, *args, **kwargs):
  4647. mapping = objects.HostMapping.get_by_host(context, host)
  4648. nova_context.set_target_cell(context, mapping.cell_mapping)
  4649. return fn(self, context, host, *args, **kwargs)
  4650. return targeted
  4651. def _get_service_in_cell_by_host(context, host_name):
  4652. # validates the host; ComputeHostNotFound is raised if invalid
  4653. try:
  4654. mapping = objects.HostMapping.get_by_host(context, host_name)
  4655. nova_context.set_target_cell(context, mapping.cell_mapping)
  4656. service = objects.Service.get_by_compute_host(context, host_name)
  4657. except exception.HostMappingNotFound:
  4658. try:
  4659. # NOTE(danms): This targets our cell
  4660. service = _find_service_in_cell(context, service_host=host_name)
  4661. except exception.NotFound:
  4662. raise exception.ComputeHostNotFound(host=host_name)
  4663. return service
  4664. def _find_service_in_cell(context, service_id=None, service_host=None):
  4665. """Find a service by id or hostname by searching all cells.
  4666. If one matching service is found, return it. If none or multiple
  4667. are found, raise an exception.
  4668. :param context: A context.RequestContext
  4669. :param service_id: If not none, the DB ID of the service to find
  4670. :param service_host: If not None, the hostname of the service to find
  4671. :returns: An objects.Service
  4672. :raises: ServiceNotUnique if multiple matching IDs are found
  4673. :raises: NotFound if no matches are found
  4674. :raises: NovaException if called with neither search option
  4675. """
  4676. load_cells()
  4677. service = None
  4678. found_in_cell = None
  4679. is_uuid = False
  4680. if service_id is not None:
  4681. is_uuid = uuidutils.is_uuid_like(service_id)
  4682. if is_uuid:
  4683. lookup_fn = lambda c: objects.Service.get_by_uuid(c, service_id)
  4684. else:
  4685. lookup_fn = lambda c: objects.Service.get_by_id(c, service_id)
  4686. elif service_host is not None:
  4687. lookup_fn = lambda c: (
  4688. objects.Service.get_by_compute_host(c, service_host))
  4689. else:
  4690. LOG.exception('_find_service_in_cell called with no search parameters')
  4691. # This is intentionally cryptic so we don't leak implementation details
  4692. # out of the API.
  4693. raise exception.NovaException()
  4694. for cell in CELLS:
  4695. # NOTE(danms): Services can be in cell0, so don't skip it here
  4696. try:
  4697. with nova_context.target_cell(context, cell) as cctxt:
  4698. cell_service = lookup_fn(cctxt)
  4699. except exception.NotFound:
  4700. # NOTE(danms): Keep looking in other cells
  4701. continue
  4702. if service and cell_service:
  4703. raise exception.ServiceNotUnique()
  4704. service = cell_service
  4705. found_in_cell = cell
  4706. if service and is_uuid:
  4707. break
  4708. if service:
  4709. # NOTE(danms): Set the cell on the context so it remains
  4710. # when we return to our caller
  4711. nova_context.set_target_cell(context, found_in_cell)
  4712. return service
  4713. else:
  4714. raise exception.NotFound()
  4715. class HostAPI(base.Base):
  4716. """Sub-set of the Compute Manager API for managing host operations."""
  4717. def __init__(self, rpcapi=None, servicegroup_api=None):
  4718. self.rpcapi = rpcapi or compute_rpcapi.ComputeAPI()
  4719. self.servicegroup_api = servicegroup_api or servicegroup.API()
  4720. super(HostAPI, self).__init__()
  4721. def _assert_host_exists(self, context, host_name, must_be_up=False):
  4722. """Raise HostNotFound if compute host doesn't exist."""
  4723. service = objects.Service.get_by_compute_host(context, host_name)
  4724. if not service:
  4725. raise exception.HostNotFound(host=host_name)
  4726. if must_be_up and not self.servicegroup_api.service_is_up(service):
  4727. raise exception.ComputeServiceUnavailable(host=host_name)
  4728. return service['host']
  4729. @wrap_exception()
  4730. @target_host_cell
  4731. def set_host_enabled(self, context, host_name, enabled):
  4732. """Sets the specified host's ability to accept new instances."""
  4733. host_name = self._assert_host_exists(context, host_name)
  4734. payload = {'host_name': host_name, 'enabled': enabled}
  4735. compute_utils.notify_about_host_update(context,
  4736. 'set_enabled.start',
  4737. payload)
  4738. result = self.rpcapi.set_host_enabled(context, enabled=enabled,
  4739. host=host_name)
  4740. compute_utils.notify_about_host_update(context,
  4741. 'set_enabled.end',
  4742. payload)
  4743. return result
  4744. @target_host_cell
  4745. def get_host_uptime(self, context, host_name):
  4746. """Returns the result of calling "uptime" on the target host."""
  4747. host_name = self._assert_host_exists(context, host_name,
  4748. must_be_up=True)
  4749. return self.rpcapi.get_host_uptime(context, host=host_name)
  4750. @wrap_exception()
  4751. @target_host_cell
  4752. def host_power_action(self, context, host_name, action):
  4753. """Reboots, shuts down or powers up the host."""
  4754. host_name = self._assert_host_exists(context, host_name)
  4755. payload = {'host_name': host_name, 'action': action}
  4756. compute_utils.notify_about_host_update(context,
  4757. 'power_action.start',
  4758. payload)
  4759. result = self.rpcapi.host_power_action(context, action=action,
  4760. host=host_name)
  4761. compute_utils.notify_about_host_update(context,
  4762. 'power_action.end',
  4763. payload)
  4764. return result
  4765. @wrap_exception()
  4766. @target_host_cell
  4767. def set_host_maintenance(self, context, host_name, mode):
  4768. """Start/Stop host maintenance window. On start, it triggers
  4769. guest VMs evacuation.
  4770. """
  4771. host_name = self._assert_host_exists(context, host_name)
  4772. payload = {'host_name': host_name, 'mode': mode}
  4773. compute_utils.notify_about_host_update(context,
  4774. 'set_maintenance.start',
  4775. payload)
  4776. result = self.rpcapi.host_maintenance_mode(context,
  4777. host_param=host_name, mode=mode, host=host_name)
  4778. compute_utils.notify_about_host_update(context,
  4779. 'set_maintenance.end',
  4780. payload)
  4781. return result
  4782. def service_get_all(self, context, filters=None, set_zones=False,
  4783. all_cells=False, cell_down_support=False):
  4784. """Returns a list of services, optionally filtering the results.
  4785. If specified, 'filters' should be a dictionary containing services
  4786. attributes and matching values. Ie, to get a list of services for
  4787. the 'compute' topic, use filters={'topic': 'compute'}.
  4788. If all_cells=True, then scan all cells and merge the results.
  4789. If cell_down_support=True then return minimal service records
  4790. for cells that do not respond based on what we have in the
  4791. host mappings. These will have only 'binary' and 'host' set.
  4792. """
  4793. if filters is None:
  4794. filters = {}
  4795. disabled = filters.pop('disabled', None)
  4796. if 'availability_zone' in filters:
  4797. set_zones = True
  4798. # NOTE(danms): Eventually this all_cells nonsense should go away
  4799. # and we should always iterate over the cells. However, certain
  4800. # callers need the legacy behavior for now.
  4801. if all_cells:
  4802. services = []
  4803. service_dict = nova_context.scatter_gather_all_cells(context,
  4804. objects.ServiceList.get_all, disabled, set_zones=set_zones)
  4805. for cell_uuid, service in service_dict.items():
  4806. if not nova_context.is_cell_failure_sentinel(service):
  4807. services.extend(service)
  4808. elif cell_down_support:
  4809. unavailable_services = objects.ServiceList()
  4810. cid = [cm.id for cm in nova_context.CELLS
  4811. if cm.uuid == cell_uuid]
  4812. # We know cid[0] is in the list because we are using the
  4813. # same list that scatter_gather_all_cells used
  4814. hms = objects.HostMappingList.get_by_cell_id(context,
  4815. cid[0])
  4816. for hm in hms:
  4817. unavailable_services.objects.append(objects.Service(
  4818. binary='nova-compute', host=hm.host))
  4819. LOG.warning("Cell %s is not responding and hence only "
  4820. "partial results are available from this "
  4821. "cell.", cell_uuid)
  4822. services.extend(unavailable_services)
  4823. else:
  4824. LOG.warning("Cell %s is not responding and hence skipped "
  4825. "from the results.", cell_uuid)
  4826. else:
  4827. services = objects.ServiceList.get_all(context, disabled,
  4828. set_zones=set_zones)
  4829. ret_services = []
  4830. for service in services:
  4831. for key, val in filters.items():
  4832. if service[key] != val:
  4833. break
  4834. else:
  4835. # All filters matched.
  4836. ret_services.append(service)
  4837. return ret_services
  4838. def service_get_by_id(self, context, service_id):
  4839. """Get service entry for the given service id or uuid."""
  4840. try:
  4841. return _find_service_in_cell(context, service_id=service_id)
  4842. except exception.NotFound:
  4843. raise exception.ServiceNotFound(service_id=service_id)
  4844. @target_host_cell
  4845. def service_get_by_compute_host(self, context, host_name):
  4846. """Get service entry for the given compute hostname."""
  4847. return objects.Service.get_by_compute_host(context, host_name)
  4848. def _update_compute_provider_status(self, context, service):
  4849. """Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
  4850. There are two cases where the API will not call the compute service:
  4851. * The compute service is down. In this case the trait is synchronized
  4852. when the compute service is restarted.
  4853. * The compute service is old. In this case the trait is synchronized
  4854. when the compute service is upgraded and restarted.
  4855. :param context: nova auth RequestContext
  4856. :param service: nova.objects.Service object which has been enabled
  4857. or disabled (see ``service_update``).
  4858. """
  4859. # Make sure the service is up so we can make the RPC call.
  4860. if not self.servicegroup_api.service_is_up(service):
  4861. LOG.info('Compute service on host %s is down. The '
  4862. 'COMPUTE_STATUS_DISABLED trait will be synchronized '
  4863. 'when the service is restarted.', service.host)
  4864. return
  4865. # Make sure the compute service is new enough for the trait sync
  4866. # behavior.
  4867. # TODO(mriedem): Remove this compat check in the U release.
  4868. if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
  4869. LOG.info('Compute service on host %s is too old to sync the '
  4870. 'COMPUTE_STATUS_DISABLED trait in Placement. The '
  4871. 'trait will be synchronized when the service is '
  4872. 'upgraded and restarted.', service.host)
  4873. return
  4874. enabled = not service.disabled
  4875. # Avoid leaking errors out of the API.
  4876. try:
  4877. LOG.debug('Calling the compute service on host %s to sync the '
  4878. 'COMPUTE_STATUS_DISABLED trait.', service.host)
  4879. self.rpcapi.set_host_enabled(context, service.host, enabled)
  4880. except Exception:
  4881. LOG.exception('An error occurred while updating the '
  4882. 'COMPUTE_STATUS_DISABLED trait on compute node '
  4883. 'resource providers managed by host %s. The trait '
  4884. 'will be synchronized automatically by the compute '
  4885. 'service when the update_available_resource '
  4886. 'periodic task runs.', service.host)
  4887. def service_update(self, context, service):
  4888. """Performs the actual service update operation.
  4889. If the "disabled" field is changed, potentially calls the compute
  4890. service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
  4891. resource providers managed by this compute service.
  4892. :param context: nova auth RequestContext
  4893. :param service: nova.objects.Service object with changes already
  4894. set on the object
  4895. """
  4896. # Before persisting changes and resetting the changed fields on the
  4897. # Service object, determine if the disabled field changed.
  4898. update_placement = 'disabled' in service.obj_what_changed()
  4899. # Persist the Service object changes to the database.
  4900. service.save()
  4901. # If the disabled field changed, potentially call the compute service
  4902. # to sync the COMPUTE_STATUS_DISABLED trait.
  4903. if update_placement:
  4904. self._update_compute_provider_status(context, service)
  4905. return service
  4906. @target_host_cell
  4907. def service_update_by_host_and_binary(self, context, host_name, binary,
  4908. params_to_update):
  4909. """Enable / Disable a service.
  4910. Determines the cell that the service is in using the HostMapping.
  4911. For compute services, this stops new builds and migrations going to
  4912. the host.
  4913. See also ``service_update``.
  4914. :param context: nova auth RequestContext
  4915. :param host_name: hostname of the service
  4916. :param binary: service binary (really only supports "nova-compute")
  4917. :param params_to_update: dict of changes to make to the Service object
  4918. :raises: HostMappingNotFound if the host is not mapped to a cell
  4919. :raises: HostBinaryNotFound if a services table record is not found
  4920. with the given host_name and binary
  4921. """
  4922. # TODO(mriedem): Service.get_by_args is deprecated; we should use
  4923. # get_by_compute_host here (remember to update the "raises" docstring).
  4924. service = objects.Service.get_by_args(context, host_name, binary)
  4925. service.update(params_to_update)
  4926. return self.service_update(context, service)
  4927. @target_host_cell
  4928. def instance_get_all_by_host(self, context, host_name):
  4929. """Return all instances on the given host."""
  4930. return objects.InstanceList.get_by_host(context, host_name)
  4931. def task_log_get_all(self, context, task_name, period_beginning,
  4932. period_ending, host=None, state=None):
  4933. """Return the task logs within a given range, optionally
  4934. filtering by host and/or state.
  4935. """
  4936. return self.db.task_log_get_all(context, task_name,
  4937. period_beginning,
  4938. period_ending,
  4939. host=host,
  4940. state=state)
  4941. def compute_node_get(self, context, compute_id):
  4942. """Return compute node entry for particular integer ID or UUID."""
  4943. load_cells()
  4944. # NOTE(danms): Unfortunately this API exposes database identifiers
  4945. # which means we really can't do something efficient here
  4946. is_uuid = uuidutils.is_uuid_like(compute_id)
  4947. for cell in CELLS:
  4948. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4949. continue
  4950. with nova_context.target_cell(context, cell) as cctxt:
  4951. try:
  4952. if is_uuid:
  4953. return objects.ComputeNode.get_by_uuid(cctxt,
  4954. compute_id)
  4955. return objects.ComputeNode.get_by_id(cctxt,
  4956. int(compute_id))
  4957. except exception.ComputeHostNotFound:
  4958. # NOTE(danms): Keep looking in other cells
  4959. continue
  4960. raise exception.ComputeHostNotFound(host=compute_id)
  4961. def compute_node_get_all(self, context, limit=None, marker=None):
  4962. load_cells()
  4963. computes = []
  4964. uuid_marker = marker and uuidutils.is_uuid_like(marker)
  4965. for cell in CELLS:
  4966. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4967. continue
  4968. with nova_context.target_cell(context, cell) as cctxt:
  4969. # If we have a marker and it's a uuid, see if the compute node
  4970. # is in this cell.
  4971. if marker and uuid_marker:
  4972. try:
  4973. compute_marker = objects.ComputeNode.get_by_uuid(
  4974. cctxt, marker)
  4975. # we found the marker compute node, so use it's id
  4976. # for the actual marker for paging in this cell's db
  4977. marker = compute_marker.id
  4978. except exception.ComputeHostNotFound:
  4979. # The marker node isn't in this cell so keep looking.
  4980. continue
  4981. try:
  4982. cell_computes = objects.ComputeNodeList.get_by_pagination(
  4983. cctxt, limit=limit, marker=marker)
  4984. except exception.MarkerNotFound:
  4985. # NOTE(danms): Keep looking through cells
  4986. continue
  4987. computes.extend(cell_computes)
  4988. # NOTE(danms): We must have found the marker, so continue on
  4989. # without one
  4990. marker = None
  4991. if limit:
  4992. limit -= len(cell_computes)
  4993. if limit <= 0:
  4994. break
  4995. if marker is not None and len(computes) == 0:
  4996. # NOTE(danms): If we did not find the marker in any cell,
  4997. # mimic the db_api behavior here.
  4998. raise exception.MarkerNotFound(marker=marker)
  4999. return objects.ComputeNodeList(objects=computes)
  5000. def compute_node_search_by_hypervisor(self, context, hypervisor_match):
  5001. load_cells()
  5002. computes = []
  5003. for cell in CELLS:
  5004. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5005. continue
  5006. with nova_context.target_cell(context, cell) as cctxt:
  5007. cell_computes = objects.ComputeNodeList.get_by_hypervisor(
  5008. cctxt, hypervisor_match)
  5009. computes.extend(cell_computes)
  5010. return objects.ComputeNodeList(objects=computes)
  5011. def compute_node_statistics(self, context):
  5012. load_cells()
  5013. cell_stats = []
  5014. for cell in CELLS:
  5015. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5016. continue
  5017. with nova_context.target_cell(context, cell) as cctxt:
  5018. cell_stats.append(self.db.compute_node_statistics(cctxt))
  5019. if cell_stats:
  5020. keys = cell_stats[0].keys()
  5021. return {k: sum(stats[k] for stats in cell_stats)
  5022. for k in keys}
  5023. else:
  5024. return {}
  5025. class InstanceActionAPI(base.Base):
  5026. """Sub-set of the Compute Manager API for managing instance actions."""
  5027. def actions_get(self, context, instance, limit=None, marker=None,
  5028. filters=None):
  5029. return objects.InstanceActionList.get_by_instance_uuid(
  5030. context, instance.uuid, limit, marker, filters)
  5031. def action_get_by_request_id(self, context, instance, request_id):
  5032. return objects.InstanceAction.get_by_request_id(
  5033. context, instance.uuid, request_id)
  5034. def action_events_get(self, context, instance, action_id):
  5035. return objects.InstanceActionEventList.get_by_action(
  5036. context, action_id)
  5037. class AggregateAPI(base.Base):
  5038. """Sub-set of the Compute Manager API for managing host aggregates."""
  5039. def __init__(self, **kwargs):
  5040. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  5041. self.query_client = query.SchedulerQueryClient()
  5042. self._placement_client = None # Lazy-load on first access.
  5043. super(AggregateAPI, self).__init__(**kwargs)
  5044. @property
  5045. def placement_client(self):
  5046. if self._placement_client is None:
  5047. self._placement_client = report.SchedulerReportClient()
  5048. return self._placement_client
  5049. @wrap_exception()
  5050. def create_aggregate(self, context, aggregate_name, availability_zone):
  5051. """Creates the model for the aggregate."""
  5052. aggregate = objects.Aggregate(context=context)
  5053. aggregate.name = aggregate_name
  5054. if availability_zone:
  5055. aggregate.metadata = {'availability_zone': availability_zone}
  5056. aggregate.create()
  5057. self.query_client.update_aggregates(context, [aggregate])
  5058. return aggregate
  5059. def get_aggregate(self, context, aggregate_id):
  5060. """Get an aggregate by id."""
  5061. return objects.Aggregate.get_by_id(context, aggregate_id)
  5062. def get_aggregate_list(self, context):
  5063. """Get all the aggregates."""
  5064. return objects.AggregateList.get_all(context)
  5065. def get_aggregates_by_host(self, context, compute_host):
  5066. """Get all the aggregates where the given host is presented."""
  5067. return objects.AggregateList.get_by_host(context, compute_host)
  5068. @wrap_exception()
  5069. def update_aggregate(self, context, aggregate_id, values):
  5070. """Update the properties of an aggregate."""
  5071. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5072. if 'name' in values:
  5073. aggregate.name = values.pop('name')
  5074. aggregate.save()
  5075. self.is_safe_to_update_az(context, values, aggregate=aggregate,
  5076. action_name=AGGREGATE_ACTION_UPDATE,
  5077. check_no_instances_in_az=True)
  5078. if values:
  5079. aggregate.update_metadata(values)
  5080. aggregate.updated_at = timeutils.utcnow()
  5081. self.query_client.update_aggregates(context, [aggregate])
  5082. # If updated values include availability_zones, then the cache
  5083. # which stored availability_zones and host need to be reset
  5084. if values.get('availability_zone'):
  5085. availability_zones.reset_cache()
  5086. return aggregate
  5087. @wrap_exception()
  5088. def update_aggregate_metadata(self, context, aggregate_id, metadata):
  5089. """Updates the aggregate metadata."""
  5090. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5091. self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
  5092. action_name=AGGREGATE_ACTION_UPDATE_META,
  5093. check_no_instances_in_az=True)
  5094. aggregate.update_metadata(metadata)
  5095. self.query_client.update_aggregates(context, [aggregate])
  5096. # If updated metadata include availability_zones, then the cache
  5097. # which stored availability_zones and host need to be reset
  5098. if metadata and metadata.get('availability_zone'):
  5099. availability_zones.reset_cache()
  5100. aggregate.updated_at = timeutils.utcnow()
  5101. return aggregate
  5102. @wrap_exception()
  5103. def delete_aggregate(self, context, aggregate_id):
  5104. """Deletes the aggregate."""
  5105. aggregate_payload = {'aggregate_id': aggregate_id}
  5106. compute_utils.notify_about_aggregate_update(context,
  5107. "delete.start",
  5108. aggregate_payload)
  5109. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5110. compute_utils.notify_about_aggregate_action(
  5111. context=context,
  5112. aggregate=aggregate,
  5113. action=fields_obj.NotificationAction.DELETE,
  5114. phase=fields_obj.NotificationPhase.START)
  5115. if len(aggregate.hosts) > 0:
  5116. msg = _("Host aggregate is not empty")
  5117. raise exception.InvalidAggregateActionDelete(
  5118. aggregate_id=aggregate_id, reason=msg)
  5119. aggregate.destroy()
  5120. self.query_client.delete_aggregate(context, aggregate)
  5121. compute_utils.notify_about_aggregate_update(context,
  5122. "delete.end",
  5123. aggregate_payload)
  5124. compute_utils.notify_about_aggregate_action(
  5125. context=context,
  5126. aggregate=aggregate,
  5127. action=fields_obj.NotificationAction.DELETE,
  5128. phase=fields_obj.NotificationPhase.END)
  5129. def is_safe_to_update_az(self, context, metadata, aggregate,
  5130. hosts=None,
  5131. action_name=AGGREGATE_ACTION_ADD,
  5132. check_no_instances_in_az=False):
  5133. """Determine if updates alter an aggregate's availability zone.
  5134. :param context: local context
  5135. :param metadata: Target metadata for updating aggregate
  5136. :param aggregate: Aggregate to update
  5137. :param hosts: Hosts to check. If None, aggregate.hosts is used
  5138. :type hosts: list
  5139. :param action_name: Calling method for logging purposes
  5140. :param check_no_instances_in_az: if True, it checks
  5141. there is no instances on any hosts of the aggregate
  5142. """
  5143. if 'availability_zone' in metadata:
  5144. if not metadata['availability_zone']:
  5145. msg = _("Aggregate %s does not support empty named "
  5146. "availability zone") % aggregate.name
  5147. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5148. msg)
  5149. _hosts = hosts or aggregate.hosts
  5150. host_aggregates = objects.AggregateList.get_by_metadata_key(
  5151. context, 'availability_zone', hosts=_hosts)
  5152. conflicting_azs = [
  5153. agg.availability_zone for agg in host_aggregates
  5154. if agg.availability_zone != metadata['availability_zone'] and
  5155. agg.id != aggregate.id]
  5156. if conflicting_azs:
  5157. msg = _("One or more hosts already in availability zone(s) "
  5158. "%s") % conflicting_azs
  5159. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5160. msg)
  5161. same_az_name = (aggregate.availability_zone ==
  5162. metadata['availability_zone'])
  5163. if check_no_instances_in_az and not same_az_name:
  5164. instance_count_by_cell = (
  5165. nova_context.scatter_gather_skip_cell0(
  5166. context,
  5167. objects.InstanceList.get_count_by_hosts,
  5168. _hosts))
  5169. if any(cnt for cnt in instance_count_by_cell.values()):
  5170. msg = _("One or more hosts contain instances in this zone")
  5171. self._raise_invalid_aggregate_exc(
  5172. action_name, aggregate.id, msg)
  5173. def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
  5174. if action_name == AGGREGATE_ACTION_ADD:
  5175. raise exception.InvalidAggregateActionAdd(
  5176. aggregate_id=aggregate_id, reason=reason)
  5177. elif action_name == AGGREGATE_ACTION_UPDATE:
  5178. raise exception.InvalidAggregateActionUpdate(
  5179. aggregate_id=aggregate_id, reason=reason)
  5180. elif action_name == AGGREGATE_ACTION_UPDATE_META:
  5181. raise exception.InvalidAggregateActionUpdateMeta(
  5182. aggregate_id=aggregate_id, reason=reason)
  5183. elif action_name == AGGREGATE_ACTION_DELETE:
  5184. raise exception.InvalidAggregateActionDelete(
  5185. aggregate_id=aggregate_id, reason=reason)
  5186. raise exception.NovaException(
  5187. _("Unexpected aggregate action %s") % action_name)
  5188. def _update_az_cache_for_host(self, context, host_name, aggregate_meta):
  5189. # Update the availability_zone cache to avoid getting wrong
  5190. # availability_zone in cache retention time when add/remove
  5191. # host to/from aggregate.
  5192. if aggregate_meta and aggregate_meta.get('availability_zone'):
  5193. availability_zones.update_host_availability_zone_cache(context,
  5194. host_name)
  5195. @wrap_exception()
  5196. def add_host_to_aggregate(self, context, aggregate_id, host_name):
  5197. """Adds the host to an aggregate."""
  5198. aggregate_payload = {'aggregate_id': aggregate_id,
  5199. 'host_name': host_name}
  5200. compute_utils.notify_about_aggregate_update(context,
  5201. "addhost.start",
  5202. aggregate_payload)
  5203. service = _get_service_in_cell_by_host(context, host_name)
  5204. if service.host != host_name:
  5205. # NOTE(danms): If we found a service but it is not an
  5206. # exact match, we may have a case-insensitive backend
  5207. # database (like mysql) which will end up with us
  5208. # adding the host-aggregate mapping with a
  5209. # non-matching hostname.
  5210. raise exception.ComputeHostNotFound(host=host_name)
  5211. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5212. compute_utils.notify_about_aggregate_action(
  5213. context=context,
  5214. aggregate=aggregate,
  5215. action=fields_obj.NotificationAction.ADD_HOST,
  5216. phase=fields_obj.NotificationPhase.START)
  5217. self.is_safe_to_update_az(context, aggregate.metadata,
  5218. hosts=[host_name], aggregate=aggregate)
  5219. aggregate.add_host(host_name)
  5220. self.query_client.update_aggregates(context, [aggregate])
  5221. try:
  5222. self.placement_client.aggregate_add_host(
  5223. context, aggregate.uuid, host_name=host_name)
  5224. except (exception.ResourceProviderNotFound,
  5225. exception.ResourceProviderAggregateRetrievalFailed,
  5226. exception.ResourceProviderUpdateFailed,
  5227. exception.ResourceProviderUpdateConflict) as err:
  5228. # NOTE(jaypipes): We don't want a failure perform the mirroring
  5229. # action in the placement service to be returned to the user (they
  5230. # probably don't know anything about the placement service and
  5231. # would just be confused). So, we just log a warning here, noting
  5232. # that on the next run of nova-manage placement sync_aggregates
  5233. # things will go back to normal
  5234. LOG.warning("Failed to associate %s with a placement "
  5235. "aggregate: %s. This may be corrected after running "
  5236. "nova-manage placement sync_aggregates.",
  5237. host_name, err)
  5238. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5239. # NOTE(jogo): Send message to host to support resource pools
  5240. self.compute_rpcapi.add_aggregate_host(context,
  5241. aggregate=aggregate, host_param=host_name, host=host_name)
  5242. aggregate_payload.update({'name': aggregate.name})
  5243. compute_utils.notify_about_aggregate_update(context,
  5244. "addhost.end",
  5245. aggregate_payload)
  5246. compute_utils.notify_about_aggregate_action(
  5247. context=context,
  5248. aggregate=aggregate,
  5249. action=fields_obj.NotificationAction.ADD_HOST,
  5250. phase=fields_obj.NotificationPhase.END)
  5251. return aggregate
  5252. @wrap_exception()
  5253. def remove_host_from_aggregate(self, context, aggregate_id, host_name):
  5254. """Removes host from the aggregate."""
  5255. aggregate_payload = {'aggregate_id': aggregate_id,
  5256. 'host_name': host_name}
  5257. compute_utils.notify_about_aggregate_update(context,
  5258. "removehost.start",
  5259. aggregate_payload)
  5260. _get_service_in_cell_by_host(context, host_name)
  5261. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5262. compute_utils.notify_about_aggregate_action(
  5263. context=context,
  5264. aggregate=aggregate,
  5265. action=fields_obj.NotificationAction.REMOVE_HOST,
  5266. phase=fields_obj.NotificationPhase.START)
  5267. # Remove the resource provider from the provider aggregate first before
  5268. # we change anything on the nova side because if we did the nova stuff
  5269. # first we can't re-attempt this from the compute API if cleaning up
  5270. # placement fails.
  5271. try:
  5272. # Anything else this raises is handled in the route handler as
  5273. # either a 409 (ResourceProviderUpdateConflict) or 500.
  5274. self.placement_client.aggregate_remove_host(
  5275. context, aggregate.uuid, host_name)
  5276. except exception.ResourceProviderNotFound as err:
  5277. # If the resource provider is not found then it's likely not part
  5278. # of the aggregate anymore anyway since provider aggregates are
  5279. # not resources themselves with metadata like nova aggregates, they
  5280. # are just a grouping concept around resource providers. Log and
  5281. # continue.
  5282. LOG.warning("Failed to remove association of %s with a placement "
  5283. "aggregate: %s.", host_name, err)
  5284. aggregate.delete_host(host_name)
  5285. self.query_client.update_aggregates(context, [aggregate])
  5286. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5287. self.compute_rpcapi.remove_aggregate_host(context,
  5288. aggregate=aggregate, host_param=host_name, host=host_name)
  5289. compute_utils.notify_about_aggregate_update(context,
  5290. "removehost.end",
  5291. aggregate_payload)
  5292. compute_utils.notify_about_aggregate_action(
  5293. context=context,
  5294. aggregate=aggregate,
  5295. action=fields_obj.NotificationAction.REMOVE_HOST,
  5296. phase=fields_obj.NotificationPhase.END)
  5297. return aggregate
  5298. class KeypairAPI(base.Base):
  5299. """Subset of the Compute Manager API for managing key pairs."""
  5300. get_notifier = functools.partial(rpc.get_notifier, service='api')
  5301. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  5302. get_notifier=get_notifier,
  5303. binary='nova-api')
  5304. def _notify(self, context, event_suffix, keypair_name):
  5305. payload = {
  5306. 'tenant_id': context.project_id,
  5307. 'user_id': context.user_id,
  5308. 'key_name': keypair_name,
  5309. }
  5310. notify = self.get_notifier()
  5311. notify.info(context, 'keypair.%s' % event_suffix, payload)
  5312. def _validate_new_key_pair(self, context, user_id, key_name, key_type):
  5313. safe_chars = "_- " + string.digits + string.ascii_letters
  5314. clean_value = "".join(x for x in key_name if x in safe_chars)
  5315. if clean_value != key_name:
  5316. raise exception.InvalidKeypair(
  5317. reason=_("Keypair name contains unsafe characters"))
  5318. try:
  5319. utils.check_string_length(key_name, min_length=1, max_length=255)
  5320. except exception.InvalidInput:
  5321. raise exception.InvalidKeypair(
  5322. reason=_('Keypair name must be string and between '
  5323. '1 and 255 characters long'))
  5324. try:
  5325. objects.Quotas.check_deltas(context, {'key_pairs': 1}, user_id)
  5326. except exception.OverQuota:
  5327. raise exception.KeypairLimitExceeded()
  5328. @wrap_exception()
  5329. def import_key_pair(self, context, user_id, key_name, public_key,
  5330. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5331. """Import a key pair using an existing public key."""
  5332. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5333. self._notify(context, 'import.start', key_name)
  5334. keypair = objects.KeyPair(context)
  5335. keypair.user_id = user_id
  5336. keypair.name = key_name
  5337. keypair.type = key_type
  5338. keypair.fingerprint = None
  5339. keypair.public_key = public_key
  5340. compute_utils.notify_about_keypair_action(
  5341. context=context,
  5342. keypair=keypair,
  5343. action=fields_obj.NotificationAction.IMPORT,
  5344. phase=fields_obj.NotificationPhase.START)
  5345. fingerprint = self._generate_fingerprint(public_key, key_type)
  5346. keypair.fingerprint = fingerprint
  5347. keypair.create()
  5348. compute_utils.notify_about_keypair_action(
  5349. context=context,
  5350. keypair=keypair,
  5351. action=fields_obj.NotificationAction.IMPORT,
  5352. phase=fields_obj.NotificationPhase.END)
  5353. self._notify(context, 'import.end', key_name)
  5354. return keypair
  5355. @wrap_exception()
  5356. def create_key_pair(self, context, user_id, key_name,
  5357. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5358. """Create a new key pair."""
  5359. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5360. keypair = objects.KeyPair(context)
  5361. keypair.user_id = user_id
  5362. keypair.name = key_name
  5363. keypair.type = key_type
  5364. keypair.fingerprint = None
  5365. keypair.public_key = None
  5366. self._notify(context, 'create.start', key_name)
  5367. compute_utils.notify_about_keypair_action(
  5368. context=context,
  5369. keypair=keypair,
  5370. action=fields_obj.NotificationAction.CREATE,
  5371. phase=fields_obj.NotificationPhase.START)
  5372. private_key, public_key, fingerprint = self._generate_key_pair(
  5373. user_id, key_type)
  5374. keypair.fingerprint = fingerprint
  5375. keypair.public_key = public_key
  5376. keypair.create()
  5377. # NOTE(melwitt): We recheck the quota after creating the object to
  5378. # prevent users from allocating more resources than their allowed quota
  5379. # in the event of a race. This is configurable because it can be
  5380. # expensive if strict quota limits are not required in a deployment.
  5381. if CONF.quota.recheck_quota:
  5382. try:
  5383. objects.Quotas.check_deltas(context, {'key_pairs': 0}, user_id)
  5384. except exception.OverQuota:
  5385. keypair.destroy()
  5386. raise exception.KeypairLimitExceeded()
  5387. compute_utils.notify_about_keypair_action(
  5388. context=context,
  5389. keypair=keypair,
  5390. action=fields_obj.NotificationAction.CREATE,
  5391. phase=fields_obj.NotificationPhase.END)
  5392. self._notify(context, 'create.end', key_name)
  5393. return keypair, private_key
  5394. def _generate_fingerprint(self, public_key, key_type):
  5395. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5396. return crypto.generate_fingerprint(public_key)
  5397. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5398. return crypto.generate_x509_fingerprint(public_key)
  5399. def _generate_key_pair(self, user_id, key_type):
  5400. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5401. return crypto.generate_key_pair()
  5402. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5403. return crypto.generate_winrm_x509_cert(user_id)
  5404. @wrap_exception()
  5405. def delete_key_pair(self, context, user_id, key_name):
  5406. """Delete a keypair by name."""
  5407. self._notify(context, 'delete.start', key_name)
  5408. keypair = self.get_key_pair(context, user_id, key_name)
  5409. compute_utils.notify_about_keypair_action(
  5410. context=context,
  5411. keypair=keypair,
  5412. action=fields_obj.NotificationAction.DELETE,
  5413. phase=fields_obj.NotificationPhase.START)
  5414. objects.KeyPair.destroy_by_name(context, user_id, key_name)
  5415. compute_utils.notify_about_keypair_action(
  5416. context=context,
  5417. keypair=keypair,
  5418. action=fields_obj.NotificationAction.DELETE,
  5419. phase=fields_obj.NotificationPhase.END)
  5420. self._notify(context, 'delete.end', key_name)
  5421. def get_key_pairs(self, context, user_id, limit=None, marker=None):
  5422. """List key pairs."""
  5423. return objects.KeyPairList.get_by_user(
  5424. context, user_id, limit=limit, marker=marker)
  5425. def get_key_pair(self, context, user_id, key_name):
  5426. """Get a keypair by name."""
  5427. return objects.KeyPair.get_by_name(context, user_id, key_name)