OpenStack Compute (Nova)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

6329 lines
295 KiB

  1. # Copyright 2010 United States Government as represented by the
  2. # Administrator of the National Aeronautics and Space Administration.
  3. # Copyright 2011 Piston Cloud Computing, Inc.
  4. # Copyright 2012-2013 Red Hat, Inc.
  5. # All Rights Reserved.
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  8. # not use this file except in compliance with the License. You may obtain
  9. # a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. # License for the specific language governing permissions and limitations
  17. # under the License.
  18. """Handles all requests relating to compute resources (e.g. guest VMs,
  19. networking and storage of VMs, and compute hosts on which they run)."""
  20. import collections
  21. import functools
  22. import re
  23. import string
  24. from castellan import key_manager
  25. import os_traits
  26. from oslo_log import log as logging
  27. from oslo_messaging import exceptions as oslo_exceptions
  28. from oslo_serialization import base64 as base64utils
  29. from oslo_utils import excutils
  30. from oslo_utils import strutils
  31. from oslo_utils import timeutils
  32. from oslo_utils import units
  33. from oslo_utils import uuidutils
  34. import six
  35. from six.moves import range
  36. from nova.accelerator import cyborg
  37. from nova import availability_zones
  38. from nova import block_device
  39. from nova.compute import flavors
  40. from nova.compute import instance_actions
  41. from nova.compute import instance_list
  42. from nova.compute import migration_list
  43. from nova.compute import power_state
  44. from nova.compute import rpcapi as compute_rpcapi
  45. from nova.compute import task_states
  46. from nova.compute import utils as compute_utils
  47. from nova.compute.utils import wrap_instance_event
  48. from nova.compute import vm_states
  49. from nova import conductor
  50. import nova.conf
  51. from nova import context as nova_context
  52. from nova import crypto
  53. from nova.db import base
  54. from nova.db.sqlalchemy import api as db_api
  55. from nova import exception
  56. from nova import exception_wrapper
  57. from nova.i18n import _
  58. from nova.image import glance
  59. from nova.network import constants
  60. from nova.network import model as network_model
  61. from nova.network import neutron
  62. from nova.network import security_group_api
  63. from nova import objects
  64. from nova.objects import block_device as block_device_obj
  65. from nova.objects import external_event as external_event_obj
  66. from nova.objects import fields as fields_obj
  67. from nova.objects import image_meta as image_meta_obj
  68. from nova.objects import keypair as keypair_obj
  69. from nova.objects import quotas as quotas_obj
  70. from nova.pci import request as pci_request
  71. from nova.policies import servers as servers_policies
  72. import nova.policy
  73. from nova import profiler
  74. from nova import rpc
  75. from nova.scheduler.client import query
  76. from nova.scheduler.client import report
  77. from nova.scheduler import utils as scheduler_utils
  78. from nova import servicegroup
  79. from nova import utils
  80. from nova.virt import hardware
  81. from nova.volume import cinder
  82. LOG = logging.getLogger(__name__)
  83. get_notifier = functools.partial(rpc.get_notifier, service='compute')
  84. # NOTE(gibi): legacy notification used compute as a service but these
  85. # calls still run on the client side of the compute service which is
  86. # nova-api. By setting the binary to nova-api below, we can make sure
  87. # that the new versioned notifications has the right publisher_id but the
  88. # legacy notifications does not change.
  89. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  90. get_notifier=get_notifier,
  91. binary='nova-api')
  92. CONF = nova.conf.CONF
  93. AGGREGATE_ACTION_UPDATE = 'Update'
  94. AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
  95. AGGREGATE_ACTION_DELETE = 'Delete'
  96. AGGREGATE_ACTION_ADD = 'Add'
  97. MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
  98. MIN_COMPUTE_CROSS_CELL_RESIZE = 47
  99. MIN_COMPUTE_SAME_HOST_COLD_MIGRATE = 48
  100. # TODO(huaqiang): Remove in Wallaby
  101. MIN_VER_NOVA_COMPUTE_MIXED_POLICY = 52
  102. # FIXME(danms): Keep a global cache of the cells we find the
  103. # first time we look. This needs to be refreshed on a timer or
  104. # trigger.
  105. CELLS = []
  106. def check_instance_state(vm_state=None, task_state=(None,),
  107. must_have_launched=True):
  108. """Decorator to check VM and/or task state before entry to API functions.
  109. If the instance is in the wrong state, or has not been successfully
  110. started at least once the wrapper will raise an exception.
  111. """
  112. if vm_state is not None and not isinstance(vm_state, set):
  113. vm_state = set(vm_state)
  114. if task_state is not None and not isinstance(task_state, set):
  115. task_state = set(task_state)
  116. def outer(f):
  117. @six.wraps(f)
  118. def inner(self, context, instance, *args, **kw):
  119. if vm_state is not None and instance.vm_state not in vm_state:
  120. raise exception.InstanceInvalidState(
  121. attr='vm_state',
  122. instance_uuid=instance.uuid,
  123. state=instance.vm_state,
  124. method=f.__name__)
  125. if (task_state is not None and
  126. instance.task_state not in task_state):
  127. raise exception.InstanceInvalidState(
  128. attr='task_state',
  129. instance_uuid=instance.uuid,
  130. state=instance.task_state,
  131. method=f.__name__)
  132. if must_have_launched and not instance.launched_at:
  133. raise exception.InstanceInvalidState(
  134. attr='launched_at',
  135. instance_uuid=instance.uuid,
  136. state=instance.launched_at,
  137. method=f.__name__)
  138. return f(self, context, instance, *args, **kw)
  139. return inner
  140. return outer
  141. def _set_or_none(q):
  142. return q if q is None or isinstance(q, set) else set(q)
  143. def reject_instance_state(vm_state=None, task_state=None):
  144. """Decorator. Raise InstanceInvalidState if instance is in any of the
  145. given states.
  146. """
  147. vm_state = _set_or_none(vm_state)
  148. task_state = _set_or_none(task_state)
  149. def outer(f):
  150. @six.wraps(f)
  151. def inner(self, context, instance, *args, **kw):
  152. _InstanceInvalidState = functools.partial(
  153. exception.InstanceInvalidState,
  154. instance_uuid=instance.uuid,
  155. method=f.__name__)
  156. if vm_state is not None and instance.vm_state in vm_state:
  157. raise _InstanceInvalidState(
  158. attr='vm_state', state=instance.vm_state)
  159. if task_state is not None and instance.task_state in task_state:
  160. raise _InstanceInvalidState(
  161. attr='task_state', state=instance.task_state)
  162. return f(self, context, instance, *args, **kw)
  163. return inner
  164. return outer
  165. def check_instance_host(check_is_up=False):
  166. """Validate the instance.host before performing the operation.
  167. At a minimum this method will check that the instance.host is set.
  168. :param check_is_up: If True, check that the instance.host status is UP
  169. or MAINTENANCE (disabled but not down).
  170. :raises: InstanceNotReady if the instance.host is not set
  171. :raises: ServiceUnavailable if check_is_up=True and the instance.host
  172. compute service status is not UP or MAINTENANCE
  173. """
  174. def outer(function):
  175. @six.wraps(function)
  176. def wrapped(self, context, instance, *args, **kwargs):
  177. if not instance.host:
  178. raise exception.InstanceNotReady(instance_id=instance.uuid)
  179. if check_is_up:
  180. # Make sure the source compute service is not down otherwise we
  181. # cannot proceed.
  182. host_status = self.get_instance_host_status(instance)
  183. if host_status not in (fields_obj.HostStatus.UP,
  184. fields_obj.HostStatus.MAINTENANCE):
  185. # ComputeServiceUnavailable would make more sense here but
  186. # we do not want to leak hostnames to end users.
  187. raise exception.ServiceUnavailable()
  188. return function(self, context, instance, *args, **kwargs)
  189. return wrapped
  190. return outer
  191. def check_instance_lock(function):
  192. @six.wraps(function)
  193. def inner(self, context, instance, *args, **kwargs):
  194. if instance.locked and not context.is_admin:
  195. raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
  196. return function(self, context, instance, *args, **kwargs)
  197. return inner
  198. def reject_sev_instances(operation):
  199. """Decorator. Raise OperationNotSupportedForSEV if instance has SEV
  200. enabled.
  201. """
  202. def outer(f):
  203. @six.wraps(f)
  204. def inner(self, context, instance, *args, **kw):
  205. if hardware.get_mem_encryption_constraint(instance.flavor,
  206. instance.image_meta):
  207. raise exception.OperationNotSupportedForSEV(
  208. instance_uuid=instance.uuid,
  209. operation=operation)
  210. return f(self, context, instance, *args, **kw)
  211. return inner
  212. return outer
  213. def _diff_dict(orig, new):
  214. """Return a dict describing how to change orig to new. The keys
  215. correspond to values that have changed; the value will be a list
  216. of one or two elements. The first element of the list will be
  217. either '+' or '-', indicating whether the key was updated or
  218. deleted; if the key was updated, the list will contain a second
  219. element, giving the updated value.
  220. """
  221. # Figure out what keys went away
  222. result = {k: ['-'] for k in set(orig.keys()) - set(new.keys())}
  223. # Compute the updates
  224. for key, value in new.items():
  225. if key not in orig or value != orig[key]:
  226. result[key] = ['+', value]
  227. return result
  228. def load_cells():
  229. global CELLS
  230. if not CELLS:
  231. CELLS = objects.CellMappingList.get_all(
  232. nova_context.get_admin_context())
  233. LOG.debug('Found %(count)i cells: %(cells)s',
  234. dict(count=len(CELLS),
  235. cells=','.join([c.identity for c in CELLS])))
  236. if not CELLS:
  237. LOG.error('No cells are configured, unable to continue')
  238. def _get_image_meta_obj(image_meta_dict):
  239. try:
  240. image_meta = objects.ImageMeta.from_dict(image_meta_dict)
  241. except ValueError as e:
  242. # there must be invalid values in the image meta properties so
  243. # consider this an invalid request
  244. msg = _('Invalid image metadata. Error: %s') % six.text_type(e)
  245. raise exception.InvalidRequest(msg)
  246. return image_meta
  247. def block_accelerators(func):
  248. @functools.wraps(func)
  249. def wrapper(self, context, instance, *args, **kwargs):
  250. dp_name = instance.flavor.extra_specs.get('accel:device_profile')
  251. if dp_name:
  252. raise exception.ForbiddenWithAccelerators()
  253. return func(self, context, instance, *args, **kwargs)
  254. return wrapper
  255. @profiler.trace_cls("compute_api")
  256. class API(base.Base):
  257. """API for interacting with the compute manager."""
  258. def __init__(self, image_api=None, network_api=None, volume_api=None,
  259. **kwargs):
  260. self.image_api = image_api or glance.API()
  261. self.network_api = network_api or neutron.API()
  262. self.volume_api = volume_api or cinder.API()
  263. self._placementclient = None # Lazy-load on first access.
  264. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  265. self.compute_task_api = conductor.ComputeTaskAPI()
  266. self.servicegroup_api = servicegroup.API()
  267. self.host_api = HostAPI(self.compute_rpcapi, self.servicegroup_api)
  268. self.notifier = rpc.get_notifier('compute', CONF.host)
  269. if CONF.ephemeral_storage_encryption.enabled:
  270. self.key_manager = key_manager.API()
  271. # Help us to record host in EventReporter
  272. self.host = CONF.host
  273. super(API, self).__init__(**kwargs)
  274. def _record_action_start(self, context, instance, action):
  275. objects.InstanceAction.action_start(context, instance.uuid,
  276. action, want_result=False)
  277. def _check_injected_file_quota(self, context, injected_files):
  278. """Enforce quota limits on injected files.
  279. Raises a QuotaError if any limit is exceeded.
  280. """
  281. if not injected_files:
  282. return
  283. # Check number of files first
  284. try:
  285. objects.Quotas.limit_check(context,
  286. injected_files=len(injected_files))
  287. except exception.OverQuota:
  288. raise exception.OnsetFileLimitExceeded()
  289. # OK, now count path and content lengths; we're looking for
  290. # the max...
  291. max_path = 0
  292. max_content = 0
  293. for path, content in injected_files:
  294. max_path = max(max_path, len(path))
  295. max_content = max(max_content, len(content))
  296. try:
  297. objects.Quotas.limit_check(context,
  298. injected_file_path_bytes=max_path,
  299. injected_file_content_bytes=max_content)
  300. except exception.OverQuota as exc:
  301. # Favor path limit over content limit for reporting
  302. # purposes
  303. if 'injected_file_path_bytes' in exc.kwargs['overs']:
  304. raise exception.OnsetFilePathLimitExceeded(
  305. allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
  306. else:
  307. raise exception.OnsetFileContentLimitExceeded(
  308. allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
  309. def _check_metadata_properties_quota(self, context, metadata=None):
  310. """Enforce quota limits on metadata properties."""
  311. if not metadata:
  312. return
  313. if not isinstance(metadata, dict):
  314. msg = (_("Metadata type should be dict."))
  315. raise exception.InvalidMetadata(reason=msg)
  316. num_metadata = len(metadata)
  317. try:
  318. objects.Quotas.limit_check(context, metadata_items=num_metadata)
  319. except exception.OverQuota as exc:
  320. quota_metadata = exc.kwargs['quotas']['metadata_items']
  321. raise exception.MetadataLimitExceeded(allowed=quota_metadata)
  322. # Because metadata is stored in the DB, we hard-code the size limits
  323. # In future, we may support more variable length strings, so we act
  324. # as if this is quota-controlled for forwards compatibility.
  325. # Those are only used in V2 API, from V2.1 API, those checks are
  326. # validated at API layer schema validation.
  327. for k, v in metadata.items():
  328. try:
  329. utils.check_string_length(v)
  330. utils.check_string_length(k, min_length=1)
  331. except exception.InvalidInput as e:
  332. raise exception.InvalidMetadata(reason=e.format_message())
  333. if len(k) > 255:
  334. msg = _("Metadata property key greater than 255 characters")
  335. raise exception.InvalidMetadataSize(reason=msg)
  336. if len(v) > 255:
  337. msg = _("Metadata property value greater than 255 characters")
  338. raise exception.InvalidMetadataSize(reason=msg)
  339. def _check_requested_secgroups(self, context, secgroups):
  340. """Check if the security group requested exists and belongs to
  341. the project.
  342. :param context: The nova request context.
  343. :type context: nova.context.RequestContext
  344. :param secgroups: list of requested security group names
  345. :type secgroups: list
  346. :returns: list of requested security group UUIDs; note that 'default'
  347. is a special case and will be unmodified if it's requested.
  348. """
  349. security_groups = []
  350. for secgroup in secgroups:
  351. # NOTE(sdague): default is handled special
  352. if secgroup == "default":
  353. security_groups.append(secgroup)
  354. continue
  355. secgroup_uuid = security_group_api.validate_name(context, secgroup)
  356. security_groups.append(secgroup_uuid)
  357. return security_groups
  358. def _check_requested_networks(self, context, requested_networks,
  359. max_count):
  360. """Check if the networks requested belongs to the project
  361. and the fixed IP address for each network provided is within
  362. same the network block
  363. """
  364. if requested_networks is not None:
  365. if requested_networks.no_allocate:
  366. # If the network request was specifically 'none' meaning don't
  367. # allocate any networks, we just return the number of requested
  368. # instances since quotas don't change at all.
  369. return max_count
  370. # NOTE(danms): Temporary transition
  371. requested_networks = requested_networks.as_tuples()
  372. return self.network_api.validate_networks(context, requested_networks,
  373. max_count)
  374. def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
  375. image):
  376. """Choose kernel and ramdisk appropriate for the instance.
  377. The kernel and ramdisk can be chosen in one of two ways:
  378. 1. Passed in with create-instance request.
  379. 2. Inherited from image metadata.
  380. If inherited from image metadata, and if that image metadata value is
  381. set to 'nokernel', both kernel and ramdisk will default to None.
  382. """
  383. # Inherit from image if not specified
  384. image_properties = image.get('properties', {})
  385. if kernel_id is None:
  386. kernel_id = image_properties.get('kernel_id')
  387. if ramdisk_id is None:
  388. ramdisk_id = image_properties.get('ramdisk_id')
  389. # Force to None if kernel_id indicates that a kernel is not to be used
  390. if kernel_id == 'nokernel':
  391. kernel_id = None
  392. ramdisk_id = None
  393. # Verify kernel and ramdisk exist (fail-fast)
  394. if kernel_id is not None:
  395. kernel_image = self.image_api.get(context, kernel_id)
  396. # kernel_id could have been a URI, not a UUID, so to keep behaviour
  397. # from before, which leaked that implementation detail out to the
  398. # caller, we return the image UUID of the kernel image and ramdisk
  399. # image (below) and not any image URIs that might have been
  400. # supplied.
  401. # TODO(jaypipes): Get rid of this silliness once we move to a real
  402. # Image object and hide all of that stuff within nova.image.glance
  403. kernel_id = kernel_image['id']
  404. if ramdisk_id is not None:
  405. ramdisk_image = self.image_api.get(context, ramdisk_id)
  406. ramdisk_id = ramdisk_image['id']
  407. return kernel_id, ramdisk_id
  408. @staticmethod
  409. def parse_availability_zone(context, availability_zone):
  410. # NOTE(vish): We have a legacy hack to allow admins to specify hosts
  411. # via az using az:host:node. It might be nice to expose an
  412. # api to specify specific hosts to force onto, but for
  413. # now it just supports this legacy hack.
  414. # NOTE(deva): It is also possible to specify az::node, in which case
  415. # the host manager will determine the correct host.
  416. forced_host = None
  417. forced_node = None
  418. if availability_zone and ':' in availability_zone:
  419. c = availability_zone.count(':')
  420. if c == 1:
  421. availability_zone, forced_host = availability_zone.split(':')
  422. elif c == 2:
  423. if '::' in availability_zone:
  424. availability_zone, forced_node = \
  425. availability_zone.split('::')
  426. else:
  427. availability_zone, forced_host, forced_node = \
  428. availability_zone.split(':')
  429. else:
  430. raise exception.InvalidInput(
  431. reason="Unable to parse availability_zone")
  432. if not availability_zone:
  433. availability_zone = CONF.default_schedule_zone
  434. return availability_zone, forced_host, forced_node
  435. def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
  436. auto_disk_config, image):
  437. auto_disk_config_disabled = \
  438. utils.is_auto_disk_config_disabled(auto_disk_config_img)
  439. if auto_disk_config_disabled and auto_disk_config:
  440. raise exception.AutoDiskConfigDisabledByImage(image=image)
  441. def _inherit_properties_from_image(self, image, auto_disk_config):
  442. image_properties = image.get('properties', {})
  443. auto_disk_config_img = \
  444. utils.get_auto_disk_config_from_image_props(image_properties)
  445. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  446. auto_disk_config,
  447. image.get("id"))
  448. if auto_disk_config is None:
  449. auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
  450. return {
  451. 'os_type': image_properties.get('os_type'),
  452. 'architecture': image_properties.get('architecture'),
  453. 'vm_mode': image_properties.get('vm_mode'),
  454. 'auto_disk_config': auto_disk_config
  455. }
  456. def _check_config_drive(self, config_drive):
  457. if config_drive:
  458. try:
  459. bool_val = strutils.bool_from_string(config_drive,
  460. strict=True)
  461. except ValueError:
  462. raise exception.ConfigDriveInvalidValue(option=config_drive)
  463. else:
  464. bool_val = False
  465. # FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
  466. # but this is because the config drive column is a String. False
  467. # is represented by using an empty string. And for whatever
  468. # reason, we rely on the DB to cast True to a String.
  469. return True if bool_val else ''
  470. def _validate_flavor_image(self, context, image_id, image,
  471. instance_type, root_bdm, validate_numa=True):
  472. """Validate the flavor and image.
  473. This is called from the API service to ensure that the flavor
  474. extra-specs and image properties are self-consistent and compatible
  475. with each other.
  476. :param context: A context.RequestContext
  477. :param image_id: UUID of the image
  478. :param image: a dict representation of the image including properties,
  479. enforces the image status is active.
  480. :param instance_type: Flavor object
  481. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  482. the resize case.
  483. :param validate_numa: Flag to indicate whether or not to validate
  484. the NUMA-related metadata.
  485. :raises: Many different possible exceptions. See
  486. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  487. for the full list.
  488. """
  489. if image and image['status'] != 'active':
  490. raise exception.ImageNotActive(image_id=image_id)
  491. self._validate_flavor_image_nostatus(context, image, instance_type,
  492. root_bdm, validate_numa)
  493. @staticmethod
  494. def _detect_nonbootable_image_from_properties(image_id, image):
  495. """Check image for a property indicating it's nonbootable.
  496. This is called from the API service to ensure that there are
  497. no known image properties indicating that this image is of a
  498. type that we do not support booting from.
  499. Currently the only such property is 'cinder_encryption_key_id'.
  500. :param image_id: UUID of the image
  501. :param image: a dict representation of the image including properties
  502. :raises: ImageUnacceptable if the image properties indicate
  503. that booting this image is not supported
  504. """
  505. if not image:
  506. return
  507. image_properties = image.get('properties', {})
  508. if image_properties.get('cinder_encryption_key_id'):
  509. reason = _('Direct booting of an image uploaded from an '
  510. 'encrypted volume is unsupported.')
  511. raise exception.ImageUnacceptable(image_id=image_id,
  512. reason=reason)
  513. @staticmethod
  514. def _validate_flavor_image_nostatus(context, image, instance_type,
  515. root_bdm, validate_numa=True,
  516. validate_pci=False):
  517. """Validate the flavor and image.
  518. This is called from the API service to ensure that the flavor
  519. extra-specs and image properties are self-consistent and compatible
  520. with each other.
  521. :param context: A context.RequestContext
  522. :param image: a dict representation of the image including properties
  523. :param instance_type: Flavor object
  524. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  525. the resize case.
  526. :param validate_numa: Flag to indicate whether or not to validate
  527. the NUMA-related metadata.
  528. :param validate_pci: Flag to indicate whether or not to validate
  529. the PCI-related metadata.
  530. :raises: Many different possible exceptions. See
  531. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  532. for the full list.
  533. """
  534. if not image:
  535. return
  536. image_properties = image.get('properties', {})
  537. config_drive_option = image_properties.get(
  538. 'img_config_drive', 'optional')
  539. if config_drive_option not in ['optional', 'mandatory']:
  540. raise exception.InvalidImageConfigDrive(
  541. config_drive=config_drive_option)
  542. if instance_type['memory_mb'] < int(image.get('min_ram') or 0):
  543. raise exception.FlavorMemoryTooSmall()
  544. # Image min_disk is in gb, size is in bytes. For sanity, have them both
  545. # in bytes.
  546. image_min_disk = int(image.get('min_disk') or 0) * units.Gi
  547. image_size = int(image.get('size') or 0)
  548. # Target disk is a volume. Don't check flavor disk size because it
  549. # doesn't make sense, and check min_disk against the volume size.
  550. if root_bdm is not None and root_bdm.is_volume:
  551. # There are 2 possibilities here:
  552. #
  553. # 1. The target volume already exists but bdm.volume_size is not
  554. # yet set because this method is called before
  555. # _bdm_validate_set_size_and_instance during server create.
  556. # 2. The target volume doesn't exist, in which case the bdm will
  557. # contain the intended volume size
  558. #
  559. # Note that rebuild also calls this method with potentially a new
  560. # image but you can't rebuild a volume-backed server with a new
  561. # image (yet).
  562. #
  563. # Cinder does its own check against min_disk, so if the target
  564. # volume already exists this has already been done and we don't
  565. # need to check it again here. In this case, volume_size may not be
  566. # set on the bdm.
  567. #
  568. # If we're going to create the volume, the bdm will contain
  569. # volume_size. Therefore we should check it if it exists. This will
  570. # still be checked again by cinder when the volume is created, but
  571. # that will not happen until the request reaches a host. By
  572. # checking it here, the user gets an immediate and useful failure
  573. # indication.
  574. #
  575. # The third possibility is that we have failed to consider
  576. # something, and there are actually more than 2 possibilities. In
  577. # this case cinder will still do the check at volume creation time.
  578. # The behaviour will still be correct, but the user will not get an
  579. # immediate failure from the api, and will instead have to
  580. # determine why the instance is in an error state with a task of
  581. # block_device_mapping.
  582. #
  583. # We could reasonably refactor this check into _validate_bdm at
  584. # some future date, as the various size logic is already split out
  585. # in there.
  586. dest_size = root_bdm.volume_size
  587. if dest_size is not None:
  588. dest_size *= units.Gi
  589. if image_min_disk > dest_size:
  590. raise exception.VolumeSmallerThanMinDisk(
  591. volume_size=dest_size, image_min_disk=image_min_disk)
  592. # Target disk is a local disk whose size is taken from the flavor
  593. else:
  594. dest_size = instance_type['root_gb'] * units.Gi
  595. # NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
  596. # since libvirt interpreted the value differently than other
  597. # drivers. A value of 0 means don't check size.
  598. if dest_size != 0:
  599. if image_size > dest_size:
  600. raise exception.FlavorDiskSmallerThanImage(
  601. flavor_size=dest_size, image_size=image_size)
  602. if image_min_disk > dest_size:
  603. raise exception.FlavorDiskSmallerThanMinDisk(
  604. flavor_size=dest_size, image_min_disk=image_min_disk)
  605. else:
  606. # The user is attempting to create a server with a 0-disk
  607. # image-backed flavor, which can lead to issues with a large
  608. # image consuming an unexpectedly large amount of local disk
  609. # on the compute host. Check to see if the deployment will
  610. # allow that.
  611. if not context.can(
  612. servers_policies.ZERO_DISK_FLAVOR, fatal=False):
  613. raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
  614. API._validate_flavor_image_numa_pci(
  615. image, instance_type, validate_numa=validate_numa,
  616. validate_pci=validate_pci)
  617. # TODO(huaqiang): Remove in Wallaby when there is no nova-compute node
  618. # having a version prior to Victoria.
  619. @staticmethod
  620. def _check_compute_service_for_mixed_instance(numa_topology):
  621. """Check if the nova-compute service is ready to support mixed instance
  622. when the CPU allocation policy is 'mixed'.
  623. """
  624. # No need to check the instance with no NUMA topology associated with.
  625. if numa_topology is None:
  626. return
  627. # No need to check if instance CPU policy is not 'mixed'
  628. if numa_topology.cpu_policy != fields_obj.CPUAllocationPolicy.MIXED:
  629. return
  630. # Catch a request creating a mixed instance, make sure all nova-compute
  631. # service have been upgraded and support the mixed policy.
  632. minimal_version = objects.service.get_minimum_version_all_cells(
  633. nova_context.get_admin_context(), ['nova-compute'])
  634. if minimal_version < MIN_VER_NOVA_COMPUTE_MIXED_POLICY:
  635. raise exception.MixedInstanceNotSupportByComputeService()
  636. @staticmethod
  637. def _validate_flavor_image_numa_pci(image, instance_type,
  638. validate_numa=True,
  639. validate_pci=False):
  640. """Validate the flavor and image NUMA/PCI values.
  641. This is called from the API service to ensure that the flavor
  642. extra-specs and image properties are self-consistent and compatible
  643. with each other.
  644. :param image: a dict representation of the image including properties
  645. :param instance_type: Flavor object
  646. :param validate_numa: Flag to indicate whether or not to validate
  647. the NUMA-related metadata.
  648. :param validate_pci: Flag to indicate whether or not to validate
  649. the PCI-related metadata.
  650. :raises: Many different possible exceptions. See
  651. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  652. for the full list.
  653. """
  654. image_meta = _get_image_meta_obj(image)
  655. API._validate_flavor_image_mem_encryption(instance_type, image_meta)
  656. # validate PMU extra spec and image metadata
  657. flavor_pmu = instance_type.extra_specs.get('hw:pmu')
  658. image_pmu = image_meta.properties.get('hw_pmu')
  659. if (flavor_pmu is not None and image_pmu is not None and
  660. image_pmu != strutils.bool_from_string(flavor_pmu)):
  661. raise exception.ImagePMUConflict()
  662. # Only validate values of flavor/image so the return results of
  663. # following 'get' functions are not used.
  664. hardware.get_number_of_serial_ports(instance_type, image_meta)
  665. hardware.get_realtime_cpu_constraint(instance_type, image_meta)
  666. hardware.get_cpu_topology_constraints(instance_type, image_meta)
  667. if validate_numa:
  668. hardware.numa_get_constraints(instance_type, image_meta)
  669. if validate_pci:
  670. pci_request.get_pci_requests_from_flavor(instance_type)
  671. @staticmethod
  672. def _validate_flavor_image_mem_encryption(instance_type, image):
  673. """Validate that the flavor and image don't make contradictory
  674. requests regarding memory encryption.
  675. :param instance_type: Flavor object
  676. :param image: an ImageMeta object
  677. :raises: nova.exception.FlavorImageConflict
  678. """
  679. # This library function will raise the exception for us if
  680. # necessary; if not, we can ignore the result returned.
  681. hardware.get_mem_encryption_constraint(instance_type, image)
  682. def _get_image_defined_bdms(self, instance_type, image_meta,
  683. root_device_name):
  684. image_properties = image_meta.get('properties', {})
  685. # Get the block device mappings defined by the image.
  686. image_defined_bdms = image_properties.get('block_device_mapping', [])
  687. legacy_image_defined = not image_properties.get('bdm_v2', False)
  688. image_mapping = image_properties.get('mappings', [])
  689. if legacy_image_defined:
  690. image_defined_bdms = block_device.from_legacy_mapping(
  691. image_defined_bdms, None, root_device_name)
  692. else:
  693. image_defined_bdms = list(map(block_device.BlockDeviceDict,
  694. image_defined_bdms))
  695. if image_mapping:
  696. image_mapping = self._prepare_image_mapping(instance_type,
  697. image_mapping)
  698. image_defined_bdms = self._merge_bdms_lists(
  699. image_mapping, image_defined_bdms)
  700. return image_defined_bdms
  701. def _get_flavor_defined_bdms(self, instance_type, block_device_mapping):
  702. flavor_defined_bdms = []
  703. have_ephemeral_bdms = any(filter(
  704. block_device.new_format_is_ephemeral, block_device_mapping))
  705. have_swap_bdms = any(filter(
  706. block_device.new_format_is_swap, block_device_mapping))
  707. if instance_type.get('ephemeral_gb') and not have_ephemeral_bdms:
  708. flavor_defined_bdms.append(
  709. block_device.create_blank_bdm(instance_type['ephemeral_gb']))
  710. if instance_type.get('swap') and not have_swap_bdms:
  711. flavor_defined_bdms.append(
  712. block_device.create_blank_bdm(instance_type['swap'], 'swap'))
  713. return flavor_defined_bdms
  714. def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
  715. """Override any block devices from the first list by device name
  716. :param overridable_mappings: list which items are overridden
  717. :param overrider_mappings: list which items override
  718. :returns: A merged list of bdms
  719. """
  720. device_names = set(bdm['device_name'] for bdm in overrider_mappings
  721. if bdm['device_name'])
  722. return (overrider_mappings +
  723. [bdm for bdm in overridable_mappings
  724. if bdm['device_name'] not in device_names])
  725. def _check_and_transform_bdm(self, context, base_options, instance_type,
  726. image_meta, min_count, max_count,
  727. block_device_mapping, legacy_bdm):
  728. # NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
  729. # It's needed for legacy conversion to work.
  730. root_device_name = (base_options.get('root_device_name') or 'vda')
  731. image_ref = base_options.get('image_ref', '')
  732. # If the instance is booted by image and has a volume attached,
  733. # the volume cannot have the same device name as root_device_name
  734. if image_ref:
  735. for bdm in block_device_mapping:
  736. if (bdm.get('destination_type') == 'volume' and
  737. block_device.strip_dev(bdm.get(
  738. 'device_name')) == root_device_name):
  739. msg = _('The volume cannot be assigned the same device'
  740. ' name as the root device %s') % root_device_name
  741. raise exception.InvalidRequest(msg)
  742. image_defined_bdms = self._get_image_defined_bdms(
  743. instance_type, image_meta, root_device_name)
  744. root_in_image_bdms = (
  745. block_device.get_root_bdm(image_defined_bdms) is not None)
  746. if legacy_bdm:
  747. block_device_mapping = block_device.from_legacy_mapping(
  748. block_device_mapping, image_ref, root_device_name,
  749. no_root=root_in_image_bdms)
  750. elif root_in_image_bdms:
  751. # NOTE (ndipanov): client will insert an image mapping into the v2
  752. # block_device_mapping, but if there is a bootable device in image
  753. # mappings - we need to get rid of the inserted image
  754. # NOTE (gibi): another case is when a server is booted with an
  755. # image to bdm mapping where the image only contains a bdm to a
  756. # snapshot. In this case the other image to bdm mapping
  757. # contains an unnecessary device with boot_index == 0.
  758. # Also in this case the image_ref is None as we are booting from
  759. # an image to volume bdm.
  760. def not_image_and_root_bdm(bdm):
  761. return not (bdm.get('boot_index') == 0 and
  762. bdm.get('source_type') == 'image')
  763. block_device_mapping = list(
  764. filter(not_image_and_root_bdm, block_device_mapping))
  765. block_device_mapping = self._merge_bdms_lists(
  766. image_defined_bdms, block_device_mapping)
  767. if min_count > 1 or max_count > 1:
  768. if any(map(lambda bdm: bdm['source_type'] == 'volume',
  769. block_device_mapping)):
  770. msg = _('Cannot attach one or more volumes to multiple'
  771. ' instances')
  772. raise exception.InvalidRequest(msg)
  773. block_device_mapping += self._get_flavor_defined_bdms(
  774. instance_type, block_device_mapping)
  775. return block_device_obj.block_device_make_list_from_dicts(
  776. context, block_device_mapping)
  777. def _get_image(self, context, image_href):
  778. if not image_href:
  779. return None, {}
  780. image = self.image_api.get(context, image_href)
  781. return image['id'], image
  782. def _checks_for_create_and_rebuild(self, context, image_id, image,
  783. instance_type, metadata,
  784. files_to_inject, root_bdm,
  785. validate_numa=True):
  786. self._check_metadata_properties_quota(context, metadata)
  787. self._check_injected_file_quota(context, files_to_inject)
  788. self._detect_nonbootable_image_from_properties(image_id, image)
  789. self._validate_flavor_image(context, image_id, image,
  790. instance_type, root_bdm,
  791. validate_numa=validate_numa)
  792. def _validate_and_build_base_options(self, context, instance_type,
  793. boot_meta, image_href, image_id,
  794. kernel_id, ramdisk_id, display_name,
  795. display_description, key_name,
  796. key_data, security_groups,
  797. availability_zone, user_data,
  798. metadata, access_ip_v4, access_ip_v6,
  799. requested_networks, config_drive,
  800. auto_disk_config, reservation_id,
  801. max_count,
  802. supports_port_resource_request):
  803. """Verify all the input parameters regardless of the provisioning
  804. strategy being performed.
  805. """
  806. if instance_type['disabled']:
  807. raise exception.FlavorNotFound(flavor_id=instance_type['id'])
  808. if user_data:
  809. try:
  810. base64utils.decode_as_bytes(user_data)
  811. except TypeError:
  812. raise exception.InstanceUserDataMalformed()
  813. # When using Neutron, _check_requested_secgroups will translate and
  814. # return any requested security group names to uuids.
  815. security_groups = self._check_requested_secgroups(
  816. context, security_groups)
  817. # Note: max_count is the number of instances requested by the user,
  818. # max_network_count is the maximum number of instances taking into
  819. # account any network quotas
  820. max_network_count = self._check_requested_networks(
  821. context, requested_networks, max_count)
  822. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  823. context, kernel_id, ramdisk_id, boot_meta)
  824. config_drive = self._check_config_drive(config_drive)
  825. if key_data is None and key_name is not None:
  826. key_pair = objects.KeyPair.get_by_name(context,
  827. context.user_id,
  828. key_name)
  829. key_data = key_pair.public_key
  830. else:
  831. key_pair = None
  832. root_device_name = block_device.prepend_dev(
  833. block_device.properties_root_device_name(
  834. boot_meta.get('properties', {})))
  835. image_meta = _get_image_meta_obj(boot_meta)
  836. numa_topology = hardware.numa_get_constraints(
  837. instance_type, image_meta)
  838. system_metadata = {}
  839. pci_numa_affinity_policy = hardware.get_pci_numa_policy_constraint(
  840. instance_type, image_meta)
  841. # PCI requests come from two sources: instance flavor and
  842. # requested_networks. The first call in below returns an
  843. # InstancePCIRequests object which is a list of InstancePCIRequest
  844. # objects. The second call in below creates an InstancePCIRequest
  845. # object for each SR-IOV port, and append it to the list in the
  846. # InstancePCIRequests object
  847. pci_request_info = pci_request.get_pci_requests_from_flavor(
  848. instance_type, affinity_policy=pci_numa_affinity_policy)
  849. result = self.network_api.create_resource_requests(
  850. context, requested_networks, pci_request_info,
  851. affinity_policy=pci_numa_affinity_policy)
  852. network_metadata, port_resource_requests = result
  853. # Creating servers with ports that have resource requests, like QoS
  854. # minimum bandwidth rules, is only supported in a requested minimum
  855. # microversion.
  856. if port_resource_requests and not supports_port_resource_request:
  857. raise exception.CreateWithPortResourceRequestOldVersion()
  858. base_options = {
  859. 'reservation_id': reservation_id,
  860. 'image_ref': image_href,
  861. 'kernel_id': kernel_id or '',
  862. 'ramdisk_id': ramdisk_id or '',
  863. 'power_state': power_state.NOSTATE,
  864. 'vm_state': vm_states.BUILDING,
  865. 'config_drive': config_drive,
  866. 'user_id': context.user_id,
  867. 'project_id': context.project_id,
  868. 'instance_type_id': instance_type['id'],
  869. 'memory_mb': instance_type['memory_mb'],
  870. 'vcpus': instance_type['vcpus'],
  871. 'root_gb': instance_type['root_gb'],
  872. 'ephemeral_gb': instance_type['ephemeral_gb'],
  873. 'display_name': display_name,
  874. 'display_description': display_description,
  875. 'user_data': user_data,
  876. 'key_name': key_name,
  877. 'key_data': key_data,
  878. 'locked': False,
  879. 'metadata': metadata or {},
  880. 'access_ip_v4': access_ip_v4,
  881. 'access_ip_v6': access_ip_v6,
  882. 'availability_zone': availability_zone,
  883. 'root_device_name': root_device_name,
  884. 'progress': 0,
  885. 'pci_requests': pci_request_info,
  886. 'numa_topology': numa_topology,
  887. 'system_metadata': system_metadata,
  888. 'port_resource_requests': port_resource_requests}
  889. options_from_image = self._inherit_properties_from_image(
  890. boot_meta, auto_disk_config)
  891. base_options.update(options_from_image)
  892. # return the validated options and maximum number of instances allowed
  893. # by the network quotas
  894. return (base_options, max_network_count, key_pair, security_groups,
  895. network_metadata)
  896. @staticmethod
  897. @db_api.api_context_manager.writer
  898. def _create_reqspec_buildreq_instmapping(context, rs, br, im):
  899. """Create the request spec, build request, and instance mapping in a
  900. single database transaction.
  901. The RequestContext must be passed in to this method so that the
  902. database transaction context manager decorator will nest properly and
  903. include each create() into the same transaction context.
  904. """
  905. rs.create()
  906. br.create()
  907. im.create()
  908. def _validate_host_or_node(self, context, host, hypervisor_hostname):
  909. """Check whether compute nodes exist by validating the host
  910. and/or the hypervisor_hostname. There are three cases:
  911. 1. If only host is supplied, we can lookup the HostMapping in
  912. the API DB.
  913. 2. If only node is supplied, we can query a resource provider
  914. with that name in placement.
  915. 3. If both host and node are supplied, we can get the cell from
  916. HostMapping and from that lookup the ComputeNode with the
  917. given cell.
  918. :param context: The API request context.
  919. :param host: Target host.
  920. :param hypervisor_hostname: Target node.
  921. :raises: ComputeHostNotFound if we find no compute nodes with host
  922. and/or hypervisor_hostname.
  923. """
  924. if host:
  925. # When host is specified.
  926. try:
  927. host_mapping = objects.HostMapping.get_by_host(context, host)
  928. except exception.HostMappingNotFound:
  929. LOG.warning('No host-to-cell mapping found for host '
  930. '%(host)s.', {'host': host})
  931. raise exception.ComputeHostNotFound(host=host)
  932. # When both host and node are specified.
  933. if hypervisor_hostname:
  934. cell = host_mapping.cell_mapping
  935. with nova_context.target_cell(context, cell) as cctxt:
  936. # Here we only do an existence check, so we don't
  937. # need to store the return value into a variable.
  938. objects.ComputeNode.get_by_host_and_nodename(
  939. cctxt, host, hypervisor_hostname)
  940. elif hypervisor_hostname:
  941. # When only node is specified.
  942. try:
  943. self.placementclient.get_provider_by_name(
  944. context, hypervisor_hostname)
  945. except exception.ResourceProviderNotFound:
  946. raise exception.ComputeHostNotFound(host=hypervisor_hostname)
  947. def _get_volumes_for_bdms(self, context, bdms):
  948. """Get the pre-existing volumes from cinder for the list of BDMs.
  949. :param context: nova auth RequestContext
  950. :param bdms: BlockDeviceMappingList which has zero or more BDMs with
  951. a pre-existing volume_id specified.
  952. :return: dict, keyed by volume id, of volume dicts
  953. :raises: VolumeNotFound - if a given volume does not exist
  954. :raises: CinderConnectionFailed - if there are problems communicating
  955. with the cinder API
  956. :raises: Forbidden - if the user token does not have authority to see
  957. a volume
  958. """
  959. volumes = {}
  960. for bdm in bdms:
  961. if bdm.volume_id:
  962. volumes[bdm.volume_id] = self.volume_api.get(
  963. context, bdm.volume_id)
  964. return volumes
  965. @staticmethod
  966. def _validate_vol_az_for_create(instance_az, volumes):
  967. """Performs cross_az_attach validation for the instance and volumes.
  968. If [cinder]/cross_az_attach=True (default) this method is a no-op.
  969. If [cinder]/cross_az_attach=False, this method will validate that:
  970. 1. All volumes are in the same availability zone.
  971. 2. The volume AZ matches the instance AZ. If the instance is being
  972. created without a specific AZ (either via the user request or the
  973. [DEFAULT]/default_schedule_zone option), and the volume AZ matches
  974. [DEFAULT]/default_availability_zone for compute services, then the
  975. method returns the volume AZ so it can be set in the RequestSpec as
  976. if the user requested the zone explicitly.
  977. :param instance_az: Availability zone for the instance. In this case
  978. the host is not yet selected so the instance AZ value should come
  979. from one of the following cases:
  980. * The user requested availability zone.
  981. * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  982. does not specify an AZ (see parse_availability_zone).
  983. :param volumes: iterable of dicts of cinder volumes to be attached to
  984. the server being created
  985. :returns: None or volume AZ to set in the RequestSpec for the instance
  986. :raises: MismatchVolumeAZException if the instance and volume AZ do
  987. not match
  988. """
  989. if CONF.cinder.cross_az_attach:
  990. return
  991. if not volumes:
  992. return
  993. # First make sure that all of the volumes are in the same zone.
  994. vol_zones = [vol['availability_zone'] for vol in volumes]
  995. if len(set(vol_zones)) > 1:
  996. msg = (_("Volumes are in different availability zones: %s")
  997. % ','.join(vol_zones))
  998. raise exception.MismatchVolumeAZException(reason=msg)
  999. volume_az = vol_zones[0]
  1000. # In this case the instance.host should not be set so the instance AZ
  1001. # value should come from instance.availability_zone which will be one
  1002. # of the following cases:
  1003. # * The user requested availability zone.
  1004. # * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  1005. # does not specify an AZ (see parse_availability_zone).
  1006. # If the instance is not being created with a specific AZ (the AZ is
  1007. # input via the API create request *or* [DEFAULT]/default_schedule_zone
  1008. # is not None), then check to see if we should use the default AZ
  1009. # (which by default matches the default AZ in Cinder, i.e. 'nova').
  1010. if instance_az is None:
  1011. # Check if the volume AZ is the same as our default AZ for compute
  1012. # hosts (nova) and if so, assume we are OK because the user did not
  1013. # request an AZ and will get the same default. If the volume AZ is
  1014. # not the same as our default, return the volume AZ so the caller
  1015. # can put it into the request spec so the instance is scheduled
  1016. # to the same zone as the volume. Note that we are paranoid about
  1017. # the default here since both nova and cinder's default backend AZ
  1018. # is "nova" and we do not want to pin the server to that AZ since
  1019. # it's special, i.e. just like we tell users in the docs to not
  1020. # specify availability_zone='nova' when creating a server since we
  1021. # might not be able to migrate it later.
  1022. if volume_az != CONF.default_availability_zone:
  1023. return volume_az # indication to set in request spec
  1024. # The volume AZ is the same as the default nova AZ so we will be OK
  1025. return
  1026. if instance_az != volume_az:
  1027. msg = _("Server and volumes are not in the same availability "
  1028. "zone. Server is in: %(instance_az)s. Volumes are in: "
  1029. "%(volume_az)s") % {
  1030. 'instance_az': instance_az, 'volume_az': volume_az}
  1031. raise exception.MismatchVolumeAZException(reason=msg)
  1032. def _provision_instances(self, context, instance_type, min_count,
  1033. max_count, base_options, boot_meta, security_groups,
  1034. block_device_mapping, shutdown_terminate,
  1035. instance_group, check_server_group_quota, filter_properties,
  1036. key_pair, tags, trusted_certs, supports_multiattach,
  1037. network_metadata=None, requested_host=None,
  1038. requested_hypervisor_hostname=None):
  1039. # NOTE(boxiang): Check whether compute nodes exist by validating
  1040. # the host and/or the hypervisor_hostname. Pass the destination
  1041. # to the scheduler with host and/or hypervisor_hostname(node).
  1042. destination = None
  1043. if requested_host or requested_hypervisor_hostname:
  1044. self._validate_host_or_node(context, requested_host,
  1045. requested_hypervisor_hostname)
  1046. destination = objects.Destination()
  1047. if requested_host:
  1048. destination.host = requested_host
  1049. destination.node = requested_hypervisor_hostname
  1050. # Check quotas
  1051. num_instances = compute_utils.check_num_instances_quota(
  1052. context, instance_type, min_count, max_count)
  1053. security_groups = security_group_api.populate_security_groups(
  1054. security_groups)
  1055. port_resource_requests = base_options.pop('port_resource_requests')
  1056. instances_to_build = []
  1057. # We could be iterating over several instances with several BDMs per
  1058. # instance and those BDMs could be using a lot of the same images so
  1059. # we want to cache the image API GET results for performance.
  1060. image_cache = {} # dict of image dicts keyed by image id
  1061. # Before processing the list of instances get all of the requested
  1062. # pre-existing volumes so we can do some validation here rather than
  1063. # down in the bowels of _validate_bdm.
  1064. volumes = self._get_volumes_for_bdms(context, block_device_mapping)
  1065. volume_az = self._validate_vol_az_for_create(
  1066. base_options['availability_zone'], volumes.values())
  1067. if volume_az:
  1068. # This means the instance is not being created in a specific zone
  1069. # but needs to match the zone that the volumes are in so update
  1070. # base_options to match the volume zone.
  1071. base_options['availability_zone'] = volume_az
  1072. LOG.debug("Going to run %s instances...", num_instances)
  1073. extra_specs = instance_type.extra_specs
  1074. dp_name = extra_specs.get('accel:device_profile')
  1075. dp_request_groups = []
  1076. if dp_name:
  1077. dp_request_groups = cyborg.get_device_profile_request_groups(
  1078. context, dp_name)
  1079. try:
  1080. for i in range(num_instances):
  1081. # Create a uuid for the instance so we can store the
  1082. # RequestSpec before the instance is created.
  1083. instance_uuid = uuidutils.generate_uuid()
  1084. # Store the RequestSpec that will be used for scheduling.
  1085. req_spec = objects.RequestSpec.from_components(context,
  1086. instance_uuid, boot_meta, instance_type,
  1087. base_options['numa_topology'],
  1088. base_options['pci_requests'], filter_properties,
  1089. instance_group, base_options['availability_zone'],
  1090. security_groups=security_groups,
  1091. port_resource_requests=port_resource_requests)
  1092. if block_device_mapping:
  1093. # Record whether or not we are a BFV instance
  1094. root = block_device_mapping.root_bdm()
  1095. req_spec.is_bfv = bool(root and root.is_volume)
  1096. else:
  1097. # If we have no BDMs, we're clearly not BFV
  1098. req_spec.is_bfv = False
  1099. # NOTE(danms): We need to record num_instances on the request
  1100. # spec as this is how the conductor knows how many were in this
  1101. # batch.
  1102. req_spec.num_instances = num_instances
  1103. # NOTE(stephenfin): The network_metadata field is not persisted
  1104. # inside RequestSpec object.
  1105. if network_metadata:
  1106. req_spec.network_metadata = network_metadata
  1107. if destination:
  1108. req_spec.requested_destination = destination
  1109. if dp_request_groups:
  1110. req_spec.requested_resources.extend(dp_request_groups)
  1111. # Create an instance object, but do not store in db yet.
  1112. instance = objects.Instance(context=context)
  1113. instance.uuid = instance_uuid
  1114. instance.update(base_options)
  1115. instance.keypairs = objects.KeyPairList(objects=[])
  1116. if key_pair:
  1117. instance.keypairs.objects.append(key_pair)
  1118. instance.trusted_certs = self._retrieve_trusted_certs_object(
  1119. context, trusted_certs)
  1120. instance = self.create_db_entry_for_new_instance(context,
  1121. instance_type, boot_meta, instance, security_groups,
  1122. block_device_mapping, num_instances, i,
  1123. shutdown_terminate, create_instance=False)
  1124. block_device_mapping = (
  1125. self._bdm_validate_set_size_and_instance(context,
  1126. instance, instance_type, block_device_mapping,
  1127. image_cache, volumes, supports_multiattach))
  1128. instance_tags = self._transform_tags(tags, instance.uuid)
  1129. build_request = objects.BuildRequest(context,
  1130. instance=instance, instance_uuid=instance.uuid,
  1131. project_id=instance.project_id,
  1132. block_device_mappings=block_device_mapping,
  1133. tags=instance_tags)
  1134. # Create an instance_mapping. The null cell_mapping indicates
  1135. # that the instance doesn't yet exist in a cell, and lookups
  1136. # for it need to instead look for the RequestSpec.
  1137. # cell_mapping will be populated after scheduling, with a
  1138. # scheduling failure using the cell_mapping for the special
  1139. # cell0.
  1140. inst_mapping = objects.InstanceMapping(context=context)
  1141. inst_mapping.instance_uuid = instance_uuid
  1142. inst_mapping.project_id = context.project_id
  1143. inst_mapping.user_id = context.user_id
  1144. inst_mapping.cell_mapping = None
  1145. # Create the request spec, build request, and instance mapping
  1146. # records in a single transaction so that if a DBError is
  1147. # raised from any of them, all INSERTs will be rolled back and
  1148. # no orphaned records will be left behind.
  1149. self._create_reqspec_buildreq_instmapping(context, req_spec,
  1150. build_request,
  1151. inst_mapping)
  1152. instances_to_build.append(
  1153. (req_spec, build_request, inst_mapping))
  1154. if instance_group:
  1155. if check_server_group_quota:
  1156. try:
  1157. objects.Quotas.check_deltas(
  1158. context, {'server_group_members': 1},
  1159. instance_group, context.user_id)
  1160. except exception.OverQuota:
  1161. msg = _("Quota exceeded, too many servers in "
  1162. "group")
  1163. raise exception.QuotaError(msg)
  1164. members = objects.InstanceGroup.add_members(
  1165. context, instance_group.uuid, [instance.uuid])
  1166. # NOTE(melwitt): We recheck the quota after creating the
  1167. # object to prevent users from allocating more resources
  1168. # than their allowed quota in the event of a race. This is
  1169. # configurable because it can be expensive if strict quota
  1170. # limits are not required in a deployment.
  1171. if CONF.quota.recheck_quota and check_server_group_quota:
  1172. try:
  1173. objects.Quotas.check_deltas(
  1174. context, {'server_group_members': 0},
  1175. instance_group, context.user_id)
  1176. except exception.OverQuota:
  1177. objects.InstanceGroup._remove_members_in_db(
  1178. context, instance_group.id, [instance.uuid])
  1179. msg = _("Quota exceeded, too many servers in "
  1180. "group")
  1181. raise exception.QuotaError(msg)
  1182. # list of members added to servers group in this iteration
  1183. # is needed to check quota of server group during add next
  1184. # instance
  1185. instance_group.members.extend(members)
  1186. # In the case of any exceptions, attempt DB cleanup
  1187. except Exception:
  1188. with excutils.save_and_reraise_exception():
  1189. self._cleanup_build_artifacts(None, instances_to_build)
  1190. return instances_to_build
  1191. @staticmethod
  1192. def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
  1193. """Convert user-requested trusted cert IDs to TrustedCerts object
  1194. Also validates that the deployment is new enough to support trusted
  1195. image certification validation.
  1196. :param context: The user request auth context
  1197. :param trusted_certs: list of user-specified trusted cert string IDs,
  1198. may be None
  1199. :param rebuild: True if rebuilding the server, False if creating a
  1200. new server
  1201. :returns: nova.objects.TrustedCerts object or None if no user-specified
  1202. trusted cert IDs were given and nova is not configured with
  1203. default trusted cert IDs
  1204. """
  1205. # Retrieve trusted_certs parameter, or use CONF value if certificate
  1206. # validation is enabled
  1207. if trusted_certs:
  1208. certs_to_return = objects.TrustedCerts(ids=trusted_certs)
  1209. elif (CONF.glance.verify_glance_signatures and
  1210. CONF.glance.enable_certificate_validation and
  1211. CONF.glance.default_trusted_certificate_ids):
  1212. certs_to_return = objects.TrustedCerts(
  1213. ids=CONF.glance.default_trusted_certificate_ids)
  1214. else:
  1215. return None
  1216. return certs_to_return
  1217. @staticmethod
  1218. def _get_requested_instance_group(context, filter_properties):
  1219. if (not filter_properties or
  1220. not filter_properties.get('scheduler_hints')):
  1221. return
  1222. group_hint = filter_properties.get('scheduler_hints').get('group')
  1223. if not group_hint:
  1224. return
  1225. return objects.InstanceGroup.get_by_uuid(context, group_hint)
  1226. def _create_instance(self, context, instance_type,
  1227. image_href, kernel_id, ramdisk_id,
  1228. min_count, max_count,
  1229. display_name, display_description,
  1230. key_name, key_data, security_groups,
  1231. availability_zone, user_data, metadata, injected_files,
  1232. admin_password, access_ip_v4, access_ip_v6,
  1233. requested_networks, config_drive,
  1234. block_device_mapping, auto_disk_config, filter_properties,
  1235. reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
  1236. check_server_group_quota=False, tags=None,
  1237. supports_multiattach=False, trusted_certs=None,
  1238. supports_port_resource_request=False,
  1239. requested_host=None, requested_hypervisor_hostname=None):
  1240. """Verify all the input parameters regardless of the provisioning
  1241. strategy being performed and schedule the instance(s) for
  1242. creation.
  1243. """
  1244. # Normalize and setup some parameters
  1245. if reservation_id is None:
  1246. reservation_id = utils.generate_uid('r')
  1247. security_groups = security_groups or ['default']
  1248. min_count = min_count or 1
  1249. max_count = max_count or min_count
  1250. block_device_mapping = block_device_mapping or []
  1251. tags = tags or []
  1252. if image_href:
  1253. image_id, boot_meta = self._get_image(context, image_href)
  1254. else:
  1255. # This is similar to the logic in _retrieve_trusted_certs_object.
  1256. if (trusted_certs or
  1257. (CONF.glance.verify_glance_signatures and
  1258. CONF.glance.enable_certificate_validation and
  1259. CONF.glance.default_trusted_certificate_ids)):
  1260. msg = _("Image certificate validation is not supported "
  1261. "when booting from volume")
  1262. raise exception.CertificateValidationFailed(message=msg)
  1263. image_id = None
  1264. boot_meta = block_device.get_bdm_image_metadata(
  1265. context, self.image_api, self.volume_api, block_device_mapping,
  1266. legacy_bdm)
  1267. self._check_auto_disk_config(image=boot_meta,
  1268. auto_disk_config=auto_disk_config)
  1269. base_options, max_net_count, key_pair, security_groups, \
  1270. network_metadata = self._validate_and_build_base_options(
  1271. context, instance_type, boot_meta, image_href, image_id,
  1272. kernel_id, ramdisk_id, display_name, display_description,
  1273. key_name, key_data, security_groups, availability_zone,
  1274. user_data, metadata, access_ip_v4, access_ip_v6,
  1275. requested_networks, config_drive, auto_disk_config,
  1276. reservation_id, max_count, supports_port_resource_request)
  1277. # TODO(huaqiang): Remove in Wallaby
  1278. # check nova-compute nodes have been updated to Victoria to support the
  1279. # mixed CPU policy for creating a new instance.
  1280. numa_topology = base_options.get('numa_topology')
  1281. self._check_compute_service_for_mixed_instance(numa_topology)
  1282. # max_net_count is the maximum number of instances requested by the
  1283. # user adjusted for any network quota constraints, including
  1284. # consideration of connections to each requested network
  1285. if max_net_count < min_count:
  1286. raise exception.PortLimitExceeded()
  1287. elif max_net_count < max_count:
  1288. LOG.info("max count reduced from %(max_count)d to "
  1289. "%(max_net_count)d due to network port quota",
  1290. {'max_count': max_count,
  1291. 'max_net_count': max_net_count})
  1292. max_count = max_net_count
  1293. block_device_mapping = self._check_and_transform_bdm(context,
  1294. base_options, instance_type, boot_meta, min_count, max_count,
  1295. block_device_mapping, legacy_bdm)
  1296. # We can't do this check earlier because we need bdms from all sources
  1297. # to have been merged in order to get the root bdm.
  1298. # Set validate_numa=False since numa validation is already done by
  1299. # _validate_and_build_base_options().
  1300. self._checks_for_create_and_rebuild(context, image_id, boot_meta,
  1301. instance_type, metadata, injected_files,
  1302. block_device_mapping.root_bdm(), validate_numa=False)
  1303. instance_group = self._get_requested_instance_group(context,
  1304. filter_properties)
  1305. tags = self._create_tag_list_obj(context, tags)
  1306. instances_to_build = self._provision_instances(
  1307. context, instance_type, min_count, max_count, base_options,
  1308. boot_meta, security_groups, block_device_mapping,
  1309. shutdown_terminate, instance_group, check_server_group_quota,
  1310. filter_properties, key_pair, tags, trusted_certs,
  1311. supports_multiattach, network_metadata,
  1312. requested_host, requested_hypervisor_hostname)
  1313. instances = []
  1314. request_specs = []
  1315. build_requests = []
  1316. for rs, build_request, im in instances_to_build:
  1317. build_requests.append(build_request)
  1318. instance = build_request.get_new_instance(context)
  1319. instances.append(instance)
  1320. request_specs.append(rs)
  1321. self.compute_task_api.schedule_and_build_instances(
  1322. context,
  1323. build_requests=build_requests,
  1324. request_spec=request_specs,
  1325. image=boot_meta,
  1326. admin_password=admin_password,
  1327. injected_files=injected_files,
  1328. requested_networks=requested_networks,
  1329. block_device_mapping=block_device_mapping,
  1330. tags=tags)
  1331. return instances, reservation_id
  1332. @staticmethod
  1333. def _cleanup_build_artifacts(instances, instances_to_build):
  1334. # instances_to_build is a list of tuples:
  1335. # (RequestSpec, BuildRequest, InstanceMapping)
  1336. # Be paranoid about artifacts being deleted underneath us.
  1337. for instance in instances or []:
  1338. try:
  1339. instance.destroy()
  1340. except exception.InstanceNotFound:
  1341. pass
  1342. for rs, build_request, im in instances_to_build or []:
  1343. try:
  1344. rs.destroy()
  1345. except exception.RequestSpecNotFound:
  1346. pass
  1347. try:
  1348. build_request.destroy()
  1349. except exception.BuildRequestNotFound:
  1350. pass
  1351. try:
  1352. im.destroy()
  1353. except exception.InstanceMappingNotFound:
  1354. pass
  1355. @staticmethod
  1356. def _volume_size(instance_type, bdm):
  1357. size = bdm.get('volume_size')
  1358. # NOTE (ndipanov): inherit flavor size only for swap and ephemeral
  1359. if (size is None and bdm.get('source_type') == 'blank' and
  1360. bdm.get('destination_type') == 'local'):
  1361. if bdm.get('guest_format') == 'swap':
  1362. size = instance_type.get('swap', 0)
  1363. else:
  1364. size = instance_type.get('ephemeral_gb', 0)
  1365. return size
  1366. def _prepare_image_mapping(self, instance_type, mappings):
  1367. """Extract and format blank devices from image mappings."""
  1368. prepared_mappings = []
  1369. for bdm in block_device.mappings_prepend_dev(mappings):
  1370. LOG.debug("Image bdm %s", bdm)
  1371. virtual_name = bdm['virtual']
  1372. if virtual_name == 'ami' or virtual_name == 'root':
  1373. continue
  1374. if not block_device.is_swap_or_ephemeral(virtual_name):
  1375. continue
  1376. guest_format = bdm.get('guest_format')
  1377. if virtual_name == 'swap':
  1378. guest_format = 'swap'
  1379. if not guest_format:
  1380. guest_format = CONF.default_ephemeral_format
  1381. values = block_device.BlockDeviceDict({
  1382. 'device_name': bdm['device'],
  1383. 'source_type': 'blank',
  1384. 'destination_type': 'local',
  1385. 'device_type': 'disk',
  1386. 'guest_format': guest_format,
  1387. 'delete_on_termination': True,
  1388. 'boot_index': -1})
  1389. values['volume_size'] = self._volume_size(
  1390. instance_type, values)
  1391. if values['volume_size'] == 0:
  1392. continue
  1393. prepared_mappings.append(values)
  1394. return prepared_mappings
  1395. def _bdm_validate_set_size_and_instance(self, context, instance,
  1396. instance_type,
  1397. block_device_mapping,
  1398. image_cache, volumes,
  1399. supports_multiattach=False):
  1400. """Ensure the bdms are valid, then set size and associate with instance
  1401. Because this method can be called multiple times when more than one
  1402. instance is booted in a single request it makes a copy of the bdm list.
  1403. :param context: nova auth RequestContext
  1404. :param instance: Instance object
  1405. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1406. :param block_device_mapping: BlockDeviceMappingList object
  1407. :param image_cache: dict of image dicts keyed by id which is used as a
  1408. cache in case there are multiple BDMs in the same request using
  1409. the same image to avoid redundant GET calls to the image service
  1410. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1411. :param supports_multiattach: True if the request supports multiattach
  1412. volumes, False otherwise
  1413. """
  1414. LOG.debug("block_device_mapping %s", list(block_device_mapping),
  1415. instance_uuid=instance.uuid)
  1416. self._validate_bdm(
  1417. context, instance, instance_type, block_device_mapping,
  1418. image_cache, volumes, supports_multiattach)
  1419. instance_block_device_mapping = block_device_mapping.obj_clone()
  1420. for bdm in instance_block_device_mapping:
  1421. bdm.volume_size = self._volume_size(instance_type, bdm)
  1422. bdm.instance_uuid = instance.uuid
  1423. return instance_block_device_mapping
  1424. @staticmethod
  1425. def _check_requested_volume_type(bdm, volume_type_id_or_name,
  1426. volume_types):
  1427. """If we are specifying a volume type, we need to get the
  1428. volume type details from Cinder and make sure the ``volume_type``
  1429. is available.
  1430. """
  1431. # NOTE(brinzhang): Verify that the specified volume type exists.
  1432. # And save the volume type name internally for consistency in the
  1433. # BlockDeviceMapping object.
  1434. for vol_type in volume_types:
  1435. if (volume_type_id_or_name == vol_type['id'] or
  1436. volume_type_id_or_name == vol_type['name']):
  1437. bdm.volume_type = vol_type['name']
  1438. break
  1439. else:
  1440. raise exception.VolumeTypeNotFound(
  1441. id_or_name=volume_type_id_or_name)
  1442. def _validate_bdm(self, context, instance, instance_type,
  1443. block_device_mappings, image_cache, volumes,
  1444. supports_multiattach=False):
  1445. """Validate requested block device mappings.
  1446. :param context: nova auth RequestContext
  1447. :param instance: Instance object
  1448. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1449. :param block_device_mappings: BlockDeviceMappingList object
  1450. :param image_cache: dict of image dicts keyed by id which is used as a
  1451. cache in case there are multiple BDMs in the same request using
  1452. the same image to avoid redundant GET calls to the image service
  1453. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1454. :param supports_multiattach: True if the request supports multiattach
  1455. volumes, False otherwise
  1456. """
  1457. # Make sure that the boot indexes make sense.
  1458. # Setting a negative value or None indicates that the device should not
  1459. # be used for booting.
  1460. boot_indexes = sorted([bdm.boot_index
  1461. for bdm in block_device_mappings
  1462. if bdm.boot_index is not None and
  1463. bdm.boot_index >= 0])
  1464. # Each device which is capable of being used as boot device should
  1465. # be given a unique boot index, starting from 0 in ascending order, and
  1466. # there needs to be at least one boot device.
  1467. if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
  1468. # Convert the BlockDeviceMappingList to a list for repr details.
  1469. LOG.debug('Invalid block device mapping boot sequence for '
  1470. 'instance: %s', list(block_device_mappings),
  1471. instance=instance)
  1472. raise exception.InvalidBDMBootSequence()
  1473. volume_types = None
  1474. for bdm in block_device_mappings:
  1475. volume_type = bdm.volume_type
  1476. if volume_type:
  1477. if not volume_types:
  1478. # In order to reduce the number of hit cinder APIs,
  1479. # initialize our cache of volume types.
  1480. volume_types = self.volume_api.get_all_volume_types(
  1481. context)
  1482. # NOTE(brinzhang): Ensure the validity of volume_type.
  1483. self._check_requested_volume_type(bdm, volume_type,
  1484. volume_types)
  1485. # NOTE(vish): For now, just make sure the volumes are accessible.
  1486. # Additionally, check that the volume can be attached to this
  1487. # instance.
  1488. snapshot_id = bdm.snapshot_id
  1489. volume_id = bdm.volume_id
  1490. image_id = bdm.image_id
  1491. if image_id is not None:
  1492. if (image_id != instance.get('image_ref') and
  1493. image_id not in image_cache):
  1494. try:
  1495. # Cache the results of the image GET so we do not make
  1496. # the same request for the same image if processing
  1497. # multiple BDMs or multiple servers with the same image
  1498. image_cache[image_id] = self._get_image(
  1499. context, image_id)
  1500. except Exception:
  1501. raise exception.InvalidBDMImage(id=image_id)
  1502. if (bdm.source_type == 'image' and
  1503. bdm.destination_type == 'volume' and
  1504. not bdm.volume_size):
  1505. raise exception.InvalidBDM(message=_("Images with "
  1506. "destination_type 'volume' need to have a non-zero "
  1507. "size specified"))
  1508. elif volume_id is not None:
  1509. try:
  1510. volume = volumes[volume_id]
  1511. # We do not validate the instance and volume AZ here
  1512. # because that is done earlier by _provision_instances.
  1513. self._check_attach_and_reserve_volume(
  1514. context, volume, instance, bdm, supports_multiattach,
  1515. validate_az=False)
  1516. bdm.volume_size = volume.get('size')
  1517. except (exception.CinderConnectionFailed,
  1518. exception.InvalidVolume,
  1519. exception.MultiattachNotSupportedOldMicroversion):
  1520. raise
  1521. except exception.InvalidInput as exc:
  1522. raise exception.InvalidVolume(reason=exc.format_message())
  1523. except Exception as e:
  1524. LOG.info('Failed validating volume %s. Error: %s',
  1525. volume_id, e)
  1526. raise exception.InvalidBDMVolume(id=volume_id)
  1527. elif snapshot_id is not None:
  1528. try:
  1529. snap = self.volume_api.get_snapshot(context, snapshot_id)
  1530. bdm.volume_size = bdm.volume_size or snap.get('size')
  1531. except exception.CinderConnectionFailed:
  1532. raise
  1533. except Exception:
  1534. raise exception.InvalidBDMSnapshot(id=snapshot_id)
  1535. elif (bdm.source_type == 'blank' and
  1536. bdm.destination_type == 'volume' and
  1537. not bdm.volume_size):
  1538. raise exception.InvalidBDM(message=_("Blank volumes "
  1539. "(source: 'blank', dest: 'volume') need to have non-zero "
  1540. "size"))
  1541. # NOTE(lyarwood): Ensure the disk_bus is at least known to Nova.
  1542. # The virt driver may reject this later but for now just ensure
  1543. # it's listed as an acceptable value of the DiskBus field class.
  1544. disk_bus = bdm.disk_bus if 'disk_bus' in bdm else None
  1545. if disk_bus and disk_bus not in fields_obj.DiskBus.ALL:
  1546. raise exception.InvalidBDMDiskBus(disk_bus=disk_bus)
  1547. ephemeral_size = sum(bdm.volume_size or instance_type['ephemeral_gb']
  1548. for bdm in block_device_mappings
  1549. if block_device.new_format_is_ephemeral(bdm))
  1550. if ephemeral_size > instance_type['ephemeral_gb']:
  1551. raise exception.InvalidBDMEphemeralSize()
  1552. # There should be only one swap
  1553. swap_list = block_device.get_bdm_swap_list(block_device_mappings)
  1554. if len(swap_list) > 1:
  1555. msg = _("More than one swap drive requested.")
  1556. raise exception.InvalidBDMFormat(details=msg)
  1557. if swap_list:
  1558. swap_size = swap_list[0].volume_size or 0
  1559. if swap_size > instance_type['swap']:
  1560. raise exception.InvalidBDMSwapSize()
  1561. max_local = CONF.max_local_block_devices
  1562. if max_local >= 0:
  1563. num_local = len([bdm for bdm in block_device_mappings
  1564. if bdm.destination_type == 'local'])
  1565. if num_local > max_local:
  1566. raise exception.InvalidBDMLocalsLimit()
  1567. def _populate_instance_names(self, instance, num_instances, index):
  1568. """Populate instance display_name and hostname.
  1569. :param instance: The instance to set the display_name, hostname for
  1570. :type instance: nova.objects.Instance
  1571. :param num_instances: Total number of instances being created in this
  1572. request
  1573. :param index: The 0-based index of this particular instance
  1574. """
  1575. # NOTE(mriedem): This is only here for test simplicity since a server
  1576. # name is required in the REST API.
  1577. if 'display_name' not in instance or instance.display_name is None:
  1578. instance.display_name = 'Server %s' % instance.uuid
  1579. # if we're booting multiple instances, we need to add an indexing
  1580. # suffix to both instance.hostname and instance.display_name. This is
  1581. # not necessary for a single instance.
  1582. if num_instances == 1:
  1583. default_hostname = 'Server-%s' % instance.uuid
  1584. instance.hostname = utils.sanitize_hostname(
  1585. instance.display_name, default_hostname)
  1586. elif num_instances > 1:
  1587. old_display_name = instance.display_name
  1588. new_display_name = '%s-%d' % (old_display_name, index + 1)
  1589. if utils.sanitize_hostname(old_display_name) == "":
  1590. instance.hostname = 'Server-%s' % instance.uuid
  1591. else:
  1592. instance.hostname = utils.sanitize_hostname(
  1593. new_display_name)
  1594. instance.display_name = new_display_name
  1595. def _populate_instance_for_create(self, context, instance, image,
  1596. index, security_groups, instance_type,
  1597. num_instances, shutdown_terminate):
  1598. """Build the beginning of a new instance."""
  1599. instance.launch_index = index
  1600. instance.vm_state = vm_states.BUILDING
  1601. instance.task_state = task_states.SCHEDULING
  1602. info_cache = objects.InstanceInfoCache()
  1603. info_cache.instance_uuid = instance.uuid
  1604. info_cache.network_info = network_model.NetworkInfo()
  1605. instance.info_cache = info_cache
  1606. instance.flavor = instance_type
  1607. instance.old_flavor = None
  1608. instance.new_flavor = None
  1609. if CONF.ephemeral_storage_encryption.enabled:
  1610. # NOTE(kfarr): dm-crypt expects the cipher in a
  1611. # hyphenated format: cipher-chainmode-ivmode
  1612. # (ex: aes-xts-plain64). The algorithm needs
  1613. # to be parsed out to pass to the key manager (ex: aes).
  1614. cipher = CONF.ephemeral_storage_encryption.cipher
  1615. algorithm = cipher.split('-')[0] if cipher else None
  1616. instance.ephemeral_key_uuid = self.key_manager.create_key(
  1617. context,
  1618. algorithm=algorithm,
  1619. length=CONF.ephemeral_storage_encryption.key_size)
  1620. else:
  1621. instance.ephemeral_key_uuid = None
  1622. # Store image properties so we can use them later
  1623. # (for notifications, etc). Only store what we can.
  1624. if not instance.obj_attr_is_set('system_metadata'):
  1625. instance.system_metadata = {}
  1626. # Make sure we have the dict form that we need for instance_update.
  1627. instance.system_metadata = utils.instance_sys_meta(instance)
  1628. system_meta = utils.get_system_metadata_from_image(
  1629. image, instance_type)
  1630. # In case we couldn't find any suitable base_image
  1631. system_meta.setdefault('image_base_image_ref', instance.image_ref)
  1632. system_meta['owner_user_name'] = context.user_name
  1633. system_meta['owner_project_name'] = context.project_name
  1634. instance.system_metadata.update(system_meta)
  1635. # Since the removal of nova-network, we don't actually store anything
  1636. # in the database. Instead, we proxy the security groups on the
  1637. # instance from the ports attached to the instance.
  1638. instance.security_groups = objects.SecurityGroupList()
  1639. self._populate_instance_names(instance, num_instances, index)
  1640. instance.shutdown_terminate = shutdown_terminate
  1641. return instance
  1642. def _create_tag_list_obj(self, context, tags):
  1643. """Create TagList objects from simple string tags.
  1644. :param context: security context.
  1645. :param tags: simple string tags from API request.
  1646. :returns: TagList object.
  1647. """
  1648. tag_list = [objects.Tag(context=context, tag=t) for t in tags]
  1649. tag_list_obj = objects.TagList(objects=tag_list)
  1650. return tag_list_obj
  1651. def _transform_tags(self, tags, resource_id):
  1652. """Change the resource_id of the tags according to the input param.
  1653. Because this method can be called multiple times when more than one
  1654. instance is booted in a single request it makes a copy of the tags
  1655. list.
  1656. :param tags: TagList object.
  1657. :param resource_id: string.
  1658. :returns: TagList object.
  1659. """
  1660. instance_tags = tags.obj_clone()
  1661. for tag in instance_tags:
  1662. tag.resource_id = resource_id
  1663. return instance_tags
  1664. # This method remains because cellsv1 uses it in the scheduler
  1665. def create_db_entry_for_new_instance(self, context, instance_type, image,
  1666. instance, security_group, block_device_mapping, num_instances,
  1667. index, shutdown_terminate=False, create_instance=True):
  1668. """Create an entry in the DB for this new instance,
  1669. including any related table updates (such as security group,
  1670. etc).
  1671. This is called by the scheduler after a location for the
  1672. instance has been determined.
  1673. :param create_instance: Determines if the instance is created here or
  1674. just populated for later creation. This is done so that this code
  1675. can be shared with cellsv1 which needs the instance creation to
  1676. happen here. It should be removed and this method cleaned up when
  1677. cellsv1 is a distant memory.
  1678. """
  1679. self._populate_instance_for_create(context, instance, image, index,
  1680. security_group, instance_type,
  1681. num_instances, shutdown_terminate)
  1682. if create_instance:
  1683. instance.create()
  1684. return instance
  1685. def _check_multiple_instances_with_neutron_ports(self,
  1686. requested_networks):
  1687. """Check whether multiple instances are created from port id(s)."""
  1688. for requested_net in requested_networks:
  1689. if requested_net.port_id:
  1690. msg = _("Unable to launch multiple instances with"
  1691. " a single configured port ID. Please launch your"
  1692. " instance one by one with different ports.")
  1693. raise exception.MultiplePortsNotApplicable(reason=msg)
  1694. def _check_multiple_instances_with_specified_ip(self, requested_networks):
  1695. """Check whether multiple instances are created with specified ip."""
  1696. for requested_net in requested_networks:
  1697. if requested_net.network_id and requested_net.address:
  1698. msg = _("max_count cannot be greater than 1 if an fixed_ip "
  1699. "is specified.")
  1700. raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
  1701. def create(self, context, instance_type,
  1702. image_href, kernel_id=None, ramdisk_id=None,
  1703. min_count=None, max_count=None,
  1704. display_name=None, display_description=None,
  1705. key_name=None, key_data=None, security_groups=None,
  1706. availability_zone=None, forced_host=None, forced_node=None,
  1707. user_data=None, metadata=None, injected_files=None,
  1708. admin_password=None, block_device_mapping=None,
  1709. access_ip_v4=None, access_ip_v6=None, requested_networks=None,
  1710. config_drive=None, auto_disk_config=None, scheduler_hints=None,
  1711. legacy_bdm=True, shutdown_terminate=False,
  1712. check_server_group_quota=False, tags=None,
  1713. supports_multiattach=False, trusted_certs=None,
  1714. supports_port_resource_request=False,
  1715. requested_host=None, requested_hypervisor_hostname=None):
  1716. """Provision instances, sending instance information to the
  1717. scheduler. The scheduler will determine where the instance(s)
  1718. go and will handle creating the DB entries.
  1719. Returns a tuple of (instances, reservation_id)
  1720. """
  1721. if requested_networks and max_count is not None and max_count > 1:
  1722. self._check_multiple_instances_with_specified_ip(
  1723. requested_networks)
  1724. self._check_multiple_instances_with_neutron_ports(
  1725. requested_networks)
  1726. if availability_zone:
  1727. available_zones = availability_zones.\
  1728. get_availability_zones(context.elevated(), self.host_api,
  1729. get_only_available=True)
  1730. if forced_host is None and availability_zone not in \
  1731. available_zones:
  1732. msg = _('The requested availability zone is not available')
  1733. raise exception.InvalidRequest(msg)
  1734. filter_properties = scheduler_utils.build_filter_properties(
  1735. scheduler_hints, forced_host, forced_node, instance_type)
  1736. return self._create_instance(
  1737. context, instance_type,
  1738. image_href, kernel_id, ramdisk_id,
  1739. min_count, max_count,
  1740. display_name, display_description,
  1741. key_name, key_data, security_groups,
  1742. availability_zone, user_data, metadata,
  1743. injected_files, admin_password,
  1744. access_ip_v4, access_ip_v6,
  1745. requested_networks, config_drive,
  1746. block_device_mapping, auto_disk_config,
  1747. filter_properties=filter_properties,
  1748. legacy_bdm=legacy_bdm,
  1749. shutdown_terminate=shutdown_terminate,
  1750. check_server_group_quota=check_server_group_quota,
  1751. tags=tags, supports_multiattach=supports_multiattach,
  1752. trusted_certs=trusted_certs,
  1753. supports_port_resource_request=supports_port_resource_request,
  1754. requested_host=requested_host,
  1755. requested_hypervisor_hostname=requested_hypervisor_hostname)
  1756. def _check_auto_disk_config(self, instance=None, image=None,
  1757. auto_disk_config=None):
  1758. if auto_disk_config is None:
  1759. return
  1760. if not image and not instance:
  1761. return
  1762. if image:
  1763. image_props = image.get("properties", {})
  1764. auto_disk_config_img = \
  1765. utils.get_auto_disk_config_from_image_props(image_props)
  1766. image_ref = image.get("id")
  1767. else:
  1768. sys_meta = utils.instance_sys_meta(instance)
  1769. image_ref = sys_meta.get('image_base_image_ref')
  1770. auto_disk_config_img = \
  1771. utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
  1772. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  1773. auto_disk_config,
  1774. image_ref)
  1775. def _lookup_instance(self, context, uuid):
  1776. '''Helper method for pulling an instance object from a database.
  1777. During the transition to cellsv2 there is some complexity around
  1778. retrieving an instance from the database which this method hides. If
  1779. there is an instance mapping then query the cell for the instance, if
  1780. no mapping exists then query the configured nova database.
  1781. Once we are past the point that all deployments can be assumed to be
  1782. migrated to cellsv2 this method can go away.
  1783. '''
  1784. inst_map = None
  1785. try:
  1786. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  1787. context, uuid)
  1788. except exception.InstanceMappingNotFound:
  1789. # TODO(alaski): This exception block can be removed once we're
  1790. # guaranteed everyone is using cellsv2.
  1791. pass
  1792. if inst_map is None or inst_map.cell_mapping is None:
  1793. # If inst_map is None then the deployment has not migrated to
  1794. # cellsv2 yet.
  1795. # If inst_map.cell_mapping is None then the instance is not in a
  1796. # cell yet. Until instance creation moves to the conductor the
  1797. # instance can be found in the configured database, so attempt
  1798. # to look it up.
  1799. cell = None
  1800. try:
  1801. instance = objects.Instance.get_by_uuid(context, uuid)
  1802. except exception.InstanceNotFound:
  1803. # If we get here then the conductor is in charge of writing the
  1804. # instance to the database and hasn't done that yet. It's up to
  1805. # the caller of this method to determine what to do with that
  1806. # information.
  1807. return None, None
  1808. else:
  1809. cell = inst_map.cell_mapping
  1810. with nova_context.target_cell(context, cell) as cctxt:
  1811. try:
  1812. instance = objects.Instance.get_by_uuid(cctxt, uuid)
  1813. except exception.InstanceNotFound:
  1814. # Since the cell_mapping exists we know the instance is in
  1815. # the cell, however InstanceNotFound means it's already
  1816. # deleted.
  1817. return None, None
  1818. return cell, instance
  1819. def _delete_while_booting(self, context, instance):
  1820. """Handle deletion if the instance has not reached a cell yet
  1821. Deletion before an instance reaches a cell needs to be handled
  1822. differently. What we're attempting to do is delete the BuildRequest
  1823. before the api level conductor does. If we succeed here then the boot
  1824. request stops before reaching a cell. If not then the instance will
  1825. need to be looked up in a cell db and the normal delete path taken.
  1826. """
  1827. deleted = self._attempt_delete_of_buildrequest(context, instance)
  1828. if deleted:
  1829. # If we've reached this block the successful deletion of the
  1830. # buildrequest indicates that the build process should be halted by
  1831. # the conductor.
  1832. # NOTE(alaski): Though the conductor halts the build process it
  1833. # does not currently delete the instance record. This is
  1834. # because in the near future the instance record will not be
  1835. # created if the buildrequest has been deleted here. For now we
  1836. # ensure the instance has been set to deleted at this point.
  1837. # Yes this directly contradicts the comment earlier in this
  1838. # method, but this is a temporary measure.
  1839. # Look up the instance because the current instance object was
  1840. # stashed on the buildrequest and therefore not complete enough
  1841. # to run .destroy().
  1842. try:
  1843. instance_uuid = instance.uuid
  1844. cell, instance = self._lookup_instance(context, instance_uuid)
  1845. if instance is not None:
  1846. # If instance is None it has already been deleted.
  1847. if cell:
  1848. with nova_context.target_cell(context, cell) as cctxt:
  1849. # FIXME: When the instance context is targeted,
  1850. # we can remove this
  1851. with compute_utils.notify_about_instance_delete(
  1852. self.notifier, cctxt, instance):
  1853. instance.destroy()
  1854. else:
  1855. instance.destroy()
  1856. except exception.InstanceNotFound:
  1857. pass
  1858. return True
  1859. return False
  1860. def _local_delete_cleanup(self, context, instance):
  1861. # NOTE(aarents) Ensure instance allocation is cleared and instance
  1862. # mapping queued as deleted before _delete() return
  1863. try:
  1864. self.placementclient.delete_allocation_for_instance(
  1865. context, instance.uuid)
  1866. except exception.AllocationDeleteFailed:
  1867. LOG.info("Allocation delete failed during local delete cleanup.",
  1868. instance=instance)
  1869. try:
  1870. self._update_queued_for_deletion(context, instance, True)
  1871. except exception.InstanceMappingNotFound:
  1872. LOG.info("Instance Mapping does not exist while attempting "
  1873. "local delete cleanup.",
  1874. instance=instance)
  1875. def _attempt_delete_of_buildrequest(self, context, instance):
  1876. # If there is a BuildRequest then the instance may not have been
  1877. # written to a cell db yet. Delete the BuildRequest here, which
  1878. # will indicate that the Instance build should not proceed.
  1879. try:
  1880. build_req = objects.BuildRequest.get_by_instance_uuid(
  1881. context, instance.uuid)
  1882. build_req.destroy()
  1883. except exception.BuildRequestNotFound:
  1884. # This means that conductor has deleted the BuildRequest so the
  1885. # instance is now in a cell and the delete needs to proceed
  1886. # normally.
  1887. return False
  1888. # We need to detach from any volumes so they aren't orphaned.
  1889. self._local_cleanup_bdm_volumes(
  1890. build_req.block_device_mappings, instance, context)
  1891. return True
  1892. def _delete(self, context, instance, delete_type, cb, **instance_attrs):
  1893. if instance.disable_terminate:
  1894. LOG.info('instance termination disabled', instance=instance)
  1895. return
  1896. cell = None
  1897. # If there is an instance.host (or the instance is shelved-offloaded or
  1898. # in error state), the instance has been scheduled and sent to a
  1899. # cell/compute which means it was pulled from the cell db.
  1900. # Normal delete should be attempted.
  1901. may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
  1902. instance)
  1903. if not instance.host and not may_have_ports_or_volumes:
  1904. try:
  1905. if self._delete_while_booting(context, instance):
  1906. self._local_delete_cleanup(context, instance)
  1907. return
  1908. # If instance.host was not set it's possible that the Instance
  1909. # object here was pulled from a BuildRequest object and is not
  1910. # fully populated. Notably it will be missing an 'id' field
  1911. # which will prevent instance.destroy from functioning
  1912. # properly. A lookup is attempted which will either return a
  1913. # full Instance or None if not found. If not found then it's
  1914. # acceptable to skip the rest of the delete processing.
  1915. cell, instance = self._lookup_instance(context, instance.uuid)
  1916. if cell and instance:
  1917. try:
  1918. # Now destroy the instance from the cell it lives in.
  1919. with compute_utils.notify_about_instance_delete(
  1920. self.notifier, context, instance):
  1921. instance.destroy()
  1922. except exception.InstanceNotFound:
  1923. pass
  1924. # The instance was deleted or is already gone.
  1925. self._local_delete_cleanup(context, instance)
  1926. return
  1927. if not instance:
  1928. # Instance is already deleted.
  1929. self._local_delete_cleanup(context, instance)
  1930. return
  1931. except exception.ObjectActionError:
  1932. # NOTE(melwitt): This means the instance.host changed
  1933. # under us indicating the instance became scheduled
  1934. # during the destroy(). Refresh the instance from the DB and
  1935. # continue on with the delete logic for a scheduled instance.
  1936. # NOTE(danms): If instance.host is set, we should be able to
  1937. # do the following lookup. If not, there's not much we can
  1938. # do to recover.
  1939. cell, instance = self._lookup_instance(context, instance.uuid)
  1940. if not instance:
  1941. # Instance is already deleted
  1942. self._local_delete_cleanup(context, instance)
  1943. return
  1944. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  1945. context, instance.uuid)
  1946. # At these states an instance has a snapshot associate.
  1947. if instance.vm_state in (vm_states.SHELVED,
  1948. vm_states.SHELVED_OFFLOADED):
  1949. snapshot_id = instance.system_metadata.get('shelved_image_id')
  1950. LOG.info("Working on deleting snapshot %s "
  1951. "from shelved instance...",
  1952. snapshot_id, instance=instance)
  1953. try:
  1954. self.image_api.delete(context, snapshot_id)
  1955. except (exception.ImageNotFound,
  1956. exception.ImageNotAuthorized) as exc:
  1957. LOG.warning("Failed to delete snapshot "
  1958. "from shelved instance (%s).",
  1959. exc.format_message(), instance=instance)
  1960. except Exception:
  1961. LOG.exception("Something wrong happened when trying to "
  1962. "delete snapshot from shelved instance.",
  1963. instance=instance)
  1964. original_task_state = instance.task_state
  1965. try:
  1966. # NOTE(maoy): no expected_task_state needs to be set
  1967. instance.update(instance_attrs)
  1968. instance.progress = 0
  1969. instance.save()
  1970. if not instance.host and not may_have_ports_or_volumes:
  1971. try:
  1972. with compute_utils.notify_about_instance_delete(
  1973. self.notifier, context, instance,
  1974. delete_type
  1975. if delete_type != 'soft_delete'
  1976. else 'delete'):
  1977. instance.destroy()
  1978. LOG.info('Instance deleted and does not have host '
  1979. 'field, its vm_state is %(state)s.',
  1980. {'state': instance.vm_state},
  1981. instance=instance)
  1982. self._local_delete_cleanup(context, instance)
  1983. return
  1984. except exception.ObjectActionError as ex:
  1985. # The instance's host likely changed under us as
  1986. # this instance could be building and has since been
  1987. # scheduled. Continue with attempts to delete it.
  1988. LOG.debug('Refreshing instance because: %s', ex,
  1989. instance=instance)
  1990. instance.refresh()
  1991. if instance.vm_state == vm_states.RESIZED:
  1992. self._confirm_resize_on_deleting(context, instance)
  1993. # NOTE(neha_alhat): After confirm resize vm_state will become
  1994. # 'active' and task_state will be set to 'None'. But for soft
  1995. # deleting a vm, the _do_soft_delete callback requires
  1996. # task_state in 'SOFT_DELETING' status. So, we need to set
  1997. # task_state as 'SOFT_DELETING' again for soft_delete case.
  1998. # After confirm resize and before saving the task_state to
  1999. # "SOFT_DELETING", during the short window, user can submit
  2000. # soft delete vm request again and system will accept and
  2001. # process it without any errors.
  2002. if delete_type == 'soft_delete':
  2003. instance.task_state = instance_attrs['task_state']
  2004. instance.save()
  2005. is_local_delete = True
  2006. try:
  2007. # instance.host must be set in order to look up the service.
  2008. if instance.host is not None:
  2009. service = objects.Service.get_by_compute_host(
  2010. context.elevated(), instance.host)
  2011. is_local_delete = not self.servicegroup_api.service_is_up(
  2012. service)
  2013. if not is_local_delete:
  2014. if original_task_state in (task_states.DELETING,
  2015. task_states.SOFT_DELETING):
  2016. LOG.info('Instance is already in deleting state, '
  2017. 'ignoring this request',
  2018. instance=instance)
  2019. return
  2020. self._record_action_start(context, instance,
  2021. instance_actions.DELETE)
  2022. cb(context, instance, bdms)
  2023. except exception.ComputeHostNotFound:
  2024. LOG.debug('Compute host %s not found during service up check, '
  2025. 'going to local delete instance', instance.host,
  2026. instance=instance)
  2027. if is_local_delete:
  2028. # If instance is in shelved_offloaded state or compute node
  2029. # isn't up, delete instance from db and clean bdms info and
  2030. # network info
  2031. if cell is None:
  2032. # NOTE(danms): If we didn't get our cell from one of the
  2033. # paths above, look it up now.
  2034. try:
  2035. im = objects.InstanceMapping.get_by_instance_uuid(
  2036. context, instance.uuid)
  2037. cell = im.cell_mapping
  2038. except exception.InstanceMappingNotFound:
  2039. LOG.warning('During local delete, failed to find '
  2040. 'instance mapping', instance=instance)
  2041. return
  2042. LOG.debug('Doing local delete in cell %s', cell.identity,
  2043. instance=instance)
  2044. with nova_context.target_cell(context, cell) as cctxt:
  2045. self._local_delete(cctxt, instance, bdms, delete_type, cb)
  2046. except exception.InstanceNotFound:
  2047. # NOTE(comstud): Race condition. Instance already gone.
  2048. pass
  2049. def _confirm_resize_on_deleting(self, context, instance):
  2050. # If in the middle of a resize, use confirm_resize to
  2051. # ensure the original instance is cleaned up too along
  2052. # with its allocations (and migration-based allocations)
  2053. # in placement.
  2054. migration = None
  2055. for status in ('finished', 'confirming'):
  2056. try:
  2057. migration = objects.Migration.get_by_instance_and_status(
  2058. context.elevated(), instance.uuid, status)
  2059. LOG.info('Found an unconfirmed migration during delete, '
  2060. 'id: %(id)s, status: %(status)s',
  2061. {'id': migration.id,
  2062. 'status': migration.status},
  2063. instance=instance)
  2064. break
  2065. except exception.MigrationNotFoundByStatus:
  2066. pass
  2067. if not migration:
  2068. LOG.info('Instance may have been confirmed during delete',
  2069. instance=instance)
  2070. return
  2071. self._record_action_start(context, instance,
  2072. instance_actions.CONFIRM_RESIZE)
  2073. # If migration.cross_cell_move, we need to also cleanup the instance
  2074. # data from the source cell database.
  2075. if migration.cross_cell_move:
  2076. self.compute_task_api.confirm_snapshot_based_resize(
  2077. context, instance, migration, do_cast=False)
  2078. else:
  2079. self.compute_rpcapi.confirm_resize(context,
  2080. instance, migration, migration.source_compute, cast=False)
  2081. def _local_cleanup_bdm_volumes(self, bdms, instance, context):
  2082. """The method deletes the bdm records and, if a bdm is a volume, call
  2083. the terminate connection and the detach volume via the Volume API.
  2084. """
  2085. elevated = context.elevated()
  2086. for bdm in bdms:
  2087. if bdm.is_volume:
  2088. try:
  2089. if bdm.attachment_id:
  2090. self.volume_api.attachment_delete(context,
  2091. bdm.attachment_id)
  2092. else:
  2093. connector = compute_utils.get_stashed_volume_connector(
  2094. bdm, instance)
  2095. if connector:
  2096. self.volume_api.terminate_connection(context,
  2097. bdm.volume_id,
  2098. connector)
  2099. else:
  2100. LOG.debug('Unable to find connector for volume %s,'
  2101. ' not attempting terminate_connection.',
  2102. bdm.volume_id, instance=instance)
  2103. # Attempt to detach the volume. If there was no
  2104. # connection made in the first place this is just
  2105. # cleaning up the volume state in the Cinder DB.
  2106. self.volume_api.detach(elevated, bdm.volume_id,
  2107. instance.uuid)
  2108. if bdm.delete_on_termination:
  2109. self.volume_api.delete(context, bdm.volume_id)
  2110. except Exception as exc:
  2111. LOG.warning("Ignoring volume cleanup failure due to %s",
  2112. exc, instance=instance)
  2113. # If we're cleaning up volumes from an instance that wasn't yet
  2114. # created in a cell, i.e. the user deleted the server while
  2115. # the BuildRequest still existed, then the BDM doesn't actually
  2116. # exist in the DB to destroy it.
  2117. if 'id' in bdm:
  2118. bdm.destroy()
  2119. @property
  2120. def placementclient(self):
  2121. if self._placementclient is None:
  2122. self._placementclient = report.SchedulerReportClient()
  2123. return self._placementclient
  2124. def _local_delete(self, context, instance, bdms, delete_type, cb):
  2125. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2126. LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
  2127. " the instance's info from database.",
  2128. instance=instance)
  2129. else:
  2130. LOG.warning("instance's host %s is down, deleting from "
  2131. "database", instance.host, instance=instance)
  2132. with compute_utils.notify_about_instance_delete(
  2133. self.notifier, context, instance,
  2134. delete_type if delete_type != 'soft_delete' else 'delete'):
  2135. elevated = context.elevated()
  2136. self.network_api.deallocate_for_instance(elevated, instance)
  2137. # cleanup volumes
  2138. self._local_cleanup_bdm_volumes(bdms, instance, context)
  2139. # cleanup accelerator requests (ARQs)
  2140. compute_utils.delete_arqs_if_needed(context, instance)
  2141. # Cleanup allocations in Placement since we can't do it from the
  2142. # compute service.
  2143. self.placementclient.delete_allocation_for_instance(
  2144. context, instance.uuid)
  2145. cb(context, instance, bdms, local=True)
  2146. instance.destroy()
  2147. @staticmethod
  2148. def _update_queued_for_deletion(context, instance, qfd):
  2149. # NOTE(tssurya): We query the instance_mapping record of this instance
  2150. # and update the queued_for_delete flag to True (or False according to
  2151. # the state of the instance). This just means that the instance is
  2152. # queued for deletion (or is no longer queued for deletion). It does
  2153. # not guarantee its successful deletion (or restoration). Hence the
  2154. # value could be stale which is fine, considering its use is only
  2155. # during down cell (desperate) situation.
  2156. im = objects.InstanceMapping.get_by_instance_uuid(context,
  2157. instance.uuid)
  2158. im.queued_for_delete = qfd
  2159. im.save()
  2160. def _do_delete(self, context, instance, bdms, local=False):
  2161. if local:
  2162. instance.vm_state = vm_states.DELETED
  2163. instance.task_state = None
  2164. instance.terminated_at = timeutils.utcnow()
  2165. instance.save()
  2166. else:
  2167. self.compute_rpcapi.terminate_instance(context, instance, bdms)
  2168. self._update_queued_for_deletion(context, instance, True)
  2169. def _do_soft_delete(self, context, instance, bdms, local=False):
  2170. if local:
  2171. instance.vm_state = vm_states.SOFT_DELETED
  2172. instance.task_state = None
  2173. instance.terminated_at = timeutils.utcnow()
  2174. instance.save()
  2175. else:
  2176. self.compute_rpcapi.soft_delete_instance(context, instance)
  2177. self._update_queued_for_deletion(context, instance, True)
  2178. # NOTE(maoy): we allow delete to be called no matter what vm_state says.
  2179. @check_instance_lock
  2180. @check_instance_state(vm_state=None, task_state=None,
  2181. must_have_launched=True)
  2182. def soft_delete(self, context, instance):
  2183. """Terminate an instance."""
  2184. LOG.debug('Going to try to soft delete instance',
  2185. instance=instance)
  2186. self._delete(context, instance, 'soft_delete', self._do_soft_delete,
  2187. task_state=task_states.SOFT_DELETING,
  2188. deleted_at=timeutils.utcnow())
  2189. def _delete_instance(self, context, instance):
  2190. self._delete(context, instance, 'delete', self._do_delete,
  2191. task_state=task_states.DELETING)
  2192. @check_instance_lock
  2193. @check_instance_state(vm_state=None, task_state=None,
  2194. must_have_launched=False)
  2195. def delete(self, context, instance):
  2196. """Terminate an instance."""
  2197. LOG.debug("Going to try to terminate instance", instance=instance)
  2198. self._delete_instance(context, instance)
  2199. @check_instance_lock
  2200. @check_instance_state(vm_state=[vm_states.SOFT_DELETED])
  2201. def restore(self, context, instance):
  2202. """Restore a previously deleted (but not reclaimed) instance."""
  2203. # Check quotas
  2204. flavor = instance.get_flavor()
  2205. project_id, user_id = quotas_obj.ids_from_instance(context, instance)
  2206. compute_utils.check_num_instances_quota(context, flavor, 1, 1,
  2207. project_id=project_id, user_id=user_id)
  2208. self._record_action_start(context, instance, instance_actions.RESTORE)
  2209. if instance.host:
  2210. instance.task_state = task_states.RESTORING
  2211. instance.deleted_at = None
  2212. instance.save(expected_task_state=[None])
  2213. # TODO(melwitt): We're not rechecking for strict quota here to
  2214. # guard against going over quota during a race at this time because
  2215. # the resource consumption for this operation is written to the
  2216. # database by compute.
  2217. self.compute_rpcapi.restore_instance(context, instance)
  2218. else:
  2219. instance.vm_state = vm_states.ACTIVE
  2220. instance.task_state = None
  2221. instance.deleted_at = None
  2222. instance.save(expected_task_state=[None])
  2223. self._update_queued_for_deletion(context, instance, False)
  2224. @check_instance_lock
  2225. @check_instance_state(task_state=None,
  2226. must_have_launched=False)
  2227. def force_delete(self, context, instance):
  2228. """Force delete an instance in any vm_state/task_state."""
  2229. self._delete(context, instance, 'force_delete', self._do_delete,
  2230. task_state=task_states.DELETING)
  2231. def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2232. LOG.debug("Going to try to stop instance", instance=instance)
  2233. instance.task_state = task_states.POWERING_OFF
  2234. instance.progress = 0
  2235. instance.save(expected_task_state=[None])
  2236. self._record_action_start(context, instance, instance_actions.STOP)
  2237. self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
  2238. clean_shutdown=clean_shutdown)
  2239. @check_instance_lock
  2240. @check_instance_host()
  2241. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
  2242. def stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2243. """Stop an instance."""
  2244. self.force_stop(context, instance, do_cast, clean_shutdown)
  2245. @check_instance_lock
  2246. @check_instance_host()
  2247. @check_instance_state(vm_state=[vm_states.STOPPED])
  2248. def start(self, context, instance):
  2249. """Start an instance."""
  2250. LOG.debug("Going to try to start instance", instance=instance)
  2251. instance.task_state = task_states.POWERING_ON
  2252. instance.save(expected_task_state=[None])
  2253. self._record_action_start(context, instance, instance_actions.START)
  2254. self.compute_rpcapi.start_instance(context, instance)
  2255. @check_instance_lock
  2256. @check_instance_host()
  2257. @check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
  2258. def trigger_crash_dump(self, context, instance):
  2259. """Trigger crash dump in an instance."""
  2260. LOG.debug("Try to trigger crash dump", instance=instance)
  2261. self._record_action_start(context, instance,
  2262. instance_actions.TRIGGER_CRASH_DUMP)
  2263. self.compute_rpcapi.trigger_crash_dump(context, instance)
  2264. def _generate_minimal_construct_for_down_cells(self, context,
  2265. down_cell_uuids,
  2266. project, limit):
  2267. """Generate a list of minimal instance constructs for a given list of
  2268. cells that did not respond to a list operation. This will list
  2269. every instance mapping in the affected cells and return a minimal
  2270. objects.Instance for each (non-queued-for-delete) mapping.
  2271. :param context: RequestContext
  2272. :param down_cell_uuids: A list of cell UUIDs that did not respond
  2273. :param project: A project ID to filter mappings, or None
  2274. :param limit: A numeric limit on the number of results, or None
  2275. :returns: An InstanceList() of partial Instance() objects
  2276. """
  2277. unavailable_servers = objects.InstanceList()
  2278. for cell_uuid in down_cell_uuids:
  2279. LOG.warning("Cell %s is not responding and hence only "
  2280. "partial results are available from this "
  2281. "cell if any.", cell_uuid)
  2282. instance_mappings = (objects.InstanceMappingList.
  2283. get_not_deleted_by_cell_and_project(context, cell_uuid,
  2284. project, limit=limit))
  2285. for im in instance_mappings:
  2286. unavailable_servers.objects.append(
  2287. objects.Instance(
  2288. context=context,
  2289. uuid=im.instance_uuid,
  2290. project_id=im.project_id,
  2291. created_at=im.created_at
  2292. )
  2293. )
  2294. if limit is not None:
  2295. limit -= len(instance_mappings)
  2296. if limit <= 0:
  2297. break
  2298. return unavailable_servers
  2299. def _get_instance_map_or_none(self, context, instance_uuid):
  2300. try:
  2301. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  2302. context, instance_uuid)
  2303. except exception.InstanceMappingNotFound:
  2304. # InstanceMapping should always be found generally. This exception
  2305. # may be raised if a deployment has partially migrated the nova-api
  2306. # services.
  2307. inst_map = None
  2308. return inst_map
  2309. @staticmethod
  2310. def _save_user_id_in_instance_mapping(mapping, instance):
  2311. # TODO(melwitt): We take the opportunity to migrate user_id on the
  2312. # instance mapping if it's not yet been migrated. This can be removed
  2313. # in a future release, when all migrations are complete.
  2314. # If the instance came from a RequestSpec because of a down cell, its
  2315. # user_id could be None and the InstanceMapping.user_id field is
  2316. # non-nullable. Avoid trying to set/save the user_id in that case.
  2317. if 'user_id' not in mapping and instance.user_id is not None:
  2318. mapping.user_id = instance.user_id
  2319. mapping.save()
  2320. def _get_instance_from_cell(self, context, im, expected_attrs,
  2321. cell_down_support):
  2322. # NOTE(danms): Even though we're going to scatter/gather to the
  2323. # right cell, other code depends on this being force targeted when
  2324. # the get call returns.
  2325. nova_context.set_target_cell(context, im.cell_mapping)
  2326. uuid = im.instance_uuid
  2327. result = nova_context.scatter_gather_single_cell(context,
  2328. im.cell_mapping, objects.Instance.get_by_uuid, uuid,
  2329. expected_attrs=expected_attrs)
  2330. cell_uuid = im.cell_mapping.uuid
  2331. if not nova_context.is_cell_failure_sentinel(result[cell_uuid]):
  2332. inst = result[cell_uuid]
  2333. self._save_user_id_in_instance_mapping(im, inst)
  2334. return inst
  2335. elif isinstance(result[cell_uuid], exception.InstanceNotFound):
  2336. raise exception.InstanceNotFound(instance_id=uuid)
  2337. elif cell_down_support:
  2338. if im.queued_for_delete:
  2339. # should be treated like deleted instance.
  2340. raise exception.InstanceNotFound(instance_id=uuid)
  2341. # instance in down cell, return a minimal construct
  2342. LOG.warning("Cell %s is not responding and hence only "
  2343. "partial results are available from this "
  2344. "cell.", cell_uuid)
  2345. try:
  2346. rs = objects.RequestSpec.get_by_instance_uuid(context,
  2347. uuid)
  2348. # For BFV case, we could have rs.image but rs.image.id might
  2349. # still not be set. So we check the existence of both image
  2350. # and its id.
  2351. image_ref = (rs.image.id if rs.image and
  2352. 'id' in rs.image else None)
  2353. inst = objects.Instance(context=context, power_state=0,
  2354. uuid=uuid,
  2355. project_id=im.project_id,
  2356. created_at=im.created_at,
  2357. user_id=rs.user_id,
  2358. flavor=rs.flavor,
  2359. image_ref=image_ref,
  2360. availability_zone=rs.availability_zone)
  2361. self._save_user_id_in_instance_mapping(im, inst)
  2362. return inst
  2363. except exception.RequestSpecNotFound:
  2364. # could be that a deleted instance whose request
  2365. # spec has been archived is being queried.
  2366. raise exception.InstanceNotFound(instance_id=uuid)
  2367. else:
  2368. raise exception.NovaException(
  2369. _("Cell %s is not responding and hence instance "
  2370. "info is not available.") % cell_uuid)
  2371. def _get_instance(self, context, instance_uuid, expected_attrs,
  2372. cell_down_support=False):
  2373. inst_map = self._get_instance_map_or_none(context, instance_uuid)
  2374. if inst_map and (inst_map.cell_mapping is not None):
  2375. instance = self._get_instance_from_cell(context, inst_map,
  2376. expected_attrs, cell_down_support)
  2377. elif inst_map and (inst_map.cell_mapping is None):
  2378. # This means the instance has not been scheduled and put in
  2379. # a cell yet. For now it also may mean that the deployer
  2380. # has not created their cell(s) yet.
  2381. try:
  2382. build_req = objects.BuildRequest.get_by_instance_uuid(
  2383. context, instance_uuid)
  2384. instance = build_req.instance
  2385. except exception.BuildRequestNotFound:
  2386. # Instance was mapped and the BuildRequest was deleted
  2387. # while fetching. Try again.
  2388. inst_map = self._get_instance_map_or_none(context,
  2389. instance_uuid)
  2390. if inst_map and (inst_map.cell_mapping is not None):
  2391. instance = self._get_instance_from_cell(context, inst_map,
  2392. expected_attrs, cell_down_support)
  2393. else:
  2394. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2395. else:
  2396. # If we got here, we don't have an instance mapping, but we aren't
  2397. # sure why. The instance mapping might be missing because the
  2398. # upgrade is incomplete (map_instances wasn't run). Or because the
  2399. # instance was deleted and the DB was archived at which point the
  2400. # mapping is deleted. The former case is bad, but because of the
  2401. # latter case we can't really log any kind of warning/error here
  2402. # since it might be normal.
  2403. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2404. return instance
  2405. def get(self, context, instance_id, expected_attrs=None,
  2406. cell_down_support=False):
  2407. """Get a single instance with the given instance_id.
  2408. :param cell_down_support: True if the API (and caller) support
  2409. returning a minimal instance
  2410. construct if the relevant cell is
  2411. down. If False, an error is raised
  2412. since the instance cannot be retrieved
  2413. due to the cell being down.
  2414. """
  2415. if not expected_attrs:
  2416. expected_attrs = []
  2417. expected_attrs.extend(['metadata', 'system_metadata',
  2418. 'security_groups', 'info_cache'])
  2419. # NOTE(ameade): we still need to support integer ids for ec2
  2420. try:
  2421. if uuidutils.is_uuid_like(instance_id):
  2422. LOG.debug("Fetching instance by UUID",
  2423. instance_uuid=instance_id)
  2424. instance = self._get_instance(context, instance_id,
  2425. expected_attrs, cell_down_support=cell_down_support)
  2426. else:
  2427. LOG.debug("Failed to fetch instance by id %s", instance_id)
  2428. raise exception.InstanceNotFound(instance_id=instance_id)
  2429. except exception.InvalidID:
  2430. LOG.debug("Invalid instance id %s", instance_id)
  2431. raise exception.InstanceNotFound(instance_id=instance_id)
  2432. return instance
  2433. def get_all(self, context, search_opts=None, limit=None, marker=None,
  2434. expected_attrs=None, sort_keys=None, sort_dirs=None,
  2435. cell_down_support=False, all_tenants=False):
  2436. """Get all instances filtered by one of the given parameters.
  2437. If there is no filter and the context is an admin, it will retrieve
  2438. all instances in the system.
  2439. Deleted instances will be returned by default, unless there is a
  2440. search option that says otherwise.
  2441. The results will be sorted based on the list of sort keys in the
  2442. 'sort_keys' parameter (first value is primary sort key, second value is
  2443. secondary sort ket, etc.). For each sort key, the associated sort
  2444. direction is based on the list of sort directions in the 'sort_dirs'
  2445. parameter.
  2446. :param cell_down_support: True if the API (and caller) support
  2447. returning a minimal instance
  2448. construct if the relevant cell is
  2449. down. If False, instances from
  2450. unreachable cells will be omitted.
  2451. :param all_tenants: True if the "all_tenants" filter was passed.
  2452. """
  2453. if search_opts is None:
  2454. search_opts = {}
  2455. LOG.debug("Searching by: %s", str(search_opts))
  2456. # Fixups for the DB call
  2457. filters = {}
  2458. def _remap_flavor_filter(flavor_id):
  2459. flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
  2460. filters['instance_type_id'] = flavor.id
  2461. def _remap_fixed_ip_filter(fixed_ip):
  2462. # Turn fixed_ip into a regexp match. Since '.' matches
  2463. # any character, we need to use regexp escaping for it.
  2464. filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
  2465. # search_option to filter_name mapping.
  2466. filter_mapping = {
  2467. 'image': 'image_ref',
  2468. 'name': 'display_name',
  2469. 'tenant_id': 'project_id',
  2470. 'flavor': _remap_flavor_filter,
  2471. 'fixed_ip': _remap_fixed_ip_filter}
  2472. # copy from search_opts, doing various remappings as necessary
  2473. for opt, value in search_opts.items():
  2474. # Do remappings.
  2475. # Values not in the filter_mapping table are copied as-is.
  2476. # If remapping is None, option is not copied
  2477. # If the remapping is a string, it is the filter_name to use
  2478. try:
  2479. remap_object = filter_mapping[opt]
  2480. except KeyError:
  2481. filters[opt] = value
  2482. else:
  2483. # Remaps are strings to translate to, or functions to call
  2484. # to do the translating as defined by the table above.
  2485. if isinstance(remap_object, six.string_types):
  2486. filters[remap_object] = value
  2487. else:
  2488. try:
  2489. remap_object(value)
  2490. # We already know we can't match the filter, so
  2491. # return an empty list
  2492. except ValueError:
  2493. return objects.InstanceList()
  2494. # IP address filtering cannot be applied at the DB layer, remove any DB
  2495. # limit so that it can be applied after the IP filter.
  2496. filter_ip = 'ip6' in filters or 'ip' in filters
  2497. skip_build_request = False
  2498. orig_limit = limit
  2499. if filter_ip:
  2500. # We cannot skip build requests if there is a marker since the
  2501. # the marker could be a build request.
  2502. skip_build_request = marker is None
  2503. if self.network_api.has_substr_port_filtering_extension(context):
  2504. # We're going to filter by IP using Neutron so set filter_ip
  2505. # to False so we don't attempt post-DB query filtering in
  2506. # memory below.
  2507. filter_ip = False
  2508. instance_uuids = self._ip_filter_using_neutron(context,
  2509. filters)
  2510. if instance_uuids:
  2511. # Note that 'uuid' is not in the 2.1 GET /servers query
  2512. # parameter schema, however, we allow additionalProperties
  2513. # so someone could filter instances by uuid, which doesn't
  2514. # make a lot of sense but we have to account for it.
  2515. if 'uuid' in filters and filters['uuid']:
  2516. filter_uuids = filters['uuid']
  2517. if isinstance(filter_uuids, list):
  2518. instance_uuids.extend(filter_uuids)
  2519. else:
  2520. # Assume a string. If it's a dict or tuple or
  2521. # something, well...that's too bad. This is why
  2522. # we have query parameter schema definitions.
  2523. if filter_uuids not in instance_uuids:
  2524. instance_uuids.append(filter_uuids)
  2525. filters['uuid'] = instance_uuids
  2526. else:
  2527. # No matches on the ip filter(s), return an empty list.
  2528. return objects.InstanceList()
  2529. elif limit:
  2530. LOG.debug('Removing limit for DB query due to IP filter')
  2531. limit = None
  2532. # Skip get BuildRequest if filtering by IP address, as building
  2533. # instances will not have IP addresses.
  2534. if skip_build_request:
  2535. build_requests = objects.BuildRequestList()
  2536. else:
  2537. # The ordering of instances will be
  2538. # [sorted instances with no host] + [sorted instances with host].
  2539. # This means BuildRequest and cell0 instances first, then cell
  2540. # instances
  2541. try:
  2542. build_requests = objects.BuildRequestList.get_by_filters(
  2543. context, filters, limit=limit, marker=marker,
  2544. sort_keys=sort_keys, sort_dirs=sort_dirs)
  2545. # If we found the marker in we need to set it to None
  2546. # so we don't expect to find it in the cells below.
  2547. marker = None
  2548. except exception.MarkerNotFound:
  2549. # If we didn't find the marker in the build requests then keep
  2550. # looking for it in the cells.
  2551. build_requests = objects.BuildRequestList()
  2552. build_req_instances = objects.InstanceList(
  2553. objects=[build_req.instance for build_req in build_requests])
  2554. # Only subtract from limit if it is not None
  2555. limit = (limit - len(build_req_instances)) if limit else limit
  2556. # We could arguably avoid joining on security_groups if we're using
  2557. # neutron (which is the default) but if you're using neutron then the
  2558. # security_group_instance_association table should be empty anyway
  2559. # and the DB should optimize out that join, making it insignificant.
  2560. fields = ['metadata', 'info_cache', 'security_groups']
  2561. if expected_attrs:
  2562. fields.extend(expected_attrs)
  2563. insts, down_cell_uuids = instance_list.get_instance_objects_sorted(
  2564. context, filters, limit, marker, fields, sort_keys, sort_dirs,
  2565. cell_down_support=cell_down_support)
  2566. def _get_unique_filter_method():
  2567. seen_uuids = set()
  2568. def _filter(instance):
  2569. # During a cross-cell move operation we could have the instance
  2570. # in more than one cell database so we not only have to filter
  2571. # duplicates but we want to make sure we only return the
  2572. # "current" one which should also be the one that the instance
  2573. # mapping points to, but we don't want to do that expensive
  2574. # lookup here. The DB API will filter out hidden instances by
  2575. # default but there is a small window where two copies of an
  2576. # instance could be hidden=False in separate cell DBs.
  2577. # NOTE(mriedem): We could make this better in the case that we
  2578. # have duplicate instances that are both hidden=False by
  2579. # showing the one with the newer updated_at value, but that
  2580. # could be tricky if the user is filtering on
  2581. # changes-since/before or updated_at, or sorting on updated_at,
  2582. # but technically that was already potentially broken with this
  2583. # _filter method if we return an older BuildRequest.instance,
  2584. # and given the window should be very small where we have
  2585. # duplicates, it's probably not worth the complexity.
  2586. if instance.uuid in seen_uuids:
  2587. return False
  2588. seen_uuids.add(instance.uuid)
  2589. return True
  2590. return _filter
  2591. filter_method = _get_unique_filter_method()
  2592. # Only subtract from limit if it is not None
  2593. limit = (limit - len(insts)) if limit else limit
  2594. # TODO(alaski): Clean up the objects concatenation when List objects
  2595. # support it natively.
  2596. instances = objects.InstanceList(
  2597. objects=list(filter(filter_method,
  2598. build_req_instances.objects +
  2599. insts.objects)))
  2600. if filter_ip:
  2601. instances = self._ip_filter(instances, filters, orig_limit)
  2602. if cell_down_support:
  2603. # API and client want minimal construct instances for any cells
  2604. # that didn't return, so generate and prefix those to the actual
  2605. # results.
  2606. project = search_opts.get('project_id', context.project_id)
  2607. if all_tenants:
  2608. # NOTE(tssurya): The only scenario where project has to be None
  2609. # is when using "all_tenants" in which case we do not want
  2610. # the query to be restricted based on the project_id.
  2611. project = None
  2612. limit = (orig_limit - len(instances)) if limit else limit
  2613. return (self._generate_minimal_construct_for_down_cells(context,
  2614. down_cell_uuids, project, limit) + instances)
  2615. return instances
  2616. @staticmethod
  2617. def _ip_filter(inst_models, filters, limit):
  2618. ipv4_f = re.compile(str(filters.get('ip')))
  2619. ipv6_f = re.compile(str(filters.get('ip6')))
  2620. def _match_instance(instance):
  2621. nw_info = instance.get_network_info()
  2622. for vif in nw_info:
  2623. for fixed_ip in vif.fixed_ips():
  2624. address = fixed_ip.get('address')
  2625. if not address:
  2626. continue
  2627. version = fixed_ip.get('version')
  2628. if ((version == 4 and ipv4_f.match(address)) or
  2629. (version == 6 and ipv6_f.match(address))):
  2630. return True
  2631. return False
  2632. result_objs = []
  2633. for instance in inst_models:
  2634. if _match_instance(instance):
  2635. result_objs.append(instance)
  2636. if limit and len(result_objs) == limit:
  2637. break
  2638. return objects.InstanceList(objects=result_objs)
  2639. def _ip_filter_using_neutron(self, context, filters):
  2640. ip4_address = filters.get('ip')
  2641. ip6_address = filters.get('ip6')
  2642. addresses = [ip4_address, ip6_address]
  2643. uuids = []
  2644. for address in addresses:
  2645. if address:
  2646. try:
  2647. ports = self.network_api.list_ports(
  2648. context, fixed_ips='ip_address_substr=' + address,
  2649. fields=['device_id'])['ports']
  2650. for port in ports:
  2651. uuids.append(port['device_id'])
  2652. except Exception as e:
  2653. LOG.error('An error occurred while listing ports '
  2654. 'with an ip_address filter value of "%s". '
  2655. 'Error: %s',
  2656. address, six.text_type(e))
  2657. return uuids
  2658. def update_instance(self, context, instance, updates):
  2659. """Updates a single Instance object with some updates dict.
  2660. Returns the updated instance.
  2661. """
  2662. # NOTE(sbauza): Given we only persist the Instance object after we
  2663. # create the BuildRequest, we are sure that if the Instance object
  2664. # has an ID field set, then it was persisted in the right Cell DB.
  2665. if instance.obj_attr_is_set('id'):
  2666. instance.update(updates)
  2667. instance.save()
  2668. else:
  2669. # Instance is not yet mapped to a cell, so we need to update
  2670. # BuildRequest instead
  2671. # TODO(sbauza): Fix the possible race conditions where BuildRequest
  2672. # could be deleted because of either a concurrent instance delete
  2673. # or because the scheduler just returned a destination right
  2674. # after we called the instance in the API.
  2675. try:
  2676. build_req = objects.BuildRequest.get_by_instance_uuid(
  2677. context, instance.uuid)
  2678. instance = build_req.instance
  2679. instance.update(updates)
  2680. # FIXME(sbauza): Here we are updating the current
  2681. # thread-related BuildRequest object. Given that another worker
  2682. # could have looking up at that BuildRequest in the API, it
  2683. # means that it could pass it down to the conductor without
  2684. # making sure that it's not updated, we could have some race
  2685. # condition where it would missing the updated fields, but
  2686. # that's something we could discuss once the instance record
  2687. # is persisted by the conductor.
  2688. build_req.save()
  2689. except exception.BuildRequestNotFound:
  2690. # Instance was mapped and the BuildRequest was deleted
  2691. # while fetching (and possibly the instance could have been
  2692. # deleted as well). We need to lookup again the Instance object
  2693. # in order to correctly update it.
  2694. # TODO(sbauza): Figure out a good way to know the expected
  2695. # attributes by checking which fields are set or not.
  2696. expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
  2697. 'tags', 'metadata', 'system_metadata',
  2698. 'security_groups', 'info_cache']
  2699. inst_map = self._get_instance_map_or_none(context,
  2700. instance.uuid)
  2701. if inst_map and (inst_map.cell_mapping is not None):
  2702. with nova_context.target_cell(
  2703. context,
  2704. inst_map.cell_mapping) as cctxt:
  2705. instance = objects.Instance.get_by_uuid(
  2706. cctxt, instance.uuid,
  2707. expected_attrs=expected_attrs)
  2708. instance.update(updates)
  2709. instance.save()
  2710. else:
  2711. # Conductor doesn't delete the BuildRequest until after the
  2712. # InstanceMapping record is created, so if we didn't get
  2713. # that and the BuildRequest doesn't exist, then the
  2714. # instance is already gone and we need to just error out.
  2715. raise exception.InstanceNotFound(instance_id=instance.uuid)
  2716. return instance
  2717. # NOTE(melwitt): We don't check instance lock for backup because lock is
  2718. # intended to prevent accidental change/delete of instances
  2719. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2720. vm_states.PAUSED, vm_states.SUSPENDED])
  2721. def backup(self, context, instance, name, backup_type, rotation,
  2722. extra_properties=None):
  2723. """Backup the given instance
  2724. :param instance: nova.objects.instance.Instance object
  2725. :param name: name of the backup
  2726. :param backup_type: 'daily' or 'weekly'
  2727. :param rotation: int representing how many backups to keep around;
  2728. None if rotation shouldn't be used (as in the case of snapshots)
  2729. :param extra_properties: dict of extra image properties to include
  2730. when creating the image.
  2731. :returns: A dict containing image metadata
  2732. """
  2733. props_copy = dict(extra_properties, backup_type=backup_type)
  2734. if compute_utils.is_volume_backed_instance(context, instance):
  2735. LOG.info("It's not supported to backup volume backed "
  2736. "instance.", instance=instance)
  2737. raise exception.InvalidRequest(
  2738. _('Backup is not supported for volume-backed instances.'))
  2739. else:
  2740. image_meta = compute_utils.create_image(
  2741. context, instance, name, 'backup', self.image_api,
  2742. extra_properties=props_copy)
  2743. instance.task_state = task_states.IMAGE_BACKUP
  2744. instance.save(expected_task_state=[None])
  2745. self._record_action_start(context, instance,
  2746. instance_actions.BACKUP)
  2747. self.compute_rpcapi.backup_instance(context, instance,
  2748. image_meta['id'],
  2749. backup_type,
  2750. rotation)
  2751. return image_meta
  2752. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2753. # intended to prevent accidental change/delete of instances
  2754. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2755. vm_states.PAUSED, vm_states.SUSPENDED])
  2756. def snapshot(self, context, instance, name, extra_properties=None):
  2757. """Snapshot the given instance.
  2758. :param instance: nova.objects.instance.Instance object
  2759. :param name: name of the snapshot
  2760. :param extra_properties: dict of extra image properties to include
  2761. when creating the image.
  2762. :returns: A dict containing image metadata
  2763. """
  2764. image_meta = compute_utils.create_image(
  2765. context, instance, name, 'snapshot', self.image_api,
  2766. extra_properties=extra_properties)
  2767. instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
  2768. try:
  2769. instance.save(expected_task_state=[None])
  2770. except (exception.InstanceNotFound,
  2771. exception.UnexpectedDeletingTaskStateError) as ex:
  2772. # Changing the instance task state to use in raising the
  2773. # InstanceInvalidException below
  2774. LOG.debug('Instance disappeared during snapshot.',
  2775. instance=instance)
  2776. try:
  2777. image_id = image_meta['id']
  2778. self.image_api.delete(context, image_id)
  2779. LOG.info('Image %s deleted because instance '
  2780. 'deleted before snapshot started.',
  2781. image_id, instance=instance)
  2782. except exception.ImageNotFound:
  2783. pass
  2784. except Exception as exc:
  2785. LOG.warning("Error while trying to clean up image %(img_id)s: "
  2786. "%(error_msg)s",
  2787. {"img_id": image_meta['id'],
  2788. "error_msg": six.text_type(exc)})
  2789. attr = 'task_state'
  2790. state = task_states.DELETING
  2791. if type(ex) == exception.InstanceNotFound:
  2792. attr = 'vm_state'
  2793. state = vm_states.DELETED
  2794. raise exception.InstanceInvalidState(attr=attr,
  2795. instance_uuid=instance.uuid,
  2796. state=state,
  2797. method='snapshot')
  2798. self._record_action_start(context, instance,
  2799. instance_actions.CREATE_IMAGE)
  2800. self.compute_rpcapi.snapshot_instance(context, instance,
  2801. image_meta['id'])
  2802. return image_meta
  2803. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2804. # intended to prevent accidental change/delete of instances
  2805. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2806. vm_states.PAUSED, vm_states.SUSPENDED])
  2807. def snapshot_volume_backed(self, context, instance, name,
  2808. extra_properties=None):
  2809. """Snapshot the given volume-backed instance.
  2810. :param instance: nova.objects.instance.Instance object
  2811. :param name: name of the backup or snapshot
  2812. :param extra_properties: dict of extra image properties to include
  2813. :returns: the new image metadata
  2814. """
  2815. image_meta = compute_utils.initialize_instance_snapshot_metadata(
  2816. context, instance, name, extra_properties)
  2817. # the new image is simply a bucket of properties (particularly the
  2818. # block device mapping, kernel and ramdisk IDs) with no image data,
  2819. # hence the zero size
  2820. image_meta['size'] = 0
  2821. for attr in ('container_format', 'disk_format'):
  2822. image_meta.pop(attr, None)
  2823. properties = image_meta['properties']
  2824. # clean properties before filling
  2825. for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
  2826. properties.pop(key, None)
  2827. if instance.root_device_name:
  2828. properties['root_device_name'] = instance.root_device_name
  2829. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2830. context, instance.uuid)
  2831. mapping = [] # list of BDM dicts that can go into the image properties
  2832. # Do some up-front filtering of the list of BDMs from
  2833. # which we are going to create snapshots.
  2834. volume_bdms = []
  2835. for bdm in bdms:
  2836. if bdm.no_device:
  2837. continue
  2838. if bdm.is_volume:
  2839. # These will be handled below.
  2840. volume_bdms.append(bdm)
  2841. else:
  2842. mapping.append(bdm.get_image_mapping())
  2843. # Check limits in Cinder before creating snapshots to avoid going over
  2844. # quota in the middle of a list of volumes. This is a best-effort check
  2845. # but concurrently running snapshot requests from the same project
  2846. # could still fail to create volume snapshots if they go over limit.
  2847. if volume_bdms:
  2848. limits = self.volume_api.get_absolute_limits(context)
  2849. total_snapshots_used = limits['totalSnapshotsUsed']
  2850. max_snapshots = limits['maxTotalSnapshots']
  2851. # -1 means there is unlimited quota for snapshots
  2852. if (max_snapshots > -1 and
  2853. len(volume_bdms) + total_snapshots_used > max_snapshots):
  2854. LOG.debug('Unable to create volume snapshots for instance. '
  2855. 'Currently has %s snapshots, requesting %s new '
  2856. 'snapshots, with a limit of %s.',
  2857. total_snapshots_used, len(volume_bdms),
  2858. max_snapshots, instance=instance)
  2859. raise exception.OverQuota(overs='snapshots')
  2860. quiesced = False
  2861. if instance.vm_state == vm_states.ACTIVE:
  2862. try:
  2863. LOG.info("Attempting to quiesce instance before volume "
  2864. "snapshot.", instance=instance)
  2865. self.compute_rpcapi.quiesce_instance(context, instance)
  2866. quiesced = True
  2867. except (exception.InstanceQuiesceNotSupported,
  2868. exception.QemuGuestAgentNotEnabled,
  2869. exception.NovaException, NotImplementedError) as err:
  2870. if strutils.bool_from_string(instance.system_metadata.get(
  2871. 'image_os_require_quiesce')):
  2872. raise
  2873. if isinstance(err, exception.NovaException):
  2874. LOG.info('Skipping quiescing instance: %(reason)s.',
  2875. {'reason': err.format_message()},
  2876. instance=instance)
  2877. else:
  2878. LOG.info('Skipping quiescing instance because the '
  2879. 'operation is not supported by the underlying '
  2880. 'compute driver.', instance=instance)
  2881. # NOTE(tasker): discovered that an uncaught exception could occur
  2882. # after the instance has been frozen. catch and thaw.
  2883. except Exception as ex:
  2884. with excutils.save_and_reraise_exception():
  2885. LOG.error("An error occurred during quiesce of instance. "
  2886. "Unquiescing to ensure instance is thawed. "
  2887. "Error: %s", six.text_type(ex),
  2888. instance=instance)
  2889. self.compute_rpcapi.unquiesce_instance(context, instance,
  2890. mapping=None)
  2891. @wrap_instance_event(prefix='api')
  2892. def snapshot_instance(self, context, instance, bdms):
  2893. try:
  2894. for bdm in volume_bdms:
  2895. # create snapshot based on volume_id
  2896. volume = self.volume_api.get(context, bdm.volume_id)
  2897. # NOTE(yamahata): Should we wait for snapshot creation?
  2898. # Linux LVM snapshot creation completes in
  2899. # short time, it doesn't matter for now.
  2900. name = _('snapshot for %s') % image_meta['name']
  2901. LOG.debug('Creating snapshot from volume %s.',
  2902. volume['id'], instance=instance)
  2903. snapshot = self.volume_api.create_snapshot_force(
  2904. context, volume['id'],
  2905. name, volume['display_description'])
  2906. mapping_dict = block_device.snapshot_from_bdm(
  2907. snapshot['id'], bdm)
  2908. mapping_dict = mapping_dict.get_image_mapping()
  2909. mapping.append(mapping_dict)
  2910. return mapping
  2911. # NOTE(tasker): No error handling is done in the above for loop.
  2912. # This means that if the snapshot fails and throws an exception
  2913. # the traceback will skip right over the unquiesce needed below.
  2914. # Here, catch any exception, unquiesce the instance, and raise the
  2915. # error so that the calling function can do what it needs to in
  2916. # order to properly treat a failed snap.
  2917. except Exception:
  2918. with excutils.save_and_reraise_exception():
  2919. if quiesced:
  2920. LOG.info("Unquiescing instance after volume snapshot "
  2921. "failure.", instance=instance)
  2922. self.compute_rpcapi.unquiesce_instance(
  2923. context, instance, mapping)
  2924. self._record_action_start(context, instance,
  2925. instance_actions.CREATE_IMAGE)
  2926. mapping = snapshot_instance(self, context, instance, bdms)
  2927. if quiesced:
  2928. self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
  2929. if mapping:
  2930. properties['block_device_mapping'] = mapping
  2931. properties['bdm_v2'] = True
  2932. return self.image_api.create(context, image_meta)
  2933. @check_instance_lock
  2934. def reboot(self, context, instance, reboot_type):
  2935. """Reboot the given instance."""
  2936. if reboot_type == 'SOFT':
  2937. self._soft_reboot(context, instance)
  2938. else:
  2939. self._hard_reboot(context, instance)
  2940. @check_instance_state(vm_state=set(vm_states.ALLOW_SOFT_REBOOT),
  2941. task_state=[None])
  2942. def _soft_reboot(self, context, instance):
  2943. expected_task_state = [None]
  2944. instance.task_state = task_states.REBOOTING
  2945. instance.save(expected_task_state=expected_task_state)
  2946. self._record_action_start(context, instance, instance_actions.REBOOT)
  2947. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2948. block_device_info=None,
  2949. reboot_type='SOFT')
  2950. @check_instance_state(vm_state=set(vm_states.ALLOW_HARD_REBOOT),
  2951. task_state=task_states.ALLOW_REBOOT)
  2952. def _hard_reboot(self, context, instance):
  2953. instance.task_state = task_states.REBOOTING_HARD
  2954. instance.save(expected_task_state=task_states.ALLOW_REBOOT)
  2955. self._record_action_start(context, instance, instance_actions.REBOOT)
  2956. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2957. block_device_info=None,
  2958. reboot_type='HARD')
  2959. def _check_image_arch(self, image=None):
  2960. if image:
  2961. img_arch = image.get("properties", {}).get('hw_architecture')
  2962. if img_arch:
  2963. fields_obj.Architecture.canonicalize(img_arch)
  2964. @block_accelerators
  2965. # TODO(stephenfin): We should expand kwargs out to named args
  2966. @check_instance_lock
  2967. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2968. vm_states.ERROR])
  2969. def rebuild(self, context, instance, image_href, admin_password,
  2970. files_to_inject=None, **kwargs):
  2971. """Rebuild the given instance with the provided attributes."""
  2972. files_to_inject = files_to_inject or []
  2973. metadata = kwargs.get('metadata', {})
  2974. preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
  2975. auto_disk_config = kwargs.get('auto_disk_config')
  2976. if 'key_name' in kwargs:
  2977. key_name = kwargs.pop('key_name')
  2978. if key_name:
  2979. # NOTE(liuyulong): we are intentionally using the user_id from
  2980. # the request context rather than the instance.user_id because
  2981. # users own keys but instances are owned by projects, and
  2982. # another user in the same project can rebuild an instance
  2983. # even if they didn't create it.
  2984. key_pair = objects.KeyPair.get_by_name(context,
  2985. context.user_id,
  2986. key_name)
  2987. instance.key_name = key_pair.name
  2988. instance.key_data = key_pair.public_key
  2989. instance.keypairs = objects.KeyPairList(objects=[key_pair])
  2990. else:
  2991. instance.key_name = None
  2992. instance.key_data = None
  2993. instance.keypairs = objects.KeyPairList(objects=[])
  2994. # Use trusted_certs value from kwargs to create TrustedCerts object
  2995. trusted_certs = None
  2996. if 'trusted_certs' in kwargs:
  2997. # Note that the user can set, change, or unset / reset trusted
  2998. # certs. If they are explicitly specifying
  2999. # trusted_image_certificates=None, that means we'll either unset
  3000. # them on the instance *or* reset to use the defaults (if defaults
  3001. # are configured).
  3002. trusted_certs = kwargs.pop('trusted_certs')
  3003. instance.trusted_certs = self._retrieve_trusted_certs_object(
  3004. context, trusted_certs, rebuild=True)
  3005. image_id, image = self._get_image(context, image_href)
  3006. self._check_auto_disk_config(image=image,
  3007. auto_disk_config=auto_disk_config)
  3008. self._check_image_arch(image=image)
  3009. flavor = instance.get_flavor()
  3010. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3011. context, instance.uuid)
  3012. root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
  3013. # Check to see if the image is changing and we have a volume-backed
  3014. # server. The compute doesn't support changing the image in the
  3015. # root disk of a volume-backed server, so we need to just fail fast.
  3016. is_volume_backed = compute_utils.is_volume_backed_instance(
  3017. context, instance, bdms)
  3018. if is_volume_backed:
  3019. if trusted_certs:
  3020. # The only way we can get here is if the user tried to set
  3021. # trusted certs or specified trusted_image_certificates=None
  3022. # and default_trusted_certificate_ids is configured.
  3023. msg = _("Image certificate validation is not supported "
  3024. "for volume-backed servers.")
  3025. raise exception.CertificateValidationFailed(message=msg)
  3026. # For boot from volume, instance.image_ref is empty, so we need to
  3027. # query the image from the volume.
  3028. if root_bdm is None:
  3029. # This shouldn't happen and is an error, we need to fail. This
  3030. # is not the users fault, it's an internal error. Without a
  3031. # root BDM we have no way of knowing the backing volume (or
  3032. # image in that volume) for this instance.
  3033. raise exception.NovaException(
  3034. _('Unable to find root block device mapping for '
  3035. 'volume-backed instance.'))
  3036. volume = self.volume_api.get(context, root_bdm.volume_id)
  3037. volume_image_metadata = volume.get('volume_image_metadata', {})
  3038. orig_image_ref = volume_image_metadata.get('image_id')
  3039. if orig_image_ref != image_href:
  3040. # Leave a breadcrumb.
  3041. LOG.debug('Requested to rebuild instance with a new image %s '
  3042. 'for a volume-backed server with image %s in its '
  3043. 'root volume which is not supported.', image_href,
  3044. orig_image_ref, instance=instance)
  3045. msg = _('Unable to rebuild with a different image for a '
  3046. 'volume-backed server.')
  3047. raise exception.ImageUnacceptable(
  3048. image_id=image_href, reason=msg)
  3049. else:
  3050. orig_image_ref = instance.image_ref
  3051. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3052. context, instance.uuid)
  3053. self._checks_for_create_and_rebuild(context, image_id, image,
  3054. flavor, metadata, files_to_inject, root_bdm)
  3055. # NOTE(sean-k-mooney): When we rebuild with a new image we need to
  3056. # validate that the NUMA topology does not change as we do a NOOP claim
  3057. # in resource tracker. As such we cannot allow the resource usage or
  3058. # assignment to change as a result of a new image altering the
  3059. # numa constraints.
  3060. if orig_image_ref != image_href:
  3061. self._validate_numa_rebuild(instance, image, flavor)
  3062. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  3063. context, None, None, image)
  3064. def _reset_image_metadata():
  3065. """Remove old image properties that we're storing as instance
  3066. system metadata. These properties start with 'image_'.
  3067. Then add the properties for the new image.
  3068. """
  3069. # FIXME(comstud): There's a race condition here in that if
  3070. # the system_metadata for this instance is updated after
  3071. # we do the previous save() and before we update.. those
  3072. # other updates will be lost. Since this problem exists in
  3073. # a lot of other places, I think it should be addressed in
  3074. # a DB layer overhaul.
  3075. orig_sys_metadata = dict(instance.system_metadata)
  3076. # Remove the old keys
  3077. for key in list(instance.system_metadata.keys()):
  3078. if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
  3079. del instance.system_metadata[key]
  3080. # Add the new ones
  3081. new_sys_metadata = utils.get_system_metadata_from_image(
  3082. image, flavor)
  3083. instance.system_metadata.update(new_sys_metadata)
  3084. instance.save()
  3085. return orig_sys_metadata
  3086. # Since image might have changed, we may have new values for
  3087. # os_type, vm_mode, etc
  3088. options_from_image = self._inherit_properties_from_image(
  3089. image, auto_disk_config)
  3090. instance.update(options_from_image)
  3091. instance.task_state = task_states.REBUILDING
  3092. # An empty instance.image_ref is currently used as an indication
  3093. # of BFV. Preserve that over a rebuild to not break users.
  3094. if not is_volume_backed:
  3095. instance.image_ref = image_href
  3096. instance.kernel_id = kernel_id or ""
  3097. instance.ramdisk_id = ramdisk_id or ""
  3098. instance.progress = 0
  3099. instance.update(kwargs)
  3100. instance.save(expected_task_state=[None])
  3101. # On a rebuild, since we're potentially changing images, we need to
  3102. # wipe out the old image properties that we're storing as instance
  3103. # system metadata... and copy in the properties for the new image.
  3104. orig_sys_metadata = _reset_image_metadata()
  3105. self._record_action_start(context, instance, instance_actions.REBUILD)
  3106. # NOTE(sbauza): The migration script we provided in Newton should make
  3107. # sure that all our instances are currently migrated to have an
  3108. # attached RequestSpec object but let's consider that the operator only
  3109. # half migrated all their instances in the meantime.
  3110. host = instance.host
  3111. # If a new image is provided on rebuild, we will need to run
  3112. # through the scheduler again, but we want the instance to be
  3113. # rebuilt on the same host it's already on.
  3114. if orig_image_ref != image_href:
  3115. # We have to modify the request spec that goes to the scheduler
  3116. # to contain the new image. We persist this since we've already
  3117. # changed the instance.image_ref above so we're being
  3118. # consistent.
  3119. request_spec.image = objects.ImageMeta.from_dict(image)
  3120. request_spec.save()
  3121. if 'scheduler_hints' not in request_spec:
  3122. request_spec.scheduler_hints = {}
  3123. # Nuke the id on this so we can't accidentally save
  3124. # this hint hack later
  3125. del request_spec.id
  3126. # NOTE(danms): Passing host=None tells conductor to
  3127. # call the scheduler. The _nova_check_type hint
  3128. # requires that the scheduler returns only the same
  3129. # host that we are currently on and only checks
  3130. # rebuild-related filters.
  3131. request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
  3132. request_spec.force_hosts = [instance.host]
  3133. request_spec.force_nodes = [instance.node]
  3134. host = None
  3135. self.compute_task_api.rebuild_instance(context, instance=instance,
  3136. new_pass=admin_password, injected_files=files_to_inject,
  3137. image_ref=image_href, orig_image_ref=orig_image_ref,
  3138. orig_sys_metadata=orig_sys_metadata, bdms=bdms,
  3139. preserve_ephemeral=preserve_ephemeral, host=host,
  3140. request_spec=request_spec)
  3141. @staticmethod
  3142. def _validate_numa_rebuild(instance, image, flavor):
  3143. """validates that the NUMA constraints do not change on rebuild.
  3144. :param instance: nova.objects.instance.Instance object
  3145. :param image: the new image the instance will be rebuilt with.
  3146. :param flavor: the flavor of the current instance.
  3147. :raises: nova.exception.ImageNUMATopologyRebuildConflict
  3148. """
  3149. # NOTE(sean-k-mooney): currently it is not possible to express
  3150. # a PCI NUMA affinity policy via flavor or image but that will
  3151. # change in the future. we pull out the image metadata into
  3152. # separate variable to make future testing of this easier.
  3153. old_image_meta = instance.image_meta
  3154. new_image_meta = objects.ImageMeta.from_dict(image)
  3155. old_constraints = hardware.numa_get_constraints(flavor, old_image_meta)
  3156. new_constraints = hardware.numa_get_constraints(flavor, new_image_meta)
  3157. # early out for non NUMA instances
  3158. if old_constraints is None and new_constraints is None:
  3159. return
  3160. # if only one of the constraints are non-None (or 'set') then the
  3161. # constraints changed so raise an exception.
  3162. if old_constraints is None or new_constraints is None:
  3163. action = "removing" if old_constraints else "introducing"
  3164. LOG.debug("NUMA rebuild validation failed. The requested image "
  3165. "would alter the NUMA constraints by %s a NUMA "
  3166. "topology.", action, instance=instance)
  3167. raise exception.ImageNUMATopologyRebuildConflict()
  3168. # otherwise since both the old a new constraints are non none compare
  3169. # them as dictionaries.
  3170. old = old_constraints.obj_to_primitive()
  3171. new = new_constraints.obj_to_primitive()
  3172. if old != new:
  3173. LOG.debug("NUMA rebuild validation failed. The requested image "
  3174. "conflicts with the existing NUMA constraints.",
  3175. instance=instance)
  3176. raise exception.ImageNUMATopologyRebuildConflict()
  3177. # TODO(sean-k-mooney): add PCI NUMA affinity policy check.
  3178. @staticmethod
  3179. def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
  3180. project_id, user_id = quotas_obj.ids_from_instance(context,
  3181. instance)
  3182. # Deltas will be empty if the resize is not an upsize.
  3183. deltas = compute_utils.upsize_quota_delta(new_flavor,
  3184. current_flavor)
  3185. if deltas:
  3186. try:
  3187. res_deltas = {'cores': deltas.get('cores', 0),
  3188. 'ram': deltas.get('ram', 0)}
  3189. objects.Quotas.check_deltas(context, res_deltas,
  3190. project_id, user_id=user_id,
  3191. check_project_id=project_id,
  3192. check_user_id=user_id)
  3193. except exception.OverQuota as exc:
  3194. quotas = exc.kwargs['quotas']
  3195. overs = exc.kwargs['overs']
  3196. usages = exc.kwargs['usages']
  3197. headroom = compute_utils.get_headroom(quotas, usages,
  3198. deltas)
  3199. (overs, reqs, total_alloweds,
  3200. useds) = compute_utils.get_over_quota_detail(headroom,
  3201. overs,
  3202. quotas,
  3203. deltas)
  3204. LOG.info("%(overs)s quota exceeded for %(pid)s,"
  3205. " tried to resize instance.",
  3206. {'overs': overs, 'pid': context.project_id})
  3207. raise exception.TooManyInstances(overs=overs,
  3208. req=reqs,
  3209. used=useds,
  3210. allowed=total_alloweds)
  3211. @check_instance_lock
  3212. @check_instance_state(vm_state=[vm_states.RESIZED])
  3213. def revert_resize(self, context, instance):
  3214. """Reverts a resize or cold migration, deleting the 'new' instance in
  3215. the process.
  3216. """
  3217. elevated = context.elevated()
  3218. migration = objects.Migration.get_by_instance_and_status(
  3219. elevated, instance.uuid, 'finished')
  3220. # If this is a resize down, a revert might go over quota.
  3221. self._check_quota_for_upsize(context, instance, instance.flavor,
  3222. instance.old_flavor)
  3223. # The AZ for the server may have changed when it was migrated so while
  3224. # we are in the API and have access to the API DB, update the
  3225. # instance.availability_zone before casting off to the compute service.
  3226. # Note that we do this in the API to avoid an "up-call" from the
  3227. # compute service to the API DB. This is not great in case something
  3228. # fails during revert before the instance.host is updated to the
  3229. # original source host, but it is good enough for now. Long-term we
  3230. # could consider passing the AZ down to compute so it can set it when
  3231. # the instance.host value is set in finish_revert_resize.
  3232. instance.availability_zone = (
  3233. availability_zones.get_host_availability_zone(
  3234. context, migration.source_compute))
  3235. # If this was a resize, the conductor may have updated the
  3236. # RequestSpec.flavor field (to point at the new flavor) and the
  3237. # RequestSpec.numa_topology field (to reflect the new flavor's extra
  3238. # specs) during the initial resize operation, so we need to update the
  3239. # RequestSpec to point back at the original flavor and reflect the NUMA
  3240. # settings of this flavor, otherwise subsequent move operations through
  3241. # the scheduler will be using the wrong values. There's no need to do
  3242. # this if the flavor hasn't changed though and we're migrating rather
  3243. # than resizing.
  3244. reqspec = objects.RequestSpec.get_by_instance_uuid(
  3245. context, instance.uuid)
  3246. if reqspec.flavor['id'] != instance.old_flavor['id']:
  3247. reqspec.flavor = instance.old_flavor
  3248. reqspec.numa_topology = hardware.numa_get_constraints(
  3249. instance.old_flavor, instance.image_meta)
  3250. reqspec.save()
  3251. # NOTE(gibi): This is a performance optimization. If the network info
  3252. # cache does not have ports with allocations in the binding profile
  3253. # then we can skip reading port resource request from neutron below.
  3254. # If a port has resource request then that would have already caused
  3255. # that the finish_resize call put allocation in the binding profile
  3256. # during the resize.
  3257. if instance.get_network_info().has_port_with_allocation():
  3258. # TODO(gibi): do not directly overwrite the
  3259. # RequestSpec.requested_resources as others like cyborg might added
  3260. # to things there already
  3261. # NOTE(gibi): We need to collect the requested resource again as it
  3262. # is intentionally not persisted in nova. Note that this needs to
  3263. # be done here as the nova API code directly calls revert on the
  3264. # dest compute service skipping the conductor.
  3265. port_res_req = (
  3266. self.network_api.get_requested_resource_for_instance(
  3267. context, instance.uuid))
  3268. reqspec.requested_resources = port_res_req
  3269. instance.task_state = task_states.RESIZE_REVERTING
  3270. instance.save(expected_task_state=[None])
  3271. migration.status = 'reverting'
  3272. migration.save()
  3273. self._record_action_start(context, instance,
  3274. instance_actions.REVERT_RESIZE)
  3275. if migration.cross_cell_move:
  3276. # RPC cast to conductor to orchestrate the revert of the cross-cell
  3277. # resize.
  3278. self.compute_task_api.revert_snapshot_based_resize(
  3279. context, instance, migration)
  3280. else:
  3281. # TODO(melwitt): We're not rechecking for strict quota here to
  3282. # guard against going over quota during a race at this time because
  3283. # the resource consumption for this operation is written to the
  3284. # database by compute.
  3285. self.compute_rpcapi.revert_resize(context, instance,
  3286. migration,
  3287. migration.dest_compute,
  3288. reqspec)
  3289. @check_instance_lock
  3290. @check_instance_state(vm_state=[vm_states.RESIZED])
  3291. def confirm_resize(self, context, instance, migration=None):
  3292. """Confirms a migration/resize and deletes the 'old' instance."""
  3293. elevated = context.elevated()
  3294. # NOTE(melwitt): We're not checking quota here because there isn't a
  3295. # change in resource usage when confirming a resize. Resource
  3296. # consumption for resizes are written to the database by compute, so
  3297. # a confirm resize is just a clean up of the migration objects and a
  3298. # state change in compute.
  3299. if migration is None:
  3300. migration = objects.Migration.get_by_instance_and_status(
  3301. elevated, instance.uuid, 'finished')
  3302. migration.status = 'confirming'
  3303. migration.save()
  3304. self._record_action_start(context, instance,
  3305. instance_actions.CONFIRM_RESIZE)
  3306. # Check to see if this was a cross-cell resize, in which case the
  3307. # resized instance is in the target cell (the migration and instance
  3308. # came from the target cell DB in this case), and we need to cleanup
  3309. # the source host and source cell database records.
  3310. if migration.cross_cell_move:
  3311. self.compute_task_api.confirm_snapshot_based_resize(
  3312. context, instance, migration)
  3313. else:
  3314. # It's a traditional resize within a single cell, so RPC cast to
  3315. # the source compute host to cleanup the host since the instance
  3316. # is already on the target host.
  3317. self.compute_rpcapi.confirm_resize(context,
  3318. instance,
  3319. migration,
  3320. migration.source_compute)
  3321. @staticmethod
  3322. def _allow_cross_cell_resize(context, instance):
  3323. """Determine if the request can perform a cross-cell resize on this
  3324. instance.
  3325. :param context: nova auth request context for the resize operation
  3326. :param instance: Instance object being resized
  3327. :returns: True if cross-cell resize is allowed, False otherwise
  3328. """
  3329. # First check to see if the requesting project/user is allowed by
  3330. # policy to perform cross-cell resize.
  3331. allowed = context.can(
  3332. servers_policies.CROSS_CELL_RESIZE,
  3333. target={'user_id': instance.user_id,
  3334. 'project_id': instance.project_id},
  3335. fatal=False)
  3336. # If the user is allowed by policy, check to make sure the deployment
  3337. # is upgraded to the point of supporting cross-cell resize on all
  3338. # compute services.
  3339. if allowed:
  3340. # TODO(mriedem): We can remove this minimum compute version check
  3341. # in the 22.0.0 "V" release.
  3342. min_compute_version = (
  3343. objects.service.get_minimum_version_all_cells(
  3344. context, ['nova-compute']))
  3345. if min_compute_version < MIN_COMPUTE_CROSS_CELL_RESIZE:
  3346. LOG.debug('Request is allowed by policy to perform cross-cell '
  3347. 'resize but the minimum nova-compute service '
  3348. 'version in the deployment %s is less than %s so '
  3349. 'cross-cell resize is not allowed at this time.',
  3350. min_compute_version, MIN_COMPUTE_CROSS_CELL_RESIZE)
  3351. allowed = False
  3352. return allowed
  3353. @staticmethod
  3354. def _validate_host_for_cold_migrate(
  3355. context, instance, host_name, allow_cross_cell_resize):
  3356. """Validates a host specified for cold migration.
  3357. :param context: nova auth request context for the cold migration
  3358. :param instance: Instance object being cold migrated
  3359. :param host_name: User-specified compute service hostname for the
  3360. desired destination of the instance during the cold migration
  3361. :param allow_cross_cell_resize: If True, cross-cell resize is allowed
  3362. for this operation and the host could be in a different cell from
  3363. the one that the instance is currently in. If False, the speciifed
  3364. host must be in the same cell as the instance.
  3365. :returns: ComputeNode object of the requested host
  3366. :raises: CannotMigrateToSameHost if the host is the same as the
  3367. current instance.host
  3368. :raises: ComputeHostNotFound if the specified host cannot be found
  3369. """
  3370. # Cannot migrate to the host where the instance exists
  3371. # because it is useless.
  3372. if host_name == instance.host:
  3373. raise exception.CannotMigrateToSameHost()
  3374. # Check whether host exists or not. If a cross-cell resize is
  3375. # allowed, the host could be in another cell from the one the
  3376. # instance is currently in, so we need to lookup the HostMapping
  3377. # to get the cell and lookup the ComputeNode in that cell.
  3378. if allow_cross_cell_resize:
  3379. try:
  3380. hm = objects.HostMapping.get_by_host(context, host_name)
  3381. except exception.HostMappingNotFound:
  3382. LOG.info('HostMapping not found for host: %s', host_name)
  3383. raise exception.ComputeHostNotFound(host=host_name)
  3384. with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
  3385. node = objects.ComputeNode.\
  3386. get_first_node_by_host_for_old_compat(
  3387. cctxt, host_name, use_slave=True)
  3388. else:
  3389. node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
  3390. context, host_name, use_slave=True)
  3391. return node
  3392. @block_accelerators
  3393. @check_instance_lock
  3394. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
  3395. @check_instance_host(check_is_up=True)
  3396. def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
  3397. host_name=None, auto_disk_config=None):
  3398. """Resize (ie, migrate) a running instance.
  3399. If flavor_id is None, the process is considered a migration, keeping
  3400. the original flavor_id. If flavor_id is not None, the instance should
  3401. be migrated to a new host and resized to the new flavor_id.
  3402. host_name is always None in the resize case.
  3403. host_name can be set in the cold migration case only.
  3404. """
  3405. allow_cross_cell_resize = self._allow_cross_cell_resize(
  3406. context, instance)
  3407. if host_name is not None:
  3408. node = self._validate_host_for_cold_migrate(
  3409. context, instance, host_name, allow_cross_cell_resize)
  3410. self._check_auto_disk_config(
  3411. instance, auto_disk_config=auto_disk_config)
  3412. current_instance_type = instance.get_flavor()
  3413. # If flavor_id is not provided, only migrate the instance.
  3414. volume_backed = None
  3415. if not flavor_id:
  3416. LOG.debug("flavor_id is None. Assuming migration.",
  3417. instance=instance)
  3418. new_instance_type = current_instance_type
  3419. else:
  3420. new_instance_type = flavors.get_flavor_by_flavor_id(
  3421. flavor_id, read_deleted="no")
  3422. # Check to see if we're resizing to a zero-disk flavor which is
  3423. # only supported with volume-backed servers.
  3424. if (new_instance_type.get('root_gb') == 0 and
  3425. current_instance_type.get('root_gb') != 0):
  3426. volume_backed = compute_utils.is_volume_backed_instance(
  3427. context, instance)
  3428. if not volume_backed:
  3429. reason = _('Resize to zero disk flavor is not allowed.')
  3430. raise exception.CannotResizeDisk(reason=reason)
  3431. current_instance_type_name = current_instance_type['name']
  3432. new_instance_type_name = new_instance_type['name']
  3433. LOG.debug("Old instance type %(current_instance_type_name)s, "
  3434. "new instance type %(new_instance_type_name)s",
  3435. {'current_instance_type_name': current_instance_type_name,
  3436. 'new_instance_type_name': new_instance_type_name},
  3437. instance=instance)
  3438. same_instance_type = (current_instance_type['id'] ==
  3439. new_instance_type['id'])
  3440. # NOTE(sirp): We don't want to force a customer to change their flavor
  3441. # when Ops is migrating off of a failed host.
  3442. if not same_instance_type and new_instance_type.get('disabled'):
  3443. raise exception.FlavorNotFound(flavor_id=flavor_id)
  3444. if same_instance_type and flavor_id:
  3445. raise exception.CannotResizeToSameFlavor()
  3446. # ensure there is sufficient headroom for upsizes
  3447. if flavor_id:
  3448. self._check_quota_for_upsize(context, instance,
  3449. current_instance_type,
  3450. new_instance_type)
  3451. if not same_instance_type:
  3452. image = utils.get_image_from_system_metadata(
  3453. instance.system_metadata)
  3454. # Figure out if the instance is volume-backed but only if we didn't
  3455. # already figure that out above (avoid the extra db hit).
  3456. if volume_backed is None:
  3457. volume_backed = compute_utils.is_volume_backed_instance(
  3458. context, instance)
  3459. # If the server is volume-backed, we still want to validate numa
  3460. # and pci information in the new flavor, but we don't call
  3461. # _validate_flavor_image_nostatus because how it handles checking
  3462. # disk size validation was not intended for a volume-backed
  3463. # resize case.
  3464. if volume_backed:
  3465. self._validate_flavor_image_numa_pci(
  3466. image, new_instance_type, validate_pci=True)
  3467. else:
  3468. self._validate_flavor_image_nostatus(
  3469. context, image, new_instance_type, root_bdm=None,
  3470. validate_pci=True)
  3471. filter_properties = {'ignore_hosts': []}
  3472. if not self._allow_resize_to_same_host(same_instance_type, instance):
  3473. filter_properties['ignore_hosts'].append(instance.host)
  3474. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3475. context, instance.uuid)
  3476. request_spec.ignore_hosts = filter_properties['ignore_hosts']
  3477. # don't recalculate the NUMA topology unless the flavor has changed
  3478. if not same_instance_type:
  3479. request_spec.numa_topology = hardware.numa_get_constraints(
  3480. new_instance_type, instance.image_meta)
  3481. # TODO(huaqiang): Remove in Wallaby
  3482. # check nova-compute nodes have been updated to Victoria to resize
  3483. # instance to a new mixed instance from a dedicated or shared
  3484. # instance.
  3485. self._check_compute_service_for_mixed_instance(
  3486. request_spec.numa_topology)
  3487. instance.task_state = task_states.RESIZE_PREP
  3488. instance.progress = 0
  3489. instance.auto_disk_config = auto_disk_config or False
  3490. instance.save(expected_task_state=[None])
  3491. if not flavor_id:
  3492. self._record_action_start(context, instance,
  3493. instance_actions.MIGRATE)
  3494. else:
  3495. self._record_action_start(context, instance,
  3496. instance_actions.RESIZE)
  3497. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3498. # against going over quota during a race at this time because the
  3499. # resource consumption for this operation is written to the database
  3500. # by compute.
  3501. scheduler_hint = {'filter_properties': filter_properties}
  3502. if host_name is None:
  3503. # If 'host_name' is not specified,
  3504. # clear the 'requested_destination' field of the RequestSpec
  3505. # except set the allow_cross_cell_move flag since conductor uses
  3506. # it prior to scheduling.
  3507. request_spec.requested_destination = objects.Destination(
  3508. allow_cross_cell_move=allow_cross_cell_resize)
  3509. else:
  3510. # Set the host and the node so that the scheduler will
  3511. # validate them.
  3512. request_spec.requested_destination = objects.Destination(
  3513. host=node.host, node=node.hypervisor_hostname,
  3514. allow_cross_cell_move=allow_cross_cell_resize)
  3515. # Asynchronously RPC cast to conductor so the response is not blocked
  3516. # during scheduling. If something fails the user can find out via
  3517. # instance actions.
  3518. self.compute_task_api.resize_instance(context, instance,
  3519. scheduler_hint=scheduler_hint,
  3520. flavor=new_instance_type,
  3521. clean_shutdown=clean_shutdown,
  3522. request_spec=request_spec,
  3523. do_cast=True)
  3524. def _allow_resize_to_same_host(self, cold_migrate, instance):
  3525. """Contains logic for excluding the instance.host on resize/migrate.
  3526. If performing a cold migration and the compute node resource provider
  3527. reports the COMPUTE_SAME_HOST_COLD_MIGRATE trait then same-host cold
  3528. migration is allowed otherwise it is not and the current instance.host
  3529. should be excluded as a scheduling candidate.
  3530. :param cold_migrate: true if performing a cold migration, false
  3531. for resize
  3532. :param instance: Instance object being resized or cold migrated
  3533. :returns: True if same-host resize/cold migrate is allowed, False
  3534. otherwise
  3535. """
  3536. if cold_migrate:
  3537. # Check to see if the compute node resource provider on which the
  3538. # instance is running has the COMPUTE_SAME_HOST_COLD_MIGRATE
  3539. # trait.
  3540. # Note that we check this here in the API since we cannot
  3541. # pre-filter allocation candidates in the scheduler using this
  3542. # trait as it would not work. For example, libvirt nodes will not
  3543. # report the trait but using it as a forbidden trait filter when
  3544. # getting allocation candidates would still return libvirt nodes
  3545. # which means we could attempt to cold migrate to the same libvirt
  3546. # node, which would fail.
  3547. ctxt = instance._context
  3548. cn = objects.ComputeNode.get_by_host_and_nodename(
  3549. ctxt, instance.host, instance.node)
  3550. traits = self.placementclient.get_provider_traits(
  3551. ctxt, cn.uuid).traits
  3552. # If the provider has the trait it is (1) new enough to report that
  3553. # trait and (2) supports cold migration on the same host.
  3554. if os_traits.COMPUTE_SAME_HOST_COLD_MIGRATE in traits:
  3555. allow_same_host = True
  3556. else:
  3557. # TODO(mriedem): Remove this compatibility code after one
  3558. # release. If the compute is old we will not know if it
  3559. # supports same-host cold migration so we fallback to config.
  3560. service = objects.Service.get_by_compute_host(ctxt, cn.host)
  3561. if service.version >= MIN_COMPUTE_SAME_HOST_COLD_MIGRATE:
  3562. # The compute is new enough to report the trait but does
  3563. # not so same-host cold migration is not allowed.
  3564. allow_same_host = False
  3565. else:
  3566. # The compute is not new enough to report the trait so we
  3567. # fallback to config.
  3568. allow_same_host = CONF.allow_resize_to_same_host
  3569. else:
  3570. allow_same_host = CONF.allow_resize_to_same_host
  3571. return allow_same_host
  3572. @block_accelerators
  3573. @check_instance_lock
  3574. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3575. vm_states.PAUSED, vm_states.SUSPENDED])
  3576. def shelve(self, context, instance, clean_shutdown=True):
  3577. """Shelve an instance.
  3578. Shuts down an instance and frees it up to be removed from the
  3579. hypervisor.
  3580. """
  3581. instance.task_state = task_states.SHELVING
  3582. instance.save(expected_task_state=[None])
  3583. self._record_action_start(context, instance, instance_actions.SHELVE)
  3584. if not compute_utils.is_volume_backed_instance(context, instance):
  3585. name = '%s-shelved' % instance.display_name
  3586. image_meta = compute_utils.create_image(
  3587. context, instance, name, 'snapshot', self.image_api)
  3588. image_id = image_meta['id']
  3589. self.compute_rpcapi.shelve_instance(context, instance=instance,
  3590. image_id=image_id, clean_shutdown=clean_shutdown)
  3591. else:
  3592. self.compute_rpcapi.shelve_offload_instance(context,
  3593. instance=instance, clean_shutdown=clean_shutdown)
  3594. @check_instance_lock
  3595. @check_instance_state(vm_state=[vm_states.SHELVED])
  3596. def shelve_offload(self, context, instance, clean_shutdown=True):
  3597. """Remove a shelved instance from the hypervisor."""
  3598. instance.task_state = task_states.SHELVING_OFFLOADING
  3599. instance.save(expected_task_state=[None])
  3600. self._record_action_start(context, instance,
  3601. instance_actions.SHELVE_OFFLOAD)
  3602. self.compute_rpcapi.shelve_offload_instance(context, instance=instance,
  3603. clean_shutdown=clean_shutdown)
  3604. def _validate_unshelve_az(self, context, instance, availability_zone):
  3605. """Verify the specified availability_zone during unshelve.
  3606. Verifies that the server is shelved offloaded, the AZ exists and
  3607. if [cinder]/cross_az_attach=False, that any attached volumes are in
  3608. the same AZ.
  3609. :param context: nova auth RequestContext for the unshelve action
  3610. :param instance: Instance object for the server being unshelved
  3611. :param availability_zone: The user-requested availability zone in
  3612. which to unshelve the server.
  3613. :raises: UnshelveInstanceInvalidState if the server is not shelved
  3614. offloaded
  3615. :raises: InvalidRequest if the requested AZ does not exist
  3616. :raises: MismatchVolumeAZException if [cinder]/cross_az_attach=False
  3617. and any attached volumes are not in the requested AZ
  3618. """
  3619. if instance.vm_state != vm_states.SHELVED_OFFLOADED:
  3620. # NOTE(brinzhang): If the server status is 'SHELVED', it still
  3621. # belongs to a host, the availability_zone has not changed.
  3622. # Unshelving a shelved offloaded server will go through the
  3623. # scheduler to find a new host.
  3624. raise exception.UnshelveInstanceInvalidState(
  3625. state=instance.vm_state, instance_uuid=instance.uuid)
  3626. available_zones = availability_zones.get_availability_zones(
  3627. context, self.host_api, get_only_available=True)
  3628. if availability_zone not in available_zones:
  3629. msg = _('The requested availability zone is not available')
  3630. raise exception.InvalidRequest(msg)
  3631. # NOTE(brinzhang): When specifying a availability zone to unshelve
  3632. # a shelved offloaded server, and conf cross_az_attach=False, need
  3633. # to determine if attached volume AZ matches the user-specified AZ.
  3634. if not CONF.cinder.cross_az_attach:
  3635. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3636. context, instance.uuid)
  3637. for bdm in bdms:
  3638. if bdm.is_volume and bdm.volume_id:
  3639. volume = self.volume_api.get(context, bdm.volume_id)
  3640. if availability_zone != volume['availability_zone']:
  3641. msg = _("The specified availability zone does not "
  3642. "match the volume %(vol_id)s attached to the "
  3643. "server. Specified availability zone is "
  3644. "%(az)s. Volume is in %(vol_zone)s.") % {
  3645. "vol_id": volume['id'],
  3646. "az": availability_zone,
  3647. "vol_zone": volume['availability_zone']}
  3648. raise exception.MismatchVolumeAZException(reason=msg)
  3649. @check_instance_lock
  3650. @check_instance_state(vm_state=[vm_states.SHELVED,
  3651. vm_states.SHELVED_OFFLOADED])
  3652. def unshelve(self, context, instance, new_az=None):
  3653. """Restore a shelved instance."""
  3654. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3655. context, instance.uuid)
  3656. if new_az:
  3657. self._validate_unshelve_az(context, instance, new_az)
  3658. LOG.debug("Replace the old AZ %(old_az)s in RequestSpec "
  3659. "with a new AZ %(new_az)s of the instance.",
  3660. {"old_az": request_spec.availability_zone,
  3661. "new_az": new_az}, instance=instance)
  3662. # Unshelving a shelved offloaded server will go through the
  3663. # scheduler to pick a new host, so we update the
  3664. # RequestSpec.availability_zone here. Note that if scheduling
  3665. # fails the RequestSpec will remain updated, which is not great,
  3666. # but if we want to change that we need to defer updating the
  3667. # RequestSpec until conductor which probably means RPC changes to
  3668. # pass the new_az variable to conductor. This is likely low
  3669. # priority since the RequestSpec.availability_zone on a shelved
  3670. # offloaded server does not mean much anyway and clearly the user
  3671. # is trying to put the server in the target AZ.
  3672. request_spec.availability_zone = new_az
  3673. request_spec.save()
  3674. instance.task_state = task_states.UNSHELVING
  3675. instance.save(expected_task_state=[None])
  3676. self._record_action_start(context, instance, instance_actions.UNSHELVE)
  3677. self.compute_task_api.unshelve_instance(context, instance,
  3678. request_spec)
  3679. @check_instance_lock
  3680. def add_fixed_ip(self, context, instance, network_id):
  3681. """Add fixed_ip from specified network to given instance."""
  3682. self.compute_rpcapi.add_fixed_ip_to_instance(context,
  3683. instance=instance, network_id=network_id)
  3684. @check_instance_lock
  3685. def remove_fixed_ip(self, context, instance, address):
  3686. """Remove fixed_ip from specified network to given instance."""
  3687. self.compute_rpcapi.remove_fixed_ip_from_instance(context,
  3688. instance=instance, address=address)
  3689. @check_instance_lock
  3690. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3691. def pause(self, context, instance):
  3692. """Pause the given instance."""
  3693. instance.task_state = task_states.PAUSING
  3694. instance.save(expected_task_state=[None])
  3695. self._record_action_start(context, instance, instance_actions.PAUSE)
  3696. self.compute_rpcapi.pause_instance(context, instance)
  3697. @check_instance_lock
  3698. @check_instance_state(vm_state=[vm_states.PAUSED])
  3699. def unpause(self, context, instance):
  3700. """Unpause the given instance."""
  3701. instance.task_state = task_states.UNPAUSING
  3702. instance.save(expected_task_state=[None])
  3703. self._record_action_start(context, instance, instance_actions.UNPAUSE)
  3704. self.compute_rpcapi.unpause_instance(context, instance)
  3705. @check_instance_host()
  3706. def get_diagnostics(self, context, instance):
  3707. """Retrieve diagnostics for the given instance."""
  3708. return self.compute_rpcapi.get_diagnostics(context, instance=instance)
  3709. @check_instance_host()
  3710. def get_instance_diagnostics(self, context, instance):
  3711. """Retrieve diagnostics for the given instance."""
  3712. return self.compute_rpcapi.get_instance_diagnostics(context,
  3713. instance=instance)
  3714. @block_accelerators
  3715. @reject_sev_instances(instance_actions.SUSPEND)
  3716. @check_instance_lock
  3717. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3718. def suspend(self, context, instance):
  3719. """Suspend the given instance."""
  3720. instance.task_state = task_states.SUSPENDING
  3721. instance.save(expected_task_state=[None])
  3722. self._record_action_start(context, instance, instance_actions.SUSPEND)
  3723. self.compute_rpcapi.suspend_instance(context, instance)
  3724. @check_instance_lock
  3725. @check_instance_state(vm_state=[vm_states.SUSPENDED])
  3726. def resume(self, context, instance):
  3727. """Resume the given instance."""
  3728. instance.task_state = task_states.RESUMING
  3729. instance.save(expected_task_state=[None])
  3730. self._record_action_start(context, instance, instance_actions.RESUME)
  3731. self.compute_rpcapi.resume_instance(context, instance)
  3732. @check_instance_lock
  3733. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3734. vm_states.ERROR])
  3735. def rescue(self, context, instance, rescue_password=None,
  3736. rescue_image_ref=None, clean_shutdown=True,
  3737. allow_bfv_rescue=False):
  3738. """Rescue the given instance."""
  3739. if rescue_image_ref:
  3740. try:
  3741. image_meta = image_meta_obj.ImageMeta.from_image_ref(
  3742. context, self.image_api, rescue_image_ref)
  3743. except (exception.ImageNotFound, exception.ImageBadRequest):
  3744. LOG.warning("Failed to fetch rescue image metadata using "
  3745. "image_ref %(image_ref)s",
  3746. {'image_ref': rescue_image_ref})
  3747. raise exception.UnsupportedRescueImage(
  3748. image=rescue_image_ref)
  3749. # FIXME(lyarwood): There is currently no support for rescuing
  3750. # instances using a volume snapshot so fail here before we cast to
  3751. # the compute.
  3752. if image_meta.properties.get('img_block_device_mapping'):
  3753. LOG.warning("Unable to rescue an instance using a volume "
  3754. "snapshot image with img_block_device_mapping "
  3755. "image properties set")
  3756. raise exception.UnsupportedRescueImage(
  3757. image=rescue_image_ref)
  3758. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3759. context, instance.uuid)
  3760. for bdm in bdms:
  3761. if bdm.volume_id:
  3762. vol = self.volume_api.get(context, bdm.volume_id)
  3763. self.volume_api.check_attached(context, vol)
  3764. volume_backed = compute_utils.is_volume_backed_instance(
  3765. context, instance, bdms)
  3766. if volume_backed and allow_bfv_rescue:
  3767. cn = objects.ComputeNode.get_by_host_and_nodename(
  3768. context, instance.host, instance.node)
  3769. traits = self.placementclient.get_provider_traits(
  3770. context, cn.uuid).traits
  3771. if os_traits.COMPUTE_RESCUE_BFV not in traits:
  3772. reason = _("Host unable to rescue a volume-backed instance")
  3773. raise exception.InstanceNotRescuable(instance_id=instance.uuid,
  3774. reason=reason)
  3775. elif volume_backed:
  3776. reason = _("Cannot rescue a volume-backed instance")
  3777. raise exception.InstanceNotRescuable(instance_id=instance.uuid,
  3778. reason=reason)
  3779. instance.task_state = task_states.RESCUING
  3780. instance.save(expected_task_state=[None])
  3781. self._record_action_start(context, instance, instance_actions.RESCUE)
  3782. self.compute_rpcapi.rescue_instance(context, instance=instance,
  3783. rescue_password=rescue_password, rescue_image_ref=rescue_image_ref,
  3784. clean_shutdown=clean_shutdown)
  3785. @check_instance_lock
  3786. @check_instance_state(vm_state=[vm_states.RESCUED])
  3787. def unrescue(self, context, instance):
  3788. """Unrescue the given instance."""
  3789. instance.task_state = task_states.UNRESCUING
  3790. instance.save(expected_task_state=[None])
  3791. self._record_action_start(context, instance, instance_actions.UNRESCUE)
  3792. self.compute_rpcapi.unrescue_instance(context, instance=instance)
  3793. @check_instance_lock
  3794. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3795. def set_admin_password(self, context, instance, password):
  3796. """Set the root/admin password for the given instance.
  3797. @param context: Nova auth context.
  3798. @param instance: Nova instance object.
  3799. @param password: The admin password for the instance.
  3800. """
  3801. instance.task_state = task_states.UPDATING_PASSWORD
  3802. instance.save(expected_task_state=[None])
  3803. self._record_action_start(context, instance,
  3804. instance_actions.CHANGE_PASSWORD)
  3805. self.compute_rpcapi.set_admin_password(context,
  3806. instance=instance,
  3807. new_pass=password)
  3808. @check_instance_host()
  3809. @reject_instance_state(
  3810. task_state=[task_states.DELETING, task_states.MIGRATING])
  3811. def get_vnc_console(self, context, instance, console_type):
  3812. """Get a url to an instance Console."""
  3813. connect_info = self.compute_rpcapi.get_vnc_console(context,
  3814. instance=instance, console_type=console_type)
  3815. return {'url': connect_info['access_url']}
  3816. @check_instance_host()
  3817. @reject_instance_state(
  3818. task_state=[task_states.DELETING, task_states.MIGRATING])
  3819. def get_spice_console(self, context, instance, console_type):
  3820. """Get a url to an instance Console."""
  3821. connect_info = self.compute_rpcapi.get_spice_console(context,
  3822. instance=instance, console_type=console_type)
  3823. return {'url': connect_info['access_url']}
  3824. @check_instance_host()
  3825. @reject_instance_state(
  3826. task_state=[task_states.DELETING, task_states.MIGRATING])
  3827. def get_rdp_console(self, context, instance, console_type):
  3828. """Get a url to an instance Console."""
  3829. connect_info = self.compute_rpcapi.get_rdp_console(context,
  3830. instance=instance, console_type=console_type)
  3831. return {'url': connect_info['access_url']}
  3832. @check_instance_host()
  3833. @reject_instance_state(
  3834. task_state=[task_states.DELETING, task_states.MIGRATING])
  3835. def get_serial_console(self, context, instance, console_type):
  3836. """Get a url to a serial console."""
  3837. connect_info = self.compute_rpcapi.get_serial_console(context,
  3838. instance=instance, console_type=console_type)
  3839. return {'url': connect_info['access_url']}
  3840. @check_instance_host()
  3841. @reject_instance_state(
  3842. task_state=[task_states.DELETING, task_states.MIGRATING])
  3843. def get_mks_console(self, context, instance, console_type):
  3844. """Get a url to a MKS console."""
  3845. connect_info = self.compute_rpcapi.get_mks_console(context,
  3846. instance=instance, console_type=console_type)
  3847. return {'url': connect_info['access_url']}
  3848. @check_instance_host()
  3849. def get_console_output(self, context, instance, tail_length=None):
  3850. """Get console output for an instance."""
  3851. return self.compute_rpcapi.get_console_output(context,
  3852. instance=instance, tail_length=tail_length)
  3853. def lock(self, context, instance, reason=None):
  3854. """Lock the given instance."""
  3855. # Only update the lock if we are an admin (non-owner)
  3856. is_owner = instance.project_id == context.project_id
  3857. if instance.locked and is_owner:
  3858. return
  3859. context = context.elevated()
  3860. self._record_action_start(context, instance,
  3861. instance_actions.LOCK)
  3862. @wrap_instance_event(prefix='api')
  3863. def lock(self, context, instance, reason=None):
  3864. LOG.debug('Locking', instance=instance)
  3865. instance.locked = True
  3866. instance.locked_by = 'owner' if is_owner else 'admin'
  3867. if reason:
  3868. instance.system_metadata['locked_reason'] = reason
  3869. instance.save()
  3870. lock(self, context, instance, reason=reason)
  3871. compute_utils.notify_about_instance_action(
  3872. context, instance, CONF.host,
  3873. action=fields_obj.NotificationAction.LOCK,
  3874. source=fields_obj.NotificationSource.API)
  3875. def is_expected_locked_by(self, context, instance):
  3876. is_owner = instance.project_id == context.project_id
  3877. expect_locked_by = 'owner' if is_owner else 'admin'
  3878. locked_by = instance.locked_by
  3879. if locked_by and locked_by != expect_locked_by:
  3880. return False
  3881. return True
  3882. def unlock(self, context, instance):
  3883. """Unlock the given instance."""
  3884. context = context.elevated()
  3885. self._record_action_start(context, instance,
  3886. instance_actions.UNLOCK)
  3887. @wrap_instance_event(prefix='api')
  3888. def unlock(self, context, instance):
  3889. LOG.debug('Unlocking', instance=instance)
  3890. instance.locked = False
  3891. instance.locked_by = None
  3892. instance.system_metadata.pop('locked_reason', None)
  3893. instance.save()
  3894. unlock(self, context, instance)
  3895. compute_utils.notify_about_instance_action(
  3896. context, instance, CONF.host,
  3897. action=fields_obj.NotificationAction.UNLOCK,
  3898. source=fields_obj.NotificationSource.API)
  3899. @check_instance_lock
  3900. def reset_network(self, context, instance):
  3901. """Reset networking on the instance."""
  3902. self.compute_rpcapi.reset_network(context, instance=instance)
  3903. @check_instance_lock
  3904. def inject_network_info(self, context, instance):
  3905. """Inject network info for the instance."""
  3906. self.compute_rpcapi.inject_network_info(context, instance=instance)
  3907. def _create_volume_bdm(self, context, instance, device, volume,
  3908. disk_bus, device_type, is_local_creation=False,
  3909. tag=None, delete_on_termination=False):
  3910. volume_id = volume['id']
  3911. if is_local_creation:
  3912. # when the creation is done locally we can't specify the device
  3913. # name as we do not have a way to check that the name specified is
  3914. # a valid one.
  3915. # We leave the setting of that value when the actual attach
  3916. # happens on the compute manager
  3917. # NOTE(artom) Local attach (to a shelved-offload instance) cannot
  3918. # support device tagging because we have no way to call the compute
  3919. # manager to check that it supports device tagging. In fact, we
  3920. # don't even know which computer manager the instance will
  3921. # eventually end up on when it's unshelved.
  3922. volume_bdm = objects.BlockDeviceMapping(
  3923. context=context,
  3924. source_type='volume', destination_type='volume',
  3925. instance_uuid=instance.uuid, boot_index=None,
  3926. volume_id=volume_id,
  3927. device_name=None, guest_format=None,
  3928. disk_bus=disk_bus, device_type=device_type,
  3929. delete_on_termination=delete_on_termination)
  3930. volume_bdm.create()
  3931. else:
  3932. # NOTE(vish): This is done on the compute host because we want
  3933. # to avoid a race where two devices are requested at
  3934. # the same time. When db access is removed from
  3935. # compute, the bdm will be created here and we will
  3936. # have to make sure that they are assigned atomically.
  3937. volume_bdm = self.compute_rpcapi.reserve_block_device_name(
  3938. context, instance, device, volume_id, disk_bus=disk_bus,
  3939. device_type=device_type, tag=tag,
  3940. multiattach=volume['multiattach'])
  3941. volume_bdm.delete_on_termination = delete_on_termination
  3942. volume_bdm.save()
  3943. return volume_bdm
  3944. def _check_volume_already_attached_to_instance(self, context, instance,
  3945. volume_id):
  3946. """Avoid attaching the same volume to the same instance twice.
  3947. As the new Cinder flow (microversion 3.44) is handling the checks
  3948. differently and allows to attach the same volume to the same
  3949. instance twice to enable live_migrate we are checking whether the
  3950. BDM already exists for this combination for the new flow and fail
  3951. if it does.
  3952. """
  3953. try:
  3954. objects.BlockDeviceMapping.get_by_volume_and_instance(
  3955. context, volume_id, instance.uuid)
  3956. msg = _("volume %s already attached") % volume_id
  3957. raise exception.InvalidVolume(reason=msg)
  3958. except exception.VolumeBDMNotFound:
  3959. pass
  3960. def _check_attach_and_reserve_volume(self, context, volume, instance,
  3961. bdm, supports_multiattach=False,
  3962. validate_az=True):
  3963. """Perform checks against the instance and volume before attaching.
  3964. If validation succeeds, the bdm is updated with an attachment_id which
  3965. effectively reserves it during the attach process in cinder.
  3966. :param context: nova auth RequestContext
  3967. :param volume: volume dict from cinder
  3968. :param instance: Instance object
  3969. :param bdm: BlockDeviceMapping object
  3970. :param supports_multiattach: True if the request supports multiattach
  3971. volumes, i.e. microversion >= 2.60, False otherwise
  3972. :param validate_az: True if the instance and volume availability zones
  3973. should be validated for cross_az_attach, False to not validate AZ
  3974. """
  3975. volume_id = volume['id']
  3976. if validate_az:
  3977. self.volume_api.check_availability_zone(context, volume,
  3978. instance=instance)
  3979. # If volume.multiattach=True and the microversion to
  3980. # support multiattach is not used, fail the request.
  3981. if volume['multiattach'] and not supports_multiattach:
  3982. raise exception.MultiattachNotSupportedOldMicroversion()
  3983. attachment_id = self.volume_api.attachment_create(
  3984. context, volume_id, instance.uuid)['id']
  3985. bdm.attachment_id = attachment_id
  3986. # NOTE(ildikov): In case of boot from volume the BDM at this
  3987. # point is not yet created in a cell database, so we can't
  3988. # call save(). When attaching a volume to an existing
  3989. # instance, the instance is already in a cell and the BDM has
  3990. # been created in that same cell so updating here in that case
  3991. # is "ok".
  3992. if bdm.obj_attr_is_set('id'):
  3993. bdm.save()
  3994. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3995. # override it.
  3996. def _attach_volume(self, context, instance, volume, device,
  3997. disk_bus, device_type, tag=None,
  3998. supports_multiattach=False,
  3999. delete_on_termination=False):
  4000. """Attach an existing volume to an existing instance.
  4001. This method is separated to make it possible for cells version
  4002. to override it.
  4003. """
  4004. volume_bdm = self._create_volume_bdm(
  4005. context, instance, device, volume, disk_bus=disk_bus,
  4006. device_type=device_type, tag=tag,
  4007. delete_on_termination=delete_on_termination)
  4008. try:
  4009. self._check_attach_and_reserve_volume(context, volume, instance,
  4010. volume_bdm,
  4011. supports_multiattach)
  4012. self._record_action_start(
  4013. context, instance, instance_actions.ATTACH_VOLUME)
  4014. self.compute_rpcapi.attach_volume(context, instance, volume_bdm)
  4015. except Exception:
  4016. with excutils.save_and_reraise_exception():
  4017. volume_bdm.destroy()
  4018. return volume_bdm.device_name
  4019. def _attach_volume_shelved_offloaded(self, context, instance, volume,
  4020. device, disk_bus, device_type,
  4021. delete_on_termination):
  4022. """Attach an existing volume to an instance in shelved offloaded state.
  4023. Attaching a volume for an instance in shelved offloaded state requires
  4024. to perform the regular check to see if we can attach and reserve the
  4025. volume then we need to call the attach method on the volume API
  4026. to mark the volume as 'in-use'.
  4027. The instance at this stage is not managed by a compute manager
  4028. therefore the actual attachment will be performed once the
  4029. instance will be unshelved.
  4030. """
  4031. volume_id = volume['id']
  4032. @wrap_instance_event(prefix='api')
  4033. def attach_volume(self, context, v_id, instance, dev, attachment_id):
  4034. if attachment_id:
  4035. # Normally we wouldn't complete an attachment without a host
  4036. # connector, but we do this to make the volume status change
  4037. # to "in-use" to maintain the API semantics with the old flow.
  4038. # When unshelving the instance, the compute service will deal
  4039. # with this disconnected attachment.
  4040. self.volume_api.attachment_complete(context, attachment_id)
  4041. else:
  4042. self.volume_api.attach(context,
  4043. v_id,
  4044. instance.uuid,
  4045. dev)
  4046. volume_bdm = self._create_volume_bdm(
  4047. context, instance, device, volume, disk_bus=disk_bus,
  4048. device_type=device_type, is_local_creation=True,
  4049. delete_on_termination=delete_on_termination)
  4050. try:
  4051. self._check_attach_and_reserve_volume(context, volume, instance,
  4052. volume_bdm)
  4053. self._record_action_start(
  4054. context, instance,
  4055. instance_actions.ATTACH_VOLUME)
  4056. attach_volume(self, context, volume_id, instance, device,
  4057. volume_bdm.attachment_id)
  4058. except Exception:
  4059. with excutils.save_and_reraise_exception():
  4060. volume_bdm.destroy()
  4061. return volume_bdm.device_name
  4062. @check_instance_lock
  4063. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4064. vm_states.STOPPED, vm_states.RESIZED,
  4065. vm_states.SOFT_DELETED, vm_states.SHELVED,
  4066. vm_states.SHELVED_OFFLOADED])
  4067. def attach_volume(self, context, instance, volume_id, device=None,
  4068. disk_bus=None, device_type=None, tag=None,
  4069. supports_multiattach=False,
  4070. delete_on_termination=False):
  4071. """Attach an existing volume to an existing instance."""
  4072. # NOTE(vish): Fail fast if the device is not going to pass. This
  4073. # will need to be removed along with the test if we
  4074. # change the logic in the manager for what constitutes
  4075. # a valid device.
  4076. if device and not block_device.match_device(device):
  4077. raise exception.InvalidDevicePath(path=device)
  4078. # Make sure the volume isn't already attached to this instance
  4079. # because we'll use the v3.44 attachment flow in
  4080. # _check_attach_and_reserve_volume and Cinder will allow multiple
  4081. # attachments between the same volume and instance but the old flow
  4082. # API semantics don't allow that so we enforce it here.
  4083. self._check_volume_already_attached_to_instance(context,
  4084. instance,
  4085. volume_id)
  4086. volume = self.volume_api.get(context, volume_id)
  4087. is_shelved_offloaded = instance.vm_state == vm_states.SHELVED_OFFLOADED
  4088. if is_shelved_offloaded:
  4089. if tag:
  4090. # NOTE(artom) Local attach (to a shelved-offload instance)
  4091. # cannot support device tagging because we have no way to call
  4092. # the compute manager to check that it supports device tagging.
  4093. # In fact, we don't even know which computer manager the
  4094. # instance will eventually end up on when it's unshelved.
  4095. raise exception.VolumeTaggedAttachToShelvedNotSupported()
  4096. if volume['multiattach']:
  4097. # NOTE(mriedem): Similar to tagged attach, we don't support
  4098. # attaching a multiattach volume to shelved offloaded instances
  4099. # because we can't tell if the compute host (since there isn't
  4100. # one) supports it. This could possibly be supported in the
  4101. # future if the scheduler was made aware of which computes
  4102. # support multiattach volumes.
  4103. raise exception.MultiattachToShelvedNotSupported()
  4104. return self._attach_volume_shelved_offloaded(context,
  4105. instance,
  4106. volume,
  4107. device,
  4108. disk_bus,
  4109. device_type,
  4110. delete_on_termination)
  4111. return self._attach_volume(context, instance, volume, device,
  4112. disk_bus, device_type, tag=tag,
  4113. supports_multiattach=supports_multiattach,
  4114. delete_on_termination=delete_on_termination)
  4115. def _detach_volume_shelved_offloaded(self, context, instance, volume):
  4116. """Detach a volume from an instance in shelved offloaded state.
  4117. If the instance is shelved offloaded we just need to cleanup volume
  4118. calling the volume api detach, the volume api terminate_connection
  4119. and delete the bdm record.
  4120. If the volume has delete_on_termination option set then we call the
  4121. volume api delete as well.
  4122. """
  4123. @wrap_instance_event(prefix='api')
  4124. def detach_volume(self, context, instance, bdms):
  4125. self._local_cleanup_bdm_volumes(bdms, instance, context)
  4126. bdms = [objects.BlockDeviceMapping.get_by_volume_id(
  4127. context, volume['id'], instance.uuid)]
  4128. # The begin_detaching() call only works with in-use volumes,
  4129. # which will not be the case for volumes attached to a shelved
  4130. # offloaded server via the attachments API since those volumes
  4131. # will have `reserved` status.
  4132. if not bdms[0].attachment_id:
  4133. try:
  4134. self.volume_api.begin_detaching(context, volume['id'])
  4135. except exception.InvalidInput as exc:
  4136. raise exception.InvalidVolume(reason=exc.format_message())
  4137. self._record_action_start(
  4138. context, instance,
  4139. instance_actions.DETACH_VOLUME)
  4140. detach_volume(self, context, instance, bdms)
  4141. @check_instance_lock
  4142. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4143. vm_states.STOPPED, vm_states.RESIZED,
  4144. vm_states.SOFT_DELETED, vm_states.SHELVED,
  4145. vm_states.SHELVED_OFFLOADED])
  4146. def detach_volume(self, context, instance, volume):
  4147. """Detach a volume from an instance."""
  4148. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  4149. self._detach_volume_shelved_offloaded(context, instance, volume)
  4150. else:
  4151. try:
  4152. self.volume_api.begin_detaching(context, volume['id'])
  4153. except exception.InvalidInput as exc:
  4154. raise exception.InvalidVolume(reason=exc.format_message())
  4155. attachments = volume.get('attachments', {})
  4156. attachment_id = None
  4157. if attachments and instance.uuid in attachments:
  4158. attachment_id = attachments[instance.uuid]['attachment_id']
  4159. self._record_action_start(
  4160. context, instance, instance_actions.DETACH_VOLUME)
  4161. self.compute_rpcapi.detach_volume(context, instance=instance,
  4162. volume_id=volume['id'], attachment_id=attachment_id)
  4163. def _count_attachments_for_swap(self, ctxt, volume):
  4164. """Counts the number of attachments for a swap-related volume.
  4165. Attempts to only count read/write attachments if the volume attachment
  4166. records exist, otherwise simply just counts the number of attachments
  4167. regardless of attach mode.
  4168. :param ctxt: nova.context.RequestContext - user request context
  4169. :param volume: nova-translated volume dict from nova.volume.cinder.
  4170. :returns: count of attachments for the volume
  4171. """
  4172. # This is a dict, keyed by server ID, to a dict of attachment_id and
  4173. # mountpoint.
  4174. attachments = volume.get('attachments', {})
  4175. # Multiattach volumes can have more than one attachment, so if there
  4176. # is more than one attachment, attempt to count the read/write
  4177. # attachments.
  4178. if len(attachments) > 1:
  4179. count = 0
  4180. for attachment in attachments.values():
  4181. attachment_id = attachment['attachment_id']
  4182. # Get the attachment record for this attachment so we can
  4183. # get the attach_mode.
  4184. # TODO(mriedem): This could be optimized if we had
  4185. # GET /attachments/detail?volume_id=volume['id'] in Cinder.
  4186. try:
  4187. attachment_record = self.volume_api.attachment_get(
  4188. ctxt, attachment_id)
  4189. # Note that the attachment record from Cinder has
  4190. # attach_mode in the top-level of the resource but the
  4191. # nova.volume.cinder code translates it and puts the
  4192. # attach_mode in the connection_info for some legacy
  4193. # reason...
  4194. if attachment_record['attach_mode'] == 'rw':
  4195. count += 1
  4196. except exception.VolumeAttachmentNotFound:
  4197. # attachments are read/write by default so count it
  4198. count += 1
  4199. else:
  4200. count = len(attachments)
  4201. return count
  4202. @check_instance_lock
  4203. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4204. vm_states.RESIZED])
  4205. def swap_volume(self, context, instance, old_volume, new_volume):
  4206. """Swap volume attached to an instance."""
  4207. # The caller likely got the instance from volume['attachments']
  4208. # in the first place, but let's sanity check.
  4209. if not old_volume.get('attachments', {}).get(instance.uuid):
  4210. msg = _("Old volume is attached to a different instance.")
  4211. raise exception.InvalidVolume(reason=msg)
  4212. if new_volume['attach_status'] == 'attached':
  4213. msg = _("New volume must be detached in order to swap.")
  4214. raise exception.InvalidVolume(reason=msg)
  4215. if int(new_volume['size']) < int(old_volume['size']):
  4216. msg = _("New volume must be the same size or larger.")
  4217. raise exception.InvalidVolume(reason=msg)
  4218. self.volume_api.check_availability_zone(context, new_volume,
  4219. instance=instance)
  4220. try:
  4221. self.volume_api.begin_detaching(context, old_volume['id'])
  4222. except exception.InvalidInput as exc:
  4223. raise exception.InvalidVolume(reason=exc.format_message())
  4224. # Disallow swapping from multiattach volumes that have more than one
  4225. # read/write attachment. We know the old_volume has at least one
  4226. # attachment since it's attached to this server. The new_volume
  4227. # can't have any attachments because of the attach_status check above.
  4228. # We do this count after calling "begin_detaching" to lock against
  4229. # concurrent attachments being made while we're counting.
  4230. try:
  4231. if self._count_attachments_for_swap(context, old_volume) > 1:
  4232. raise exception.MultiattachSwapVolumeNotSupported()
  4233. except Exception: # This is generic to handle failures while counting
  4234. # We need to reset the detaching status before raising.
  4235. with excutils.save_and_reraise_exception():
  4236. self.volume_api.roll_detaching(context, old_volume['id'])
  4237. # Get the BDM for the attached (old) volume so we can tell if it was
  4238. # attached with the new-style Cinder 3.44 API.
  4239. bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
  4240. context, old_volume['id'], instance.uuid)
  4241. new_attachment_id = None
  4242. if bdm.attachment_id is None:
  4243. # This is an old-style attachment so reserve the new volume before
  4244. # we cast to the compute host.
  4245. self.volume_api.reserve_volume(context, new_volume['id'])
  4246. else:
  4247. try:
  4248. self._check_volume_already_attached_to_instance(
  4249. context, instance, new_volume['id'])
  4250. except exception.InvalidVolume:
  4251. with excutils.save_and_reraise_exception():
  4252. self.volume_api.roll_detaching(context, old_volume['id'])
  4253. # This is a new-style attachment so for the volume that we are
  4254. # going to swap to, create a new volume attachment.
  4255. new_attachment_id = self.volume_api.attachment_create(
  4256. context, new_volume['id'], instance.uuid)['id']
  4257. self._record_action_start(
  4258. context, instance, instance_actions.SWAP_VOLUME)
  4259. try:
  4260. self.compute_rpcapi.swap_volume(
  4261. context, instance=instance,
  4262. old_volume_id=old_volume['id'],
  4263. new_volume_id=new_volume['id'],
  4264. new_attachment_id=new_attachment_id)
  4265. except Exception:
  4266. with excutils.save_and_reraise_exception():
  4267. self.volume_api.roll_detaching(context, old_volume['id'])
  4268. if new_attachment_id is None:
  4269. self.volume_api.unreserve_volume(context, new_volume['id'])
  4270. else:
  4271. self.volume_api.attachment_delete(
  4272. context, new_attachment_id)
  4273. @check_instance_lock
  4274. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4275. vm_states.STOPPED],
  4276. task_state=[None])
  4277. def attach_interface(self, context, instance, network_id, port_id,
  4278. requested_ip, tag=None):
  4279. """Use hotplug to add an network adapter to an instance."""
  4280. self._record_action_start(
  4281. context, instance, instance_actions.ATTACH_INTERFACE)
  4282. # NOTE(gibi): Checking if the requested port has resource request as
  4283. # such ports are currently not supported as they would at least
  4284. # need resource allocation manipulation in placement but might also
  4285. # need a new scheduling if resource on this host is not available.
  4286. if port_id:
  4287. port = self.network_api.show_port(context, port_id)
  4288. if port['port'].get(constants.RESOURCE_REQUEST):
  4289. raise exception.AttachInterfaceWithQoSPolicyNotSupported(
  4290. instance_uuid=instance.uuid)
  4291. return self.compute_rpcapi.attach_interface(context,
  4292. instance=instance, network_id=network_id, port_id=port_id,
  4293. requested_ip=requested_ip, tag=tag)
  4294. @check_instance_lock
  4295. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4296. vm_states.STOPPED],
  4297. task_state=[None])
  4298. def detach_interface(self, context, instance, port_id):
  4299. """Detach an network adapter from an instance."""
  4300. self._record_action_start(
  4301. context, instance, instance_actions.DETACH_INTERFACE)
  4302. self.compute_rpcapi.detach_interface(context, instance=instance,
  4303. port_id=port_id)
  4304. def get_instance_metadata(self, context, instance):
  4305. """Get all metadata associated with an instance."""
  4306. return self.db.instance_metadata_get(context, instance.uuid)
  4307. @check_instance_lock
  4308. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4309. vm_states.SUSPENDED, vm_states.STOPPED],
  4310. task_state=None)
  4311. def delete_instance_metadata(self, context, instance, key):
  4312. """Delete the given metadata item from an instance."""
  4313. instance.delete_metadata_key(key)
  4314. self.compute_rpcapi.change_instance_metadata(context,
  4315. instance=instance,
  4316. diff={key: ['-']})
  4317. @check_instance_lock
  4318. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4319. vm_states.SUSPENDED, vm_states.STOPPED],
  4320. task_state=None)
  4321. def update_instance_metadata(self, context, instance,
  4322. metadata, delete=False):
  4323. """Updates or creates instance metadata.
  4324. If delete is True, metadata items that are not specified in the
  4325. `metadata` argument will be deleted.
  4326. """
  4327. orig = dict(instance.metadata)
  4328. if delete:
  4329. _metadata = metadata
  4330. else:
  4331. _metadata = dict(instance.metadata)
  4332. _metadata.update(metadata)
  4333. self._check_metadata_properties_quota(context, _metadata)
  4334. instance.metadata = _metadata
  4335. instance.save()
  4336. diff = _diff_dict(orig, instance.metadata)
  4337. self.compute_rpcapi.change_instance_metadata(context,
  4338. instance=instance,
  4339. diff=diff)
  4340. return _metadata
  4341. @block_accelerators
  4342. @reject_sev_instances(instance_actions.LIVE_MIGRATION)
  4343. @check_instance_lock
  4344. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
  4345. def live_migrate(self, context, instance, block_migration,
  4346. disk_over_commit, host_name, force=None, async_=False):
  4347. """Migrate a server lively to a new host."""
  4348. LOG.debug("Going to try to live migrate instance to %s",
  4349. host_name or "another host", instance=instance)
  4350. if host_name:
  4351. # Validate the specified host before changing the instance task
  4352. # state.
  4353. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  4354. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4355. context, instance.uuid)
  4356. instance.task_state = task_states.MIGRATING
  4357. instance.save(expected_task_state=[None])
  4358. self._record_action_start(context, instance,
  4359. instance_actions.LIVE_MIGRATION)
  4360. # NOTE(sbauza): Force is a boolean by the new related API version
  4361. if force is False and host_name:
  4362. # Unset the host to make sure we call the scheduler
  4363. # from the conductor LiveMigrationTask. Yes this is tightly-coupled
  4364. # to behavior in conductor and not great.
  4365. host_name = None
  4366. # FIXME(sbauza): Since only Ironic driver uses more than one
  4367. # compute per service but doesn't support live migrations,
  4368. # let's provide the first one.
  4369. target = nodes[0]
  4370. destination = objects.Destination(
  4371. host=target.host,
  4372. node=target.hypervisor_hostname
  4373. )
  4374. # This is essentially a hint to the scheduler to only consider
  4375. # the specified host but still run it through the filters.
  4376. request_spec.requested_destination = destination
  4377. try:
  4378. self.compute_task_api.live_migrate_instance(context, instance,
  4379. host_name, block_migration=block_migration,
  4380. disk_over_commit=disk_over_commit,
  4381. request_spec=request_spec, async_=async_)
  4382. except oslo_exceptions.MessagingTimeout as messaging_timeout:
  4383. with excutils.save_and_reraise_exception():
  4384. # NOTE(pkoniszewski): It is possible that MessagingTimeout
  4385. # occurs, but LM will still be in progress, so write
  4386. # instance fault to database
  4387. compute_utils.add_instance_fault_from_exc(context,
  4388. instance,
  4389. messaging_timeout)
  4390. @check_instance_lock
  4391. @check_instance_state(vm_state=[vm_states.ACTIVE],
  4392. task_state=[task_states.MIGRATING])
  4393. def live_migrate_force_complete(self, context, instance, migration_id):
  4394. """Force live migration to complete.
  4395. :param context: Security context
  4396. :param instance: The instance that is being migrated
  4397. :param migration_id: ID of ongoing migration
  4398. """
  4399. LOG.debug("Going to try to force live migration to complete",
  4400. instance=instance)
  4401. # NOTE(pkoniszewski): Get migration object to check if there is ongoing
  4402. # live migration for particular instance. Also pass migration id to
  4403. # compute to double check and avoid possible race condition.
  4404. migration = objects.Migration.get_by_id_and_instance(
  4405. context, migration_id, instance.uuid)
  4406. if migration.status != 'running':
  4407. raise exception.InvalidMigrationState(migration_id=migration_id,
  4408. instance_uuid=instance.uuid,
  4409. state=migration.status,
  4410. method='force complete')
  4411. self._record_action_start(
  4412. context, instance, instance_actions.LIVE_MIGRATION_FORCE_COMPLETE)
  4413. self.compute_rpcapi.live_migration_force_complete(
  4414. context, instance, migration)
  4415. @check_instance_lock
  4416. @check_instance_state(task_state=[task_states.MIGRATING])
  4417. def live_migrate_abort(self, context, instance, migration_id,
  4418. support_abort_in_queue=False):
  4419. """Abort an in-progress live migration.
  4420. :param context: Security context
  4421. :param instance: The instance that is being migrated
  4422. :param migration_id: ID of in-progress live migration
  4423. :param support_abort_in_queue: Flag indicating whether we can support
  4424. abort migrations in "queued" or "preparing" status.
  4425. """
  4426. migration = objects.Migration.get_by_id_and_instance(context,
  4427. migration_id, instance.uuid)
  4428. LOG.debug("Going to cancel live migration %s",
  4429. migration.id, instance=instance)
  4430. # If the microversion does not support abort migration in queue,
  4431. # we are only be able to abort migrations with `running` status;
  4432. # if it is supported, we are able to also abort migrations in
  4433. # `queued` and `preparing` status.
  4434. allowed_states = ['running']
  4435. queued_states = ['queued', 'preparing']
  4436. if support_abort_in_queue:
  4437. # The user requested a microversion that supports aborting a queued
  4438. # or preparing live migration. But we need to check that the
  4439. # compute service hosting the instance is new enough to support
  4440. # aborting a queued/preparing live migration, so we check the
  4441. # service version here.
  4442. allowed_states.extend(queued_states)
  4443. if migration.status not in allowed_states:
  4444. raise exception.InvalidMigrationState(migration_id=migration_id,
  4445. instance_uuid=instance.uuid,
  4446. state=migration.status,
  4447. method='abort live migration')
  4448. self._record_action_start(context, instance,
  4449. instance_actions.LIVE_MIGRATION_CANCEL)
  4450. self.compute_rpcapi.live_migration_abort(context,
  4451. instance, migration.id)
  4452. @block_accelerators
  4453. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  4454. vm_states.ERROR])
  4455. def evacuate(self, context, instance, host, on_shared_storage,
  4456. admin_password=None, force=None):
  4457. """Running evacuate to target host.
  4458. Checking vm compute host state, if the host not in expected_state,
  4459. raising an exception.
  4460. :param instance: The instance to evacuate
  4461. :param host: Target host. if not set, the scheduler will pick up one
  4462. :param on_shared_storage: True if instance files on shared storage
  4463. :param admin_password: password to set on rebuilt instance
  4464. :param force: Force the evacuation to the specific host target
  4465. """
  4466. LOG.debug('vm evacuation scheduled', instance=instance)
  4467. inst_host = instance.host
  4468. service = objects.Service.get_by_compute_host(context, inst_host)
  4469. if self.servicegroup_api.service_is_up(service):
  4470. LOG.error('Instance compute service state on %s '
  4471. 'expected to be down, but it was up.', inst_host)
  4472. raise exception.ComputeServiceInUse(host=inst_host)
  4473. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4474. context, instance.uuid)
  4475. instance.task_state = task_states.REBUILDING
  4476. instance.save(expected_task_state=[None])
  4477. self._record_action_start(context, instance, instance_actions.EVACUATE)
  4478. # NOTE(danms): Create this as a tombstone for the source compute
  4479. # to find and cleanup. No need to pass it anywhere else.
  4480. migration = objects.Migration(
  4481. context, source_compute=instance.host, source_node=instance.node,
  4482. instance_uuid=instance.uuid, status='accepted',
  4483. migration_type=fields_obj.MigrationType.EVACUATION)
  4484. if host:
  4485. migration.dest_compute = host
  4486. migration.create()
  4487. compute_utils.notify_about_instance_usage(
  4488. self.notifier, context, instance, "evacuate")
  4489. compute_utils.notify_about_instance_action(
  4490. context, instance, CONF.host,
  4491. action=fields_obj.NotificationAction.EVACUATE,
  4492. source=fields_obj.NotificationSource.API)
  4493. # NOTE(sbauza): Force is a boolean by the new related API version
  4494. # TODO(stephenfin): Any reason we can't use 'not force' here to handle
  4495. # the pre-v2.29 API microversion, which wouldn't set force
  4496. if force is False and host:
  4497. nodes = objects.ComputeNodeList.get_all_by_host(context, host)
  4498. # NOTE(sbauza): Unset the host to make sure we call the scheduler
  4499. host = None
  4500. # FIXME(sbauza): Since only Ironic driver uses more than one
  4501. # compute per service but doesn't support evacuations,
  4502. # let's provide the first one.
  4503. target = nodes[0]
  4504. destination = objects.Destination(
  4505. host=target.host,
  4506. node=target.hypervisor_hostname
  4507. )
  4508. request_spec.requested_destination = destination
  4509. return self.compute_task_api.rebuild_instance(context,
  4510. instance=instance,
  4511. new_pass=admin_password,
  4512. injected_files=None,
  4513. image_ref=None,
  4514. orig_image_ref=None,
  4515. orig_sys_metadata=None,
  4516. bdms=None,
  4517. recreate=True,
  4518. on_shared_storage=on_shared_storage,
  4519. host=host,
  4520. request_spec=request_spec,
  4521. )
  4522. def get_migrations(self, context, filters):
  4523. """Get all migrations for the given filters."""
  4524. load_cells()
  4525. migrations = []
  4526. for cell in CELLS:
  4527. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4528. continue
  4529. with nova_context.target_cell(context, cell) as cctxt:
  4530. migrations.extend(objects.MigrationList.get_by_filters(
  4531. cctxt, filters).objects)
  4532. return objects.MigrationList(objects=migrations)
  4533. def get_migrations_sorted(self, context, filters, sort_dirs=None,
  4534. sort_keys=None, limit=None, marker=None):
  4535. """Get all migrations for the given parameters."""
  4536. mig_objs = migration_list.get_migration_objects_sorted(
  4537. context, filters, limit, marker, sort_keys, sort_dirs)
  4538. # Due to cross-cell resize, we could have duplicate migration records
  4539. # while the instance is in VERIFY_RESIZE state in the destination cell
  4540. # but the original migration record still exists in the source cell.
  4541. # Filter out duplicate migration records here based on which record
  4542. # is newer (last updated).
  4543. def _get_newer_obj(obj1, obj2):
  4544. # created_at will always be set.
  4545. created_at1 = obj1.created_at
  4546. created_at2 = obj2.created_at
  4547. # updated_at might be None
  4548. updated_at1 = obj1.updated_at
  4549. updated_at2 = obj2.updated_at
  4550. # If both have updated_at, compare using that field.
  4551. if updated_at1 and updated_at2:
  4552. if updated_at1 > updated_at2:
  4553. return obj1
  4554. return obj2
  4555. # Compare created_at versus updated_at.
  4556. if updated_at1:
  4557. if updated_at1 > created_at2:
  4558. return obj1
  4559. return obj2
  4560. if updated_at2:
  4561. if updated_at2 > created_at1:
  4562. return obj2
  4563. return obj1
  4564. # Compare created_at only.
  4565. if created_at1 > created_at2:
  4566. return obj1
  4567. return obj2
  4568. # TODO(mriedem): This could be easier if we leveraged the "hidden"
  4569. # field on the Migration record and then just did like
  4570. # _get_unique_filter_method in the get_all() method for instances.
  4571. migrations_by_uuid = collections.OrderedDict() # maintain sort order
  4572. for migration in mig_objs:
  4573. if migration.uuid not in migrations_by_uuid:
  4574. migrations_by_uuid[migration.uuid] = migration
  4575. else:
  4576. # We have a collision, keep the newer record.
  4577. # Note that using updated_at could be wrong if changes-since or
  4578. # changes-before filters are being used but we have the same
  4579. # issue in _get_unique_filter_method for instances.
  4580. doppelganger = migrations_by_uuid[migration.uuid]
  4581. newer = _get_newer_obj(doppelganger, migration)
  4582. migrations_by_uuid[migration.uuid] = newer
  4583. return objects.MigrationList(objects=list(migrations_by_uuid.values()))
  4584. def get_migrations_in_progress_by_instance(self, context, instance_uuid,
  4585. migration_type=None):
  4586. """Get all migrations of an instance in progress."""
  4587. return objects.MigrationList.get_in_progress_by_instance(
  4588. context, instance_uuid, migration_type)
  4589. def get_migration_by_id_and_instance(self, context,
  4590. migration_id, instance_uuid):
  4591. """Get the migration of an instance by id."""
  4592. return objects.Migration.get_by_id_and_instance(
  4593. context, migration_id, instance_uuid)
  4594. def _get_bdm_by_volume_id(self, context, volume_id, expected_attrs=None):
  4595. """Retrieve a BDM without knowing its cell.
  4596. .. note:: The context will be targeted to the cell in which the
  4597. BDM is found, if any.
  4598. :param context: The API request context.
  4599. :param volume_id: The ID of the volume.
  4600. :param expected_attrs: list of any additional attributes that should
  4601. be joined when the BDM is loaded from the database.
  4602. :raises: nova.exception.VolumeBDMNotFound if not found in any cell
  4603. """
  4604. load_cells()
  4605. for cell in CELLS:
  4606. nova_context.set_target_cell(context, cell)
  4607. try:
  4608. return objects.BlockDeviceMapping.get_by_volume(
  4609. context, volume_id, expected_attrs=expected_attrs)
  4610. except exception.NotFound:
  4611. continue
  4612. raise exception.VolumeBDMNotFound(volume_id=volume_id)
  4613. def volume_snapshot_create(self, context, volume_id, create_info):
  4614. bdm = self._get_bdm_by_volume_id(
  4615. context, volume_id, expected_attrs=['instance'])
  4616. # We allow creating the snapshot in any vm_state as long as there is
  4617. # no task being performed on the instance and it has a host.
  4618. @check_instance_host()
  4619. @check_instance_state(vm_state=None)
  4620. def do_volume_snapshot_create(self, context, instance):
  4621. self.compute_rpcapi.volume_snapshot_create(context, instance,
  4622. volume_id, create_info)
  4623. snapshot = {
  4624. 'snapshot': {
  4625. 'id': create_info.get('id'),
  4626. 'volumeId': volume_id
  4627. }
  4628. }
  4629. return snapshot
  4630. return do_volume_snapshot_create(self, context, bdm.instance)
  4631. def volume_snapshot_delete(self, context, volume_id, snapshot_id,
  4632. delete_info):
  4633. bdm = self._get_bdm_by_volume_id(
  4634. context, volume_id, expected_attrs=['instance'])
  4635. # We allow deleting the snapshot in any vm_state as long as there is
  4636. # no task being performed on the instance and it has a host.
  4637. @check_instance_host()
  4638. @check_instance_state(vm_state=None)
  4639. def do_volume_snapshot_delete(self, context, instance):
  4640. self.compute_rpcapi.volume_snapshot_delete(context, instance,
  4641. volume_id, snapshot_id, delete_info)
  4642. do_volume_snapshot_delete(self, context, bdm.instance)
  4643. def external_instance_event(self, api_context, instances, events):
  4644. # NOTE(danms): The external API consumer just provides events,
  4645. # but doesn't know where they go. We need to collate lists
  4646. # by the host the affected instance is on and dispatch them
  4647. # according to host
  4648. instances_by_host = collections.defaultdict(list)
  4649. events_by_host = collections.defaultdict(list)
  4650. hosts_by_instance = collections.defaultdict(list)
  4651. cell_contexts_by_host = {}
  4652. for instance in instances:
  4653. # instance._context is used here since it's already targeted to
  4654. # the cell that the instance lives in, and we need to use that
  4655. # cell context to lookup any migrations associated to the instance.
  4656. hosts, cross_cell_move = self._get_relevant_hosts(
  4657. instance._context, instance)
  4658. for host in hosts:
  4659. # NOTE(danms): All instances on a host must have the same
  4660. # mapping, so just use that
  4661. if host not in cell_contexts_by_host:
  4662. # NOTE(mriedem): If the instance is being migrated across
  4663. # cells then we have to get the host mapping to determine
  4664. # which cell a given host is in.
  4665. if cross_cell_move:
  4666. hm = objects.HostMapping.get_by_host(api_context, host)
  4667. ctxt = nova_context.get_admin_context()
  4668. nova_context.set_target_cell(ctxt, hm.cell_mapping)
  4669. cell_contexts_by_host[host] = ctxt
  4670. else:
  4671. # The instance is not migrating across cells so just
  4672. # use the cell-targeted context already in the
  4673. # instance since the host has to be in that same cell.
  4674. cell_contexts_by_host[host] = instance._context
  4675. instances_by_host[host].append(instance)
  4676. hosts_by_instance[instance.uuid].append(host)
  4677. for event in events:
  4678. if event.name == 'volume-extended':
  4679. # Volume extend is a user-initiated operation starting in the
  4680. # Block Storage service API. We record an instance action so
  4681. # the user can monitor the operation to completion.
  4682. host = hosts_by_instance[event.instance_uuid][0]
  4683. cell_context = cell_contexts_by_host[host]
  4684. objects.InstanceAction.action_start(
  4685. cell_context, event.instance_uuid,
  4686. instance_actions.EXTEND_VOLUME, want_result=False)
  4687. elif event.name == 'power-update':
  4688. host = hosts_by_instance[event.instance_uuid][0]
  4689. cell_context = cell_contexts_by_host[host]
  4690. if event.tag == external_event_obj.POWER_ON:
  4691. inst_action = instance_actions.START
  4692. elif event.tag == external_event_obj.POWER_OFF:
  4693. inst_action = instance_actions.STOP
  4694. else:
  4695. LOG.warning("Invalid power state %s. Cannot process "
  4696. "the event %s. Skipping it.", event.tag,
  4697. event)
  4698. continue
  4699. objects.InstanceAction.action_start(
  4700. cell_context, event.instance_uuid, inst_action,
  4701. want_result=False)
  4702. for host in hosts_by_instance[event.instance_uuid]:
  4703. events_by_host[host].append(event)
  4704. for host in instances_by_host:
  4705. cell_context = cell_contexts_by_host[host]
  4706. # TODO(salv-orlando): Handle exceptions raised by the rpc api layer
  4707. # in order to ensure that a failure in processing events on a host
  4708. # will not prevent processing events on other hosts
  4709. self.compute_rpcapi.external_instance_event(
  4710. cell_context, instances_by_host[host], events_by_host[host],
  4711. host=host)
  4712. def _get_relevant_hosts(self, context, instance):
  4713. """Get the relevant hosts for an external server event on an instance.
  4714. :param context: nova auth request context targeted at the same cell
  4715. that the instance lives in
  4716. :param instance: Instance object which is the target of an external
  4717. server event
  4718. :returns: 2-item tuple of:
  4719. - set of at least one host (the host where the instance lives); if
  4720. the instance is being migrated the source and dest compute
  4721. hostnames are in the returned set
  4722. - boolean indicating if the instance is being migrated across cells
  4723. """
  4724. hosts = set()
  4725. hosts.add(instance.host)
  4726. cross_cell_move = False
  4727. if instance.migration_context is not None:
  4728. migration_id = instance.migration_context.migration_id
  4729. migration = objects.Migration.get_by_id(context, migration_id)
  4730. cross_cell_move = migration.cross_cell_move
  4731. hosts.add(migration.dest_compute)
  4732. hosts.add(migration.source_compute)
  4733. cells_msg = (
  4734. 'across cells' if cross_cell_move else 'within the same cell')
  4735. LOG.debug('Instance %(instance)s is migrating %(cells_msg)s, '
  4736. 'copying events to all relevant hosts: '
  4737. '%(hosts)s', {'cells_msg': cells_msg,
  4738. 'instance': instance.uuid,
  4739. 'hosts': hosts})
  4740. return hosts, cross_cell_move
  4741. def get_instance_host_status(self, instance):
  4742. if instance.host:
  4743. try:
  4744. service = [service for service in instance.services if
  4745. service.binary == 'nova-compute'][0]
  4746. if service.forced_down:
  4747. host_status = fields_obj.HostStatus.DOWN
  4748. elif service.disabled:
  4749. host_status = fields_obj.HostStatus.MAINTENANCE
  4750. else:
  4751. alive = self.servicegroup_api.service_is_up(service)
  4752. host_status = ((alive and fields_obj.HostStatus.UP) or
  4753. fields_obj.HostStatus.UNKNOWN)
  4754. except IndexError:
  4755. host_status = fields_obj.HostStatus.NONE
  4756. else:
  4757. host_status = fields_obj.HostStatus.NONE
  4758. return host_status
  4759. def get_instances_host_statuses(self, instance_list):
  4760. host_status_dict = dict()
  4761. host_statuses = dict()
  4762. for instance in instance_list:
  4763. if instance.host:
  4764. if instance.host not in host_status_dict:
  4765. host_status = self.get_instance_host_status(instance)
  4766. host_status_dict[instance.host] = host_status
  4767. else:
  4768. host_status = host_status_dict[instance.host]
  4769. else:
  4770. host_status = fields_obj.HostStatus.NONE
  4771. host_statuses[instance.uuid] = host_status
  4772. return host_statuses
  4773. def target_host_cell(fn):
  4774. """Target a host-based function to a cell.
  4775. Expects to wrap a function of signature:
  4776. func(self, context, host, ...)
  4777. """
  4778. @functools.wraps(fn)
  4779. def targeted(self, context, host, *args, **kwargs):
  4780. mapping = objects.HostMapping.get_by_host(context, host)
  4781. nova_context.set_target_cell(context, mapping.cell_mapping)
  4782. return fn(self, context, host, *args, **kwargs)
  4783. return targeted
  4784. def _get_service_in_cell_by_host(context, host_name):
  4785. # validates the host; ComputeHostNotFound is raised if invalid
  4786. try:
  4787. mapping = objects.HostMapping.get_by_host(context, host_name)
  4788. nova_context.set_target_cell(context, mapping.cell_mapping)
  4789. service = objects.Service.get_by_compute_host(context, host_name)
  4790. except exception.HostMappingNotFound:
  4791. try:
  4792. # NOTE(danms): This targets our cell
  4793. service = _find_service_in_cell(context, service_host=host_name)
  4794. except exception.NotFound:
  4795. raise exception.ComputeHostNotFound(host=host_name)
  4796. return service
  4797. def _find_service_in_cell(context, service_id=None, service_host=None):
  4798. """Find a service by id or hostname by searching all cells.
  4799. If one matching service is found, return it. If none or multiple
  4800. are found, raise an exception.
  4801. :param context: A context.RequestContext
  4802. :param service_id: If not none, the DB ID of the service to find
  4803. :param service_host: If not None, the hostname of the service to find
  4804. :returns: An objects.Service
  4805. :raises: ServiceNotUnique if multiple matching IDs are found
  4806. :raises: NotFound if no matches are found
  4807. :raises: NovaException if called with neither search option
  4808. """
  4809. load_cells()
  4810. service = None
  4811. found_in_cell = None
  4812. is_uuid = False
  4813. if service_id is not None:
  4814. is_uuid = uuidutils.is_uuid_like(service_id)
  4815. if is_uuid:
  4816. lookup_fn = lambda c: objects.Service.get_by_uuid(c, service_id)
  4817. else:
  4818. lookup_fn = lambda c: objects.Service.get_by_id(c, service_id)
  4819. elif service_host is not None:
  4820. lookup_fn = lambda c: (
  4821. objects.Service.get_by_compute_host(c, service_host))
  4822. else:
  4823. LOG.exception('_find_service_in_cell called with no search parameters')
  4824. # This is intentionally cryptic so we don't leak implementation details
  4825. # out of the API.
  4826. raise exception.NovaException()
  4827. for cell in CELLS:
  4828. # NOTE(danms): Services can be in cell0, so don't skip it here
  4829. try:
  4830. with nova_context.target_cell(context, cell) as cctxt:
  4831. cell_service = lookup_fn(cctxt)
  4832. except exception.NotFound:
  4833. # NOTE(danms): Keep looking in other cells
  4834. continue
  4835. if service and cell_service:
  4836. raise exception.ServiceNotUnique()
  4837. service = cell_service
  4838. found_in_cell = cell
  4839. if service and is_uuid:
  4840. break
  4841. if service:
  4842. # NOTE(danms): Set the cell on the context so it remains
  4843. # when we return to our caller
  4844. nova_context.set_target_cell(context, found_in_cell)
  4845. return service
  4846. else:
  4847. raise exception.NotFound()
  4848. class HostAPI(base.Base):
  4849. """Sub-set of the Compute Manager API for managing host operations."""
  4850. def __init__(self, rpcapi=None, servicegroup_api=None):
  4851. self.rpcapi = rpcapi or compute_rpcapi.ComputeAPI()
  4852. self.servicegroup_api = servicegroup_api or servicegroup.API()
  4853. super(HostAPI, self).__init__()
  4854. def _assert_host_exists(self, context, host_name, must_be_up=False):
  4855. """Raise HostNotFound if compute host doesn't exist."""
  4856. service = objects.Service.get_by_compute_host(context, host_name)
  4857. if not service:
  4858. raise exception.HostNotFound(host=host_name)
  4859. if must_be_up and not self.servicegroup_api.service_is_up(service):
  4860. raise exception.ComputeServiceUnavailable(host=host_name)
  4861. return service['host']
  4862. @wrap_exception()
  4863. @target_host_cell
  4864. def set_host_enabled(self, context, host_name, enabled):
  4865. """Sets the specified host's ability to accept new instances."""
  4866. host_name = self._assert_host_exists(context, host_name)
  4867. payload = {'host_name': host_name, 'enabled': enabled}
  4868. compute_utils.notify_about_host_update(context,
  4869. 'set_enabled.start',
  4870. payload)
  4871. result = self.rpcapi.set_host_enabled(context, enabled=enabled,
  4872. host=host_name)
  4873. compute_utils.notify_about_host_update(context,
  4874. 'set_enabled.end',
  4875. payload)
  4876. return result
  4877. @target_host_cell
  4878. def get_host_uptime(self, context, host_name):
  4879. """Returns the result of calling "uptime" on the target host."""
  4880. host_name = self._assert_host_exists(context, host_name,
  4881. must_be_up=True)
  4882. return self.rpcapi.get_host_uptime(context, host=host_name)
  4883. @wrap_exception()
  4884. @target_host_cell
  4885. def host_power_action(self, context, host_name, action):
  4886. """Reboots, shuts down or powers up the host."""
  4887. host_name = self._assert_host_exists(context, host_name)
  4888. payload = {'host_name': host_name, 'action': action}
  4889. compute_utils.notify_about_host_update(context,
  4890. 'power_action.start',
  4891. payload)
  4892. result = self.rpcapi.host_power_action(context, action=action,
  4893. host=host_name)
  4894. compute_utils.notify_about_host_update(context,
  4895. 'power_action.end',
  4896. payload)
  4897. return result
  4898. @wrap_exception()
  4899. @target_host_cell
  4900. def set_host_maintenance(self, context, host_name, mode):
  4901. """Start/Stop host maintenance window. On start, it triggers
  4902. guest VMs evacuation.
  4903. """
  4904. host_name = self._assert_host_exists(context, host_name)
  4905. payload = {'host_name': host_name, 'mode': mode}
  4906. compute_utils.notify_about_host_update(context,
  4907. 'set_maintenance.start',
  4908. payload)
  4909. result = self.rpcapi.host_maintenance_mode(context,
  4910. host_param=host_name, mode=mode, host=host_name)
  4911. compute_utils.notify_about_host_update(context,
  4912. 'set_maintenance.end',
  4913. payload)
  4914. return result
  4915. def service_get_all(self, context, filters=None, set_zones=False,
  4916. all_cells=False, cell_down_support=False):
  4917. """Returns a list of services, optionally filtering the results.
  4918. If specified, 'filters' should be a dictionary containing services
  4919. attributes and matching values. Ie, to get a list of services for
  4920. the 'compute' topic, use filters={'topic': 'compute'}.
  4921. If all_cells=True, then scan all cells and merge the results.
  4922. If cell_down_support=True then return minimal service records
  4923. for cells that do not respond based on what we have in the
  4924. host mappings. These will have only 'binary' and 'host' set.
  4925. """
  4926. if filters is None:
  4927. filters = {}
  4928. disabled = filters.pop('disabled', None)
  4929. if 'availability_zone' in filters:
  4930. set_zones = True
  4931. # NOTE(danms): Eventually this all_cells nonsense should go away
  4932. # and we should always iterate over the cells. However, certain
  4933. # callers need the legacy behavior for now.
  4934. if all_cells:
  4935. services = []
  4936. service_dict = nova_context.scatter_gather_all_cells(context,
  4937. objects.ServiceList.get_all, disabled, set_zones=set_zones)
  4938. for cell_uuid, service in service_dict.items():
  4939. if not nova_context.is_cell_failure_sentinel(service):
  4940. services.extend(service)
  4941. elif cell_down_support:
  4942. unavailable_services = objects.ServiceList()
  4943. cid = [cm.id for cm in nova_context.CELLS
  4944. if cm.uuid == cell_uuid]
  4945. # We know cid[0] is in the list because we are using the
  4946. # same list that scatter_gather_all_cells used
  4947. hms = objects.HostMappingList.get_by_cell_id(context,
  4948. cid[0])
  4949. for hm in hms:
  4950. unavailable_services.objects.append(objects.Service(
  4951. binary='nova-compute', host=hm.host))
  4952. LOG.warning("Cell %s is not responding and hence only "
  4953. "partial results are available from this "
  4954. "cell.", cell_uuid)
  4955. services.extend(unavailable_services)
  4956. else:
  4957. LOG.warning("Cell %s is not responding and hence skipped "
  4958. "from the results.", cell_uuid)
  4959. else:
  4960. services = objects.ServiceList.get_all(context, disabled,
  4961. set_zones=set_zones)
  4962. ret_services = []
  4963. for service in services:
  4964. for key, val in filters.items():
  4965. if service[key] != val:
  4966. break
  4967. else:
  4968. # All filters matched.
  4969. ret_services.append(service)
  4970. return ret_services
  4971. def service_get_by_id(self, context, service_id):
  4972. """Get service entry for the given service id or uuid."""
  4973. try:
  4974. return _find_service_in_cell(context, service_id=service_id)
  4975. except exception.NotFound:
  4976. raise exception.ServiceNotFound(service_id=service_id)
  4977. @target_host_cell
  4978. def service_get_by_compute_host(self, context, host_name):
  4979. """Get service entry for the given compute hostname."""
  4980. return objects.Service.get_by_compute_host(context, host_name)
  4981. def _update_compute_provider_status(self, context, service):
  4982. """Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
  4983. There are two cases where the API will not call the compute service:
  4984. * The compute service is down. In this case the trait is synchronized
  4985. when the compute service is restarted.
  4986. * The compute service is old. In this case the trait is synchronized
  4987. when the compute service is upgraded and restarted.
  4988. :param context: nova auth RequestContext
  4989. :param service: nova.objects.Service object which has been enabled
  4990. or disabled (see ``service_update``).
  4991. """
  4992. # Make sure the service is up so we can make the RPC call.
  4993. if not self.servicegroup_api.service_is_up(service):
  4994. LOG.info('Compute service on host %s is down. The '
  4995. 'COMPUTE_STATUS_DISABLED trait will be synchronized '
  4996. 'when the service is restarted.', service.host)
  4997. return
  4998. # Make sure the compute service is new enough for the trait sync
  4999. # behavior.
  5000. # TODO(mriedem): Remove this compat check in the U release.
  5001. if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
  5002. LOG.info('Compute service on host %s is too old to sync the '
  5003. 'COMPUTE_STATUS_DISABLED trait in Placement. The '
  5004. 'trait will be synchronized when the service is '
  5005. 'upgraded and restarted.', service.host)
  5006. return
  5007. enabled = not service.disabled
  5008. # Avoid leaking errors out of the API.
  5009. try:
  5010. LOG.debug('Calling the compute service on host %s to sync the '
  5011. 'COMPUTE_STATUS_DISABLED trait.', service.host)
  5012. self.rpcapi.set_host_enabled(context, service.host, enabled)
  5013. except Exception:
  5014. LOG.exception('An error occurred while updating the '
  5015. 'COMPUTE_STATUS_DISABLED trait on compute node '
  5016. 'resource providers managed by host %s. The trait '
  5017. 'will be synchronized automatically by the compute '
  5018. 'service when the update_available_resource '
  5019. 'periodic task runs.', service.host)
  5020. def service_update(self, context, service):
  5021. """Performs the actual service update operation.
  5022. If the "disabled" field is changed, potentially calls the compute
  5023. service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
  5024. resource providers managed by this compute service.
  5025. :param context: nova auth RequestContext
  5026. :param service: nova.objects.Service object with changes already
  5027. set on the object
  5028. """
  5029. # Before persisting changes and resetting the changed fields on the
  5030. # Service object, determine if the disabled field changed.
  5031. update_placement = 'disabled' in service.obj_what_changed()
  5032. # Persist the Service object changes to the database.
  5033. service.save()
  5034. # If the disabled field changed, potentially call the compute service
  5035. # to sync the COMPUTE_STATUS_DISABLED trait.
  5036. if update_placement:
  5037. self._update_compute_provider_status(context, service)
  5038. return service
  5039. @target_host_cell
  5040. def service_update_by_host_and_binary(self, context, host_name, binary,
  5041. params_to_update):
  5042. """Enable / Disable a service.
  5043. Determines the cell that the service is in using the HostMapping.
  5044. For compute services, this stops new builds and migrations going to
  5045. the host.
  5046. See also ``service_update``.
  5047. :param context: nova auth RequestContext
  5048. :param host_name: hostname of the service
  5049. :param binary: service binary (really only supports "nova-compute")
  5050. :param params_to_update: dict of changes to make to the Service object
  5051. :raises: HostMappingNotFound if the host is not mapped to a cell
  5052. :raises: HostBinaryNotFound if a services table record is not found
  5053. with the given host_name and binary
  5054. """
  5055. # TODO(mriedem): Service.get_by_args is deprecated; we should use
  5056. # get_by_compute_host here (remember to update the "raises" docstring).
  5057. service = objects.Service.get_by_args(context, host_name, binary)
  5058. service.update(params_to_update)
  5059. return self.service_update(context, service)
  5060. @target_host_cell
  5061. def instance_get_all_by_host(self, context, host_name):
  5062. """Return all instances on the given host."""
  5063. return objects.InstanceList.get_by_host(context, host_name)
  5064. def task_log_get_all(self, context, task_name, period_beginning,
  5065. period_ending, host=None, state=None):
  5066. """Return the task logs within a given range, optionally
  5067. filtering by host and/or state.
  5068. """
  5069. return self.db.task_log_get_all(context, task_name,
  5070. period_beginning,
  5071. period_ending,
  5072. host=host,
  5073. state=state)
  5074. def compute_node_get(self, context, compute_id):
  5075. """Return compute node entry for particular integer ID or UUID."""
  5076. load_cells()
  5077. # NOTE(danms): Unfortunately this API exposes database identifiers
  5078. # which means we really can't do something efficient here
  5079. is_uuid = uuidutils.is_uuid_like(compute_id)
  5080. for cell in CELLS:
  5081. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5082. continue
  5083. with nova_context.target_cell(context, cell) as cctxt:
  5084. try:
  5085. if is_uuid:
  5086. return objects.ComputeNode.get_by_uuid(cctxt,
  5087. compute_id)
  5088. return objects.ComputeNode.get_by_id(cctxt,
  5089. int(compute_id))
  5090. except exception.ComputeHostNotFound:
  5091. # NOTE(danms): Keep looking in other cells
  5092. continue
  5093. raise exception.ComputeHostNotFound(host=compute_id)
  5094. def compute_node_get_all(self, context, limit=None, marker=None):
  5095. load_cells()
  5096. computes = []
  5097. uuid_marker = marker and uuidutils.is_uuid_like(marker)
  5098. for cell in CELLS:
  5099. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5100. continue
  5101. with nova_context.target_cell(context, cell) as cctxt:
  5102. # If we have a marker and it's a uuid, see if the compute node
  5103. # is in this cell.
  5104. if marker and uuid_marker:
  5105. try:
  5106. compute_marker = objects.ComputeNode.get_by_uuid(
  5107. cctxt, marker)
  5108. # we found the marker compute node, so use it's id
  5109. # for the actual marker for paging in this cell's db
  5110. marker = compute_marker.id
  5111. except exception.ComputeHostNotFound:
  5112. # The marker node isn't in this cell so keep looking.
  5113. continue
  5114. try:
  5115. cell_computes = objects.ComputeNodeList.get_by_pagination(
  5116. cctxt, limit=limit, marker=marker)
  5117. except exception.MarkerNotFound:
  5118. # NOTE(danms): Keep looking through cells
  5119. continue
  5120. computes.extend(cell_computes)
  5121. # NOTE(danms): We must have found the marker, so continue on
  5122. # without one
  5123. marker = None
  5124. if limit:
  5125. limit -= len(cell_computes)
  5126. if limit <= 0:
  5127. break
  5128. if marker is not None and len(computes) == 0:
  5129. # NOTE(danms): If we did not find the marker in any cell,
  5130. # mimic the db_api behavior here.
  5131. raise exception.MarkerNotFound(marker=marker)
  5132. return objects.ComputeNodeList(objects=computes)
  5133. def compute_node_search_by_hypervisor(self, context, hypervisor_match):
  5134. load_cells()
  5135. computes = []
  5136. for cell in CELLS:
  5137. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5138. continue
  5139. with nova_context.target_cell(context, cell) as cctxt:
  5140. cell_computes = objects.ComputeNodeList.get_by_hypervisor(
  5141. cctxt, hypervisor_match)
  5142. computes.extend(cell_computes)
  5143. return objects.ComputeNodeList(objects=computes)
  5144. def compute_node_statistics(self, context):
  5145. load_cells()
  5146. cell_stats = []
  5147. for cell in CELLS:
  5148. if cell.uuid == objects.CellMapping.CELL0_UUID:
  5149. continue
  5150. with nova_context.target_cell(context, cell) as cctxt:
  5151. cell_stats.append(self.db.compute_node_statistics(cctxt))
  5152. if cell_stats:
  5153. keys = cell_stats[0].keys()
  5154. return {k: sum(stats[k] for stats in cell_stats)
  5155. for k in keys}
  5156. else:
  5157. return {}
  5158. class InstanceActionAPI(base.Base):
  5159. """Sub-set of the Compute Manager API for managing instance actions."""
  5160. def actions_get(self, context, instance, limit=None, marker=None,
  5161. filters=None):
  5162. return objects.InstanceActionList.get_by_instance_uuid(
  5163. context, instance.uuid, limit, marker, filters)
  5164. def action_get_by_request_id(self, context, instance, request_id):
  5165. return objects.InstanceAction.get_by_request_id(
  5166. context, instance.uuid, request_id)
  5167. def action_events_get(self, context, instance, action_id):
  5168. return objects.InstanceActionEventList.get_by_action(
  5169. context, action_id)
  5170. class AggregateAPI(base.Base):
  5171. """Sub-set of the Compute Manager API for managing host aggregates."""
  5172. def __init__(self, **kwargs):
  5173. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  5174. self.query_client = query.SchedulerQueryClient()
  5175. self._placement_client = None # Lazy-load on first access.
  5176. super(AggregateAPI, self).__init__(**kwargs)
  5177. @property
  5178. def placement_client(self):
  5179. if self._placement_client is None:
  5180. self._placement_client = report.SchedulerReportClient()
  5181. return self._placement_client
  5182. @wrap_exception()
  5183. def create_aggregate(self, context, aggregate_name, availability_zone):
  5184. """Creates the model for the aggregate."""
  5185. aggregate = objects.Aggregate(context=context)
  5186. aggregate.name = aggregate_name
  5187. if availability_zone:
  5188. aggregate.metadata = {'availability_zone': availability_zone}
  5189. aggregate.create()
  5190. self.query_client.update_aggregates(context, [aggregate])
  5191. return aggregate
  5192. def get_aggregate(self, context, aggregate_id):
  5193. """Get an aggregate by id."""
  5194. return objects.Aggregate.get_by_id(context, aggregate_id)
  5195. def get_aggregate_list(self, context):
  5196. """Get all the aggregates."""
  5197. return objects.AggregateList.get_all(context)
  5198. def get_aggregates_by_host(self, context, compute_host):
  5199. """Get all the aggregates where the given host is presented."""
  5200. return objects.AggregateList.get_by_host(context, compute_host)
  5201. @wrap_exception()
  5202. def update_aggregate(self, context, aggregate_id, values):
  5203. """Update the properties of an aggregate."""
  5204. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5205. if 'name' in values:
  5206. aggregate.name = values.pop('name')
  5207. aggregate.save()
  5208. self.is_safe_to_update_az(context, values, aggregate=aggregate,
  5209. action_name=AGGREGATE_ACTION_UPDATE,
  5210. check_no_instances_in_az=True)
  5211. if values:
  5212. aggregate.update_metadata(values)
  5213. aggregate.updated_at = timeutils.utcnow()
  5214. self.query_client.update_aggregates(context, [aggregate])
  5215. # If updated values include availability_zones, then the cache
  5216. # which stored availability_zones and host need to be reset
  5217. if values.get('availability_zone'):
  5218. availability_zones.reset_cache()
  5219. return aggregate
  5220. @wrap_exception()
  5221. def update_aggregate_metadata(self, context, aggregate_id, metadata):
  5222. """Updates the aggregate metadata."""
  5223. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5224. self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
  5225. action_name=AGGREGATE_ACTION_UPDATE_META,
  5226. check_no_instances_in_az=True)
  5227. aggregate.update_metadata(metadata)
  5228. self.query_client.update_aggregates(context, [aggregate])
  5229. # If updated metadata include availability_zones, then the cache
  5230. # which stored availability_zones and host need to be reset
  5231. if metadata and metadata.get('availability_zone'):
  5232. availability_zones.reset_cache()
  5233. aggregate.updated_at = timeutils.utcnow()
  5234. return aggregate
  5235. @wrap_exception()
  5236. def delete_aggregate(self, context, aggregate_id):
  5237. """Deletes the aggregate."""
  5238. aggregate_payload = {'aggregate_id': aggregate_id}
  5239. compute_utils.notify_about_aggregate_update(context,
  5240. "delete.start",
  5241. aggregate_payload)
  5242. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5243. compute_utils.notify_about_aggregate_action(
  5244. context=context,
  5245. aggregate=aggregate,
  5246. action=fields_obj.NotificationAction.DELETE,
  5247. phase=fields_obj.NotificationPhase.START)
  5248. if len(aggregate.hosts) > 0:
  5249. msg = _("Host aggregate is not empty")
  5250. raise exception.InvalidAggregateActionDelete(
  5251. aggregate_id=aggregate_id, reason=msg)
  5252. aggregate.destroy()
  5253. self.query_client.delete_aggregate(context, aggregate)
  5254. compute_utils.notify_about_aggregate_update(context,
  5255. "delete.end",
  5256. aggregate_payload)
  5257. compute_utils.notify_about_aggregate_action(
  5258. context=context,
  5259. aggregate=aggregate,
  5260. action=fields_obj.NotificationAction.DELETE,
  5261. phase=fields_obj.NotificationPhase.END)
  5262. def is_safe_to_update_az(self, context, metadata, aggregate,
  5263. hosts=None,
  5264. action_name=AGGREGATE_ACTION_ADD,
  5265. check_no_instances_in_az=False):
  5266. """Determine if updates alter an aggregate's availability zone.
  5267. :param context: local context
  5268. :param metadata: Target metadata for updating aggregate
  5269. :param aggregate: Aggregate to update
  5270. :param hosts: Hosts to check. If None, aggregate.hosts is used
  5271. :type hosts: list
  5272. :param action_name: Calling method for logging purposes
  5273. :param check_no_instances_in_az: if True, it checks
  5274. there is no instances on any hosts of the aggregate
  5275. """
  5276. if 'availability_zone' in metadata:
  5277. if not metadata['availability_zone']:
  5278. msg = _("Aggregate %s does not support empty named "
  5279. "availability zone") % aggregate.name
  5280. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5281. msg)
  5282. _hosts = hosts or aggregate.hosts
  5283. host_aggregates = objects.AggregateList.get_by_metadata_key(
  5284. context, 'availability_zone', hosts=_hosts)
  5285. conflicting_azs = [
  5286. agg.availability_zone for agg in host_aggregates
  5287. if agg.availability_zone != metadata['availability_zone'] and
  5288. agg.id != aggregate.id]
  5289. if conflicting_azs:
  5290. msg = _("One or more hosts already in availability zone(s) "
  5291. "%s") % conflicting_azs
  5292. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5293. msg)
  5294. same_az_name = (aggregate.availability_zone ==
  5295. metadata['availability_zone'])
  5296. if check_no_instances_in_az and not same_az_name:
  5297. instance_count_by_cell = (
  5298. nova_context.scatter_gather_skip_cell0(
  5299. context,
  5300. objects.InstanceList.get_count_by_hosts,
  5301. _hosts))
  5302. if any(cnt for cnt in instance_count_by_cell.values()):
  5303. msg = _("One or more hosts contain instances in this zone")
  5304. self._raise_invalid_aggregate_exc(
  5305. action_name, aggregate.id, msg)
  5306. def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
  5307. if action_name == AGGREGATE_ACTION_ADD:
  5308. raise exception.InvalidAggregateActionAdd(
  5309. aggregate_id=aggregate_id, reason=reason)
  5310. elif action_name == AGGREGATE_ACTION_UPDATE:
  5311. raise exception.InvalidAggregateActionUpdate(
  5312. aggregate_id=aggregate_id, reason=reason)
  5313. elif action_name == AGGREGATE_ACTION_UPDATE_META:
  5314. raise exception.InvalidAggregateActionUpdateMeta(
  5315. aggregate_id=aggregate_id, reason=reason)
  5316. elif action_name == AGGREGATE_ACTION_DELETE:
  5317. raise exception.InvalidAggregateActionDelete(
  5318. aggregate_id=aggregate_id, reason=reason)
  5319. raise exception.NovaException(
  5320. _("Unexpected aggregate action %s") % action_name)
  5321. def _update_az_cache_for_host(self, context, host_name, aggregate_meta):
  5322. # Update the availability_zone cache to avoid getting wrong
  5323. # availability_zone in cache retention time when add/remove
  5324. # host to/from aggregate.
  5325. if aggregate_meta and aggregate_meta.get('availability_zone'):
  5326. availability_zones.update_host_availability_zone_cache(context,
  5327. host_name)
  5328. @wrap_exception()
  5329. def add_host_to_aggregate(self, context, aggregate_id, host_name):
  5330. """Adds the host to an aggregate."""
  5331. aggregate_payload = {'aggregate_id': aggregate_id,
  5332. 'host_name': host_name}
  5333. compute_utils.notify_about_aggregate_update(context,
  5334. "addhost.start",
  5335. aggregate_payload)
  5336. service = _get_service_in_cell_by_host(context, host_name)
  5337. if service.host != host_name:
  5338. # NOTE(danms): If we found a service but it is not an
  5339. # exact match, we may have a case-insensitive backend
  5340. # database (like mysql) which will end up with us
  5341. # adding the host-aggregate mapping with a
  5342. # non-matching hostname.
  5343. raise exception.ComputeHostNotFound(host=host_name)
  5344. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5345. compute_utils.notify_about_aggregate_action(
  5346. context=context,
  5347. aggregate=aggregate,
  5348. action=fields_obj.NotificationAction.ADD_HOST,
  5349. phase=fields_obj.NotificationPhase.START)
  5350. self.is_safe_to_update_az(context, aggregate.metadata,
  5351. hosts=[host_name], aggregate=aggregate)
  5352. aggregate.add_host(host_name)
  5353. self.query_client.update_aggregates(context, [aggregate])
  5354. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  5355. node_name = nodes[0].hypervisor_hostname
  5356. try:
  5357. self.placement_client.aggregate_add_host(
  5358. context, aggregate.uuid, host_name=node_name)
  5359. except (exception.ResourceProviderNotFound,
  5360. exception.ResourceProviderAggregateRetrievalFailed,
  5361. exception.ResourceProviderUpdateFailed,
  5362. exception.ResourceProviderUpdateConflict) as err:
  5363. # NOTE(jaypipes): We don't want a failure perform the mirroring
  5364. # action in the placement service to be returned to the user (they
  5365. # probably don't know anything about the placement service and
  5366. # would just be confused). So, we just log a warning here, noting
  5367. # that on the next run of nova-manage placement sync_aggregates
  5368. # things will go back to normal
  5369. LOG.warning("Failed to associate %s with a placement "
  5370. "aggregate: %s. This may be corrected after running "
  5371. "nova-manage placement sync_aggregates.",
  5372. node_name, err)
  5373. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5374. # NOTE(jogo): Send message to host to support resource pools
  5375. self.compute_rpcapi.add_aggregate_host(context,
  5376. aggregate=aggregate, host_param=host_name, host=host_name)
  5377. aggregate_payload.update({'name': aggregate.name})
  5378. compute_utils.notify_about_aggregate_update(context,
  5379. "addhost.end",
  5380. aggregate_payload)
  5381. compute_utils.notify_about_aggregate_action(
  5382. context=context,
  5383. aggregate=aggregate,
  5384. action=fields_obj.NotificationAction.ADD_HOST,
  5385. phase=fields_obj.NotificationPhase.END)
  5386. return aggregate
  5387. @wrap_exception()
  5388. def remove_host_from_aggregate(self, context, aggregate_id, host_name):
  5389. """Removes host from the aggregate."""
  5390. aggregate_payload = {'aggregate_id': aggregate_id,
  5391. 'host_name': host_name}
  5392. compute_utils.notify_about_aggregate_update(context,
  5393. "removehost.start",
  5394. aggregate_payload)
  5395. _get_service_in_cell_by_host(context, host_name)
  5396. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5397. compute_utils.notify_about_aggregate_action(
  5398. context=context,
  5399. aggregate=aggregate,
  5400. action=fields_obj.NotificationAction.REMOVE_HOST,
  5401. phase=fields_obj.NotificationPhase.START)
  5402. # Remove the resource provider from the provider aggregate first before
  5403. # we change anything on the nova side because if we did the nova stuff
  5404. # first we can't re-attempt this from the compute API if cleaning up
  5405. # placement fails.
  5406. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  5407. node_name = nodes[0].hypervisor_hostname
  5408. try:
  5409. # Anything else this raises is handled in the route handler as
  5410. # either a 409 (ResourceProviderUpdateConflict) or 500.
  5411. self.placement_client.aggregate_remove_host(
  5412. context, aggregate.uuid, node_name)
  5413. except exception.ResourceProviderNotFound as err:
  5414. # If the resource provider is not found then it's likely not part
  5415. # of the aggregate anymore anyway since provider aggregates are
  5416. # not resources themselves with metadata like nova aggregates, they
  5417. # are just a grouping concept around resource providers. Log and
  5418. # continue.
  5419. LOG.warning("Failed to remove association of %s with a placement "
  5420. "aggregate: %s.", node_name, err)
  5421. aggregate.delete_host(host_name)
  5422. self.query_client.update_aggregates(context, [aggregate])
  5423. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5424. self.compute_rpcapi.remove_aggregate_host(context,
  5425. aggregate=aggregate, host_param=host_name, host=host_name)
  5426. compute_utils.notify_about_aggregate_update(context,
  5427. "removehost.end",
  5428. aggregate_payload)
  5429. compute_utils.notify_about_aggregate_action(
  5430. context=context,
  5431. aggregate=aggregate,
  5432. action=fields_obj.NotificationAction.REMOVE_HOST,
  5433. phase=fields_obj.NotificationPhase.END)
  5434. return aggregate
  5435. class KeypairAPI(base.Base):
  5436. """Subset of the Compute Manager API for managing key pairs."""
  5437. get_notifier = functools.partial(rpc.get_notifier, service='api')
  5438. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  5439. get_notifier=get_notifier,
  5440. binary='nova-api')
  5441. def _notify(self, context, event_suffix, keypair_name):
  5442. payload = {
  5443. 'tenant_id': context.project_id,
  5444. 'user_id': context.user_id,
  5445. 'key_name': keypair_name,
  5446. }
  5447. notify = self.get_notifier()
  5448. notify.info(context, 'keypair.%s' % event_suffix, payload)
  5449. def _validate_new_key_pair(self, context, user_id, key_name, key_type):
  5450. safe_chars = "_- " + string.digits + string.ascii_letters
  5451. clean_value = "".join(x for x in key_name if x in safe_chars)
  5452. if clean_value != key_name:
  5453. raise exception.InvalidKeypair(
  5454. reason=_("Keypair name contains unsafe characters"))
  5455. try:
  5456. utils.check_string_length(key_name, min_length=1, max_length=255)
  5457. except exception.InvalidInput:
  5458. raise exception.InvalidKeypair(
  5459. reason=_('Keypair name must be string and between '
  5460. '1 and 255 characters long'))
  5461. try:
  5462. objects.Quotas.check_deltas(context, {'key_pairs': 1}, user_id)
  5463. except exception.OverQuota:
  5464. raise exception.KeypairLimitExceeded()
  5465. @wrap_exception()
  5466. def import_key_pair(self, context, user_id, key_name, public_key,
  5467. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5468. """Import a key pair using an existing public key."""
  5469. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5470. self._notify(context, 'import.start', key_name)
  5471. keypair = objects.KeyPair(context)
  5472. keypair.user_id = user_id
  5473. keypair.name = key_name
  5474. keypair.type = key_type
  5475. keypair.fingerprint = None
  5476. keypair.public_key = public_key
  5477. compute_utils.notify_about_keypair_action(
  5478. context=context,
  5479. keypair=keypair,
  5480. action=fields_obj.NotificationAction.IMPORT,
  5481. phase=fields_obj.NotificationPhase.START)
  5482. fingerprint = self._generate_fingerprint(public_key, key_type)
  5483. keypair.fingerprint = fingerprint
  5484. keypair.create()
  5485. compute_utils.notify_about_keypair_action(
  5486. context=context,
  5487. keypair=keypair,
  5488. action=fields_obj.NotificationAction.IMPORT,
  5489. phase=fields_obj.NotificationPhase.END)
  5490. self._notify(context, 'import.end', key_name)
  5491. return keypair
  5492. @wrap_exception()
  5493. def create_key_pair(self, context, user_id, key_name,
  5494. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5495. """Create a new key pair."""
  5496. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5497. keypair = objects.KeyPair(context)
  5498. keypair.user_id = user_id
  5499. keypair.name = key_name
  5500. keypair.type = key_type
  5501. keypair.fingerprint = None
  5502. keypair.public_key = None
  5503. self._notify(context, 'create.start', key_name)
  5504. compute_utils.notify_about_keypair_action(
  5505. context=context,
  5506. keypair=keypair,
  5507. action=fields_obj.NotificationAction.CREATE,
  5508. phase=fields_obj.NotificationPhase.START)
  5509. private_key, public_key, fingerprint = self._generate_key_pair(
  5510. user_id, key_type)
  5511. keypair.fingerprint = fingerprint
  5512. keypair.public_key = public_key
  5513. keypair.create()
  5514. # NOTE(melwitt): We recheck the quota after creating the object to
  5515. # prevent users from allocating more resources than their allowed quota
  5516. # in the event of a race. This is configurable because it can be
  5517. # expensive if strict quota limits are not required in a deployment.
  5518. if CONF.quota.recheck_quota:
  5519. try:
  5520. objects.Quotas.check_deltas(context, {'key_pairs': 0}, user_id)
  5521. except exception.OverQuota:
  5522. keypair.destroy()
  5523. raise exception.KeypairLimitExceeded()
  5524. compute_utils.notify_about_keypair_action(
  5525. context=context,
  5526. keypair=keypair,
  5527. action=fields_obj.NotificationAction.CREATE,
  5528. phase=fields_obj.NotificationPhase.END)
  5529. self._notify(context, 'create.end', key_name)
  5530. return keypair, private_key
  5531. def _generate_fingerprint(self, public_key, key_type):
  5532. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5533. return crypto.generate_fingerprint(public_key)
  5534. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5535. return crypto.generate_x509_fingerprint(public_key)
  5536. def _generate_key_pair(self, user_id, key_type):
  5537. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5538. return crypto.generate_key_pair()
  5539. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5540. return crypto.generate_winrm_x509_cert(user_id)
  5541. @wrap_exception()
  5542. def delete_key_pair(self, context, user_id, key_name):
  5543. """Delete a keypair by name."""
  5544. self._notify(context, 'delete.start', key_name)
  5545. keypair = self.get_key_pair(context, user_id, key_name)
  5546. compute_utils.notify_about_keypair_action(
  5547. context=context,
  5548. keypair=keypair,
  5549. action=fields_obj.NotificationAction.DELETE,
  5550. phase=fields_obj.NotificationPhase.START)
  5551. objects.KeyPair.destroy_by_name(context, user_id, key_name)
  5552. compute_utils.notify_about_keypair_action(
  5553. context=context,
  5554. keypair=keypair,
  5555. action=fields_obj.NotificationAction.DELETE,
  5556. phase=fields_obj.NotificationPhase.END)
  5557. self._notify(context, 'delete.end', key_name)
  5558. def get_key_pairs(self, context, user_id, limit=None, marker=None):
  5559. """List key pairs."""
  5560. return objects.KeyPairList.get_by_user(
  5561. context, user_id, limit=limit, marker=marker)
  5562. def get_key_pair(self, context, user_id, key_name):
  5563. """Get a keypair by name."""
  5564. return objects.KeyPair.get_by_name(context, user_id, key_name)