OpenStack Compute (Nova)
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

6401 lines
296KB

  1. # Copyright 2010 United States Government as represented by the
  2. # Administrator of the National Aeronautics and Space Administration.
  3. # Copyright 2011 Piston Cloud Computing, Inc.
  4. # Copyright 2012-2013 Red Hat, Inc.
  5. # All Rights Reserved.
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  8. # not use this file except in compliance with the License. You may obtain
  9. # a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. # License for the specific language governing permissions and limitations
  17. # under the License.
  18. """Handles all requests relating to compute resources (e.g. guest VMs,
  19. networking and storage of VMs, and compute hosts on which they run)."""
  20. import collections
  21. import functools
  22. import re
  23. import string
  24. from castellan import key_manager
  25. from oslo_log import log as logging
  26. from oslo_messaging import exceptions as oslo_exceptions
  27. from oslo_serialization import base64 as base64utils
  28. from oslo_utils import excutils
  29. from oslo_utils import strutils
  30. from oslo_utils import timeutils
  31. from oslo_utils import units
  32. from oslo_utils import uuidutils
  33. import six
  34. from six.moves import range
  35. from nova import availability_zones
  36. from nova import block_device
  37. from nova.compute import flavors
  38. from nova.compute import instance_actions
  39. from nova.compute import instance_list
  40. from nova.compute import migration_list
  41. from nova.compute import power_state
  42. from nova.compute import rpcapi as compute_rpcapi
  43. from nova.compute import task_states
  44. from nova.compute import utils as compute_utils
  45. from nova.compute.utils import wrap_instance_event
  46. from nova.compute import vm_states
  47. from nova import conductor
  48. import nova.conf
  49. from nova import context as nova_context
  50. from nova import crypto
  51. from nova.db import base
  52. from nova.db.sqlalchemy import api as db_api
  53. from nova import exception
  54. from nova import exception_wrapper
  55. from nova import hooks
  56. from nova.i18n import _
  57. from nova import image
  58. from nova import network
  59. from nova.network import model as network_model
  60. from nova.network.neutronv2 import constants
  61. from nova.network.security_group import openstack_driver
  62. from nova.network.security_group import security_group_base
  63. from nova import objects
  64. from nova.objects import base as obj_base
  65. from nova.objects import block_device as block_device_obj
  66. from nova.objects import external_event as external_event_obj
  67. from nova.objects import fields as fields_obj
  68. from nova.objects import keypair as keypair_obj
  69. from nova.objects import quotas as quotas_obj
  70. from nova.pci import request as pci_request
  71. from nova.policies import servers as servers_policies
  72. import nova.policy
  73. from nova import profiler
  74. from nova import rpc
  75. from nova.scheduler.client import query
  76. from nova.scheduler.client import report
  77. from nova.scheduler import utils as scheduler_utils
  78. from nova import servicegroup
  79. from nova import utils
  80. from nova.virt import hardware
  81. from nova.volume import cinder
  82. LOG = logging.getLogger(__name__)
  83. get_notifier = functools.partial(rpc.get_notifier, service='compute')
  84. # NOTE(gibi): legacy notification used compute as a service but these
  85. # calls still run on the client side of the compute service which is
  86. # nova-api. By setting the binary to nova-api below, we can make sure
  87. # that the new versioned notifications has the right publisher_id but the
  88. # legacy notifications does not change.
  89. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  90. get_notifier=get_notifier,
  91. binary='nova-api')
  92. CONF = nova.conf.CONF
  93. RO_SECURITY_GROUPS = ['default']
  94. AGGREGATE_ACTION_UPDATE = 'Update'
  95. AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
  96. AGGREGATE_ACTION_DELETE = 'Delete'
  97. AGGREGATE_ACTION_ADD = 'Add'
  98. MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
  99. # FIXME(danms): Keep a global cache of the cells we find the
  100. # first time we look. This needs to be refreshed on a timer or
  101. # trigger.
  102. CELLS = []
  103. def check_instance_state(vm_state=None, task_state=(None,),
  104. must_have_launched=True):
  105. """Decorator to check VM and/or task state before entry to API functions.
  106. If the instance is in the wrong state, or has not been successfully
  107. started at least once the wrapper will raise an exception.
  108. """
  109. if vm_state is not None and not isinstance(vm_state, set):
  110. vm_state = set(vm_state)
  111. if task_state is not None and not isinstance(task_state, set):
  112. task_state = set(task_state)
  113. def outer(f):
  114. @six.wraps(f)
  115. def inner(self, context, instance, *args, **kw):
  116. if vm_state is not None and instance.vm_state not in vm_state:
  117. raise exception.InstanceInvalidState(
  118. attr='vm_state',
  119. instance_uuid=instance.uuid,
  120. state=instance.vm_state,
  121. method=f.__name__)
  122. if (task_state is not None and
  123. instance.task_state not in task_state):
  124. raise exception.InstanceInvalidState(
  125. attr='task_state',
  126. instance_uuid=instance.uuid,
  127. state=instance.task_state,
  128. method=f.__name__)
  129. if must_have_launched and not instance.launched_at:
  130. raise exception.InstanceInvalidState(
  131. attr='launched_at',
  132. instance_uuid=instance.uuid,
  133. state=instance.launched_at,
  134. method=f.__name__)
  135. return f(self, context, instance, *args, **kw)
  136. return inner
  137. return outer
  138. def _set_or_none(q):
  139. return q if q is None or isinstance(q, set) else set(q)
  140. def reject_instance_state(vm_state=None, task_state=None):
  141. """Decorator. Raise InstanceInvalidState if instance is in any of the
  142. given states.
  143. """
  144. vm_state = _set_or_none(vm_state)
  145. task_state = _set_or_none(task_state)
  146. def outer(f):
  147. @six.wraps(f)
  148. def inner(self, context, instance, *args, **kw):
  149. _InstanceInvalidState = functools.partial(
  150. exception.InstanceInvalidState,
  151. instance_uuid=instance.uuid,
  152. method=f.__name__)
  153. if vm_state is not None and instance.vm_state in vm_state:
  154. raise _InstanceInvalidState(
  155. attr='vm_state', state=instance.vm_state)
  156. if task_state is not None and instance.task_state in task_state:
  157. raise _InstanceInvalidState(
  158. attr='task_state', state=instance.task_state)
  159. return f(self, context, instance, *args, **kw)
  160. return inner
  161. return outer
  162. def check_instance_host(function):
  163. @six.wraps(function)
  164. def wrapped(self, context, instance, *args, **kwargs):
  165. if not instance.host:
  166. raise exception.InstanceNotReady(instance_id=instance.uuid)
  167. return function(self, context, instance, *args, **kwargs)
  168. return wrapped
  169. def check_instance_lock(function):
  170. @six.wraps(function)
  171. def inner(self, context, instance, *args, **kwargs):
  172. if instance.locked and not context.is_admin:
  173. raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
  174. return function(self, context, instance, *args, **kwargs)
  175. return inner
  176. def reject_sev_instances(operation):
  177. """Decorator. Raise OperationNotSupportedForSEV if instance has SEV
  178. enabled.
  179. """
  180. def outer(f):
  181. @six.wraps(f)
  182. def inner(self, context, instance, *args, **kw):
  183. if hardware.get_mem_encryption_constraint(instance.flavor,
  184. instance.image_meta):
  185. raise exception.OperationNotSupportedForSEV(
  186. instance_uuid=instance.uuid,
  187. operation=operation)
  188. return f(self, context, instance, *args, **kw)
  189. return inner
  190. return outer
  191. def _diff_dict(orig, new):
  192. """Return a dict describing how to change orig to new. The keys
  193. correspond to values that have changed; the value will be a list
  194. of one or two elements. The first element of the list will be
  195. either '+' or '-', indicating whether the key was updated or
  196. deleted; if the key was updated, the list will contain a second
  197. element, giving the updated value.
  198. """
  199. # Figure out what keys went away
  200. result = {k: ['-'] for k in set(orig.keys()) - set(new.keys())}
  201. # Compute the updates
  202. for key, value in new.items():
  203. if key not in orig or value != orig[key]:
  204. result[key] = ['+', value]
  205. return result
  206. def load_cells():
  207. global CELLS
  208. if not CELLS:
  209. CELLS = objects.CellMappingList.get_all(
  210. nova_context.get_admin_context())
  211. LOG.debug('Found %(count)i cells: %(cells)s',
  212. dict(count=len(CELLS),
  213. cells=','.join([c.identity for c in CELLS])))
  214. if not CELLS:
  215. LOG.error('No cells are configured, unable to continue')
  216. def _get_image_meta_obj(image_meta_dict):
  217. try:
  218. image_meta = objects.ImageMeta.from_dict(image_meta_dict)
  219. except ValueError as e:
  220. # there must be invalid values in the image meta properties so
  221. # consider this an invalid request
  222. msg = _('Invalid image metadata. Error: %s') % six.text_type(e)
  223. raise exception.InvalidRequest(msg)
  224. return image_meta
  225. @profiler.trace_cls("compute_api")
  226. class API(base.Base):
  227. """API for interacting with the compute manager."""
  228. def __init__(self, image_api=None, network_api=None, volume_api=None,
  229. security_group_api=None, **kwargs):
  230. self.image_api = image_api or image.API()
  231. self.network_api = network_api or network.API()
  232. self.volume_api = volume_api or cinder.API()
  233. self._placementclient = None # Lazy-load on first access.
  234. self.security_group_api = (security_group_api or
  235. openstack_driver.get_openstack_security_group_driver())
  236. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  237. self.compute_task_api = conductor.ComputeTaskAPI()
  238. self.servicegroup_api = servicegroup.API()
  239. self.host_api = HostAPI(self.compute_rpcapi, self.servicegroup_api)
  240. self.notifier = rpc.get_notifier('compute', CONF.host)
  241. if CONF.ephemeral_storage_encryption.enabled:
  242. self.key_manager = key_manager.API()
  243. # Help us to record host in EventReporter
  244. self.host = CONF.host
  245. super(API, self).__init__(**kwargs)
  246. def _record_action_start(self, context, instance, action):
  247. objects.InstanceAction.action_start(context, instance.uuid,
  248. action, want_result=False)
  249. def _check_injected_file_quota(self, context, injected_files):
  250. """Enforce quota limits on injected files.
  251. Raises a QuotaError if any limit is exceeded.
  252. """
  253. if injected_files is None:
  254. return
  255. # Check number of files first
  256. try:
  257. objects.Quotas.limit_check(context,
  258. injected_files=len(injected_files))
  259. except exception.OverQuota:
  260. raise exception.OnsetFileLimitExceeded()
  261. # OK, now count path and content lengths; we're looking for
  262. # the max...
  263. max_path = 0
  264. max_content = 0
  265. for path, content in injected_files:
  266. max_path = max(max_path, len(path))
  267. max_content = max(max_content, len(content))
  268. try:
  269. objects.Quotas.limit_check(context,
  270. injected_file_path_bytes=max_path,
  271. injected_file_content_bytes=max_content)
  272. except exception.OverQuota as exc:
  273. # Favor path limit over content limit for reporting
  274. # purposes
  275. if 'injected_file_path_bytes' in exc.kwargs['overs']:
  276. raise exception.OnsetFilePathLimitExceeded(
  277. allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
  278. else:
  279. raise exception.OnsetFileContentLimitExceeded(
  280. allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
  281. def _check_metadata_properties_quota(self, context, metadata=None):
  282. """Enforce quota limits on metadata properties."""
  283. if not metadata:
  284. metadata = {}
  285. if not isinstance(metadata, dict):
  286. msg = (_("Metadata type should be dict."))
  287. raise exception.InvalidMetadata(reason=msg)
  288. num_metadata = len(metadata)
  289. try:
  290. objects.Quotas.limit_check(context, metadata_items=num_metadata)
  291. except exception.OverQuota as exc:
  292. quota_metadata = exc.kwargs['quotas']['metadata_items']
  293. raise exception.MetadataLimitExceeded(allowed=quota_metadata)
  294. # Because metadata is stored in the DB, we hard-code the size limits
  295. # In future, we may support more variable length strings, so we act
  296. # as if this is quota-controlled for forwards compatibility.
  297. # Those are only used in V2 API, from V2.1 API, those checks are
  298. # validated at API layer schema validation.
  299. for k, v in metadata.items():
  300. try:
  301. utils.check_string_length(v)
  302. utils.check_string_length(k, min_length=1)
  303. except exception.InvalidInput as e:
  304. raise exception.InvalidMetadata(reason=e.format_message())
  305. if len(k) > 255:
  306. msg = _("Metadata property key greater than 255 characters")
  307. raise exception.InvalidMetadataSize(reason=msg)
  308. if len(v) > 255:
  309. msg = _("Metadata property value greater than 255 characters")
  310. raise exception.InvalidMetadataSize(reason=msg)
  311. def _check_requested_secgroups(self, context, secgroups):
  312. """Check if the security group requested exists and belongs to
  313. the project.
  314. :param context: The nova request context.
  315. :type context: nova.context.RequestContext
  316. :param secgroups: list of requested security group names, or uuids in
  317. the case of Neutron.
  318. :type secgroups: list
  319. :returns: list of requested security group names unmodified if using
  320. nova-network. If using Neutron, the list returned is all uuids.
  321. Note that 'default' is a special case and will be unmodified if
  322. it's requested.
  323. """
  324. security_groups = []
  325. for secgroup in secgroups:
  326. # NOTE(sdague): default is handled special
  327. if secgroup == "default":
  328. security_groups.append(secgroup)
  329. continue
  330. secgroup_dict = self.security_group_api.get(context, secgroup)
  331. if not secgroup_dict:
  332. raise exception.SecurityGroupNotFoundForProject(
  333. project_id=context.project_id, security_group_id=secgroup)
  334. # Check to see if it's a nova-network or neutron type.
  335. if isinstance(secgroup_dict['id'], int):
  336. # This is nova-network so just return the requested name.
  337. security_groups.append(secgroup)
  338. else:
  339. # The id for neutron is a uuid, so we return the id (uuid).
  340. security_groups.append(secgroup_dict['id'])
  341. return security_groups
  342. def _check_requested_networks(self, context, requested_networks,
  343. max_count):
  344. """Check if the networks requested belongs to the project
  345. and the fixed IP address for each network provided is within
  346. same the network block
  347. """
  348. if requested_networks is not None:
  349. if requested_networks.no_allocate:
  350. # If the network request was specifically 'none' meaning don't
  351. # allocate any networks, we just return the number of requested
  352. # instances since quotas don't change at all.
  353. return max_count
  354. # NOTE(danms): Temporary transition
  355. requested_networks = requested_networks.as_tuples()
  356. return self.network_api.validate_networks(context, requested_networks,
  357. max_count)
  358. def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
  359. image):
  360. """Choose kernel and ramdisk appropriate for the instance.
  361. The kernel and ramdisk can be chosen in one of two ways:
  362. 1. Passed in with create-instance request.
  363. 2. Inherited from image metadata.
  364. If inherited from image metadata, and if that image metadata value is
  365. set to 'nokernel', both kernel and ramdisk will default to None.
  366. """
  367. # Inherit from image if not specified
  368. image_properties = image.get('properties', {})
  369. if kernel_id is None:
  370. kernel_id = image_properties.get('kernel_id')
  371. if ramdisk_id is None:
  372. ramdisk_id = image_properties.get('ramdisk_id')
  373. # Force to None if kernel_id indicates that a kernel is not to be used
  374. if kernel_id == 'nokernel':
  375. kernel_id = None
  376. ramdisk_id = None
  377. # Verify kernel and ramdisk exist (fail-fast)
  378. if kernel_id is not None:
  379. kernel_image = self.image_api.get(context, kernel_id)
  380. # kernel_id could have been a URI, not a UUID, so to keep behaviour
  381. # from before, which leaked that implementation detail out to the
  382. # caller, we return the image UUID of the kernel image and ramdisk
  383. # image (below) and not any image URIs that might have been
  384. # supplied.
  385. # TODO(jaypipes): Get rid of this silliness once we move to a real
  386. # Image object and hide all of that stuff within nova.image.api.
  387. kernel_id = kernel_image['id']
  388. if ramdisk_id is not None:
  389. ramdisk_image = self.image_api.get(context, ramdisk_id)
  390. ramdisk_id = ramdisk_image['id']
  391. return kernel_id, ramdisk_id
  392. @staticmethod
  393. def parse_availability_zone(context, availability_zone):
  394. # NOTE(vish): We have a legacy hack to allow admins to specify hosts
  395. # via az using az:host:node. It might be nice to expose an
  396. # api to specify specific hosts to force onto, but for
  397. # now it just supports this legacy hack.
  398. # NOTE(deva): It is also possible to specify az::node, in which case
  399. # the host manager will determine the correct host.
  400. forced_host = None
  401. forced_node = None
  402. if availability_zone and ':' in availability_zone:
  403. c = availability_zone.count(':')
  404. if c == 1:
  405. availability_zone, forced_host = availability_zone.split(':')
  406. elif c == 2:
  407. if '::' in availability_zone:
  408. availability_zone, forced_node = \
  409. availability_zone.split('::')
  410. else:
  411. availability_zone, forced_host, forced_node = \
  412. availability_zone.split(':')
  413. else:
  414. raise exception.InvalidInput(
  415. reason="Unable to parse availability_zone")
  416. if not availability_zone:
  417. availability_zone = CONF.default_schedule_zone
  418. return availability_zone, forced_host, forced_node
  419. def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
  420. auto_disk_config, image):
  421. auto_disk_config_disabled = \
  422. utils.is_auto_disk_config_disabled(auto_disk_config_img)
  423. if auto_disk_config_disabled and auto_disk_config:
  424. raise exception.AutoDiskConfigDisabledByImage(image=image)
  425. def _inherit_properties_from_image(self, image, auto_disk_config):
  426. image_properties = image.get('properties', {})
  427. auto_disk_config_img = \
  428. utils.get_auto_disk_config_from_image_props(image_properties)
  429. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  430. auto_disk_config,
  431. image.get("id"))
  432. if auto_disk_config is None:
  433. auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
  434. return {
  435. 'os_type': image_properties.get('os_type'),
  436. 'architecture': image_properties.get('architecture'),
  437. 'vm_mode': image_properties.get('vm_mode'),
  438. 'auto_disk_config': auto_disk_config
  439. }
  440. def _check_config_drive(self, config_drive):
  441. if config_drive:
  442. try:
  443. bool_val = strutils.bool_from_string(config_drive,
  444. strict=True)
  445. except ValueError:
  446. raise exception.ConfigDriveInvalidValue(option=config_drive)
  447. else:
  448. bool_val = False
  449. # FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
  450. # but this is because the config drive column is a String. False
  451. # is represented by using an empty string. And for whatever
  452. # reason, we rely on the DB to cast True to a String.
  453. return True if bool_val else ''
  454. def _validate_flavor_image(self, context, image_id, image,
  455. instance_type, root_bdm, validate_numa=True):
  456. """Validate the flavor and image.
  457. This is called from the API service to ensure that the flavor
  458. extra-specs and image properties are self-consistent and compatible
  459. with each other.
  460. :param context: A context.RequestContext
  461. :param image_id: UUID of the image
  462. :param image: a dict representation of the image including properties,
  463. enforces the image status is active.
  464. :param instance_type: Flavor object
  465. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  466. the resize case.
  467. :param validate_numa: Flag to indicate whether or not to validate
  468. the NUMA-related metadata.
  469. :raises: Many different possible exceptions. See
  470. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  471. for the full list.
  472. """
  473. if image and image['status'] != 'active':
  474. raise exception.ImageNotActive(image_id=image_id)
  475. self._validate_flavor_image_nostatus(context, image, instance_type,
  476. root_bdm, validate_numa)
  477. @staticmethod
  478. def _validate_flavor_image_nostatus(context, image, instance_type,
  479. root_bdm, validate_numa=True,
  480. validate_pci=False):
  481. """Validate the flavor and image.
  482. This is called from the API service to ensure that the flavor
  483. extra-specs and image properties are self-consistent and compatible
  484. with each other.
  485. :param context: A context.RequestContext
  486. :param image: a dict representation of the image including properties
  487. :param instance_type: Flavor object
  488. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  489. the resize case.
  490. :param validate_numa: Flag to indicate whether or not to validate
  491. the NUMA-related metadata.
  492. :param validate_pci: Flag to indicate whether or not to validate
  493. the PCI-related metadata.
  494. :raises: Many different possible exceptions. See
  495. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  496. for the full list.
  497. """
  498. if not image:
  499. return
  500. image_properties = image.get('properties', {})
  501. config_drive_option = image_properties.get(
  502. 'img_config_drive', 'optional')
  503. if config_drive_option not in ['optional', 'mandatory']:
  504. raise exception.InvalidImageConfigDrive(
  505. config_drive=config_drive_option)
  506. if instance_type['memory_mb'] < int(image.get('min_ram') or 0):
  507. raise exception.FlavorMemoryTooSmall()
  508. # Image min_disk is in gb, size is in bytes. For sanity, have them both
  509. # in bytes.
  510. image_min_disk = int(image.get('min_disk') or 0) * units.Gi
  511. image_size = int(image.get('size') or 0)
  512. # Target disk is a volume. Don't check flavor disk size because it
  513. # doesn't make sense, and check min_disk against the volume size.
  514. if root_bdm is not None and root_bdm.is_volume:
  515. # There are 2 possibilities here:
  516. #
  517. # 1. The target volume already exists but bdm.volume_size is not
  518. # yet set because this method is called before
  519. # _bdm_validate_set_size_and_instance during server create.
  520. # 2. The target volume doesn't exist, in which case the bdm will
  521. # contain the intended volume size
  522. #
  523. # Note that rebuild also calls this method with potentially a new
  524. # image but you can't rebuild a volume-backed server with a new
  525. # image (yet).
  526. #
  527. # Cinder does its own check against min_disk, so if the target
  528. # volume already exists this has already been done and we don't
  529. # need to check it again here. In this case, volume_size may not be
  530. # set on the bdm.
  531. #
  532. # If we're going to create the volume, the bdm will contain
  533. # volume_size. Therefore we should check it if it exists. This will
  534. # still be checked again by cinder when the volume is created, but
  535. # that will not happen until the request reaches a host. By
  536. # checking it here, the user gets an immediate and useful failure
  537. # indication.
  538. #
  539. # The third possibility is that we have failed to consider
  540. # something, and there are actually more than 2 possibilities. In
  541. # this case cinder will still do the check at volume creation time.
  542. # The behaviour will still be correct, but the user will not get an
  543. # immediate failure from the api, and will instead have to
  544. # determine why the instance is in an error state with a task of
  545. # block_device_mapping.
  546. #
  547. # We could reasonably refactor this check into _validate_bdm at
  548. # some future date, as the various size logic is already split out
  549. # in there.
  550. dest_size = root_bdm.volume_size
  551. if dest_size is not None:
  552. dest_size *= units.Gi
  553. if image_min_disk > dest_size:
  554. raise exception.VolumeSmallerThanMinDisk(
  555. volume_size=dest_size, image_min_disk=image_min_disk)
  556. # Target disk is a local disk whose size is taken from the flavor
  557. else:
  558. dest_size = instance_type['root_gb'] * units.Gi
  559. # NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
  560. # since libvirt interpreted the value differently than other
  561. # drivers. A value of 0 means don't check size.
  562. if dest_size != 0:
  563. if image_size > dest_size:
  564. raise exception.FlavorDiskSmallerThanImage(
  565. flavor_size=dest_size, image_size=image_size)
  566. if image_min_disk > dest_size:
  567. raise exception.FlavorDiskSmallerThanMinDisk(
  568. flavor_size=dest_size, image_min_disk=image_min_disk)
  569. else:
  570. # The user is attempting to create a server with a 0-disk
  571. # image-backed flavor, which can lead to issues with a large
  572. # image consuming an unexpectedly large amount of local disk
  573. # on the compute host. Check to see if the deployment will
  574. # allow that.
  575. if not context.can(
  576. servers_policies.ZERO_DISK_FLAVOR, fatal=False):
  577. raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
  578. API._validate_flavor_image_numa_pci(
  579. image, instance_type, validate_numa=validate_numa,
  580. validate_pci=validate_pci)
  581. @staticmethod
  582. def _validate_flavor_image_numa_pci(image, instance_type,
  583. validate_numa=True,
  584. validate_pci=False):
  585. """Validate the flavor and image NUMA/PCI values.
  586. This is called from the API service to ensure that the flavor
  587. extra-specs and image properties are self-consistent and compatible
  588. with each other.
  589. :param image: a dict representation of the image including properties
  590. :param instance_type: Flavor object
  591. :param validate_numa: Flag to indicate whether or not to validate
  592. the NUMA-related metadata.
  593. :param validate_pci: Flag to indicate whether or not to validate
  594. the PCI-related metadata.
  595. :raises: Many different possible exceptions. See
  596. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  597. for the full list.
  598. """
  599. image_meta = _get_image_meta_obj(image)
  600. API._validate_flavor_image_mem_encryption(instance_type, image_meta)
  601. # validate PMU extra spec and image metadata
  602. flavor_pmu = instance_type.extra_specs.get('hw:pmu')
  603. image_pmu = image_meta.properties.get('hw_pmu')
  604. if (flavor_pmu is not None and image_pmu is not None and
  605. image_pmu != strutils.bool_from_string(flavor_pmu)):
  606. raise exception.ImagePMUConflict()
  607. # Only validate values of flavor/image so the return results of
  608. # following 'get' functions are not used.
  609. hardware.get_number_of_serial_ports(instance_type, image_meta)
  610. if hardware.is_realtime_enabled(instance_type):
  611. hardware.vcpus_realtime_topology(instance_type, image_meta)
  612. hardware.get_cpu_topology_constraints(instance_type, image_meta)
  613. if validate_numa:
  614. hardware.numa_get_constraints(instance_type, image_meta)
  615. if validate_pci:
  616. pci_request.get_pci_requests_from_flavor(instance_type)
  617. @staticmethod
  618. def _validate_flavor_image_mem_encryption(instance_type, image):
  619. """Validate that the flavor and image don't make contradictory
  620. requests regarding memory encryption.
  621. :param instance_type: Flavor object
  622. :param image: an ImageMeta object
  623. :raises: nova.exception.FlavorImageConflict
  624. """
  625. # This library function will raise the exception for us if
  626. # necessary; if not, we can ignore the result returned.
  627. hardware.get_mem_encryption_constraint(instance_type, image)
  628. def _get_image_defined_bdms(self, instance_type, image_meta,
  629. root_device_name):
  630. image_properties = image_meta.get('properties', {})
  631. # Get the block device mappings defined by the image.
  632. image_defined_bdms = image_properties.get('block_device_mapping', [])
  633. legacy_image_defined = not image_properties.get('bdm_v2', False)
  634. image_mapping = image_properties.get('mappings', [])
  635. if legacy_image_defined:
  636. image_defined_bdms = block_device.from_legacy_mapping(
  637. image_defined_bdms, None, root_device_name)
  638. else:
  639. image_defined_bdms = list(map(block_device.BlockDeviceDict,
  640. image_defined_bdms))
  641. if image_mapping:
  642. image_mapping = self._prepare_image_mapping(instance_type,
  643. image_mapping)
  644. image_defined_bdms = self._merge_bdms_lists(
  645. image_mapping, image_defined_bdms)
  646. return image_defined_bdms
  647. def _get_flavor_defined_bdms(self, instance_type, block_device_mapping):
  648. flavor_defined_bdms = []
  649. have_ephemeral_bdms = any(filter(
  650. block_device.new_format_is_ephemeral, block_device_mapping))
  651. have_swap_bdms = any(filter(
  652. block_device.new_format_is_swap, block_device_mapping))
  653. if instance_type.get('ephemeral_gb') and not have_ephemeral_bdms:
  654. flavor_defined_bdms.append(
  655. block_device.create_blank_bdm(instance_type['ephemeral_gb']))
  656. if instance_type.get('swap') and not have_swap_bdms:
  657. flavor_defined_bdms.append(
  658. block_device.create_blank_bdm(instance_type['swap'], 'swap'))
  659. return flavor_defined_bdms
  660. def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
  661. """Override any block devices from the first list by device name
  662. :param overridable_mappings: list which items are overridden
  663. :param overrider_mappings: list which items override
  664. :returns: A merged list of bdms
  665. """
  666. device_names = set(bdm['device_name'] for bdm in overrider_mappings
  667. if bdm['device_name'])
  668. return (overrider_mappings +
  669. [bdm for bdm in overridable_mappings
  670. if bdm['device_name'] not in device_names])
  671. def _check_and_transform_bdm(self, context, base_options, instance_type,
  672. image_meta, min_count, max_count,
  673. block_device_mapping, legacy_bdm):
  674. # NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
  675. # It's needed for legacy conversion to work.
  676. root_device_name = (base_options.get('root_device_name') or 'vda')
  677. image_ref = base_options.get('image_ref', '')
  678. # If the instance is booted by image and has a volume attached,
  679. # the volume cannot have the same device name as root_device_name
  680. if image_ref:
  681. for bdm in block_device_mapping:
  682. if (bdm.get('destination_type') == 'volume' and
  683. block_device.strip_dev(bdm.get(
  684. 'device_name')) == root_device_name):
  685. msg = _('The volume cannot be assigned the same device'
  686. ' name as the root device %s') % root_device_name
  687. raise exception.InvalidRequest(msg)
  688. image_defined_bdms = self._get_image_defined_bdms(
  689. instance_type, image_meta, root_device_name)
  690. root_in_image_bdms = (
  691. block_device.get_root_bdm(image_defined_bdms) is not None)
  692. if legacy_bdm:
  693. block_device_mapping = block_device.from_legacy_mapping(
  694. block_device_mapping, image_ref, root_device_name,
  695. no_root=root_in_image_bdms)
  696. elif root_in_image_bdms:
  697. # NOTE (ndipanov): client will insert an image mapping into the v2
  698. # block_device_mapping, but if there is a bootable device in image
  699. # mappings - we need to get rid of the inserted image
  700. # NOTE (gibi): another case is when a server is booted with an
  701. # image to bdm mapping where the image only contains a bdm to a
  702. # snapshot. In this case the other image to bdm mapping
  703. # contains an unnecessary device with boot_index == 0.
  704. # Also in this case the image_ref is None as we are booting from
  705. # an image to volume bdm.
  706. def not_image_and_root_bdm(bdm):
  707. return not (bdm.get('boot_index') == 0 and
  708. bdm.get('source_type') == 'image')
  709. block_device_mapping = list(
  710. filter(not_image_and_root_bdm, block_device_mapping))
  711. block_device_mapping = self._merge_bdms_lists(
  712. image_defined_bdms, block_device_mapping)
  713. if min_count > 1 or max_count > 1:
  714. if any(map(lambda bdm: bdm['source_type'] == 'volume',
  715. block_device_mapping)):
  716. msg = _('Cannot attach one or more volumes to multiple'
  717. ' instances')
  718. raise exception.InvalidRequest(msg)
  719. block_device_mapping += self._get_flavor_defined_bdms(
  720. instance_type, block_device_mapping)
  721. return block_device_obj.block_device_make_list_from_dicts(
  722. context, block_device_mapping)
  723. def _get_image(self, context, image_href):
  724. if not image_href:
  725. return None, {}
  726. image = self.image_api.get(context, image_href)
  727. return image['id'], image
  728. def _checks_for_create_and_rebuild(self, context, image_id, image,
  729. instance_type, metadata,
  730. files_to_inject, root_bdm,
  731. validate_numa=True):
  732. self._check_metadata_properties_quota(context, metadata)
  733. self._check_injected_file_quota(context, files_to_inject)
  734. self._validate_flavor_image(context, image_id, image,
  735. instance_type, root_bdm,
  736. validate_numa=validate_numa)
  737. def _validate_and_build_base_options(self, context, instance_type,
  738. boot_meta, image_href, image_id,
  739. kernel_id, ramdisk_id, display_name,
  740. display_description, key_name,
  741. key_data, security_groups,
  742. availability_zone, user_data,
  743. metadata, access_ip_v4, access_ip_v6,
  744. requested_networks, config_drive,
  745. auto_disk_config, reservation_id,
  746. max_count,
  747. supports_port_resource_request):
  748. """Verify all the input parameters regardless of the provisioning
  749. strategy being performed.
  750. """
  751. if instance_type['disabled']:
  752. raise exception.FlavorNotFound(flavor_id=instance_type['id'])
  753. if user_data:
  754. try:
  755. base64utils.decode_as_bytes(user_data)
  756. except TypeError:
  757. raise exception.InstanceUserDataMalformed()
  758. # When using Neutron, _check_requested_secgroups will translate and
  759. # return any requested security group names to uuids.
  760. security_groups = (
  761. self._check_requested_secgroups(context, security_groups))
  762. # Note: max_count is the number of instances requested by the user,
  763. # max_network_count is the maximum number of instances taking into
  764. # account any network quotas
  765. max_network_count = self._check_requested_networks(context,
  766. requested_networks, max_count)
  767. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  768. context, kernel_id, ramdisk_id, boot_meta)
  769. config_drive = self._check_config_drive(config_drive)
  770. if key_data is None and key_name is not None:
  771. key_pair = objects.KeyPair.get_by_name(context,
  772. context.user_id,
  773. key_name)
  774. key_data = key_pair.public_key
  775. else:
  776. key_pair = None
  777. root_device_name = block_device.prepend_dev(
  778. block_device.properties_root_device_name(
  779. boot_meta.get('properties', {})))
  780. image_meta = _get_image_meta_obj(boot_meta)
  781. numa_topology = hardware.numa_get_constraints(
  782. instance_type, image_meta)
  783. system_metadata = {}
  784. # PCI requests come from two sources: instance flavor and
  785. # requested_networks. The first call in below returns an
  786. # InstancePCIRequests object which is a list of InstancePCIRequest
  787. # objects. The second call in below creates an InstancePCIRequest
  788. # object for each SR-IOV port, and append it to the list in the
  789. # InstancePCIRequests object
  790. pci_request_info = pci_request.get_pci_requests_from_flavor(
  791. instance_type)
  792. result = self.network_api.create_resource_requests(
  793. context, requested_networks, pci_request_info)
  794. network_metadata, port_resource_requests = result
  795. # Creating servers with ports that have resource requests, like QoS
  796. # minimum bandwidth rules, is only supported in a requested minimum
  797. # microversion.
  798. if port_resource_requests and not supports_port_resource_request:
  799. raise exception.CreateWithPortResourceRequestOldVersion()
  800. base_options = {
  801. 'reservation_id': reservation_id,
  802. 'image_ref': image_href,
  803. 'kernel_id': kernel_id or '',
  804. 'ramdisk_id': ramdisk_id or '',
  805. 'power_state': power_state.NOSTATE,
  806. 'vm_state': vm_states.BUILDING,
  807. 'config_drive': config_drive,
  808. 'user_id': context.user_id,
  809. 'project_id': context.project_id,
  810. 'instance_type_id': instance_type['id'],
  811. 'memory_mb': instance_type['memory_mb'],
  812. 'vcpus': instance_type['vcpus'],
  813. 'root_gb': instance_type['root_gb'],
  814. 'ephemeral_gb': instance_type['ephemeral_gb'],
  815. 'display_name': display_name,
  816. 'display_description': display_description,
  817. 'user_data': user_data,
  818. 'key_name': key_name,
  819. 'key_data': key_data,
  820. 'locked': False,
  821. 'metadata': metadata or {},
  822. 'access_ip_v4': access_ip_v4,
  823. 'access_ip_v6': access_ip_v6,
  824. 'availability_zone': availability_zone,
  825. 'root_device_name': root_device_name,
  826. 'progress': 0,
  827. 'pci_requests': pci_request_info,
  828. 'numa_topology': numa_topology,
  829. 'system_metadata': system_metadata,
  830. 'port_resource_requests': port_resource_requests}
  831. options_from_image = self._inherit_properties_from_image(
  832. boot_meta, auto_disk_config)
  833. base_options.update(options_from_image)
  834. # return the validated options and maximum number of instances allowed
  835. # by the network quotas
  836. return (base_options, max_network_count, key_pair, security_groups,
  837. network_metadata)
  838. @staticmethod
  839. @db_api.api_context_manager.writer
  840. def _create_reqspec_buildreq_instmapping(context, rs, br, im):
  841. """Create the request spec, build request, and instance mapping in a
  842. single database transaction.
  843. The RequestContext must be passed in to this method so that the
  844. database transaction context manager decorator will nest properly and
  845. include each create() into the same transaction context.
  846. """
  847. rs.create()
  848. br.create()
  849. im.create()
  850. def _validate_host_or_node(self, context, host, hypervisor_hostname):
  851. """Check whether compute nodes exist by validating the host
  852. and/or the hypervisor_hostname. There are three cases:
  853. 1. If only host is supplied, we can lookup the HostMapping in
  854. the API DB.
  855. 2. If only node is supplied, we can query a resource provider
  856. with that name in placement.
  857. 3. If both host and node are supplied, we can get the cell from
  858. HostMapping and from that lookup the ComputeNode with the
  859. given cell.
  860. :param context: The API request context.
  861. :param host: Target host.
  862. :param hypervisor_hostname: Target node.
  863. :raises: ComputeHostNotFound if we find no compute nodes with host
  864. and/or hypervisor_hostname.
  865. """
  866. if host:
  867. # When host is specified.
  868. try:
  869. host_mapping = objects.HostMapping.get_by_host(context, host)
  870. except exception.HostMappingNotFound:
  871. LOG.warning('No host-to-cell mapping found for host '
  872. '%(host)s.', {'host': host})
  873. raise exception.ComputeHostNotFound(host=host)
  874. # When both host and node are specified.
  875. if hypervisor_hostname:
  876. cell = host_mapping.cell_mapping
  877. with nova_context.target_cell(context, cell) as cctxt:
  878. # Here we only do an existence check, so we don't
  879. # need to store the return value into a variable.
  880. objects.ComputeNode.get_by_host_and_nodename(
  881. cctxt, host, hypervisor_hostname)
  882. elif hypervisor_hostname:
  883. # When only node is specified.
  884. try:
  885. self.placementclient.get_provider_by_name(
  886. context, hypervisor_hostname)
  887. except exception.ResourceProviderNotFound:
  888. raise exception.ComputeHostNotFound(host=hypervisor_hostname)
  889. def _get_volumes_for_bdms(self, context, bdms):
  890. """Get the pre-existing volumes from cinder for the list of BDMs.
  891. :param context: nova auth RequestContext
  892. :param bdms: BlockDeviceMappingList which has zero or more BDMs with
  893. a pre-existing volume_id specified.
  894. :return: dict, keyed by volume id, of volume dicts
  895. :raises: VolumeNotFound - if a given volume does not exist
  896. :raises: CinderConnectionFailed - if there are problems communicating
  897. with the cinder API
  898. :raises: Forbidden - if the user token does not have authority to see
  899. a volume
  900. """
  901. volumes = {}
  902. for bdm in bdms:
  903. if bdm.volume_id:
  904. volumes[bdm.volume_id] = self.volume_api.get(
  905. context, bdm.volume_id)
  906. return volumes
  907. @staticmethod
  908. def _validate_vol_az_for_create(instance_az, volumes):
  909. """Performs cross_az_attach validation for the instance and volumes.
  910. If [cinder]/cross_az_attach=True (default) this method is a no-op.
  911. If [cinder]/cross_az_attach=False, this method will validate that:
  912. 1. All volumes are in the same availability zone.
  913. 2. The volume AZ matches the instance AZ. If the instance is being
  914. created without a specific AZ (either via the user request or the
  915. [DEFAULT]/default_schedule_zone option), and the volume AZ matches
  916. [DEFAULT]/default_availability_zone for compute services, then the
  917. method returns the volume AZ so it can be set in the RequestSpec as
  918. if the user requested the zone explicitly.
  919. :param instance_az: Availability zone for the instance. In this case
  920. the host is not yet selected so the instance AZ value should come
  921. from one of the following cases:
  922. * The user requested availability zone.
  923. * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  924. does not specify an AZ (see parse_availability_zone).
  925. :param volumes: iterable of dicts of cinder volumes to be attached to
  926. the server being created
  927. :returns: None or volume AZ to set in the RequestSpec for the instance
  928. :raises: MismatchVolumeAZException if the instance and volume AZ do
  929. not match
  930. """
  931. if CONF.cinder.cross_az_attach:
  932. return
  933. if not volumes:
  934. return
  935. # First make sure that all of the volumes are in the same zone.
  936. vol_zones = [vol['availability_zone'] for vol in volumes]
  937. if len(set(vol_zones)) > 1:
  938. msg = (_("Volumes are in different availability zones: %s")
  939. % ','.join(vol_zones))
  940. raise exception.MismatchVolumeAZException(reason=msg)
  941. volume_az = vol_zones[0]
  942. # In this case the instance.host should not be set so the instance AZ
  943. # value should come from instance.availability_zone which will be one
  944. # of the following cases:
  945. # * The user requested availability zone.
  946. # * [DEFAULT]/default_schedule_zone (defaults to None) if the request
  947. # does not specify an AZ (see parse_availability_zone).
  948. # If the instance is not being created with a specific AZ (the AZ is
  949. # input via the API create request *or* [DEFAULT]/default_schedule_zone
  950. # is not None), then check to see if we should use the default AZ
  951. # (which by default matches the default AZ in Cinder, i.e. 'nova').
  952. if instance_az is None:
  953. # Check if the volume AZ is the same as our default AZ for compute
  954. # hosts (nova) and if so, assume we are OK because the user did not
  955. # request an AZ and will get the same default. If the volume AZ is
  956. # not the same as our default, return the volume AZ so the caller
  957. # can put it into the request spec so the instance is scheduled
  958. # to the same zone as the volume. Note that we are paranoid about
  959. # the default here since both nova and cinder's default backend AZ
  960. # is "nova" and we do not want to pin the server to that AZ since
  961. # it's special, i.e. just like we tell users in the docs to not
  962. # specify availability_zone='nova' when creating a server since we
  963. # might not be able to migrate it later.
  964. if volume_az != CONF.default_availability_zone:
  965. return volume_az # indication to set in request spec
  966. # The volume AZ is the same as the default nova AZ so we will be OK
  967. return
  968. if instance_az != volume_az:
  969. msg = _("Server and volumes are not in the same availability "
  970. "zone. Server is in: %(instance_az)s. Volumes are in: "
  971. "%(volume_az)s") % {
  972. 'instance_az': instance_az, 'volume_az': volume_az}
  973. raise exception.MismatchVolumeAZException(reason=msg)
  974. def _provision_instances(self, context, instance_type, min_count,
  975. max_count, base_options, boot_meta, security_groups,
  976. block_device_mapping, shutdown_terminate,
  977. instance_group, check_server_group_quota, filter_properties,
  978. key_pair, tags, trusted_certs, supports_multiattach,
  979. network_metadata=None, requested_host=None,
  980. requested_hypervisor_hostname=None):
  981. # NOTE(boxiang): Check whether compute nodes exist by validating
  982. # the host and/or the hypervisor_hostname. Pass the destination
  983. # to the scheduler with host and/or hypervisor_hostname(node).
  984. destination = None
  985. if requested_host or requested_hypervisor_hostname:
  986. self._validate_host_or_node(context, requested_host,
  987. requested_hypervisor_hostname)
  988. destination = objects.Destination()
  989. if requested_host:
  990. destination.host = requested_host
  991. destination.node = requested_hypervisor_hostname
  992. # Check quotas
  993. num_instances = compute_utils.check_num_instances_quota(
  994. context, instance_type, min_count, max_count)
  995. security_groups = self.security_group_api.populate_security_groups(
  996. security_groups)
  997. self.security_group_api.ensure_default(context)
  998. port_resource_requests = base_options.pop('port_resource_requests')
  999. instances_to_build = []
  1000. # We could be iterating over several instances with several BDMs per
  1001. # instance and those BDMs could be using a lot of the same images so
  1002. # we want to cache the image API GET results for performance.
  1003. image_cache = {} # dict of image dicts keyed by image id
  1004. # Before processing the list of instances get all of the requested
  1005. # pre-existing volumes so we can do some validation here rather than
  1006. # down in the bowels of _validate_bdm.
  1007. volumes = self._get_volumes_for_bdms(context, block_device_mapping)
  1008. volume_az = self._validate_vol_az_for_create(
  1009. base_options['availability_zone'], volumes.values())
  1010. if volume_az:
  1011. # This means the instance is not being created in a specific zone
  1012. # but needs to match the zone that the volumes are in so update
  1013. # base_options to match the volume zone.
  1014. base_options['availability_zone'] = volume_az
  1015. LOG.debug("Going to run %s instances...", num_instances)
  1016. try:
  1017. for i in range(num_instances):
  1018. # Create a uuid for the instance so we can store the
  1019. # RequestSpec before the instance is created.
  1020. instance_uuid = uuidutils.generate_uuid()
  1021. # Store the RequestSpec that will be used for scheduling.
  1022. req_spec = objects.RequestSpec.from_components(context,
  1023. instance_uuid, boot_meta, instance_type,
  1024. base_options['numa_topology'],
  1025. base_options['pci_requests'], filter_properties,
  1026. instance_group, base_options['availability_zone'],
  1027. security_groups=security_groups,
  1028. port_resource_requests=port_resource_requests)
  1029. if block_device_mapping:
  1030. # Record whether or not we are a BFV instance
  1031. root = block_device_mapping.root_bdm()
  1032. req_spec.is_bfv = bool(root and root.is_volume)
  1033. else:
  1034. # If we have no BDMs, we're clearly not BFV
  1035. req_spec.is_bfv = False
  1036. # NOTE(danms): We need to record num_instances on the request
  1037. # spec as this is how the conductor knows how many were in this
  1038. # batch.
  1039. req_spec.num_instances = num_instances
  1040. # NOTE(stephenfin): The network_metadata field is not persisted
  1041. # inside RequestSpec object.
  1042. if network_metadata:
  1043. req_spec.network_metadata = network_metadata
  1044. if destination:
  1045. req_spec.requested_destination = destination
  1046. # Create an instance object, but do not store in db yet.
  1047. instance = objects.Instance(context=context)
  1048. instance.uuid = instance_uuid
  1049. instance.update(base_options)
  1050. instance.keypairs = objects.KeyPairList(objects=[])
  1051. if key_pair:
  1052. instance.keypairs.objects.append(key_pair)
  1053. instance.trusted_certs = self._retrieve_trusted_certs_object(
  1054. context, trusted_certs)
  1055. instance = self.create_db_entry_for_new_instance(context,
  1056. instance_type, boot_meta, instance, security_groups,
  1057. block_device_mapping, num_instances, i,
  1058. shutdown_terminate, create_instance=False)
  1059. block_device_mapping = (
  1060. self._bdm_validate_set_size_and_instance(context,
  1061. instance, instance_type, block_device_mapping,
  1062. image_cache, volumes, supports_multiattach))
  1063. instance_tags = self._transform_tags(tags, instance.uuid)
  1064. build_request = objects.BuildRequest(context,
  1065. instance=instance, instance_uuid=instance.uuid,
  1066. project_id=instance.project_id,
  1067. block_device_mappings=block_device_mapping,
  1068. tags=instance_tags)
  1069. # Create an instance_mapping. The null cell_mapping indicates
  1070. # that the instance doesn't yet exist in a cell, and lookups
  1071. # for it need to instead look for the RequestSpec.
  1072. # cell_mapping will be populated after scheduling, with a
  1073. # scheduling failure using the cell_mapping for the special
  1074. # cell0.
  1075. inst_mapping = objects.InstanceMapping(context=context)
  1076. inst_mapping.instance_uuid = instance_uuid
  1077. inst_mapping.project_id = context.project_id
  1078. inst_mapping.user_id = context.user_id
  1079. inst_mapping.cell_mapping = None
  1080. # Create the request spec, build request, and instance mapping
  1081. # records in a single transaction so that if a DBError is
  1082. # raised from any of them, all INSERTs will be rolled back and
  1083. # no orphaned records will be left behind.
  1084. self._create_reqspec_buildreq_instmapping(context, req_spec,
  1085. build_request,
  1086. inst_mapping)
  1087. instances_to_build.append(
  1088. (req_spec, build_request, inst_mapping))
  1089. if instance_group:
  1090. if check_server_group_quota:
  1091. try:
  1092. objects.Quotas.check_deltas(
  1093. context, {'server_group_members': 1},
  1094. instance_group, context.user_id)
  1095. except exception.OverQuota:
  1096. msg = _("Quota exceeded, too many servers in "
  1097. "group")
  1098. raise exception.QuotaError(msg)
  1099. members = objects.InstanceGroup.add_members(
  1100. context, instance_group.uuid, [instance.uuid])
  1101. # NOTE(melwitt): We recheck the quota after creating the
  1102. # object to prevent users from allocating more resources
  1103. # than their allowed quota in the event of a race. This is
  1104. # configurable because it can be expensive if strict quota
  1105. # limits are not required in a deployment.
  1106. if CONF.quota.recheck_quota and check_server_group_quota:
  1107. try:
  1108. objects.Quotas.check_deltas(
  1109. context, {'server_group_members': 0},
  1110. instance_group, context.user_id)
  1111. except exception.OverQuota:
  1112. objects.InstanceGroup._remove_members_in_db(
  1113. context, instance_group.id, [instance.uuid])
  1114. msg = _("Quota exceeded, too many servers in "
  1115. "group")
  1116. raise exception.QuotaError(msg)
  1117. # list of members added to servers group in this iteration
  1118. # is needed to check quota of server group during add next
  1119. # instance
  1120. instance_group.members.extend(members)
  1121. # In the case of any exceptions, attempt DB cleanup
  1122. except Exception:
  1123. with excutils.save_and_reraise_exception():
  1124. self._cleanup_build_artifacts(None, instances_to_build)
  1125. return instances_to_build
  1126. @staticmethod
  1127. def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
  1128. """Convert user-requested trusted cert IDs to TrustedCerts object
  1129. Also validates that the deployment is new enough to support trusted
  1130. image certification validation.
  1131. :param context: The user request auth context
  1132. :param trusted_certs: list of user-specified trusted cert string IDs,
  1133. may be None
  1134. :param rebuild: True if rebuilding the server, False if creating a
  1135. new server
  1136. :returns: nova.objects.TrustedCerts object or None if no user-specified
  1137. trusted cert IDs were given and nova is not configured with
  1138. default trusted cert IDs
  1139. """
  1140. # Retrieve trusted_certs parameter, or use CONF value if certificate
  1141. # validation is enabled
  1142. if trusted_certs:
  1143. certs_to_return = objects.TrustedCerts(ids=trusted_certs)
  1144. elif (CONF.glance.verify_glance_signatures and
  1145. CONF.glance.enable_certificate_validation and
  1146. CONF.glance.default_trusted_certificate_ids):
  1147. certs_to_return = objects.TrustedCerts(
  1148. ids=CONF.glance.default_trusted_certificate_ids)
  1149. else:
  1150. return None
  1151. return certs_to_return
  1152. def _get_bdm_image_metadata(self, context, block_device_mapping,
  1153. legacy_bdm=True):
  1154. """If we are booting from a volume, we need to get the
  1155. volume details from Cinder and make sure we pass the
  1156. metadata back accordingly.
  1157. """
  1158. if not block_device_mapping:
  1159. return {}
  1160. for bdm in block_device_mapping:
  1161. if (legacy_bdm and
  1162. block_device.get_device_letter(
  1163. bdm.get('device_name', '')) != 'a'):
  1164. continue
  1165. elif not legacy_bdm and bdm.get('boot_index') != 0:
  1166. continue
  1167. volume_id = bdm.get('volume_id')
  1168. snapshot_id = bdm.get('snapshot_id')
  1169. if snapshot_id:
  1170. # NOTE(alaski): A volume snapshot inherits metadata from the
  1171. # originating volume, but the API does not expose metadata
  1172. # on the snapshot itself. So we query the volume for it below.
  1173. snapshot = self.volume_api.get_snapshot(context, snapshot_id)
  1174. volume_id = snapshot['volume_id']
  1175. if bdm.get('image_id'):
  1176. try:
  1177. image_id = bdm['image_id']
  1178. image_meta = self.image_api.get(context, image_id)
  1179. return image_meta
  1180. except Exception:
  1181. raise exception.InvalidBDMImage(id=image_id)
  1182. elif volume_id:
  1183. try:
  1184. volume = self.volume_api.get(context, volume_id)
  1185. except exception.CinderConnectionFailed:
  1186. raise
  1187. except Exception:
  1188. raise exception.InvalidBDMVolume(id=volume_id)
  1189. if not volume.get('bootable', True):
  1190. raise exception.InvalidBDMVolumeNotBootable(id=volume_id)
  1191. return utils.get_image_metadata_from_volume(volume)
  1192. return {}
  1193. @staticmethod
  1194. def _get_requested_instance_group(context, filter_properties):
  1195. if (not filter_properties or
  1196. not filter_properties.get('scheduler_hints')):
  1197. return
  1198. group_hint = filter_properties.get('scheduler_hints').get('group')
  1199. if not group_hint:
  1200. return
  1201. return objects.InstanceGroup.get_by_uuid(context, group_hint)
  1202. def _create_instance(self, context, instance_type,
  1203. image_href, kernel_id, ramdisk_id,
  1204. min_count, max_count,
  1205. display_name, display_description,
  1206. key_name, key_data, security_groups,
  1207. availability_zone, user_data, metadata, injected_files,
  1208. admin_password, access_ip_v4, access_ip_v6,
  1209. requested_networks, config_drive,
  1210. block_device_mapping, auto_disk_config, filter_properties,
  1211. reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
  1212. check_server_group_quota=False, tags=None,
  1213. supports_multiattach=False, trusted_certs=None,
  1214. supports_port_resource_request=False,
  1215. requested_host=None, requested_hypervisor_hostname=None):
  1216. """Verify all the input parameters regardless of the provisioning
  1217. strategy being performed and schedule the instance(s) for
  1218. creation.
  1219. """
  1220. # Normalize and setup some parameters
  1221. if reservation_id is None:
  1222. reservation_id = utils.generate_uid('r')
  1223. security_groups = security_groups or ['default']
  1224. min_count = min_count or 1
  1225. max_count = max_count or min_count
  1226. block_device_mapping = block_device_mapping or []
  1227. tags = tags or []
  1228. if image_href:
  1229. image_id, boot_meta = self._get_image(context, image_href)
  1230. else:
  1231. # This is similar to the logic in _retrieve_trusted_certs_object.
  1232. if (trusted_certs or
  1233. (CONF.glance.verify_glance_signatures and
  1234. CONF.glance.enable_certificate_validation and
  1235. CONF.glance.default_trusted_certificate_ids)):
  1236. msg = _("Image certificate validation is not supported "
  1237. "when booting from volume")
  1238. raise exception.CertificateValidationFailed(message=msg)
  1239. image_id = None
  1240. boot_meta = self._get_bdm_image_metadata(
  1241. context, block_device_mapping, legacy_bdm)
  1242. self._check_auto_disk_config(image=boot_meta,
  1243. auto_disk_config=auto_disk_config)
  1244. base_options, max_net_count, key_pair, security_groups, \
  1245. network_metadata = self._validate_and_build_base_options(
  1246. context, instance_type, boot_meta, image_href, image_id,
  1247. kernel_id, ramdisk_id, display_name, display_description,
  1248. key_name, key_data, security_groups, availability_zone,
  1249. user_data, metadata, access_ip_v4, access_ip_v6,
  1250. requested_networks, config_drive, auto_disk_config,
  1251. reservation_id, max_count, supports_port_resource_request)
  1252. # max_net_count is the maximum number of instances requested by the
  1253. # user adjusted for any network quota constraints, including
  1254. # consideration of connections to each requested network
  1255. if max_net_count < min_count:
  1256. raise exception.PortLimitExceeded()
  1257. elif max_net_count < max_count:
  1258. LOG.info("max count reduced from %(max_count)d to "
  1259. "%(max_net_count)d due to network port quota",
  1260. {'max_count': max_count,
  1261. 'max_net_count': max_net_count})
  1262. max_count = max_net_count
  1263. block_device_mapping = self._check_and_transform_bdm(context,
  1264. base_options, instance_type, boot_meta, min_count, max_count,
  1265. block_device_mapping, legacy_bdm)
  1266. # We can't do this check earlier because we need bdms from all sources
  1267. # to have been merged in order to get the root bdm.
  1268. # Set validate_numa=False since numa validation is already done by
  1269. # _validate_and_build_base_options().
  1270. self._checks_for_create_and_rebuild(context, image_id, boot_meta,
  1271. instance_type, metadata, injected_files,
  1272. block_device_mapping.root_bdm(), validate_numa=False)
  1273. instance_group = self._get_requested_instance_group(context,
  1274. filter_properties)
  1275. tags = self._create_tag_list_obj(context, tags)
  1276. instances_to_build = self._provision_instances(
  1277. context, instance_type, min_count, max_count, base_options,
  1278. boot_meta, security_groups, block_device_mapping,
  1279. shutdown_terminate, instance_group, check_server_group_quota,
  1280. filter_properties, key_pair, tags, trusted_certs,
  1281. supports_multiattach, network_metadata,
  1282. requested_host, requested_hypervisor_hostname)
  1283. instances = []
  1284. request_specs = []
  1285. build_requests = []
  1286. for rs, build_request, im in instances_to_build:
  1287. build_requests.append(build_request)
  1288. instance = build_request.get_new_instance(context)
  1289. instances.append(instance)
  1290. request_specs.append(rs)
  1291. self.compute_task_api.schedule_and_build_instances(
  1292. context,
  1293. build_requests=build_requests,
  1294. request_spec=request_specs,
  1295. image=boot_meta,
  1296. admin_password=admin_password,
  1297. injected_files=injected_files,
  1298. requested_networks=requested_networks,
  1299. block_device_mapping=block_device_mapping,
  1300. tags=tags)
  1301. return instances, reservation_id
  1302. @staticmethod
  1303. def _cleanup_build_artifacts(instances, instances_to_build):
  1304. # instances_to_build is a list of tuples:
  1305. # (RequestSpec, BuildRequest, InstanceMapping)
  1306. # Be paranoid about artifacts being deleted underneath us.
  1307. for instance in instances or []:
  1308. try:
  1309. instance.destroy()
  1310. except exception.InstanceNotFound:
  1311. pass
  1312. for rs, build_request, im in instances_to_build or []:
  1313. try:
  1314. rs.destroy()
  1315. except exception.RequestSpecNotFound:
  1316. pass
  1317. try:
  1318. build_request.destroy()
  1319. except exception.BuildRequestNotFound:
  1320. pass
  1321. try:
  1322. im.destroy()
  1323. except exception.InstanceMappingNotFound:
  1324. pass
  1325. @staticmethod
  1326. def _volume_size(instance_type, bdm):
  1327. size = bdm.get('volume_size')
  1328. # NOTE (ndipanov): inherit flavor size only for swap and ephemeral
  1329. if (size is None and bdm.get('source_type') == 'blank' and
  1330. bdm.get('destination_type') == 'local'):
  1331. if bdm.get('guest_format') == 'swap':
  1332. size = instance_type.get('swap', 0)
  1333. else:
  1334. size = instance_type.get('ephemeral_gb', 0)
  1335. return size
  1336. def _prepare_image_mapping(self, instance_type, mappings):
  1337. """Extract and format blank devices from image mappings."""
  1338. prepared_mappings = []
  1339. for bdm in block_device.mappings_prepend_dev(mappings):
  1340. LOG.debug("Image bdm %s", bdm)
  1341. virtual_name = bdm['virtual']
  1342. if virtual_name == 'ami' or virtual_name == 'root':
  1343. continue
  1344. if not block_device.is_swap_or_ephemeral(virtual_name):
  1345. continue
  1346. guest_format = bdm.get('guest_format')
  1347. if virtual_name == 'swap':
  1348. guest_format = 'swap'
  1349. if not guest_format:
  1350. guest_format = CONF.default_ephemeral_format
  1351. values = block_device.BlockDeviceDict({
  1352. 'device_name': bdm['device'],
  1353. 'source_type': 'blank',
  1354. 'destination_type': 'local',
  1355. 'device_type': 'disk',
  1356. 'guest_format': guest_format,
  1357. 'delete_on_termination': True,
  1358. 'boot_index': -1})
  1359. values['volume_size'] = self._volume_size(
  1360. instance_type, values)
  1361. if values['volume_size'] == 0:
  1362. continue
  1363. prepared_mappings.append(values)
  1364. return prepared_mappings
  1365. def _bdm_validate_set_size_and_instance(self, context, instance,
  1366. instance_type,
  1367. block_device_mapping,
  1368. image_cache, volumes,
  1369. supports_multiattach=False):
  1370. """Ensure the bdms are valid, then set size and associate with instance
  1371. Because this method can be called multiple times when more than one
  1372. instance is booted in a single request it makes a copy of the bdm list.
  1373. :param context: nova auth RequestContext
  1374. :param instance: Instance object
  1375. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1376. :param block_device_mapping: BlockDeviceMappingList object
  1377. :param image_cache: dict of image dicts keyed by id which is used as a
  1378. cache in case there are multiple BDMs in the same request using
  1379. the same image to avoid redundant GET calls to the image service
  1380. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1381. :param supports_multiattach: True if the request supports multiattach
  1382. volumes, False otherwise
  1383. """
  1384. LOG.debug("block_device_mapping %s", list(block_device_mapping),
  1385. instance_uuid=instance.uuid)
  1386. self._validate_bdm(
  1387. context, instance, instance_type, block_device_mapping,
  1388. image_cache, volumes, supports_multiattach)
  1389. instance_block_device_mapping = block_device_mapping.obj_clone()
  1390. for bdm in instance_block_device_mapping:
  1391. bdm.volume_size = self._volume_size(instance_type, bdm)
  1392. bdm.instance_uuid = instance.uuid
  1393. return instance_block_device_mapping
  1394. @staticmethod
  1395. def _check_requested_volume_type(bdm, volume_type_id_or_name,
  1396. volume_types):
  1397. """If we are specifying a volume type, we need to get the
  1398. volume type details from Cinder and make sure the ``volume_type``
  1399. is available.
  1400. """
  1401. # NOTE(brinzhang): Verify that the specified volume type exists.
  1402. # And save the volume type name internally for consistency in the
  1403. # BlockDeviceMapping object.
  1404. for vol_type in volume_types:
  1405. if (volume_type_id_or_name == vol_type['id'] or
  1406. volume_type_id_or_name == vol_type['name']):
  1407. bdm.volume_type = vol_type['name']
  1408. break
  1409. else:
  1410. raise exception.VolumeTypeNotFound(
  1411. id_or_name=volume_type_id_or_name)
  1412. def _validate_bdm(self, context, instance, instance_type,
  1413. block_device_mappings, image_cache, volumes,
  1414. supports_multiattach=False):
  1415. """Validate requested block device mappings.
  1416. :param context: nova auth RequestContext
  1417. :param instance: Instance object
  1418. :param instance_type: Flavor object - used for swap and ephemeral BDMs
  1419. :param block_device_mappings: BlockDeviceMappingList object
  1420. :param image_cache: dict of image dicts keyed by id which is used as a
  1421. cache in case there are multiple BDMs in the same request using
  1422. the same image to avoid redundant GET calls to the image service
  1423. :param volumes: dict, keyed by volume id, of volume dicts from cinder
  1424. :param supports_multiattach: True if the request supports multiattach
  1425. volumes, False otherwise
  1426. """
  1427. # Make sure that the boot indexes make sense.
  1428. # Setting a negative value or None indicates that the device should not
  1429. # be used for booting.
  1430. boot_indexes = sorted([bdm.boot_index
  1431. for bdm in block_device_mappings
  1432. if bdm.boot_index is not None and
  1433. bdm.boot_index >= 0])
  1434. # Each device which is capable of being used as boot device should
  1435. # be given a unique boot index, starting from 0 in ascending order, and
  1436. # there needs to be at least one boot device.
  1437. if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
  1438. # Convert the BlockDeviceMappingList to a list for repr details.
  1439. LOG.debug('Invalid block device mapping boot sequence for '
  1440. 'instance: %s', list(block_device_mappings),
  1441. instance=instance)
  1442. raise exception.InvalidBDMBootSequence()
  1443. volume_types = None
  1444. for bdm in block_device_mappings:
  1445. volume_type = bdm.volume_type
  1446. if volume_type:
  1447. if not volume_types:
  1448. # In order to reduce the number of hit cinder APIs,
  1449. # initialize our cache of volume types.
  1450. volume_types = self.volume_api.get_all_volume_types(
  1451. context)
  1452. # NOTE(brinzhang): Ensure the validity of volume_type.
  1453. self._check_requested_volume_type(bdm, volume_type,
  1454. volume_types)
  1455. # NOTE(vish): For now, just make sure the volumes are accessible.
  1456. # Additionally, check that the volume can be attached to this
  1457. # instance.
  1458. snapshot_id = bdm.snapshot_id
  1459. volume_id = bdm.volume_id
  1460. image_id = bdm.image_id
  1461. if image_id is not None:
  1462. if (image_id != instance.get('image_ref') and
  1463. image_id not in image_cache):
  1464. try:
  1465. # Cache the results of the image GET so we do not make
  1466. # the same request for the same image if processing
  1467. # multiple BDMs or multiple servers with the same image
  1468. image_cache[image_id] = self._get_image(
  1469. context, image_id)
  1470. except Exception:
  1471. raise exception.InvalidBDMImage(id=image_id)
  1472. if (bdm.source_type == 'image' and
  1473. bdm.destination_type == 'volume' and
  1474. not bdm.volume_size):
  1475. raise exception.InvalidBDM(message=_("Images with "
  1476. "destination_type 'volume' need to have a non-zero "
  1477. "size specified"))
  1478. elif volume_id is not None:
  1479. try:
  1480. volume = volumes[volume_id]
  1481. # We do not validate the instance and volume AZ here
  1482. # because that is done earlier by _provision_instances.
  1483. self._check_attach_and_reserve_volume(
  1484. context, volume, instance, bdm, supports_multiattach,
  1485. validate_az=False)
  1486. bdm.volume_size = volume.get('size')
  1487. except (exception.CinderConnectionFailed,
  1488. exception.InvalidVolume,
  1489. exception.MultiattachNotSupportedOldMicroversion):
  1490. raise
  1491. except exception.InvalidInput as exc:
  1492. raise exception.InvalidVolume(reason=exc.format_message())
  1493. except Exception as e:
  1494. LOG.info('Failed validating volume %s. Error: %s',
  1495. volume_id, e)
  1496. raise exception.InvalidBDMVolume(id=volume_id)
  1497. elif snapshot_id is not None:
  1498. try:
  1499. snap = self.volume_api.get_snapshot(context, snapshot_id)
  1500. bdm.volume_size = bdm.volume_size or snap.get('size')
  1501. except exception.CinderConnectionFailed:
  1502. raise
  1503. except Exception:
  1504. raise exception.InvalidBDMSnapshot(id=snapshot_id)
  1505. elif (bdm.source_type == 'blank' and
  1506. bdm.destination_type == 'volume' and
  1507. not bdm.volume_size):
  1508. raise exception.InvalidBDM(message=_("Blank volumes "
  1509. "(source: 'blank', dest: 'volume') need to have non-zero "
  1510. "size"))
  1511. ephemeral_size = sum(bdm.volume_size or instance_type['ephemeral_gb']
  1512. for bdm in block_device_mappings
  1513. if block_device.new_format_is_ephemeral(bdm))
  1514. if ephemeral_size > instance_type['ephemeral_gb']:
  1515. raise exception.InvalidBDMEphemeralSize()
  1516. # There should be only one swap
  1517. swap_list = block_device.get_bdm_swap_list(block_device_mappings)
  1518. if len(swap_list) > 1:
  1519. msg = _("More than one swap drive requested.")
  1520. raise exception.InvalidBDMFormat(details=msg)
  1521. if swap_list:
  1522. swap_size = swap_list[0].volume_size or 0
  1523. if swap_size > instance_type['swap']:
  1524. raise exception.InvalidBDMSwapSize()
  1525. max_local = CONF.max_local_block_devices
  1526. if max_local >= 0:
  1527. num_local = len([bdm for bdm in block_device_mappings
  1528. if bdm.destination_type == 'local'])
  1529. if num_local > max_local:
  1530. raise exception.InvalidBDMLocalsLimit()
  1531. def _populate_instance_names(self, instance, num_instances, index):
  1532. """Populate instance display_name and hostname.
  1533. :param instance: The instance to set the display_name, hostname for
  1534. :type instance: nova.objects.Instance
  1535. :param num_instances: Total number of instances being created in this
  1536. request
  1537. :param index: The 0-based index of this particular instance
  1538. """
  1539. # NOTE(mriedem): This is only here for test simplicity since a server
  1540. # name is required in the REST API.
  1541. if 'display_name' not in instance or instance.display_name is None:
  1542. instance.display_name = 'Server %s' % instance.uuid
  1543. # if we're booting multiple instances, we need to add an indexing
  1544. # suffix to both instance.hostname and instance.display_name. This is
  1545. # not necessary for a single instance.
  1546. if num_instances == 1:
  1547. default_hostname = 'Server-%s' % instance.uuid
  1548. instance.hostname = utils.sanitize_hostname(
  1549. instance.display_name, default_hostname)
  1550. elif num_instances > 1:
  1551. old_display_name = instance.display_name
  1552. new_display_name = '%s-%d' % (old_display_name, index + 1)
  1553. if utils.sanitize_hostname(old_display_name) == "":
  1554. instance.hostname = 'Server-%s' % instance.uuid
  1555. else:
  1556. instance.hostname = utils.sanitize_hostname(
  1557. new_display_name)
  1558. instance.display_name = new_display_name
  1559. def _populate_instance_for_create(self, context, instance, image,
  1560. index, security_groups, instance_type,
  1561. num_instances, shutdown_terminate):
  1562. """Build the beginning of a new instance."""
  1563. instance.launch_index = index
  1564. instance.vm_state = vm_states.BUILDING
  1565. instance.task_state = task_states.SCHEDULING
  1566. info_cache = objects.InstanceInfoCache()
  1567. info_cache.instance_uuid = instance.uuid
  1568. info_cache.network_info = network_model.NetworkInfo()
  1569. instance.info_cache = info_cache
  1570. instance.flavor = instance_type
  1571. instance.old_flavor = None
  1572. instance.new_flavor = None
  1573. if CONF.ephemeral_storage_encryption.enabled:
  1574. # NOTE(kfarr): dm-crypt expects the cipher in a
  1575. # hyphenated format: cipher-chainmode-ivmode
  1576. # (ex: aes-xts-plain64). The algorithm needs
  1577. # to be parsed out to pass to the key manager (ex: aes).
  1578. cipher = CONF.ephemeral_storage_encryption.cipher
  1579. algorithm = cipher.split('-')[0] if cipher else None
  1580. instance.ephemeral_key_uuid = self.key_manager.create_key(
  1581. context,
  1582. algorithm=algorithm,
  1583. length=CONF.ephemeral_storage_encryption.key_size)
  1584. else:
  1585. instance.ephemeral_key_uuid = None
  1586. # Store image properties so we can use them later
  1587. # (for notifications, etc). Only store what we can.
  1588. if not instance.obj_attr_is_set('system_metadata'):
  1589. instance.system_metadata = {}
  1590. # Make sure we have the dict form that we need for instance_update.
  1591. instance.system_metadata = utils.instance_sys_meta(instance)
  1592. system_meta = utils.get_system_metadata_from_image(
  1593. image, instance_type)
  1594. # In case we couldn't find any suitable base_image
  1595. system_meta.setdefault('image_base_image_ref', instance.image_ref)
  1596. system_meta['owner_user_name'] = context.user_name
  1597. system_meta['owner_project_name'] = context.project_name
  1598. instance.system_metadata.update(system_meta)
  1599. if CONF.use_neutron:
  1600. # For Neutron we don't actually store anything in the database, we
  1601. # proxy the security groups on the instance from the ports
  1602. # attached to the instance.
  1603. instance.security_groups = objects.SecurityGroupList()
  1604. else:
  1605. instance.security_groups = security_groups
  1606. self._populate_instance_names(instance, num_instances, index)
  1607. instance.shutdown_terminate = shutdown_terminate
  1608. return instance
  1609. def _create_tag_list_obj(self, context, tags):
  1610. """Create TagList objects from simple string tags.
  1611. :param context: security context.
  1612. :param tags: simple string tags from API request.
  1613. :returns: TagList object.
  1614. """
  1615. tag_list = [objects.Tag(context=context, tag=t) for t in tags]
  1616. tag_list_obj = objects.TagList(objects=tag_list)
  1617. return tag_list_obj
  1618. def _transform_tags(self, tags, resource_id):
  1619. """Change the resource_id of the tags according to the input param.
  1620. Because this method can be called multiple times when more than one
  1621. instance is booted in a single request it makes a copy of the tags
  1622. list.
  1623. :param tags: TagList object.
  1624. :param resource_id: string.
  1625. :returns: TagList object.
  1626. """
  1627. instance_tags = tags.obj_clone()
  1628. for tag in instance_tags:
  1629. tag.resource_id = resource_id
  1630. return instance_tags
  1631. # This method remains because cellsv1 uses it in the scheduler
  1632. def create_db_entry_for_new_instance(self, context, instance_type, image,
  1633. instance, security_group, block_device_mapping, num_instances,
  1634. index, shutdown_terminate=False, create_instance=True):
  1635. """Create an entry in the DB for this new instance,
  1636. including any related table updates (such as security group,
  1637. etc).
  1638. This is called by the scheduler after a location for the
  1639. instance has been determined.
  1640. :param create_instance: Determines if the instance is created here or
  1641. just populated for later creation. This is done so that this code
  1642. can be shared with cellsv1 which needs the instance creation to
  1643. happen here. It should be removed and this method cleaned up when
  1644. cellsv1 is a distant memory.
  1645. """
  1646. self._populate_instance_for_create(context, instance, image, index,
  1647. security_group, instance_type,
  1648. num_instances, shutdown_terminate)
  1649. if create_instance:
  1650. instance.create()
  1651. return instance
  1652. def _check_multiple_instances_with_neutron_ports(self,
  1653. requested_networks):
  1654. """Check whether multiple instances are created from port id(s)."""
  1655. for requested_net in requested_networks:
  1656. if requested_net.port_id:
  1657. msg = _("Unable to launch multiple instances with"
  1658. " a single configured port ID. Please launch your"
  1659. " instance one by one with different ports.")
  1660. raise exception.MultiplePortsNotApplicable(reason=msg)
  1661. def _check_multiple_instances_with_specified_ip(self, requested_networks):
  1662. """Check whether multiple instances are created with specified ip."""
  1663. for requested_net in requested_networks:
  1664. if requested_net.network_id and requested_net.address:
  1665. msg = _("max_count cannot be greater than 1 if an fixed_ip "
  1666. "is specified.")
  1667. raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
  1668. @hooks.add_hook("create_instance")
  1669. def create(self, context, instance_type,
  1670. image_href, kernel_id=None, ramdisk_id=None,
  1671. min_count=None, max_count=None,
  1672. display_name=None, display_description=None,
  1673. key_name=None, key_data=None, security_groups=None,
  1674. availability_zone=None, forced_host=None, forced_node=None,
  1675. user_data=None, metadata=None, injected_files=None,
  1676. admin_password=None, block_device_mapping=None,
  1677. access_ip_v4=None, access_ip_v6=None, requested_networks=None,
  1678. config_drive=None, auto_disk_config=None, scheduler_hints=None,
  1679. legacy_bdm=True, shutdown_terminate=False,
  1680. check_server_group_quota=False, tags=None,
  1681. supports_multiattach=False, trusted_certs=None,
  1682. supports_port_resource_request=False,
  1683. requested_host=None, requested_hypervisor_hostname=None):
  1684. """Provision instances, sending instance information to the
  1685. scheduler. The scheduler will determine where the instance(s)
  1686. go and will handle creating the DB entries.
  1687. Returns a tuple of (instances, reservation_id)
  1688. """
  1689. if requested_networks and max_count is not None and max_count > 1:
  1690. self._check_multiple_instances_with_specified_ip(
  1691. requested_networks)
  1692. if utils.is_neutron():
  1693. self._check_multiple_instances_with_neutron_ports(
  1694. requested_networks)
  1695. if availability_zone:
  1696. available_zones = availability_zones.\
  1697. get_availability_zones(context.elevated(), self.host_api,
  1698. get_only_available=True)
  1699. if forced_host is None and availability_zone not in \
  1700. available_zones:
  1701. msg = _('The requested availability zone is not available')
  1702. raise exception.InvalidRequest(msg)
  1703. filter_properties = scheduler_utils.build_filter_properties(
  1704. scheduler_hints, forced_host, forced_node, instance_type)
  1705. return self._create_instance(
  1706. context, instance_type,
  1707. image_href, kernel_id, ramdisk_id,
  1708. min_count, max_count,
  1709. display_name, display_description,
  1710. key_name, key_data, security_groups,
  1711. availability_zone, user_data, metadata,
  1712. injected_files, admin_password,
  1713. access_ip_v4, access_ip_v6,
  1714. requested_networks, config_drive,
  1715. block_device_mapping, auto_disk_config,
  1716. filter_properties=filter_properties,
  1717. legacy_bdm=legacy_bdm,
  1718. shutdown_terminate=shutdown_terminate,
  1719. check_server_group_quota=check_server_group_quota,
  1720. tags=tags, supports_multiattach=supports_multiattach,
  1721. trusted_certs=trusted_certs,
  1722. supports_port_resource_request=supports_port_resource_request,
  1723. requested_host=requested_host,
  1724. requested_hypervisor_hostname=requested_hypervisor_hostname)
  1725. def _check_auto_disk_config(self, instance=None, image=None,
  1726. auto_disk_config=None):
  1727. if auto_disk_config is None:
  1728. return
  1729. if not image and not instance:
  1730. return
  1731. if image:
  1732. image_props = image.get("properties", {})
  1733. auto_disk_config_img = \
  1734. utils.get_auto_disk_config_from_image_props(image_props)
  1735. image_ref = image.get("id")
  1736. else:
  1737. sys_meta = utils.instance_sys_meta(instance)
  1738. image_ref = sys_meta.get('image_base_image_ref')
  1739. auto_disk_config_img = \
  1740. utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
  1741. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  1742. auto_disk_config,
  1743. image_ref)
  1744. def _lookup_instance(self, context, uuid):
  1745. '''Helper method for pulling an instance object from a database.
  1746. During the transition to cellsv2 there is some complexity around
  1747. retrieving an instance from the database which this method hides. If
  1748. there is an instance mapping then query the cell for the instance, if
  1749. no mapping exists then query the configured nova database.
  1750. Once we are past the point that all deployments can be assumed to be
  1751. migrated to cellsv2 this method can go away.
  1752. '''
  1753. inst_map = None
  1754. try:
  1755. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  1756. context, uuid)
  1757. except exception.InstanceMappingNotFound:
  1758. # TODO(alaski): This exception block can be removed once we're
  1759. # guaranteed everyone is using cellsv2.
  1760. pass
  1761. if inst_map is None or inst_map.cell_mapping is None:
  1762. # If inst_map is None then the deployment has not migrated to
  1763. # cellsv2 yet.
  1764. # If inst_map.cell_mapping is None then the instance is not in a
  1765. # cell yet. Until instance creation moves to the conductor the
  1766. # instance can be found in the configured database, so attempt
  1767. # to look it up.
  1768. cell = None
  1769. try:
  1770. instance = objects.Instance.get_by_uuid(context, uuid)
  1771. except exception.InstanceNotFound:
  1772. # If we get here then the conductor is in charge of writing the
  1773. # instance to the database and hasn't done that yet. It's up to
  1774. # the caller of this method to determine what to do with that
  1775. # information.
  1776. return None, None
  1777. else:
  1778. cell = inst_map.cell_mapping
  1779. with nova_context.target_cell(context, cell) as cctxt:
  1780. try:
  1781. instance = objects.Instance.get_by_uuid(cctxt, uuid)
  1782. except exception.InstanceNotFound:
  1783. # Since the cell_mapping exists we know the instance is in
  1784. # the cell, however InstanceNotFound means it's already
  1785. # deleted.
  1786. return None, None
  1787. return cell, instance
  1788. def _delete_while_booting(self, context, instance):
  1789. """Handle deletion if the instance has not reached a cell yet
  1790. Deletion before an instance reaches a cell needs to be handled
  1791. differently. What we're attempting to do is delete the BuildRequest
  1792. before the api level conductor does. If we succeed here then the boot
  1793. request stops before reaching a cell. If not then the instance will
  1794. need to be looked up in a cell db and the normal delete path taken.
  1795. """
  1796. deleted = self._attempt_delete_of_buildrequest(context, instance)
  1797. if deleted:
  1798. # If we've reached this block the successful deletion of the
  1799. # buildrequest indicates that the build process should be halted by
  1800. # the conductor.
  1801. # NOTE(alaski): Though the conductor halts the build process it
  1802. # does not currently delete the instance record. This is
  1803. # because in the near future the instance record will not be
  1804. # created if the buildrequest has been deleted here. For now we
  1805. # ensure the instance has been set to deleted at this point.
  1806. # Yes this directly contradicts the comment earlier in this
  1807. # method, but this is a temporary measure.
  1808. # Look up the instance because the current instance object was
  1809. # stashed on the buildrequest and therefore not complete enough
  1810. # to run .destroy().
  1811. try:
  1812. instance_uuid = instance.uuid
  1813. cell, instance = self._lookup_instance(context, instance_uuid)
  1814. if instance is not None:
  1815. # If instance is None it has already been deleted.
  1816. if cell:
  1817. with nova_context.target_cell(context, cell) as cctxt:
  1818. # FIXME: When the instance context is targeted,
  1819. # we can remove this
  1820. with compute_utils.notify_about_instance_delete(
  1821. self.notifier, cctxt, instance):
  1822. instance.destroy()
  1823. else:
  1824. instance.destroy()
  1825. except exception.InstanceNotFound:
  1826. pass
  1827. return True
  1828. return False
  1829. def _attempt_delete_of_buildrequest(self, context, instance):
  1830. # If there is a BuildRequest then the instance may not have been
  1831. # written to a cell db yet. Delete the BuildRequest here, which
  1832. # will indicate that the Instance build should not proceed.
  1833. try:
  1834. build_req = objects.BuildRequest.get_by_instance_uuid(
  1835. context, instance.uuid)
  1836. build_req.destroy()
  1837. except exception.BuildRequestNotFound:
  1838. # This means that conductor has deleted the BuildRequest so the
  1839. # instance is now in a cell and the delete needs to proceed
  1840. # normally.
  1841. return False
  1842. # We need to detach from any volumes so they aren't orphaned.
  1843. self._local_cleanup_bdm_volumes(
  1844. build_req.block_device_mappings, instance, context)
  1845. return True
  1846. def _delete(self, context, instance, delete_type, cb, **instance_attrs):
  1847. if instance.disable_terminate:
  1848. LOG.info('instance termination disabled', instance=instance)
  1849. return
  1850. cell = None
  1851. # If there is an instance.host (or the instance is shelved-offloaded or
  1852. # in error state), the instance has been scheduled and sent to a
  1853. # cell/compute which means it was pulled from the cell db.
  1854. # Normal delete should be attempted.
  1855. may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
  1856. instance)
  1857. if not instance.host and not may_have_ports_or_volumes:
  1858. try:
  1859. if self._delete_while_booting(context, instance):
  1860. return
  1861. # If instance.host was not set it's possible that the Instance
  1862. # object here was pulled from a BuildRequest object and is not
  1863. # fully populated. Notably it will be missing an 'id' field
  1864. # which will prevent instance.destroy from functioning
  1865. # properly. A lookup is attempted which will either return a
  1866. # full Instance or None if not found. If not found then it's
  1867. # acceptable to skip the rest of the delete processing.
  1868. cell, instance = self._lookup_instance(context, instance.uuid)
  1869. if cell and instance:
  1870. try:
  1871. # Now destroy the instance from the cell it lives in.
  1872. with compute_utils.notify_about_instance_delete(
  1873. self.notifier, context, instance):
  1874. instance.destroy()
  1875. except exception.InstanceNotFound:
  1876. pass
  1877. # The instance was deleted or is already gone.
  1878. return
  1879. if not instance:
  1880. # Instance is already deleted.
  1881. return
  1882. except exception.ObjectActionError:
  1883. # NOTE(melwitt): This means the instance.host changed
  1884. # under us indicating the instance became scheduled
  1885. # during the destroy(). Refresh the instance from the DB and
  1886. # continue on with the delete logic for a scheduled instance.
  1887. # NOTE(danms): If instance.host is set, we should be able to
  1888. # do the following lookup. If not, there's not much we can
  1889. # do to recover.
  1890. cell, instance = self._lookup_instance(context, instance.uuid)
  1891. if not instance:
  1892. # Instance is already deleted
  1893. return
  1894. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  1895. context, instance.uuid)
  1896. # At these states an instance has a snapshot associate.
  1897. if instance.vm_state in (vm_states.SHELVED,
  1898. vm_states.SHELVED_OFFLOADED):
  1899. snapshot_id = instance.system_metadata.get('shelved_image_id')
  1900. LOG.info("Working on deleting snapshot %s "
  1901. "from shelved instance...",
  1902. snapshot_id, instance=instance)
  1903. try:
  1904. self.image_api.delete(context, snapshot_id)
  1905. except (exception.ImageNotFound,
  1906. exception.ImageNotAuthorized) as exc:
  1907. LOG.warning("Failed to delete snapshot "
  1908. "from shelved instance (%s).",
  1909. exc.format_message(), instance=instance)
  1910. except Exception:
  1911. LOG.exception("Something wrong happened when trying to "
  1912. "delete snapshot from shelved instance.",
  1913. instance=instance)
  1914. original_task_state = instance.task_state
  1915. try:
  1916. # NOTE(maoy): no expected_task_state needs to be set
  1917. instance.update(instance_attrs)
  1918. instance.progress = 0
  1919. instance.save()
  1920. if not instance.host and not may_have_ports_or_volumes:
  1921. try:
  1922. with compute_utils.notify_about_instance_delete(
  1923. self.notifier, context, instance,
  1924. delete_type
  1925. if delete_type != 'soft_delete'
  1926. else 'delete'):
  1927. instance.destroy()
  1928. LOG.info('Instance deleted and does not have host '
  1929. 'field, its vm_state is %(state)s.',
  1930. {'state': instance.vm_state},
  1931. instance=instance)
  1932. return
  1933. except exception.ObjectActionError as ex:
  1934. # The instance's host likely changed under us as
  1935. # this instance could be building and has since been
  1936. # scheduled. Continue with attempts to delete it.
  1937. LOG.debug('Refreshing instance because: %s', ex,
  1938. instance=instance)
  1939. instance.refresh()
  1940. if instance.vm_state == vm_states.RESIZED:
  1941. self._confirm_resize_on_deleting(context, instance)
  1942. # NOTE(neha_alhat): After confirm resize vm_state will become
  1943. # 'active' and task_state will be set to 'None'. But for soft
  1944. # deleting a vm, the _do_soft_delete callback requires
  1945. # task_state in 'SOFT_DELETING' status. So, we need to set
  1946. # task_state as 'SOFT_DELETING' again for soft_delete case.
  1947. # After confirm resize and before saving the task_state to
  1948. # "SOFT_DELETING", during the short window, user can submit
  1949. # soft delete vm request again and system will accept and
  1950. # process it without any errors.
  1951. if delete_type == 'soft_delete':
  1952. instance.task_state = instance_attrs['task_state']
  1953. instance.save()
  1954. is_local_delete = True
  1955. try:
  1956. # instance.host must be set in order to look up the service.
  1957. if instance.host is not None:
  1958. service = objects.Service.get_by_compute_host(
  1959. context.elevated(), instance.host)
  1960. is_local_delete = not self.servicegroup_api.service_is_up(
  1961. service)
  1962. if not is_local_delete:
  1963. if original_task_state in (task_states.DELETING,
  1964. task_states.SOFT_DELETING):
  1965. LOG.info('Instance is already in deleting state, '
  1966. 'ignoring this request',
  1967. instance=instance)
  1968. return
  1969. self._record_action_start(context, instance,
  1970. instance_actions.DELETE)
  1971. cb(context, instance, bdms)
  1972. except exception.ComputeHostNotFound:
  1973. LOG.debug('Compute host %s not found during service up check, '
  1974. 'going to local delete instance', instance.host,
  1975. instance=instance)
  1976. if is_local_delete:
  1977. # If instance is in shelved_offloaded state or compute node
  1978. # isn't up, delete instance from db and clean bdms info and
  1979. # network info
  1980. if cell is None:
  1981. # NOTE(danms): If we didn't get our cell from one of the
  1982. # paths above, look it up now.
  1983. try:
  1984. im = objects.InstanceMapping.get_by_instance_uuid(
  1985. context, instance.uuid)
  1986. cell = im.cell_mapping
  1987. except exception.InstanceMappingNotFound:
  1988. LOG.warning('During local delete, failed to find '
  1989. 'instance mapping', instance=instance)
  1990. return
  1991. LOG.debug('Doing local delete in cell %s', cell.identity,
  1992. instance=instance)
  1993. with nova_context.target_cell(context, cell) as cctxt:
  1994. self._local_delete(cctxt, instance, bdms, delete_type, cb)
  1995. except exception.InstanceNotFound:
  1996. # NOTE(comstud): Race condition. Instance already gone.
  1997. pass
  1998. def _confirm_resize_on_deleting(self, context, instance):
  1999. # If in the middle of a resize, use confirm_resize to
  2000. # ensure the original instance is cleaned up too along
  2001. # with its allocations (and migration-based allocations)
  2002. # in placement.
  2003. migration = None
  2004. for status in ('finished', 'confirming'):
  2005. try:
  2006. migration = objects.Migration.get_by_instance_and_status(
  2007. context.elevated(), instance.uuid, status)
  2008. LOG.info('Found an unconfirmed migration during delete, '
  2009. 'id: %(id)s, status: %(status)s',
  2010. {'id': migration.id,
  2011. 'status': migration.status},
  2012. instance=instance)
  2013. break
  2014. except exception.MigrationNotFoundByStatus:
  2015. pass
  2016. if not migration:
  2017. LOG.info('Instance may have been confirmed during delete',
  2018. instance=instance)
  2019. return
  2020. src_host = migration.source_compute
  2021. # FIXME(mriedem): If migration.cross_cell_move, we need to also
  2022. # cleanup the instance data from the source cell database.
  2023. self._record_action_start(context, instance,
  2024. instance_actions.CONFIRM_RESIZE)
  2025. self.compute_rpcapi.confirm_resize(context,
  2026. instance, migration, src_host, cast=False)
  2027. def _local_cleanup_bdm_volumes(self, bdms, instance, context):
  2028. """The method deletes the bdm records and, if a bdm is a volume, call
  2029. the terminate connection and the detach volume via the Volume API.
  2030. """
  2031. elevated = context.elevated()
  2032. for bdm in bdms:
  2033. if bdm.is_volume:
  2034. try:
  2035. if bdm.attachment_id:
  2036. self.volume_api.attachment_delete(context,
  2037. bdm.attachment_id)
  2038. else:
  2039. connector = compute_utils.get_stashed_volume_connector(
  2040. bdm, instance)
  2041. if connector:
  2042. self.volume_api.terminate_connection(context,
  2043. bdm.volume_id,
  2044. connector)
  2045. else:
  2046. LOG.debug('Unable to find connector for volume %s,'
  2047. ' not attempting terminate_connection.',
  2048. bdm.volume_id, instance=instance)
  2049. # Attempt to detach the volume. If there was no
  2050. # connection made in the first place this is just
  2051. # cleaning up the volume state in the Cinder DB.
  2052. self.volume_api.detach(elevated, bdm.volume_id,
  2053. instance.uuid)
  2054. if bdm.delete_on_termination:
  2055. self.volume_api.delete(context, bdm.volume_id)
  2056. except Exception as exc:
  2057. LOG.warning("Ignoring volume cleanup failure due to %s",
  2058. exc, instance=instance)
  2059. # If we're cleaning up volumes from an instance that wasn't yet
  2060. # created in a cell, i.e. the user deleted the server while
  2061. # the BuildRequest still existed, then the BDM doesn't actually
  2062. # exist in the DB to destroy it.
  2063. if 'id' in bdm:
  2064. bdm.destroy()
  2065. @property
  2066. def placementclient(self):
  2067. if self._placementclient is None:
  2068. self._placementclient = report.SchedulerReportClient()
  2069. return self._placementclient
  2070. def _local_delete(self, context, instance, bdms, delete_type, cb):
  2071. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2072. LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
  2073. " the instance's info from database.",
  2074. instance=instance)
  2075. else:
  2076. LOG.warning("instance's host %s is down, deleting from "
  2077. "database", instance.host, instance=instance)
  2078. with compute_utils.notify_about_instance_delete(
  2079. self.notifier, context, instance,
  2080. delete_type if delete_type != 'soft_delete' else 'delete'):
  2081. elevated = context.elevated()
  2082. # NOTE(liusheng): In nova-network multi_host scenario,deleting
  2083. # network info of the instance may need instance['host'] as
  2084. # destination host of RPC call. If instance in
  2085. # SHELVED_OFFLOADED state, instance['host'] is None, here, use
  2086. # shelved_host as host to deallocate network info and reset
  2087. # instance['host'] after that. Here we shouldn't use
  2088. # instance.save(), because this will mislead user who may think
  2089. # the instance's host has been changed, and actually, the
  2090. # instance.host is always None.
  2091. orig_host = instance.host
  2092. try:
  2093. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  2094. sysmeta = getattr(instance,
  2095. obj_base.get_attrname(
  2096. 'system_metadata'))
  2097. instance.host = sysmeta.get('shelved_host')
  2098. self.network_api.deallocate_for_instance(elevated,
  2099. instance)
  2100. finally:
  2101. instance.host = orig_host
  2102. # cleanup volumes
  2103. self._local_cleanup_bdm_volumes(bdms, instance, context)
  2104. # Cleanup allocations in Placement since we can't do it from the
  2105. # compute service.
  2106. self.placementclient.delete_allocation_for_instance(
  2107. context, instance.uuid)
  2108. cb(context, instance, bdms, local=True)
  2109. instance.destroy()
  2110. @staticmethod
  2111. def _update_queued_for_deletion(context, instance, qfd):
  2112. # NOTE(tssurya): We query the instance_mapping record of this instance
  2113. # and update the queued_for_delete flag to True (or False according to
  2114. # the state of the instance). This just means that the instance is
  2115. # queued for deletion (or is no longer queued for deletion). It does
  2116. # not guarantee its successful deletion (or restoration). Hence the
  2117. # value could be stale which is fine, considering its use is only
  2118. # during down cell (desperate) situation.
  2119. im = objects.InstanceMapping.get_by_instance_uuid(context,
  2120. instance.uuid)
  2121. im.queued_for_delete = qfd
  2122. im.save()
  2123. def _do_delete(self, context, instance, bdms, local=False):
  2124. if local:
  2125. instance.vm_state = vm_states.DELETED
  2126. instance.task_state = None
  2127. instance.terminated_at = timeutils.utcnow()
  2128. instance.save()
  2129. else:
  2130. self.compute_rpcapi.terminate_instance(context, instance, bdms)
  2131. self._update_queued_for_deletion(context, instance, True)
  2132. def _do_soft_delete(self, context, instance, bdms, local=False):
  2133. if local:
  2134. instance.vm_state = vm_states.SOFT_DELETED
  2135. instance.task_state = None
  2136. instance.terminated_at = timeutils.utcnow()
  2137. instance.save()
  2138. else:
  2139. self.compute_rpcapi.soft_delete_instance(context, instance)
  2140. self._update_queued_for_deletion(context, instance, True)
  2141. # NOTE(maoy): we allow delete to be called no matter what vm_state says.
  2142. @check_instance_lock
  2143. @check_instance_state(vm_state=None, task_state=None,
  2144. must_have_launched=True)
  2145. def soft_delete(self, context, instance):
  2146. """Terminate an instance."""
  2147. LOG.debug('Going to try to soft delete instance',
  2148. instance=instance)
  2149. self._delete(context, instance, 'soft_delete', self._do_soft_delete,
  2150. task_state=task_states.SOFT_DELETING,
  2151. deleted_at=timeutils.utcnow())
  2152. def _delete_instance(self, context, instance):
  2153. self._delete(context, instance, 'delete', self._do_delete,
  2154. task_state=task_states.DELETING)
  2155. @check_instance_lock
  2156. @check_instance_state(vm_state=None, task_state=None,
  2157. must_have_launched=False)
  2158. def delete(self, context, instance):
  2159. """Terminate an instance."""
  2160. LOG.debug("Going to try to terminate instance", instance=instance)
  2161. self._delete_instance(context, instance)
  2162. @check_instance_lock
  2163. @check_instance_state(vm_state=[vm_states.SOFT_DELETED])
  2164. def restore(self, context, instance):
  2165. """Restore a previously deleted (but not reclaimed) instance."""
  2166. # Check quotas
  2167. flavor = instance.get_flavor()
  2168. project_id, user_id = quotas_obj.ids_from_instance(context, instance)
  2169. compute_utils.check_num_instances_quota(context, flavor, 1, 1,
  2170. project_id=project_id, user_id=user_id)
  2171. self._record_action_start(context, instance, instance_actions.RESTORE)
  2172. if instance.host:
  2173. instance.task_state = task_states.RESTORING
  2174. instance.deleted_at = None
  2175. instance.save(expected_task_state=[None])
  2176. # TODO(melwitt): We're not rechecking for strict quota here to
  2177. # guard against going over quota during a race at this time because
  2178. # the resource consumption for this operation is written to the
  2179. # database by compute.
  2180. self.compute_rpcapi.restore_instance(context, instance)
  2181. else:
  2182. instance.vm_state = vm_states.ACTIVE
  2183. instance.task_state = None
  2184. instance.deleted_at = None
  2185. instance.save(expected_task_state=[None])
  2186. self._update_queued_for_deletion(context, instance, False)
  2187. @check_instance_lock
  2188. @check_instance_state(task_state=None,
  2189. must_have_launched=False)
  2190. def force_delete(self, context, instance):
  2191. """Force delete an instance in any vm_state/task_state."""
  2192. self._delete(context, instance, 'force_delete', self._do_delete,
  2193. task_state=task_states.DELETING)
  2194. def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2195. LOG.debug("Going to try to stop instance", instance=instance)
  2196. instance.task_state = task_states.POWERING_OFF
  2197. instance.progress = 0
  2198. instance.save(expected_task_state=[None])
  2199. self._record_action_start(context, instance, instance_actions.STOP)
  2200. self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
  2201. clean_shutdown=clean_shutdown)
  2202. @check_instance_lock
  2203. @check_instance_host
  2204. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
  2205. def stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2206. """Stop an instance."""
  2207. self.force_stop(context, instance, do_cast, clean_shutdown)
  2208. @check_instance_lock
  2209. @check_instance_host
  2210. @check_instance_state(vm_state=[vm_states.STOPPED])
  2211. def start(self, context, instance):
  2212. """Start an instance."""
  2213. LOG.debug("Going to try to start instance", instance=instance)
  2214. instance.task_state = task_states.POWERING_ON
  2215. instance.save(expected_task_state=[None])
  2216. self._record_action_start(context, instance, instance_actions.START)
  2217. self.compute_rpcapi.start_instance(context, instance)
  2218. @check_instance_lock
  2219. @check_instance_host
  2220. @check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
  2221. def trigger_crash_dump(self, context, instance):
  2222. """Trigger crash dump in an instance."""
  2223. LOG.debug("Try to trigger crash dump", instance=instance)
  2224. self._record_action_start(context, instance,
  2225. instance_actions.TRIGGER_CRASH_DUMP)
  2226. self.compute_rpcapi.trigger_crash_dump(context, instance)
  2227. def _generate_minimal_construct_for_down_cells(self, context,
  2228. down_cell_uuids,
  2229. project, limit):
  2230. """Generate a list of minimal instance constructs for a given list of
  2231. cells that did not respond to a list operation. This will list
  2232. every instance mapping in the affected cells and return a minimal
  2233. objects.Instance for each (non-queued-for-delete) mapping.
  2234. :param context: RequestContext
  2235. :param down_cell_uuids: A list of cell UUIDs that did not respond
  2236. :param project: A project ID to filter mappings, or None
  2237. :param limit: A numeric limit on the number of results, or None
  2238. :returns: An InstanceList() of partial Instance() objects
  2239. """
  2240. unavailable_servers = objects.InstanceList()
  2241. for cell_uuid in down_cell_uuids:
  2242. LOG.warning("Cell %s is not responding and hence only "
  2243. "partial results are available from this "
  2244. "cell if any.", cell_uuid)
  2245. instance_mappings = (objects.InstanceMappingList.
  2246. get_not_deleted_by_cell_and_project(context, cell_uuid,
  2247. project, limit=limit))
  2248. for im in instance_mappings:
  2249. unavailable_servers.objects.append(
  2250. objects.Instance(
  2251. context=context,
  2252. uuid=im.instance_uuid,
  2253. project_id=im.project_id,
  2254. created_at=im.created_at
  2255. )
  2256. )
  2257. if limit is not None:
  2258. limit -= len(instance_mappings)
  2259. if limit <= 0:
  2260. break
  2261. return unavailable_servers
  2262. def _get_instance_map_or_none(self, context, instance_uuid):
  2263. try:
  2264. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  2265. context, instance_uuid)
  2266. except exception.InstanceMappingNotFound:
  2267. # InstanceMapping should always be found generally. This exception
  2268. # may be raised if a deployment has partially migrated the nova-api
  2269. # services.
  2270. inst_map = None
  2271. return inst_map
  2272. @staticmethod
  2273. def _save_user_id_in_instance_mapping(mapping, instance):
  2274. # TODO(melwitt): We take the opportunity to migrate user_id on the
  2275. # instance mapping if it's not yet been migrated. This can be removed
  2276. # in a future release, when all migrations are complete.
  2277. # If the instance came from a RequestSpec because of a down cell, its
  2278. # user_id could be None and the InstanceMapping.user_id field is
  2279. # non-nullable. Avoid trying to set/save the user_id in that case.
  2280. if 'user_id' not in mapping and instance.user_id is not None:
  2281. mapping.user_id = instance.user_id
  2282. mapping.save()
  2283. def _get_instance_from_cell(self, context, im, expected_attrs,
  2284. cell_down_support):
  2285. # NOTE(danms): Even though we're going to scatter/gather to the
  2286. # right cell, other code depends on this being force targeted when
  2287. # the get call returns.
  2288. nova_context.set_target_cell(context, im.cell_mapping)
  2289. uuid = im.instance_uuid
  2290. result = nova_context.scatter_gather_single_cell(context,
  2291. im.cell_mapping, objects.Instance.get_by_uuid, uuid,
  2292. expected_attrs=expected_attrs)
  2293. cell_uuid = im.cell_mapping.uuid
  2294. if not nova_context.is_cell_failure_sentinel(result[cell_uuid]):
  2295. inst = result[cell_uuid]
  2296. self._save_user_id_in_instance_mapping(im, inst)
  2297. return inst
  2298. elif isinstance(result[cell_uuid], exception.InstanceNotFound):
  2299. raise exception.InstanceNotFound(instance_id=uuid)
  2300. elif cell_down_support:
  2301. if im.queued_for_delete:
  2302. # should be treated like deleted instance.
  2303. raise exception.InstanceNotFound(instance_id=uuid)
  2304. # instance in down cell, return a minimal construct
  2305. LOG.warning("Cell %s is not responding and hence only "
  2306. "partial results are available from this "
  2307. "cell.", cell_uuid)
  2308. try:
  2309. rs = objects.RequestSpec.get_by_instance_uuid(context,
  2310. uuid)
  2311. # For BFV case, we could have rs.image but rs.image.id might
  2312. # still not be set. So we check the existence of both image
  2313. # and its id.
  2314. image_ref = (rs.image.id if rs.image and
  2315. 'id' in rs.image else None)
  2316. inst = objects.Instance(context=context, power_state=0,
  2317. uuid=uuid,
  2318. project_id=im.project_id,
  2319. created_at=im.created_at,
  2320. user_id=rs.user_id,
  2321. flavor=rs.flavor,
  2322. image_ref=image_ref,
  2323. availability_zone=rs.availability_zone)
  2324. self._save_user_id_in_instance_mapping(im, inst)
  2325. return inst
  2326. except exception.RequestSpecNotFound:
  2327. # could be that a deleted instance whose request
  2328. # spec has been archived is being queried.
  2329. raise exception.InstanceNotFound(instance_id=uuid)
  2330. else:
  2331. raise exception.NovaException(
  2332. _("Cell %s is not responding and hence instance "
  2333. "info is not available.") % cell_uuid)
  2334. def _get_instance(self, context, instance_uuid, expected_attrs,
  2335. cell_down_support=False):
  2336. inst_map = self._get_instance_map_or_none(context, instance_uuid)
  2337. if inst_map and (inst_map.cell_mapping is not None):
  2338. instance = self._get_instance_from_cell(context, inst_map,
  2339. expected_attrs, cell_down_support)
  2340. elif inst_map and (inst_map.cell_mapping is None):
  2341. # This means the instance has not been scheduled and put in
  2342. # a cell yet. For now it also may mean that the deployer
  2343. # has not created their cell(s) yet.
  2344. try:
  2345. build_req = objects.BuildRequest.get_by_instance_uuid(
  2346. context, instance_uuid)
  2347. instance = build_req.instance
  2348. except exception.BuildRequestNotFound:
  2349. # Instance was mapped and the BuildRequest was deleted
  2350. # while fetching. Try again.
  2351. inst_map = self._get_instance_map_or_none(context,
  2352. instance_uuid)
  2353. if inst_map and (inst_map.cell_mapping is not None):
  2354. instance = self._get_instance_from_cell(context, inst_map,
  2355. expected_attrs, cell_down_support)
  2356. else:
  2357. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2358. else:
  2359. # If we got here, we don't have an instance mapping, but we aren't
  2360. # sure why. The instance mapping might be missing because the
  2361. # upgrade is incomplete (map_instances wasn't run). Or because the
  2362. # instance was deleted and the DB was archived at which point the
  2363. # mapping is deleted. The former case is bad, but because of the
  2364. # latter case we can't really log any kind of warning/error here
  2365. # since it might be normal.
  2366. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2367. return instance
  2368. def get(self, context, instance_id, expected_attrs=None,
  2369. cell_down_support=False):
  2370. """Get a single instance with the given instance_id.
  2371. :param cell_down_support: True if the API (and caller) support
  2372. returning a minimal instance
  2373. construct if the relevant cell is
  2374. down. If False, an error is raised
  2375. since the instance cannot be retrieved
  2376. due to the cell being down.
  2377. """
  2378. if not expected_attrs:
  2379. expected_attrs = []
  2380. expected_attrs.extend(['metadata', 'system_metadata',
  2381. 'security_groups', 'info_cache'])
  2382. # NOTE(ameade): we still need to support integer ids for ec2
  2383. try:
  2384. if uuidutils.is_uuid_like(instance_id):
  2385. LOG.debug("Fetching instance by UUID",
  2386. instance_uuid=instance_id)
  2387. instance = self._get_instance(context, instance_id,
  2388. expected_attrs, cell_down_support=cell_down_support)
  2389. else:
  2390. LOG.debug("Failed to fetch instance by id %s", instance_id)
  2391. raise exception.InstanceNotFound(instance_id=instance_id)
  2392. except exception.InvalidID:
  2393. LOG.debug("Invalid instance id %s", instance_id)
  2394. raise exception.InstanceNotFound(instance_id=instance_id)
  2395. return instance
  2396. def get_all(self, context, search_opts=None, limit=None, marker=None,
  2397. expected_attrs=None, sort_keys=None, sort_dirs=None,
  2398. cell_down_support=False, all_tenants=False):
  2399. """Get all instances filtered by one of the given parameters.
  2400. If there is no filter and the context is an admin, it will retrieve
  2401. all instances in the system.
  2402. Deleted instances will be returned by default, unless there is a
  2403. search option that says otherwise.
  2404. The results will be sorted based on the list of sort keys in the
  2405. 'sort_keys' parameter (first value is primary sort key, second value is
  2406. secondary sort ket, etc.). For each sort key, the associated sort
  2407. direction is based on the list of sort directions in the 'sort_dirs'
  2408. parameter.
  2409. :param cell_down_support: True if the API (and caller) support
  2410. returning a minimal instance
  2411. construct if the relevant cell is
  2412. down. If False, instances from
  2413. unreachable cells will be omitted.
  2414. :param all_tenants: True if the "all_tenants" filter was passed.
  2415. """
  2416. if search_opts is None:
  2417. search_opts = {}
  2418. LOG.debug("Searching by: %s", str(search_opts))
  2419. # Fixups for the DB call
  2420. filters = {}
  2421. def _remap_flavor_filter(flavor_id):
  2422. flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
  2423. filters['instance_type_id'] = flavor.id
  2424. def _remap_fixed_ip_filter(fixed_ip):
  2425. # Turn fixed_ip into a regexp match. Since '.' matches
  2426. # any character, we need to use regexp escaping for it.
  2427. filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
  2428. # search_option to filter_name mapping.
  2429. filter_mapping = {
  2430. 'image': 'image_ref',
  2431. 'name': 'display_name',
  2432. 'tenant_id': 'project_id',
  2433. 'flavor': _remap_flavor_filter,
  2434. 'fixed_ip': _remap_fixed_ip_filter}
  2435. # copy from search_opts, doing various remappings as necessary
  2436. for opt, value in search_opts.items():
  2437. # Do remappings.
  2438. # Values not in the filter_mapping table are copied as-is.
  2439. # If remapping is None, option is not copied
  2440. # If the remapping is a string, it is the filter_name to use
  2441. try:
  2442. remap_object = filter_mapping[opt]
  2443. except KeyError:
  2444. filters[opt] = value
  2445. else:
  2446. # Remaps are strings to translate to, or functions to call
  2447. # to do the translating as defined by the table above.
  2448. if isinstance(remap_object, six.string_types):
  2449. filters[remap_object] = value
  2450. else:
  2451. try:
  2452. remap_object(value)
  2453. # We already know we can't match the filter, so
  2454. # return an empty list
  2455. except ValueError:
  2456. return objects.InstanceList()
  2457. # IP address filtering cannot be applied at the DB layer, remove any DB
  2458. # limit so that it can be applied after the IP filter.
  2459. filter_ip = 'ip6' in filters or 'ip' in filters
  2460. skip_build_request = False
  2461. orig_limit = limit
  2462. if filter_ip:
  2463. # We cannot skip build requests if there is a marker since the
  2464. # the marker could be a build request.
  2465. skip_build_request = marker is None
  2466. if self.network_api.has_substr_port_filtering_extension(context):
  2467. # We're going to filter by IP using Neutron so set filter_ip
  2468. # to False so we don't attempt post-DB query filtering in
  2469. # memory below.
  2470. filter_ip = False
  2471. instance_uuids = self._ip_filter_using_neutron(context,
  2472. filters)
  2473. if instance_uuids:
  2474. # Note that 'uuid' is not in the 2.1 GET /servers query
  2475. # parameter schema, however, we allow additionalProperties
  2476. # so someone could filter instances by uuid, which doesn't
  2477. # make a lot of sense but we have to account for it.
  2478. if 'uuid' in filters and filters['uuid']:
  2479. filter_uuids = filters['uuid']
  2480. if isinstance(filter_uuids, list):
  2481. instance_uuids.extend(filter_uuids)
  2482. else:
  2483. # Assume a string. If it's a dict or tuple or
  2484. # something, well...that's too bad. This is why
  2485. # we have query parameter schema definitions.
  2486. if filter_uuids not in instance_uuids:
  2487. instance_uuids.append(filter_uuids)
  2488. filters['uuid'] = instance_uuids
  2489. else:
  2490. # No matches on the ip filter(s), return an empty list.
  2491. return objects.InstanceList()
  2492. elif limit:
  2493. LOG.debug('Removing limit for DB query due to IP filter')
  2494. limit = None
  2495. # Skip get BuildRequest if filtering by IP address, as building
  2496. # instances will not have IP addresses.
  2497. if skip_build_request:
  2498. build_requests = objects.BuildRequestList()
  2499. else:
  2500. # The ordering of instances will be
  2501. # [sorted instances with no host] + [sorted instances with host].
  2502. # This means BuildRequest and cell0 instances first, then cell
  2503. # instances
  2504. try:
  2505. build_requests = objects.BuildRequestList.get_by_filters(
  2506. context, filters, limit=limit, marker=marker,
  2507. sort_keys=sort_keys, sort_dirs=sort_dirs)
  2508. # If we found the marker in we need to set it to None
  2509. # so we don't expect to find it in the cells below.
  2510. marker = None
  2511. except exception.MarkerNotFound:
  2512. # If we didn't find the marker in the build requests then keep
  2513. # looking for it in the cells.
  2514. build_requests = objects.BuildRequestList()
  2515. build_req_instances = objects.InstanceList(
  2516. objects=[build_req.instance for build_req in build_requests])
  2517. # Only subtract from limit if it is not None
  2518. limit = (limit - len(build_req_instances)) if limit else limit
  2519. # We could arguably avoid joining on security_groups if we're using
  2520. # neutron (which is the default) but if you're using neutron then the
  2521. # security_group_instance_association table should be empty anyway
  2522. # and the DB should optimize out that join, making it insignificant.
  2523. fields = ['metadata', 'info_cache', 'security_groups']
  2524. if expected_attrs:
  2525. fields.extend(expected_attrs)
  2526. insts, down_cell_uuids = instance_list.get_instance_objects_sorted(
  2527. context, filters, limit, marker, fields, sort_keys, sort_dirs,
  2528. cell_down_support=cell_down_support)
  2529. def _get_unique_filter_method():
  2530. seen_uuids = set()
  2531. def _filter(instance):
  2532. # During a cross-cell move operation we could have the instance
  2533. # in more than one cell database so we not only have to filter
  2534. # duplicates but we want to make sure we only return the
  2535. # "current" one which should also be the one that the instance
  2536. # mapping points to, but we don't want to do that expensive
  2537. # lookup here. The DB API will filter out hidden instances by
  2538. # default but there is a small window where two copies of an
  2539. # instance could be hidden=False in separate cell DBs.
  2540. # NOTE(mriedem): We could make this better in the case that we
  2541. # have duplicate instances that are both hidden=False by
  2542. # showing the one with the newer updated_at value, but that
  2543. # could be tricky if the user is filtering on
  2544. # changes-since/before or updated_at, or sorting on updated_at,
  2545. # but technically that was already potentially broken with this
  2546. # _filter method if we return an older BuildRequest.instance,
  2547. # and given the window should be very small where we have
  2548. # duplicates, it's probably not worth the complexity.
  2549. if instance.uuid in seen_uuids:
  2550. return False
  2551. seen_uuids.add(instance.uuid)
  2552. return True
  2553. return _filter
  2554. filter_method = _get_unique_filter_method()
  2555. # Only subtract from limit if it is not None
  2556. limit = (limit - len(insts)) if limit else limit
  2557. # TODO(alaski): Clean up the objects concatenation when List objects
  2558. # support it natively.
  2559. instances = objects.InstanceList(
  2560. objects=list(filter(filter_method,
  2561. build_req_instances.objects +
  2562. insts.objects)))
  2563. if filter_ip:
  2564. instances = self._ip_filter(instances, filters, orig_limit)
  2565. if cell_down_support:
  2566. # API and client want minimal construct instances for any cells
  2567. # that didn't return, so generate and prefix those to the actual
  2568. # results.
  2569. project = search_opts.get('project_id', context.project_id)
  2570. if all_tenants:
  2571. # NOTE(tssurya): The only scenario where project has to be None
  2572. # is when using "all_tenants" in which case we do not want
  2573. # the query to be restricted based on the project_id.
  2574. project = None
  2575. limit = (orig_limit - len(instances)) if limit else limit
  2576. return (self._generate_minimal_construct_for_down_cells(context,
  2577. down_cell_uuids, project, limit) + instances)
  2578. return instances
  2579. @staticmethod
  2580. def _ip_filter(inst_models, filters, limit):
  2581. ipv4_f = re.compile(str(filters.get('ip')))
  2582. ipv6_f = re.compile(str(filters.get('ip6')))
  2583. def _match_instance(instance):
  2584. nw_info = instance.get_network_info()
  2585. for vif in nw_info:
  2586. for fixed_ip in vif.fixed_ips():
  2587. address = fixed_ip.get('address')
  2588. if not address:
  2589. continue
  2590. version = fixed_ip.get('version')
  2591. if ((version == 4 and ipv4_f.match(address)) or
  2592. (version == 6 and ipv6_f.match(address))):
  2593. return True
  2594. return False
  2595. result_objs = []
  2596. for instance in inst_models:
  2597. if _match_instance(instance):
  2598. result_objs.append(instance)
  2599. if limit and len(result_objs) == limit:
  2600. break
  2601. return objects.InstanceList(objects=result_objs)
  2602. def _ip_filter_using_neutron(self, context, filters):
  2603. ip4_address = filters.get('ip')
  2604. ip6_address = filters.get('ip6')
  2605. addresses = [ip4_address, ip6_address]
  2606. uuids = []
  2607. for address in addresses:
  2608. if address:
  2609. try:
  2610. ports = self.network_api.list_ports(
  2611. context, fixed_ips='ip_address_substr=' + address,
  2612. fields=['device_id'])['ports']
  2613. for port in ports:
  2614. uuids.append(port['device_id'])
  2615. except Exception as e:
  2616. LOG.error('An error occurred while listing ports '
  2617. 'with an ip_address filter value of "%s". '
  2618. 'Error: %s',
  2619. address, six.text_type(e))
  2620. return uuids
  2621. def update_instance(self, context, instance, updates):
  2622. """Updates a single Instance object with some updates dict.
  2623. Returns the updated instance.
  2624. """
  2625. # NOTE(sbauza): Given we only persist the Instance object after we
  2626. # create the BuildRequest, we are sure that if the Instance object
  2627. # has an ID field set, then it was persisted in the right Cell DB.
  2628. if instance.obj_attr_is_set('id'):
  2629. instance.update(updates)
  2630. instance.save()
  2631. else:
  2632. # Instance is not yet mapped to a cell, so we need to update
  2633. # BuildRequest instead
  2634. # TODO(sbauza): Fix the possible race conditions where BuildRequest
  2635. # could be deleted because of either a concurrent instance delete
  2636. # or because the scheduler just returned a destination right
  2637. # after we called the instance in the API.
  2638. try:
  2639. build_req = objects.BuildRequest.get_by_instance_uuid(
  2640. context, instance.uuid)
  2641. instance = build_req.instance
  2642. instance.update(updates)
  2643. # FIXME(sbauza): Here we are updating the current
  2644. # thread-related BuildRequest object. Given that another worker
  2645. # could have looking up at that BuildRequest in the API, it
  2646. # means that it could pass it down to the conductor without
  2647. # making sure that it's not updated, we could have some race
  2648. # condition where it would missing the updated fields, but
  2649. # that's something we could discuss once the instance record
  2650. # is persisted by the conductor.
  2651. build_req.save()
  2652. except exception.BuildRequestNotFound:
  2653. # Instance was mapped and the BuildRequest was deleted
  2654. # while fetching (and possibly the instance could have been
  2655. # deleted as well). We need to lookup again the Instance object
  2656. # in order to correctly update it.
  2657. # TODO(sbauza): Figure out a good way to know the expected
  2658. # attributes by checking which fields are set or not.
  2659. expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
  2660. 'tags', 'metadata', 'system_metadata',
  2661. 'security_groups', 'info_cache']
  2662. inst_map = self._get_instance_map_or_none(context,
  2663. instance.uuid)
  2664. if inst_map and (inst_map.cell_mapping is not None):
  2665. with nova_context.target_cell(
  2666. context,
  2667. inst_map.cell_mapping) as cctxt:
  2668. instance = objects.Instance.get_by_uuid(
  2669. cctxt, instance.uuid,
  2670. expected_attrs=expected_attrs)
  2671. instance.update(updates)
  2672. instance.save()
  2673. else:
  2674. # Conductor doesn't delete the BuildRequest until after the
  2675. # InstanceMapping record is created, so if we didn't get
  2676. # that and the BuildRequest doesn't exist, then the
  2677. # instance is already gone and we need to just error out.
  2678. raise exception.InstanceNotFound(instance_id=instance.uuid)
  2679. return instance
  2680. # NOTE(melwitt): We don't check instance lock for backup because lock is
  2681. # intended to prevent accidental change/delete of instances
  2682. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2683. vm_states.PAUSED, vm_states.SUSPENDED])
  2684. def backup(self, context, instance, name, backup_type, rotation,
  2685. extra_properties=None):
  2686. """Backup the given instance
  2687. :param instance: nova.objects.instance.Instance object
  2688. :param name: name of the backup
  2689. :param backup_type: 'daily' or 'weekly'
  2690. :param rotation: int representing how many backups to keep around;
  2691. None if rotation shouldn't be used (as in the case of snapshots)
  2692. :param extra_properties: dict of extra image properties to include
  2693. when creating the image.
  2694. :returns: A dict containing image metadata
  2695. """
  2696. props_copy = dict(extra_properties, backup_type=backup_type)
  2697. if compute_utils.is_volume_backed_instance(context, instance):
  2698. LOG.info("It's not supported to backup volume backed "
  2699. "instance.", instance=instance)
  2700. raise exception.InvalidRequest(
  2701. _('Backup is not supported for volume-backed instances.'))
  2702. else:
  2703. image_meta = compute_utils.create_image(
  2704. context, instance, name, 'backup', self.image_api,
  2705. extra_properties=props_copy)
  2706. instance.task_state = task_states.IMAGE_BACKUP
  2707. instance.save(expected_task_state=[None])
  2708. self._record_action_start(context, instance,
  2709. instance_actions.BACKUP)
  2710. self.compute_rpcapi.backup_instance(context, instance,
  2711. image_meta['id'],
  2712. backup_type,
  2713. rotation)
  2714. return image_meta
  2715. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2716. # intended to prevent accidental change/delete of instances
  2717. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2718. vm_states.PAUSED, vm_states.SUSPENDED])
  2719. def snapshot(self, context, instance, name, extra_properties=None):
  2720. """Snapshot the given instance.
  2721. :param instance: nova.objects.instance.Instance object
  2722. :param name: name of the snapshot
  2723. :param extra_properties: dict of extra image properties to include
  2724. when creating the image.
  2725. :returns: A dict containing image metadata
  2726. """
  2727. image_meta = compute_utils.create_image(
  2728. context, instance, name, 'snapshot', self.image_api,
  2729. extra_properties=extra_properties)
  2730. instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
  2731. try:
  2732. instance.save(expected_task_state=[None])
  2733. except (exception.InstanceNotFound,
  2734. exception.UnexpectedDeletingTaskStateError) as ex:
  2735. # Changing the instance task state to use in raising the
  2736. # InstanceInvalidException below
  2737. LOG.debug('Instance disappeared during snapshot.',
  2738. instance=instance)
  2739. try:
  2740. image_id = image_meta['id']
  2741. self.image_api.delete(context, image_id)
  2742. LOG.info('Image %s deleted because instance '
  2743. 'deleted before snapshot started.',
  2744. image_id, instance=instance)
  2745. except exception.ImageNotFound:
  2746. pass
  2747. except Exception as exc:
  2748. LOG.warning("Error while trying to clean up image %(img_id)s: "
  2749. "%(error_msg)s",
  2750. {"img_id": image_meta['id'],
  2751. "error_msg": six.text_type(exc)})
  2752. attr = 'task_state'
  2753. state = task_states.DELETING
  2754. if type(ex) == exception.InstanceNotFound:
  2755. attr = 'vm_state'
  2756. state = vm_states.DELETED
  2757. raise exception.InstanceInvalidState(attr=attr,
  2758. instance_uuid=instance.uuid,
  2759. state=state,
  2760. method='snapshot')
  2761. self._record_action_start(context, instance,
  2762. instance_actions.CREATE_IMAGE)
  2763. self.compute_rpcapi.snapshot_instance(context, instance,
  2764. image_meta['id'])
  2765. return image_meta
  2766. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2767. # intended to prevent accidental change/delete of instances
  2768. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2769. vm_states.SUSPENDED])
  2770. def snapshot_volume_backed(self, context, instance, name,
  2771. extra_properties=None):
  2772. """Snapshot the given volume-backed instance.
  2773. :param instance: nova.objects.instance.Instance object
  2774. :param name: name of the backup or snapshot
  2775. :param extra_properties: dict of extra image properties to include
  2776. :returns: the new image metadata
  2777. """
  2778. image_meta = compute_utils.initialize_instance_snapshot_metadata(
  2779. context, instance, name, extra_properties)
  2780. # the new image is simply a bucket of properties (particularly the
  2781. # block device mapping, kernel and ramdisk IDs) with no image data,
  2782. # hence the zero size
  2783. image_meta['size'] = 0
  2784. for attr in ('container_format', 'disk_format'):
  2785. image_meta.pop(attr, None)
  2786. properties = image_meta['properties']
  2787. # clean properties before filling
  2788. for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
  2789. properties.pop(key, None)
  2790. if instance.root_device_name:
  2791. properties['root_device_name'] = instance.root_device_name
  2792. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2793. context, instance.uuid)
  2794. mapping = [] # list of BDM dicts that can go into the image properties
  2795. # Do some up-front filtering of the list of BDMs from
  2796. # which we are going to create snapshots.
  2797. volume_bdms = []
  2798. for bdm in bdms:
  2799. if bdm.no_device:
  2800. continue
  2801. if bdm.is_volume:
  2802. # These will be handled below.
  2803. volume_bdms.append(bdm)
  2804. else:
  2805. mapping.append(bdm.get_image_mapping())
  2806. # Check limits in Cinder before creating snapshots to avoid going over
  2807. # quota in the middle of a list of volumes. This is a best-effort check
  2808. # but concurrently running snapshot requests from the same project
  2809. # could still fail to create volume snapshots if they go over limit.
  2810. if volume_bdms:
  2811. limits = self.volume_api.get_absolute_limits(context)
  2812. total_snapshots_used = limits['totalSnapshotsUsed']
  2813. max_snapshots = limits['maxTotalSnapshots']
  2814. # -1 means there is unlimited quota for snapshots
  2815. if (max_snapshots > -1 and
  2816. len(volume_bdms) + total_snapshots_used > max_snapshots):
  2817. LOG.debug('Unable to create volume snapshots for instance. '
  2818. 'Currently has %s snapshots, requesting %s new '
  2819. 'snapshots, with a limit of %s.',
  2820. total_snapshots_used, len(volume_bdms),
  2821. max_snapshots, instance=instance)
  2822. raise exception.OverQuota(overs='snapshots')
  2823. quiesced = False
  2824. if instance.vm_state == vm_states.ACTIVE:
  2825. try:
  2826. LOG.info("Attempting to quiesce instance before volume "
  2827. "snapshot.", instance=instance)
  2828. self.compute_rpcapi.quiesce_instance(context, instance)
  2829. quiesced = True
  2830. except (exception.InstanceQuiesceNotSupported,
  2831. exception.QemuGuestAgentNotEnabled,
  2832. exception.NovaException, NotImplementedError) as err:
  2833. if strutils.bool_from_string(instance.system_metadata.get(
  2834. 'image_os_require_quiesce')):
  2835. raise
  2836. if isinstance(err, exception.NovaException):
  2837. LOG.info('Skipping quiescing instance: %(reason)s.',
  2838. {'reason': err.format_message()},
  2839. instance=instance)
  2840. else:
  2841. LOG.info('Skipping quiescing instance because the '
  2842. 'operation is not supported by the underlying '
  2843. 'compute driver.', instance=instance)
  2844. # NOTE(tasker): discovered that an uncaught exception could occur
  2845. # after the instance has been frozen. catch and thaw.
  2846. except Exception as ex:
  2847. with excutils.save_and_reraise_exception():
  2848. LOG.error("An error occurred during quiesce of instance. "
  2849. "Unquiescing to ensure instance is thawed. "
  2850. "Error: %s", six.text_type(ex),
  2851. instance=instance)
  2852. self.compute_rpcapi.unquiesce_instance(context, instance,
  2853. mapping=None)
  2854. @wrap_instance_event(prefix='api')
  2855. def snapshot_instance(self, context, instance, bdms):
  2856. try:
  2857. for bdm in volume_bdms:
  2858. # create snapshot based on volume_id
  2859. volume = self.volume_api.get(context, bdm.volume_id)
  2860. # NOTE(yamahata): Should we wait for snapshot creation?
  2861. # Linux LVM snapshot creation completes in
  2862. # short time, it doesn't matter for now.
  2863. name = _('snapshot for %s') % image_meta['name']
  2864. LOG.debug('Creating snapshot from volume %s.',
  2865. volume['id'], instance=instance)
  2866. snapshot = self.volume_api.create_snapshot_force(
  2867. context, volume['id'],
  2868. name, volume['display_description'])
  2869. mapping_dict = block_device.snapshot_from_bdm(
  2870. snapshot['id'], bdm)
  2871. mapping_dict = mapping_dict.get_image_mapping()
  2872. mapping.append(mapping_dict)
  2873. return mapping
  2874. # NOTE(tasker): No error handling is done in the above for loop.
  2875. # This means that if the snapshot fails and throws an exception
  2876. # the traceback will skip right over the unquiesce needed below.
  2877. # Here, catch any exception, unquiesce the instance, and raise the
  2878. # error so that the calling function can do what it needs to in
  2879. # order to properly treat a failed snap.
  2880. except Exception:
  2881. with excutils.save_and_reraise_exception():
  2882. if quiesced:
  2883. LOG.info("Unquiescing instance after volume snapshot "
  2884. "failure.", instance=instance)
  2885. self.compute_rpcapi.unquiesce_instance(
  2886. context, instance, mapping)
  2887. self._record_action_start(context, instance,
  2888. instance_actions.CREATE_IMAGE)
  2889. mapping = snapshot_instance(self, context, instance, bdms)
  2890. if quiesced:
  2891. self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
  2892. if mapping:
  2893. properties['block_device_mapping'] = mapping
  2894. properties['bdm_v2'] = True
  2895. return self.image_api.create(context, image_meta)
  2896. @check_instance_lock
  2897. def reboot(self, context, instance, reboot_type):
  2898. """Reboot the given instance."""
  2899. if reboot_type == 'SOFT':
  2900. self._soft_reboot(context, instance)
  2901. else:
  2902. self._hard_reboot(context, instance)
  2903. @check_instance_state(vm_state=set(vm_states.ALLOW_SOFT_REBOOT),
  2904. task_state=[None])
  2905. def _soft_reboot(self, context, instance):
  2906. expected_task_state = [None]
  2907. instance.task_state = task_states.REBOOTING
  2908. instance.save(expected_task_state=expected_task_state)
  2909. self._record_action_start(context, instance, instance_actions.REBOOT)
  2910. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2911. block_device_info=None,
  2912. reboot_type='SOFT')
  2913. @check_instance_state(vm_state=set(vm_states.ALLOW_HARD_REBOOT),
  2914. task_state=task_states.ALLOW_REBOOT)
  2915. def _hard_reboot(self, context, instance):
  2916. instance.task_state = task_states.REBOOTING_HARD
  2917. instance.save(expected_task_state=task_states.ALLOW_REBOOT)
  2918. self._record_action_start(context, instance, instance_actions.REBOOT)
  2919. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2920. block_device_info=None,
  2921. reboot_type='HARD')
  2922. # TODO(stephenfin): We should expand kwargs out to named args
  2923. @check_instance_lock
  2924. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2925. vm_states.ERROR])
  2926. def rebuild(self, context, instance, image_href, admin_password,
  2927. files_to_inject=None, **kwargs):
  2928. """Rebuild the given instance with the provided attributes."""
  2929. files_to_inject = files_to_inject or []
  2930. metadata = kwargs.get('metadata', {})
  2931. preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
  2932. auto_disk_config = kwargs.get('auto_disk_config')
  2933. if 'key_name' in kwargs:
  2934. key_name = kwargs.pop('key_name')
  2935. if key_name:
  2936. # NOTE(liuyulong): we are intentionally using the user_id from
  2937. # the request context rather than the instance.user_id because
  2938. # users own keys but instances are owned by projects, and
  2939. # another user in the same project can rebuild an instance
  2940. # even if they didn't create it.
  2941. key_pair = objects.KeyPair.get_by_name(context,
  2942. context.user_id,
  2943. key_name)
  2944. instance.key_name = key_pair.name
  2945. instance.key_data = key_pair.public_key
  2946. instance.keypairs = objects.KeyPairList(objects=[key_pair])
  2947. else:
  2948. instance.key_name = None
  2949. instance.key_data = None
  2950. instance.keypairs = objects.KeyPairList(objects=[])
  2951. # Use trusted_certs value from kwargs to create TrustedCerts object
  2952. trusted_certs = None
  2953. if 'trusted_certs' in kwargs:
  2954. # Note that the user can set, change, or unset / reset trusted
  2955. # certs. If they are explicitly specifying
  2956. # trusted_image_certificates=None, that means we'll either unset
  2957. # them on the instance *or* reset to use the defaults (if defaults
  2958. # are configured).
  2959. trusted_certs = kwargs.pop('trusted_certs')
  2960. instance.trusted_certs = self._retrieve_trusted_certs_object(
  2961. context, trusted_certs, rebuild=True)
  2962. image_id, image = self._get_image(context, image_href)
  2963. self._check_auto_disk_config(image=image,
  2964. auto_disk_config=auto_disk_config)
  2965. flavor = instance.get_flavor()
  2966. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2967. context, instance.uuid)
  2968. root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
  2969. # Check to see if the image is changing and we have a volume-backed
  2970. # server. The compute doesn't support changing the image in the
  2971. # root disk of a volume-backed server, so we need to just fail fast.
  2972. is_volume_backed = compute_utils.is_volume_backed_instance(
  2973. context, instance, bdms)
  2974. if is_volume_backed:
  2975. if trusted_certs:
  2976. # The only way we can get here is if the user tried to set
  2977. # trusted certs or specified trusted_image_certificates=None
  2978. # and default_trusted_certificate_ids is configured.
  2979. msg = _("Image certificate validation is not supported "
  2980. "for volume-backed servers.")
  2981. raise exception.CertificateValidationFailed(message=msg)
  2982. # For boot from volume, instance.image_ref is empty, so we need to
  2983. # query the image from the volume.
  2984. if root_bdm is None:
  2985. # This shouldn't happen and is an error, we need to fail. This
  2986. # is not the users fault, it's an internal error. Without a
  2987. # root BDM we have no way of knowing the backing volume (or
  2988. # image in that volume) for this instance.
  2989. raise exception.NovaException(
  2990. _('Unable to find root block device mapping for '
  2991. 'volume-backed instance.'))
  2992. volume = self.volume_api.get(context, root_bdm.volume_id)
  2993. volume_image_metadata = volume.get('volume_image_metadata', {})
  2994. orig_image_ref = volume_image_metadata.get('image_id')
  2995. if orig_image_ref != image_href:
  2996. # Leave a breadcrumb.
  2997. LOG.debug('Requested to rebuild instance with a new image %s '
  2998. 'for a volume-backed server with image %s in its '
  2999. 'root volume which is not supported.', image_href,
  3000. orig_image_ref, instance=instance)
  3001. msg = _('Unable to rebuild with a different image for a '
  3002. 'volume-backed server.')
  3003. raise exception.ImageUnacceptable(
  3004. image_id=image_href, reason=msg)
  3005. else:
  3006. orig_image_ref = instance.image_ref
  3007. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3008. context, instance.uuid)
  3009. self._checks_for_create_and_rebuild(context, image_id, image,
  3010. flavor, metadata, files_to_inject, root_bdm)
  3011. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  3012. context, None, None, image)
  3013. def _reset_image_metadata():
  3014. """Remove old image properties that we're storing as instance
  3015. system metadata. These properties start with 'image_'.
  3016. Then add the properties for the new image.
  3017. """
  3018. # FIXME(comstud): There's a race condition here in that if
  3019. # the system_metadata for this instance is updated after
  3020. # we do the previous save() and before we update.. those
  3021. # other updates will be lost. Since this problem exists in
  3022. # a lot of other places, I think it should be addressed in
  3023. # a DB layer overhaul.
  3024. orig_sys_metadata = dict(instance.system_metadata)
  3025. # Remove the old keys
  3026. for key in list(instance.system_metadata.keys()):
  3027. if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
  3028. del instance.system_metadata[key]
  3029. # Add the new ones
  3030. new_sys_metadata = utils.get_system_metadata_from_image(
  3031. image, flavor)
  3032. instance.system_metadata.update(new_sys_metadata)
  3033. instance.save()
  3034. return orig_sys_metadata
  3035. # Since image might have changed, we may have new values for
  3036. # os_type, vm_mode, etc
  3037. options_from_image = self._inherit_properties_from_image(
  3038. image, auto_disk_config)
  3039. instance.update(options_from_image)
  3040. instance.task_state = task_states.REBUILDING
  3041. # An empty instance.image_ref is currently used as an indication
  3042. # of BFV. Preserve that over a rebuild to not break users.
  3043. if not is_volume_backed:
  3044. instance.image_ref = image_href
  3045. instance.kernel_id = kernel_id or ""
  3046. instance.ramdisk_id = ramdisk_id or ""
  3047. instance.progress = 0
  3048. instance.update(kwargs)
  3049. instance.save(expected_task_state=[None])
  3050. # On a rebuild, since we're potentially changing images, we need to
  3051. # wipe out the old image properties that we're storing as instance
  3052. # system metadata... and copy in the properties for the new image.
  3053. orig_sys_metadata = _reset_image_metadata()
  3054. self._record_action_start(context, instance, instance_actions.REBUILD)
  3055. # NOTE(sbauza): The migration script we provided in Newton should make
  3056. # sure that all our instances are currently migrated to have an
  3057. # attached RequestSpec object but let's consider that the operator only
  3058. # half migrated all their instances in the meantime.
  3059. host = instance.host
  3060. # If a new image is provided on rebuild, we will need to run
  3061. # through the scheduler again, but we want the instance to be
  3062. # rebuilt on the same host it's already on.
  3063. if orig_image_ref != image_href:
  3064. # We have to modify the request spec that goes to the scheduler
  3065. # to contain the new image. We persist this since we've already
  3066. # changed the instance.image_ref above so we're being
  3067. # consistent.
  3068. request_spec.image = objects.ImageMeta.from_dict(image)
  3069. request_spec.save()
  3070. if 'scheduler_hints' not in request_spec:
  3071. request_spec.scheduler_hints = {}
  3072. # Nuke the id on this so we can't accidentally save
  3073. # this hint hack later
  3074. del request_spec.id
  3075. # NOTE(danms): Passing host=None tells conductor to
  3076. # call the scheduler. The _nova_check_type hint
  3077. # requires that the scheduler returns only the same
  3078. # host that we are currently on and only checks
  3079. # rebuild-related filters.
  3080. request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
  3081. request_spec.force_hosts = [instance.host]
  3082. request_spec.force_nodes = [instance.node]
  3083. host = None
  3084. self.compute_task_api.rebuild_instance(context, instance=instance,
  3085. new_pass=admin_password, injected_files=files_to_inject,
  3086. image_ref=image_href, orig_image_ref=orig_image_ref,
  3087. orig_sys_metadata=orig_sys_metadata, bdms=bdms,
  3088. preserve_ephemeral=preserve_ephemeral, host=host,
  3089. request_spec=request_spec)
  3090. @staticmethod
  3091. def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
  3092. project_id, user_id = quotas_obj.ids_from_instance(context,
  3093. instance)
  3094. # Deltas will be empty if the resize is not an upsize.
  3095. deltas = compute_utils.upsize_quota_delta(new_flavor,
  3096. current_flavor)
  3097. if deltas:
  3098. try:
  3099. res_deltas = {'cores': deltas.get('cores', 0),
  3100. 'ram': deltas.get('ram', 0)}
  3101. objects.Quotas.check_deltas(context, res_deltas,
  3102. project_id, user_id=user_id,
  3103. check_project_id=project_id,
  3104. check_user_id=user_id)
  3105. except exception.OverQuota as exc:
  3106. quotas = exc.kwargs['quotas']
  3107. overs = exc.kwargs['overs']
  3108. usages = exc.kwargs['usages']
  3109. headroom = compute_utils.get_headroom(quotas, usages,
  3110. deltas)
  3111. (overs, reqs, total_alloweds,
  3112. useds) = compute_utils.get_over_quota_detail(headroom,
  3113. overs,
  3114. quotas,
  3115. deltas)
  3116. LOG.info("%(overs)s quota exceeded for %(pid)s,"
  3117. " tried to resize instance.",
  3118. {'overs': overs, 'pid': context.project_id})
  3119. raise exception.TooManyInstances(overs=overs,
  3120. req=reqs,
  3121. used=useds,
  3122. allowed=total_alloweds)
  3123. @check_instance_lock
  3124. @check_instance_state(vm_state=[vm_states.RESIZED])
  3125. def revert_resize(self, context, instance):
  3126. """Reverts a resize or cold migration, deleting the 'new' instance in
  3127. the process.
  3128. """
  3129. elevated = context.elevated()
  3130. migration = objects.Migration.get_by_instance_and_status(
  3131. elevated, instance.uuid, 'finished')
  3132. # If this is a resize down, a revert might go over quota.
  3133. self._check_quota_for_upsize(context, instance, instance.flavor,
  3134. instance.old_flavor)
  3135. # The AZ for the server may have changed when it was migrated so while
  3136. # we are in the API and have access to the API DB, update the
  3137. # instance.availability_zone before casting off to the compute service.
  3138. # Note that we do this in the API to avoid an "up-call" from the
  3139. # compute service to the API DB. This is not great in case something
  3140. # fails during revert before the instance.host is updated to the
  3141. # original source host, but it is good enough for now. Long-term we
  3142. # could consider passing the AZ down to compute so it can set it when
  3143. # the instance.host value is set in finish_revert_resize.
  3144. instance.availability_zone = (
  3145. availability_zones.get_host_availability_zone(
  3146. context, migration.source_compute))
  3147. # Conductor updated the RequestSpec.flavor during the initial resize
  3148. # operation to point at the new flavor, so we need to update the
  3149. # RequestSpec to point back at the original flavor, otherwise
  3150. # subsequent move operations through the scheduler will be using the
  3151. # wrong flavor.
  3152. reqspec = objects.RequestSpec.get_by_instance_uuid(
  3153. context, instance.uuid)
  3154. reqspec.flavor = instance.old_flavor
  3155. reqspec.save()
  3156. # NOTE(gibi): This is a performance optimization. If the network info
  3157. # cache does not have ports with allocations in the binding profile
  3158. # then we can skip reading port resource request from neutron below.
  3159. # If a port has resource request then that would have already caused
  3160. # that the finish_resize call put allocation in the binding profile
  3161. # during the resize.
  3162. if instance.get_network_info().has_port_with_allocation():
  3163. # TODO(gibi): do not directly overwrite the
  3164. # RequestSpec.requested_resources as others like cyborg might added
  3165. # to things there already
  3166. # NOTE(gibi): We need to collect the requested resource again as it
  3167. # is intentionally not persisted in nova. Note that this needs to
  3168. # be done here as the nova API code directly calls revert on the
  3169. # dest compute service skipping the conductor.
  3170. port_res_req = (
  3171. self.network_api.get_requested_resource_for_instance(
  3172. context, instance.uuid))
  3173. reqspec.requested_resources = port_res_req
  3174. instance.task_state = task_states.RESIZE_REVERTING
  3175. instance.save(expected_task_state=[None])
  3176. migration.status = 'reverting'
  3177. migration.save()
  3178. self._record_action_start(context, instance,
  3179. instance_actions.REVERT_RESIZE)
  3180. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3181. # against going over quota during a race at this time because the
  3182. # resource consumption for this operation is written to the database
  3183. # by compute.
  3184. self.compute_rpcapi.revert_resize(context, instance,
  3185. migration,
  3186. migration.dest_compute,
  3187. reqspec)
  3188. @check_instance_lock
  3189. @check_instance_state(vm_state=[vm_states.RESIZED])
  3190. def confirm_resize(self, context, instance, migration=None):
  3191. """Confirms a migration/resize and deletes the 'old' instance."""
  3192. elevated = context.elevated()
  3193. # NOTE(melwitt): We're not checking quota here because there isn't a
  3194. # change in resource usage when confirming a resize. Resource
  3195. # consumption for resizes are written to the database by compute, so
  3196. # a confirm resize is just a clean up of the migration objects and a
  3197. # state change in compute.
  3198. if migration is None:
  3199. migration = objects.Migration.get_by_instance_and_status(
  3200. elevated, instance.uuid, 'finished')
  3201. migration.status = 'confirming'
  3202. migration.save()
  3203. self._record_action_start(context, instance,
  3204. instance_actions.CONFIRM_RESIZE)
  3205. self.compute_rpcapi.confirm_resize(context,
  3206. instance,
  3207. migration,
  3208. migration.source_compute)
  3209. @staticmethod
  3210. def _allow_cross_cell_resize(context, instance):
  3211. """Determine if the request can perform a cross-cell resize on this
  3212. instance.
  3213. :param context: nova auth request context for the resize operation
  3214. :param instance: Instance object being resized
  3215. :returns: True if cross-cell resize is allowed, False otherwise
  3216. """
  3217. # TODO(mriedem): Uncomment this when confirm/revert are done. For now
  3218. # this method can just be used to internally enable the feature for
  3219. # functional testing.
  3220. # TODO(mriedem): If true, we should probably check if all of the
  3221. # computes are running a high enough service version and if not, do
  3222. # what? Raise an exception or just log something and return False?
  3223. # return context.can(
  3224. # server_policies.CROSS_CELL_RESIZE,
  3225. # target={'user_id': instance.user_id,
  3226. # 'project_id': instance.project_id},
  3227. # fatal=False)
  3228. return False
  3229. @staticmethod
  3230. def _validate_host_for_cold_migrate(
  3231. context, instance, host_name, allow_cross_cell_resize):
  3232. """Validates a host specified for cold migration.
  3233. :param context: nova auth request context for the cold migration
  3234. :param instance: Instance object being cold migrated
  3235. :param host_name: User-specified compute service hostname for the
  3236. desired destination of the instance during the cold migration
  3237. :param allow_cross_cell_resize: If True, cross-cell resize is allowed
  3238. for this operation and the host could be in a different cell from
  3239. the one that the instance is currently in. If False, the speciifed
  3240. host must be in the same cell as the instance.
  3241. :returns: ComputeNode object of the requested host
  3242. :raises: CannotMigrateToSameHost if the host is the same as the
  3243. current instance.host
  3244. :raises: ComputeHostNotFound if the specified host cannot be found
  3245. """
  3246. # Cannot migrate to the host where the instance exists
  3247. # because it is useless.
  3248. if host_name == instance.host:
  3249. raise exception.CannotMigrateToSameHost()
  3250. # Check whether host exists or not. If a cross-cell resize is
  3251. # allowed, the host could be in another cell from the one the
  3252. # instance is currently in, so we need to lookup the HostMapping
  3253. # to get the cell and lookup the ComputeNode in that cell.
  3254. if allow_cross_cell_resize:
  3255. try:
  3256. hm = objects.HostMapping.get_by_host(context, host_name)
  3257. except exception.HostMappingNotFound:
  3258. LOG.info('HostMapping not found for host: %s', host_name)
  3259. raise exception.ComputeHostNotFound(host=host_name)
  3260. with nova_context.target_cell(context, hm.cell_mapping) as cctxt:
  3261. node = objects.ComputeNode.\
  3262. get_first_node_by_host_for_old_compat(
  3263. cctxt, host_name, use_slave=True)
  3264. else:
  3265. node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
  3266. context, host_name, use_slave=True)
  3267. return node
  3268. @check_instance_lock
  3269. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
  3270. def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
  3271. host_name=None, auto_disk_config=None):
  3272. """Resize (ie, migrate) a running instance.
  3273. If flavor_id is None, the process is considered a migration, keeping
  3274. the original flavor_id. If flavor_id is not None, the instance should
  3275. be migrated to a new host and resized to the new flavor_id.
  3276. host_name is always None in the resize case.
  3277. host_name can be set in the cold migration case only.
  3278. """
  3279. allow_cross_cell_resize = self._allow_cross_cell_resize(
  3280. context, instance)
  3281. if host_name is not None:
  3282. node = self._validate_host_for_cold_migrate(
  3283. context, instance, host_name, allow_cross_cell_resize)
  3284. self._check_auto_disk_config(
  3285. instance, auto_disk_config=auto_disk_config)
  3286. current_instance_type = instance.get_flavor()
  3287. # If flavor_id is not provided, only migrate the instance.
  3288. volume_backed = None
  3289. if not flavor_id:
  3290. LOG.debug("flavor_id is None. Assuming migration.",
  3291. instance=instance)
  3292. new_instance_type = current_instance_type
  3293. else:
  3294. new_instance_type = flavors.get_flavor_by_flavor_id(
  3295. flavor_id, read_deleted="no")
  3296. # Check to see if we're resizing to a zero-disk flavor which is
  3297. # only supported with volume-backed servers.
  3298. if (new_instance_type.get('root_gb') == 0 and
  3299. current_instance_type.get('root_gb') != 0):
  3300. volume_backed = compute_utils.is_volume_backed_instance(
  3301. context, instance)
  3302. if not volume_backed:
  3303. reason = _('Resize to zero disk flavor is not allowed.')
  3304. raise exception.CannotResizeDisk(reason=reason)
  3305. current_instance_type_name = current_instance_type['name']
  3306. new_instance_type_name = new_instance_type['name']
  3307. LOG.debug("Old instance type %(current_instance_type_name)s, "
  3308. "new instance type %(new_instance_type_name)s",
  3309. {'current_instance_type_name': current_instance_type_name,
  3310. 'new_instance_type_name': new_instance_type_name},
  3311. instance=instance)
  3312. same_instance_type = (current_instance_type['id'] ==
  3313. new_instance_type['id'])
  3314. # NOTE(sirp): We don't want to force a customer to change their flavor
  3315. # when Ops is migrating off of a failed host.
  3316. if not same_instance_type and new_instance_type.get('disabled'):
  3317. raise exception.FlavorNotFound(flavor_id=flavor_id)
  3318. if same_instance_type and flavor_id:
  3319. raise exception.CannotResizeToSameFlavor()
  3320. # ensure there is sufficient headroom for upsizes
  3321. if flavor_id:
  3322. self._check_quota_for_upsize(context, instance,
  3323. current_instance_type,
  3324. new_instance_type)
  3325. if not same_instance_type:
  3326. image = utils.get_image_from_system_metadata(
  3327. instance.system_metadata)
  3328. # Figure out if the instance is volume-backed but only if we didn't
  3329. # already figure that out above (avoid the extra db hit).
  3330. if volume_backed is None:
  3331. volume_backed = compute_utils.is_volume_backed_instance(
  3332. context, instance)
  3333. # If the server is volume-backed, we still want to validate numa
  3334. # and pci information in the new flavor, but we don't call
  3335. # _validate_flavor_image_nostatus because how it handles checking
  3336. # disk size validation was not intended for a volume-backed
  3337. # resize case.
  3338. if volume_backed:
  3339. self._validate_flavor_image_numa_pci(
  3340. image, new_instance_type, validate_pci=True)
  3341. else:
  3342. self._validate_flavor_image_nostatus(
  3343. context, image, new_instance_type, root_bdm=None,
  3344. validate_pci=True)
  3345. filter_properties = {'ignore_hosts': []}
  3346. if not CONF.allow_resize_to_same_host:
  3347. filter_properties['ignore_hosts'].append(instance.host)
  3348. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3349. context, instance.uuid)
  3350. request_spec.ignore_hosts = filter_properties['ignore_hosts']
  3351. instance.task_state = task_states.RESIZE_PREP
  3352. instance.progress = 0
  3353. instance.auto_disk_config = auto_disk_config or False
  3354. instance.save(expected_task_state=[None])
  3355. if not flavor_id:
  3356. self._record_action_start(context, instance,
  3357. instance_actions.MIGRATE)
  3358. else:
  3359. self._record_action_start(context, instance,
  3360. instance_actions.RESIZE)
  3361. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3362. # against going over quota during a race at this time because the
  3363. # resource consumption for this operation is written to the database
  3364. # by compute.
  3365. scheduler_hint = {'filter_properties': filter_properties}
  3366. if host_name is None:
  3367. # If 'host_name' is not specified,
  3368. # clear the 'requested_destination' field of the RequestSpec
  3369. # except set the allow_cross_cell_move flag since conductor uses
  3370. # it prior to scheduling.
  3371. request_spec.requested_destination = objects.Destination(
  3372. allow_cross_cell_move=allow_cross_cell_resize)
  3373. else:
  3374. # Set the host and the node so that the scheduler will
  3375. # validate them.
  3376. request_spec.requested_destination = objects.Destination(
  3377. host=node.host, node=node.hypervisor_hostname,
  3378. allow_cross_cell_move=allow_cross_cell_resize)
  3379. # Asynchronously RPC cast to conductor so the response is not blocked
  3380. # during scheduling. If something fails the user can find out via
  3381. # instance actions.
  3382. self.compute_task_api.resize_instance(context, instance,
  3383. scheduler_hint=scheduler_hint,
  3384. flavor=new_instance_type,
  3385. clean_shutdown=clean_shutdown,
  3386. request_spec=request_spec,
  3387. do_cast=True)
  3388. @check_instance_lock
  3389. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3390. vm_states.PAUSED, vm_states.SUSPENDED])
  3391. def shelve(self, context, instance, clean_shutdown=True):
  3392. """Shelve an instance.
  3393. Shuts down an instance and frees it up to be removed from the
  3394. hypervisor.
  3395. """
  3396. instance.task_state = task_states.SHELVING
  3397. instance.save(expected_task_state=[None])
  3398. self._record_action_start(context, instance, instance_actions.SHELVE)
  3399. if not compute_utils.is_volume_backed_instance(context, instance):
  3400. name = '%s-shelved' % instance.display_name
  3401. image_meta = compute_utils.create_image(
  3402. context, instance, name, 'snapshot', self.image_api)
  3403. image_id = image_meta['id']
  3404. self.compute_rpcapi.shelve_instance(context, instance=instance,
  3405. image_id=image_id, clean_shutdown=clean_shutdown)
  3406. else:
  3407. self.compute_rpcapi.shelve_offload_instance(context,
  3408. instance=instance, clean_shutdown=clean_shutdown)
  3409. @check_instance_lock
  3410. @check_instance_state(vm_state=[vm_states.SHELVED])
  3411. def shelve_offload(self, context, instance, clean_shutdown=True):
  3412. """Remove a shelved instance from the hypervisor."""
  3413. instance.task_state = task_states.SHELVING_OFFLOADING
  3414. instance.save(expected_task_state=[None])
  3415. self._record_action_start(context, instance,
  3416. instance_actions.SHELVE_OFFLOAD)
  3417. self.compute_rpcapi.shelve_offload_instance(context, instance=instance,
  3418. clean_shutdown=clean_shutdown)
  3419. def _validate_unshelve_az(self, context, instance, availability_zone):
  3420. """Verify the specified availability_zone during unshelve.
  3421. Verifies that the server is shelved offloaded, the AZ exists and
  3422. if [cinder]/cross_az_attach=False, that any attached volumes are in
  3423. the same AZ.
  3424. :param context: nova auth RequestContext for the unshelve action
  3425. :param instance: Instance object for the server being unshelved
  3426. :param availability_zone: The user-requested availability zone in
  3427. which to unshelve the server.
  3428. :raises: UnshelveInstanceInvalidState if the server is not shelved
  3429. offloaded
  3430. :raises: InvalidRequest if the requested AZ does not exist
  3431. :raises: MismatchVolumeAZException if [cinder]/cross_az_attach=False
  3432. and any attached volumes are not in the requested AZ
  3433. """
  3434. if instance.vm_state != vm_states.SHELVED_OFFLOADED:
  3435. # NOTE(brinzhang): If the server status is 'SHELVED', it still
  3436. # belongs to a host, the availability_zone has not changed.
  3437. # Unshelving a shelved offloaded server will go through the
  3438. # scheduler to find a new host.
  3439. raise exception.UnshelveInstanceInvalidState(
  3440. state=instance.vm_state, instance_uuid=instance.uuid)
  3441. available_zones = availability_zones.get_availability_zones(
  3442. context, self.host_api, get_only_available=True)
  3443. if availability_zone not in available_zones:
  3444. msg = _('The requested availability zone is not available')
  3445. raise exception.InvalidRequest(msg)
  3446. # NOTE(brinzhang): When specifying a availability zone to unshelve
  3447. # a shelved offloaded server, and conf cross_az_attach=False, need
  3448. # to determine if attached volume AZ matches the user-specified AZ.
  3449. if not CONF.cinder.cross_az_attach:
  3450. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3451. context, instance.uuid)
  3452. for bdm in bdms:
  3453. if bdm.is_volume and bdm.volume_id:
  3454. volume = self.volume_api.get(context, bdm.volume_id)
  3455. if availability_zone != volume['availability_zone']:
  3456. msg = _("The specified availability zone does not "
  3457. "match the volume %(vol_id)s attached to the "
  3458. "server. Specified availability zone is "
  3459. "%(az)s. Volume is in %(vol_zone)s.") % {
  3460. "vol_id": volume['id'],
  3461. "az": availability_zone,
  3462. "vol_zone": volume['availability_zone']}
  3463. raise exception.MismatchVolumeAZException(reason=msg)
  3464. @check_instance_lock
  3465. @check_instance_state(vm_state=[vm_states.SHELVED,
  3466. vm_states.SHELVED_OFFLOADED])
  3467. def unshelve(self, context, instance, new_az=None):
  3468. """Restore a shelved instance."""
  3469. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3470. context, instance.uuid)
  3471. if new_az:
  3472. self._validate_unshelve_az(context, instance, new_az)
  3473. LOG.debug("Replace the old AZ %(old_az)s in RequestSpec "
  3474. "with a new AZ %(new_az)s of the instance.",
  3475. {"old_az": request_spec.availability_zone,
  3476. "new_az": new_az}, instance=instance)
  3477. # Unshelving a shelved offloaded server will go through the
  3478. # scheduler to pick a new host, so we update the
  3479. # RequestSpec.availability_zone here. Note that if scheduling
  3480. # fails the RequestSpec will remain updated, which is not great,
  3481. # but if we want to change that we need to defer updating the
  3482. # RequestSpec until conductor which probably means RPC changes to
  3483. # pass the new_az variable to conductor. This is likely low
  3484. # priority since the RequestSpec.availability_zone on a shelved
  3485. # offloaded server does not mean much anyway and clearly the user
  3486. # is trying to put the server in the target AZ.
  3487. request_spec.availability_zone = new_az
  3488. request_spec.save()
  3489. instance.task_state = task_states.UNSHELVING
  3490. instance.save(expected_task_state=[None])
  3491. self._record_action_start(context, instance, instance_actions.UNSHELVE)
  3492. self.compute_task_api.unshelve_instance(context, instance,
  3493. request_spec)
  3494. @check_instance_lock
  3495. def add_fixed_ip(self, context, instance, network_id):
  3496. """Add fixed_ip from specified network to given instance."""
  3497. self.compute_rpcapi.add_fixed_ip_to_instance(context,
  3498. instance=instance, network_id=network_id)
  3499. @check_instance_lock
  3500. def remove_fixed_ip(self, context, instance, address):
  3501. """Remove fixed_ip from specified network to given instance."""
  3502. self.compute_rpcapi.remove_fixed_ip_from_instance(context,
  3503. instance=instance, address=address)
  3504. @check_instance_lock
  3505. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3506. def pause(self, context, instance):
  3507. """Pause the given instance."""
  3508. instance.task_state = task_states.PAUSING
  3509. instance.save(expected_task_state=[None])
  3510. self._record_action_start(context, instance, instance_actions.PAUSE)
  3511. self.compute_rpcapi.pause_instance(context, instance)
  3512. @check_instance_lock
  3513. @check_instance_state(vm_state=[vm_states.PAUSED])
  3514. def unpause(self, context, instance):
  3515. """Unpause the given instance."""
  3516. instance.task_state = task_states.UNPAUSING
  3517. instance.save(expected_task_state=[None])
  3518. self._record_action_start(context, instance, instance_actions.UNPAUSE)
  3519. self.compute_rpcapi.unpause_instance(context, instance)
  3520. @check_instance_host
  3521. def get_diagnostics(self, context, instance):
  3522. """Retrieve diagnostics for the given instance."""
  3523. return self.compute_rpcapi.get_diagnostics(context, instance=instance)
  3524. @check_instance_host
  3525. def get_instance_diagnostics(self, context, instance):
  3526. """Retrieve diagnostics for the given instance."""
  3527. return self.compute_rpcapi.get_instance_diagnostics(context,
  3528. instance=instance)
  3529. @reject_sev_instances(instance_actions.SUSPEND)
  3530. @check_instance_lock
  3531. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3532. def suspend(self, context, instance):
  3533. """Suspend the given instance."""
  3534. instance.task_state = task_states.SUSPENDING
  3535. instance.save(expected_task_state=[None])
  3536. self._record_action_start(context, instance, instance_actions.SUSPEND)
  3537. self.compute_rpcapi.suspend_instance(context, instance)
  3538. @check_instance_lock
  3539. @check_instance_state(vm_state=[vm_states.SUSPENDED])
  3540. def resume(self, context, instance):
  3541. """Resume the given instance."""
  3542. instance.task_state = task_states.RESUMING
  3543. instance.save(expected_task_state=[None])
  3544. self._record_action_start(context, instance, instance_actions.RESUME)
  3545. self.compute_rpcapi.resume_instance(context, instance)
  3546. @check_instance_lock
  3547. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3548. vm_states.ERROR])
  3549. def rescue(self, context, instance, rescue_password=None,
  3550. rescue_image_ref=None, clean_shutdown=True):
  3551. """Rescue the given instance."""
  3552. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3553. context, instance.uuid)
  3554. for bdm in bdms:
  3555. if bdm.volume_id:
  3556. vol = self.volume_api.get(context, bdm.volume_id)
  3557. self.volume_api.check_attached(context, vol)
  3558. if compute_utils.is_volume_backed_instance(context, instance, bdms):
  3559. reason = _("Cannot rescue a volume-backed instance")
  3560. raise exception.InstanceNotRescuable(instance_id=instance.uuid,
  3561. reason=reason)
  3562. instance.task_state = task_states.RESCUING
  3563. instance.save(expected_task_state=[None])
  3564. self._record_action_start(context, instance, instance_actions.RESCUE)
  3565. self.compute_rpcapi.rescue_instance(context, instance=instance,
  3566. rescue_password=rescue_password, rescue_image_ref=rescue_image_ref,
  3567. clean_shutdown=clean_shutdown)
  3568. @check_instance_lock
  3569. @check_instance_state(vm_state=[vm_states.RESCUED])
  3570. def unrescue(self, context, instance):
  3571. """Unrescue the given instance."""
  3572. instance.task_state = task_states.UNRESCUING
  3573. instance.save(expected_task_state=[None])
  3574. self._record_action_start(context, instance, instance_actions.UNRESCUE)
  3575. self.compute_rpcapi.unrescue_instance(context, instance=instance)
  3576. @check_instance_lock
  3577. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3578. def set_admin_password(self, context, instance, password):
  3579. """Set the root/admin password for the given instance.
  3580. @param context: Nova auth context.
  3581. @param instance: Nova instance object.
  3582. @param password: The admin password for the instance.
  3583. """
  3584. instance.task_state = task_states.UPDATING_PASSWORD
  3585. instance.save(expected_task_state=[None])
  3586. self._record_action_start(context, instance,
  3587. instance_actions.CHANGE_PASSWORD)
  3588. self.compute_rpcapi.set_admin_password(context,
  3589. instance=instance,
  3590. new_pass=password)
  3591. @check_instance_host
  3592. @reject_instance_state(
  3593. task_state=[task_states.DELETING, task_states.MIGRATING])
  3594. def get_vnc_console(self, context, instance, console_type):
  3595. """Get a url to an instance Console."""
  3596. connect_info = self.compute_rpcapi.get_vnc_console(context,
  3597. instance=instance, console_type=console_type)
  3598. return {'url': connect_info['access_url']}
  3599. @check_instance_host
  3600. @reject_instance_state(
  3601. task_state=[task_states.DELETING, task_states.MIGRATING])
  3602. def get_spice_console(self, context, instance, console_type):
  3603. """Get a url to an instance Console."""
  3604. connect_info = self.compute_rpcapi.get_spice_console(context,
  3605. instance=instance, console_type=console_type)
  3606. return {'url': connect_info['access_url']}
  3607. @check_instance_host
  3608. @reject_instance_state(
  3609. task_state=[task_states.DELETING, task_states.MIGRATING])
  3610. def get_rdp_console(self, context, instance, console_type):
  3611. """Get a url to an instance Console."""
  3612. connect_info = self.compute_rpcapi.get_rdp_console(context,
  3613. instance=instance, console_type=console_type)
  3614. return {'url': connect_info['access_url']}
  3615. @check_instance_host
  3616. @reject_instance_state(
  3617. task_state=[task_states.DELETING, task_states.MIGRATING])
  3618. def get_serial_console(self, context, instance, console_type):
  3619. """Get a url to a serial console."""
  3620. connect_info = self.compute_rpcapi.get_serial_console(context,
  3621. instance=instance, console_type=console_type)
  3622. return {'url': connect_info['access_url']}
  3623. @check_instance_host
  3624. @reject_instance_state(
  3625. task_state=[task_states.DELETING, task_states.MIGRATING])
  3626. def get_mks_console(self, context, instance, console_type):
  3627. """Get a url to a MKS console."""
  3628. connect_info = self.compute_rpcapi.get_mks_console(context,
  3629. instance=instance, console_type=console_type)
  3630. return {'url': connect_info['access_url']}
  3631. @check_instance_host
  3632. def get_console_output(self, context, instance, tail_length=None):
  3633. """Get console output for an instance."""
  3634. return self.compute_rpcapi.get_console_output(context,
  3635. instance=instance, tail_length=tail_length)
  3636. def lock(self, context, instance, reason=None):
  3637. """Lock the given instance."""
  3638. # Only update the lock if we are an admin (non-owner)
  3639. is_owner = instance.project_id == context.project_id
  3640. if instance.locked and is_owner:
  3641. return
  3642. context = context.elevated()
  3643. self._record_action_start(context, instance,
  3644. instance_actions.LOCK)
  3645. @wrap_instance_event(prefix='api')
  3646. def lock(self, context, instance, reason=None):
  3647. LOG.debug('Locking', instance=instance)
  3648. instance.locked = True
  3649. instance.locked_by = 'owner' if is_owner else 'admin'
  3650. if reason:
  3651. instance.system_metadata['locked_reason'] = reason
  3652. instance.save()
  3653. lock(self, context, instance, reason=reason)
  3654. compute_utils.notify_about_instance_action(
  3655. context, instance, CONF.host,
  3656. action=fields_obj.NotificationAction.LOCK,
  3657. source=fields_obj.NotificationSource.API)
  3658. def is_expected_locked_by(self, context, instance):
  3659. is_owner = instance.project_id == context.project_id
  3660. expect_locked_by = 'owner' if is_owner else 'admin'
  3661. locked_by = instance.locked_by
  3662. if locked_by and locked_by != expect_locked_by:
  3663. return False
  3664. return True
  3665. def unlock(self, context, instance):
  3666. """Unlock the given instance."""
  3667. context = context.elevated()
  3668. self._record_action_start(context, instance,
  3669. instance_actions.UNLOCK)
  3670. @wrap_instance_event(prefix='api')
  3671. def unlock(self, context, instance):
  3672. LOG.debug('Unlocking', instance=instance)
  3673. instance.locked = False
  3674. instance.locked_by = None
  3675. instance.system_metadata.pop('locked_reason', None)
  3676. instance.save()
  3677. unlock(self, context, instance)
  3678. compute_utils.notify_about_instance_action(
  3679. context, instance, CONF.host,
  3680. action=fields_obj.NotificationAction.UNLOCK,
  3681. source=fields_obj.NotificationSource.API)
  3682. @check_instance_lock
  3683. def reset_network(self, context, instance):
  3684. """Reset networking on the instance."""
  3685. self.compute_rpcapi.reset_network(context, instance=instance)
  3686. @check_instance_lock
  3687. def inject_network_info(self, context, instance):
  3688. """Inject network info for the instance."""
  3689. self.compute_rpcapi.inject_network_info(context, instance=instance)
  3690. def _create_volume_bdm(self, context, instance, device, volume,
  3691. disk_bus, device_type, is_local_creation=False,
  3692. tag=None, delete_on_termination=False):
  3693. volume_id = volume['id']
  3694. if is_local_creation:
  3695. # when the creation is done locally we can't specify the device
  3696. # name as we do not have a way to check that the name specified is
  3697. # a valid one.
  3698. # We leave the setting of that value when the actual attach
  3699. # happens on the compute manager
  3700. # NOTE(artom) Local attach (to a shelved-offload instance) cannot
  3701. # support device tagging because we have no way to call the compute
  3702. # manager to check that it supports device tagging. In fact, we
  3703. # don't even know which computer manager the instance will
  3704. # eventually end up on when it's unshelved.
  3705. volume_bdm = objects.BlockDeviceMapping(
  3706. context=context,
  3707. source_type='volume', destination_type='volume',
  3708. instance_uuid=instance.uuid, boot_index=None,
  3709. volume_id=volume_id,
  3710. device_name=None, guest_format=None,
  3711. disk_bus=disk_bus, device_type=device_type,
  3712. delete_on_termination=delete_on_termination)
  3713. volume_bdm.create()
  3714. else:
  3715. # NOTE(vish): This is done on the compute host because we want
  3716. # to avoid a race where two devices are requested at
  3717. # the same time. When db access is removed from
  3718. # compute, the bdm will be created here and we will
  3719. # have to make sure that they are assigned atomically.
  3720. volume_bdm = self.compute_rpcapi.reserve_block_device_name(
  3721. context, instance, device, volume_id, disk_bus=disk_bus,
  3722. device_type=device_type, tag=tag,
  3723. multiattach=volume['multiattach'])
  3724. volume_bdm.delete_on_termination = delete_on_termination
  3725. volume_bdm.save()
  3726. return volume_bdm
  3727. def _check_volume_already_attached_to_instance(self, context, instance,
  3728. volume_id):
  3729. """Avoid attaching the same volume to the same instance twice.
  3730. As the new Cinder flow (microversion 3.44) is handling the checks
  3731. differently and allows to attach the same volume to the same
  3732. instance twice to enable live_migrate we are checking whether the
  3733. BDM already exists for this combination for the new flow and fail
  3734. if it does.
  3735. """
  3736. try:
  3737. objects.BlockDeviceMapping.get_by_volume_and_instance(
  3738. context, volume_id, instance.uuid)
  3739. msg = _("volume %s already attached") % volume_id
  3740. raise exception.InvalidVolume(reason=msg)
  3741. except exception.VolumeBDMNotFound:
  3742. pass
  3743. def _check_attach_and_reserve_volume(self, context, volume, instance,
  3744. bdm, supports_multiattach=False,
  3745. validate_az=True):
  3746. """Perform checks against the instance and volume before attaching.
  3747. If validation succeeds, the bdm is updated with an attachment_id which
  3748. effectively reserves it during the attach process in cinder.
  3749. :param context: nova auth RequestContext
  3750. :param volume: volume dict from cinder
  3751. :param instance: Instance object
  3752. :param bdm: BlockDeviceMapping object
  3753. :param supports_multiattach: True if the request supports multiattach
  3754. volumes, i.e. microversion >= 2.60, False otherwise
  3755. :param validate_az: True if the instance and volume availability zones
  3756. should be validated for cross_az_attach, False to not validate AZ
  3757. """
  3758. volume_id = volume['id']
  3759. if validate_az:
  3760. self.volume_api.check_availability_zone(context, volume,
  3761. instance=instance)
  3762. # If volume.multiattach=True and the microversion to
  3763. # support multiattach is not used, fail the request.
  3764. if volume['multiattach'] and not supports_multiattach:
  3765. raise exception.MultiattachNotSupportedOldMicroversion()
  3766. attachment_id = self.volume_api.attachment_create(
  3767. context, volume_id, instance.uuid)['id']
  3768. bdm.attachment_id = attachment_id
  3769. # NOTE(ildikov): In case of boot from volume the BDM at this
  3770. # point is not yet created in a cell database, so we can't
  3771. # call save(). When attaching a volume to an existing
  3772. # instance, the instance is already in a cell and the BDM has
  3773. # been created in that same cell so updating here in that case
  3774. # is "ok".
  3775. if bdm.obj_attr_is_set('id'):
  3776. bdm.save()
  3777. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3778. # override it.
  3779. def _attach_volume(self, context, instance, volume, device,
  3780. disk_bus, device_type, tag=None,
  3781. supports_multiattach=False,
  3782. delete_on_termination=False):
  3783. """Attach an existing volume to an existing instance.
  3784. This method is separated to make it possible for cells version
  3785. to override it.
  3786. """
  3787. volume_bdm = self._create_volume_bdm(
  3788. context, instance, device, volume, disk_bus=disk_bus,
  3789. device_type=device_type, tag=tag,
  3790. delete_on_termination=delete_on_termination)
  3791. try:
  3792. self._check_attach_and_reserve_volume(context, volume, instance,
  3793. volume_bdm,
  3794. supports_multiattach)
  3795. self._record_action_start(
  3796. context, instance, instance_actions.ATTACH_VOLUME)
  3797. self.compute_rpcapi.attach_volume(context, instance, volume_bdm)
  3798. except Exception:
  3799. with excutils.save_and_reraise_exception():
  3800. volume_bdm.destroy()
  3801. return volume_bdm.device_name
  3802. def _attach_volume_shelved_offloaded(self, context, instance, volume,
  3803. device, disk_bus, device_type,
  3804. delete_on_termination):
  3805. """Attach an existing volume to an instance in shelved offloaded state.
  3806. Attaching a volume for an instance in shelved offloaded state requires
  3807. to perform the regular check to see if we can attach and reserve the
  3808. volume then we need to call the attach method on the volume API
  3809. to mark the volume as 'in-use'.
  3810. The instance at this stage is not managed by a compute manager
  3811. therefore the actual attachment will be performed once the
  3812. instance will be unshelved.
  3813. """
  3814. volume_id = volume['id']
  3815. @wrap_instance_event(prefix='api')
  3816. def attach_volume(self, context, v_id, instance, dev, attachment_id):
  3817. if attachment_id:
  3818. # Normally we wouldn't complete an attachment without a host
  3819. # connector, but we do this to make the volume status change
  3820. # to "in-use" to maintain the API semantics with the old flow.
  3821. # When unshelving the instance, the compute service will deal
  3822. # with this disconnected attachment.
  3823. self.volume_api.attachment_complete(context, attachment_id)
  3824. else:
  3825. self.volume_api.attach(context,
  3826. v_id,
  3827. instance.uuid,
  3828. dev)
  3829. volume_bdm = self._create_volume_bdm(
  3830. context, instance, device, volume, disk_bus=disk_bus,
  3831. device_type=device_type, is_local_creation=True,
  3832. delete_on_termination=delete_on_termination)
  3833. try:
  3834. self._check_attach_and_reserve_volume(context, volume, instance,
  3835. volume_bdm)
  3836. self._record_action_start(
  3837. context, instance,
  3838. instance_actions.ATTACH_VOLUME)
  3839. attach_volume(self, context, volume_id, instance, device,
  3840. volume_bdm.attachment_id)
  3841. except Exception:
  3842. with excutils.save_and_reraise_exception():
  3843. volume_bdm.destroy()
  3844. return volume_bdm.device_name
  3845. @check_instance_lock
  3846. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3847. vm_states.STOPPED, vm_states.RESIZED,
  3848. vm_states.SOFT_DELETED, vm_states.SHELVED,
  3849. vm_states.SHELVED_OFFLOADED])
  3850. def attach_volume(self, context, instance, volume_id, device=None,
  3851. disk_bus=None, device_type=None, tag=None,
  3852. supports_multiattach=False,
  3853. delete_on_termination=False):
  3854. """Attach an existing volume to an existing instance."""
  3855. # NOTE(vish): Fail fast if the device is not going to pass. This
  3856. # will need to be removed along with the test if we
  3857. # change the logic in the manager for what constitutes
  3858. # a valid device.
  3859. if device and not block_device.match_device(device):
  3860. raise exception.InvalidDevicePath(path=device)
  3861. # Make sure the volume isn't already attached to this instance
  3862. # because we'll use the v3.44 attachment flow in
  3863. # _check_attach_and_reserve_volume and Cinder will allow multiple
  3864. # attachments between the same volume and instance but the old flow
  3865. # API semantics don't allow that so we enforce it here.
  3866. self._check_volume_already_attached_to_instance(context,
  3867. instance,
  3868. volume_id)
  3869. volume = self.volume_api.get(context, volume_id)
  3870. is_shelved_offloaded = instance.vm_state == vm_states.SHELVED_OFFLOADED
  3871. if is_shelved_offloaded:
  3872. if tag:
  3873. # NOTE(artom) Local attach (to a shelved-offload instance)
  3874. # cannot support device tagging because we have no way to call
  3875. # the compute manager to check that it supports device tagging.
  3876. # In fact, we don't even know which computer manager the
  3877. # instance will eventually end up on when it's unshelved.
  3878. raise exception.VolumeTaggedAttachToShelvedNotSupported()
  3879. if volume['multiattach']:
  3880. # NOTE(mriedem): Similar to tagged attach, we don't support
  3881. # attaching a multiattach volume to shelved offloaded instances
  3882. # because we can't tell if the compute host (since there isn't
  3883. # one) supports it. This could possibly be supported in the
  3884. # future if the scheduler was made aware of which computes
  3885. # support multiattach volumes.
  3886. raise exception.MultiattachToShelvedNotSupported()
  3887. return self._attach_volume_shelved_offloaded(context,
  3888. instance,
  3889. volume,
  3890. device,
  3891. disk_bus,
  3892. device_type,
  3893. delete_on_termination)
  3894. return self._attach_volume(context, instance, volume, device,
  3895. disk_bus, device_type, tag=tag,
  3896. supports_multiattach=supports_multiattach,
  3897. delete_on_termination=delete_on_termination)
  3898. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3899. # override it.
  3900. def _detach_volume(self, context, instance, volume):
  3901. """Detach volume from instance.
  3902. This method is separated to make it easier for cells version
  3903. to override.
  3904. """
  3905. try:
  3906. self.volume_api.begin_detaching(context, volume['id'])
  3907. except exception.InvalidInput as exc:
  3908. raise exception.InvalidVolume(reason=exc.format_message())
  3909. attachments = volume.get('attachments', {})
  3910. attachment_id = None
  3911. if attachments and instance.uuid in attachments:
  3912. attachment_id = attachments[instance.uuid]['attachment_id']
  3913. self._record_action_start(
  3914. context, instance, instance_actions.DETACH_VOLUME)
  3915. self.compute_rpcapi.detach_volume(context, instance=instance,
  3916. volume_id=volume['id'], attachment_id=attachment_id)
  3917. def _detach_volume_shelved_offloaded(self, context, instance, volume):
  3918. """Detach a volume from an instance in shelved offloaded state.
  3919. If the instance is shelved offloaded we just need to cleanup volume
  3920. calling the volume api detach, the volume api terminate_connection
  3921. and delete the bdm record.
  3922. If the volume has delete_on_termination option set then we call the
  3923. volume api delete as well.
  3924. """
  3925. @wrap_instance_event(prefix='api')
  3926. def detach_volume(self, context, instance, bdms):
  3927. self._local_cleanup_bdm_volumes(bdms, instance, context)
  3928. bdms = [objects.BlockDeviceMapping.get_by_volume_id(
  3929. context, volume['id'], instance.uuid)]
  3930. # The begin_detaching() call only works with in-use volumes,
  3931. # which will not be the case for volumes attached to a shelved
  3932. # offloaded server via the attachments API since those volumes
  3933. # will have `reserved` status.
  3934. if not bdms[0].attachment_id:
  3935. try:
  3936. self.volume_api.begin_detaching(context, volume['id'])
  3937. except exception.InvalidInput as exc:
  3938. raise exception.InvalidVolume(reason=exc.format_message())
  3939. self._record_action_start(
  3940. context, instance,
  3941. instance_actions.DETACH_VOLUME)
  3942. detach_volume(self, context, instance, bdms)
  3943. @check_instance_lock
  3944. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3945. vm_states.STOPPED, vm_states.RESIZED,
  3946. vm_states.SOFT_DELETED, vm_states.SHELVED,
  3947. vm_states.SHELVED_OFFLOADED])
  3948. def detach_volume(self, context, instance, volume):
  3949. """Detach a volume from an instance."""
  3950. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  3951. self._detach_volume_shelved_offloaded(context, instance, volume)
  3952. else:
  3953. self._detach_volume(context, instance, volume)
  3954. def _count_attachments_for_swap(self, ctxt, volume):
  3955. """Counts the number of attachments for a swap-related volume.
  3956. Attempts to only count read/write attachments if the volume attachment
  3957. records exist, otherwise simply just counts the number of attachments
  3958. regardless of attach mode.
  3959. :param ctxt: nova.context.RequestContext - user request context
  3960. :param volume: nova-translated volume dict from nova.volume.cinder.
  3961. :returns: count of attachments for the volume
  3962. """
  3963. # This is a dict, keyed by server ID, to a dict of attachment_id and
  3964. # mountpoint.
  3965. attachments = volume.get('attachments', {})
  3966. # Multiattach volumes can have more than one attachment, so if there
  3967. # is more than one attachment, attempt to count the read/write
  3968. # attachments.
  3969. if len(attachments) > 1:
  3970. count = 0
  3971. for attachment in attachments.values():
  3972. attachment_id = attachment['attachment_id']
  3973. # Get the attachment record for this attachment so we can
  3974. # get the attach_mode.
  3975. # TODO(mriedem): This could be optimized if we had
  3976. # GET /attachments/detail?volume_id=volume['id'] in Cinder.
  3977. try:
  3978. attachment_record = self.volume_api.attachment_get(
  3979. ctxt, attachment_id)
  3980. # Note that the attachment record from Cinder has
  3981. # attach_mode in the top-level of the resource but the
  3982. # nova.volume.cinder code translates it and puts the
  3983. # attach_mode in the connection_info for some legacy
  3984. # reason...
  3985. if attachment_record['attach_mode'] == 'rw':
  3986. count += 1
  3987. except exception.VolumeAttachmentNotFound:
  3988. # attachments are read/write by default so count it
  3989. count += 1
  3990. else:
  3991. count = len(attachments)
  3992. return count
  3993. @check_instance_lock
  3994. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3995. vm_states.RESIZED])
  3996. def swap_volume(self, context, instance, old_volume, new_volume):
  3997. """Swap volume attached to an instance."""
  3998. # The caller likely got the instance from volume['attachments']
  3999. # in the first place, but let's sanity check.
  4000. if not old_volume.get('attachments', {}).get(instance.uuid):
  4001. msg = _("Old volume is attached to a different instance.")
  4002. raise exception.InvalidVolume(reason=msg)
  4003. if new_volume['attach_status'] == 'attached':
  4004. msg = _("New volume must be detached in order to swap.")
  4005. raise exception.InvalidVolume(reason=msg)
  4006. if int(new_volume['size']) < int(old_volume['size']):
  4007. msg = _("New volume must be the same size or larger.")
  4008. raise exception.InvalidVolume(reason=msg)
  4009. self.volume_api.check_availability_zone(context, new_volume,
  4010. instance=instance)
  4011. try:
  4012. self.volume_api.begin_detaching(context, old_volume['id'])
  4013. except exception.InvalidInput as exc:
  4014. raise exception.InvalidVolume(reason=exc.format_message())
  4015. # Disallow swapping from multiattach volumes that have more than one
  4016. # read/write attachment. We know the old_volume has at least one
  4017. # attachment since it's attached to this server. The new_volume
  4018. # can't have any attachments because of the attach_status check above.
  4019. # We do this count after calling "begin_detaching" to lock against
  4020. # concurrent attachments being made while we're counting.
  4021. try:
  4022. if self._count_attachments_for_swap(context, old_volume) > 1:
  4023. raise exception.MultiattachSwapVolumeNotSupported()
  4024. except Exception: # This is generic to handle failures while counting
  4025. # We need to reset the detaching status before raising.
  4026. with excutils.save_and_reraise_exception():
  4027. self.volume_api.roll_detaching(context, old_volume['id'])
  4028. # Get the BDM for the attached (old) volume so we can tell if it was
  4029. # attached with the new-style Cinder 3.44 API.
  4030. bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
  4031. context, old_volume['id'], instance.uuid)
  4032. new_attachment_id = None
  4033. if bdm.attachment_id is None:
  4034. # This is an old-style attachment so reserve the new volume before
  4035. # we cast to the compute host.
  4036. self.volume_api.reserve_volume(context, new_volume['id'])
  4037. else:
  4038. try:
  4039. self._check_volume_already_attached_to_instance(
  4040. context, instance, new_volume['id'])
  4041. except exception.InvalidVolume:
  4042. with excutils.save_and_reraise_exception():
  4043. self.volume_api.roll_detaching(context, old_volume['id'])
  4044. # This is a new-style attachment so for the volume that we are
  4045. # going to swap to, create a new volume attachment.
  4046. new_attachment_id = self.volume_api.attachment_create(
  4047. context, new_volume['id'], instance.uuid)['id']
  4048. self._record_action_start(
  4049. context, instance, instance_actions.SWAP_VOLUME)
  4050. try:
  4051. self.compute_rpcapi.swap_volume(
  4052. context, instance=instance,
  4053. old_volume_id=old_volume['id'],
  4054. new_volume_id=new_volume['id'],
  4055. new_attachment_id=new_attachment_id)
  4056. except Exception:
  4057. with excutils.save_and_reraise_exception():
  4058. self.volume_api.roll_detaching(context, old_volume['id'])
  4059. if new_attachment_id is None:
  4060. self.volume_api.unreserve_volume(context, new_volume['id'])
  4061. else:
  4062. self.volume_api.attachment_delete(
  4063. context, new_attachment_id)
  4064. @check_instance_lock
  4065. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4066. vm_states.STOPPED],
  4067. task_state=[None])
  4068. def attach_interface(self, context, instance, network_id, port_id,
  4069. requested_ip, tag=None):
  4070. """Use hotplug to add an network adapter to an instance."""
  4071. self._record_action_start(
  4072. context, instance, instance_actions.ATTACH_INTERFACE)
  4073. # NOTE(gibi): Checking if the requested port has resource request as
  4074. # such ports are currently not supported as they would at least
  4075. # need resource allocation manipulation in placement but might also
  4076. # need a new scheduling if resource on this host is not available.
  4077. if port_id:
  4078. port = self.network_api.show_port(context, port_id)
  4079. if port['port'].get(constants.RESOURCE_REQUEST):
  4080. raise exception.AttachInterfaceWithQoSPolicyNotSupported(
  4081. instance_uuid=instance.uuid)
  4082. return self.compute_rpcapi.attach_interface(context,
  4083. instance=instance, network_id=network_id, port_id=port_id,
  4084. requested_ip=requested_ip, tag=tag)
  4085. @check_instance_lock
  4086. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4087. vm_states.STOPPED],
  4088. task_state=[None])
  4089. def detach_interface(self, context, instance, port_id):
  4090. """Detach an network adapter from an instance."""
  4091. self._record_action_start(
  4092. context, instance, instance_actions.DETACH_INTERFACE)
  4093. self.compute_rpcapi.detach_interface(context, instance=instance,
  4094. port_id=port_id)
  4095. def get_instance_metadata(self, context, instance):
  4096. """Get all metadata associated with an instance."""
  4097. return self.db.instance_metadata_get(context, instance.uuid)
  4098. @check_instance_lock
  4099. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4100. vm_states.SUSPENDED, vm_states.STOPPED],
  4101. task_state=None)
  4102. def delete_instance_metadata(self, context, instance, key):
  4103. """Delete the given metadata item from an instance."""
  4104. instance.delete_metadata_key(key)
  4105. self.compute_rpcapi.change_instance_metadata(context,
  4106. instance=instance,
  4107. diff={key: ['-']})
  4108. @check_instance_lock
  4109. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  4110. vm_states.SUSPENDED, vm_states.STOPPED],
  4111. task_state=None)
  4112. def update_instance_metadata(self, context, instance,
  4113. metadata, delete=False):
  4114. """Updates or creates instance metadata.
  4115. If delete is True, metadata items that are not specified in the
  4116. `metadata` argument will be deleted.
  4117. """
  4118. orig = dict(instance.metadata)
  4119. if delete:
  4120. _metadata = metadata
  4121. else:
  4122. _metadata = dict(instance.metadata)
  4123. _metadata.update(metadata)
  4124. self._check_metadata_properties_quota(context, _metadata)
  4125. instance.metadata = _metadata
  4126. instance.save()
  4127. diff = _diff_dict(orig, instance.metadata)
  4128. self.compute_rpcapi.change_instance_metadata(context,
  4129. instance=instance,
  4130. diff=diff)
  4131. return _metadata
  4132. @reject_sev_instances(instance_actions.LIVE_MIGRATION)
  4133. @check_instance_lock
  4134. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
  4135. def live_migrate(self, context, instance, block_migration,
  4136. disk_over_commit, host_name, force=None, async_=False):
  4137. """Migrate a server lively to a new host."""
  4138. LOG.debug("Going to try to live migrate instance to %s",
  4139. host_name or "another host", instance=instance)
  4140. if host_name:
  4141. # Validate the specified host before changing the instance task
  4142. # state.
  4143. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  4144. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4145. context, instance.uuid)
  4146. instance.task_state = task_states.MIGRATING
  4147. instance.save(expected_task_state=[None])
  4148. self._record_action_start(context, instance,
  4149. instance_actions.LIVE_MIGRATION)
  4150. # NOTE(sbauza): Force is a boolean by the new related API version
  4151. if force is False and host_name:
  4152. # Unset the host to make sure we call the scheduler
  4153. # from the conductor LiveMigrationTask. Yes this is tightly-coupled
  4154. # to behavior in conductor and not great.
  4155. host_name = None
  4156. # FIXME(sbauza): Since only Ironic driver uses more than one
  4157. # compute per service but doesn't support live migrations,
  4158. # let's provide the first one.
  4159. target = nodes[0]
  4160. destination = objects.Destination(
  4161. host=target.host,
  4162. node=target.hypervisor_hostname
  4163. )
  4164. # This is essentially a hint to the scheduler to only consider
  4165. # the specified host but still run it through the filters.
  4166. request_spec.requested_destination = destination
  4167. try:
  4168. self.compute_task_api.live_migrate_instance(context, instance,
  4169. host_name, block_migration=block_migration,
  4170. disk_over_commit=disk_over_commit,
  4171. request_spec=request_spec, async_=async_)
  4172. except oslo_exceptions.MessagingTimeout as messaging_timeout:
  4173. with excutils.save_and_reraise_exception():
  4174. # NOTE(pkoniszewski): It is possible that MessagingTimeout
  4175. # occurs, but LM will still be in progress, so write
  4176. # instance fault to database
  4177. compute_utils.add_instance_fault_from_exc(context,
  4178. instance,
  4179. messaging_timeout)
  4180. @check_instance_lock
  4181. @check_instance_state(vm_state=[vm_states.ACTIVE],
  4182. task_state=[task_states.MIGRATING])
  4183. def live_migrate_force_complete(self, context, instance, migration_id):
  4184. """Force live migration to complete.
  4185. :param context: Security context
  4186. :param instance: The instance that is being migrated
  4187. :param migration_id: ID of ongoing migration
  4188. """
  4189. LOG.debug("Going to try to force live migration to complete",
  4190. instance=instance)
  4191. # NOTE(pkoniszewski): Get migration object to check if there is ongoing
  4192. # live migration for particular instance. Also pass migration id to
  4193. # compute to double check and avoid possible race condition.
  4194. migration = objects.Migration.get_by_id_and_instance(
  4195. context, migration_id, instance.uuid)
  4196. if migration.status != 'running':
  4197. raise exception.InvalidMigrationState(migration_id=migration_id,
  4198. instance_uuid=instance.uuid,
  4199. state=migration.status,
  4200. method='force complete')
  4201. self._record_action_start(
  4202. context, instance, instance_actions.LIVE_MIGRATION_FORCE_COMPLETE)
  4203. self.compute_rpcapi.live_migration_force_complete(
  4204. context, instance, migration)
  4205. @check_instance_lock
  4206. @check_instance_state(task_state=[task_states.MIGRATING])
  4207. def live_migrate_abort(self, context, instance, migration_id,
  4208. support_abort_in_queue=False):
  4209. """Abort an in-progress live migration.
  4210. :param context: Security context
  4211. :param instance: The instance that is being migrated
  4212. :param migration_id: ID of in-progress live migration
  4213. :param support_abort_in_queue: Flag indicating whether we can support
  4214. abort migrations in "queued" or "preparing" status.
  4215. """
  4216. migration = objects.Migration.get_by_id_and_instance(context,
  4217. migration_id, instance.uuid)
  4218. LOG.debug("Going to cancel live migration %s",
  4219. migration.id, instance=instance)
  4220. # If the microversion does not support abort migration in queue,
  4221. # we are only be able to abort migrations with `running` status;
  4222. # if it is supported, we are able to also abort migrations in
  4223. # `queued` and `preparing` status.
  4224. allowed_states = ['running']
  4225. queued_states = ['queued', 'preparing']
  4226. if support_abort_in_queue:
  4227. # The user requested a microversion that supports aborting a queued
  4228. # or preparing live migration. But we need to check that the
  4229. # compute service hosting the instance is new enough to support
  4230. # aborting a queued/preparing live migration, so we check the
  4231. # service version here.
  4232. allowed_states.extend(queued_states)
  4233. if migration.status not in allowed_states:
  4234. raise exception.InvalidMigrationState(migration_id=migration_id,
  4235. instance_uuid=instance.uuid,
  4236. state=migration.status,
  4237. method='abort live migration')
  4238. self._record_action_start(context, instance,
  4239. instance_actions.LIVE_MIGRATION_CANCEL)
  4240. self.compute_rpcapi.live_migration_abort(context,
  4241. instance, migration.id)
  4242. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  4243. vm_states.ERROR])
  4244. def evacuate(self, context, instance, host, on_shared_storage,
  4245. admin_password=None, force=None):
  4246. """Running evacuate to target host.
  4247. Checking vm compute host state, if the host not in expected_state,
  4248. raising an exception.
  4249. :param instance: The instance to evacuate
  4250. :param host: Target host. if not set, the scheduler will pick up one
  4251. :param on_shared_storage: True if instance files on shared storage
  4252. :param admin_password: password to set on rebuilt instance
  4253. :param force: Force the evacuation to the specific host target
  4254. """
  4255. LOG.debug('vm evacuation scheduled', instance=instance)
  4256. inst_host = instance.host
  4257. service = objects.Service.get_by_compute_host(context, inst_host)
  4258. if self.servicegroup_api.service_is_up(service):
  4259. LOG.error('Instance compute service state on %s '
  4260. 'expected to be down, but it was up.', inst_host)
  4261. raise exception.ComputeServiceInUse(host=inst_host)
  4262. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4263. context, instance.uuid)
  4264. instance.task_state = task_states.REBUILDING
  4265. instance.save(expected_task_state=[None])
  4266. self._record_action_start(context, instance, instance_actions.EVACUATE)
  4267. # NOTE(danms): Create this as a tombstone for the source compute
  4268. # to find and cleanup. No need to pass it anywhere else.
  4269. migration = objects.Migration(context,
  4270. source_compute=instance.host,
  4271. source_node=instance.node,
  4272. instance_uuid=instance.uuid,
  4273. status='accepted',
  4274. migration_type='evacuation')
  4275. if host:
  4276. migration.dest_compute = host
  4277. migration.create()
  4278. compute_utils.notify_about_instance_usage(
  4279. self.notifier, context, instance, "evacuate")
  4280. compute_utils.notify_about_instance_action(
  4281. context, instance, CONF.host,
  4282. action=fields_obj.NotificationAction.EVACUATE,
  4283. source=fields_obj.NotificationSource.API)
  4284. # NOTE(sbauza): Force is a boolean by the new related API version
  4285. # TODO(stephenfin): Any reason we can't use 'not force' here to handle
  4286. # the pre-v2.29 API microversion, which wouldn't set force
  4287. if force is False and host:
  4288. nodes = objects.ComputeNodeList.get_all_by_host(context, host)
  4289. # NOTE(sbauza): Unset the host to make sure we call the scheduler
  4290. host = None
  4291. # FIXME(sbauza): Since only Ironic driver uses more than one
  4292. # compute per service but doesn't support evacuations,
  4293. # let's provide the first one.
  4294. target = nodes[0]
  4295. destination = objects.Destination(
  4296. host=target.host,
  4297. node=target.hypervisor_hostname
  4298. )
  4299. request_spec.requested_destination = destination
  4300. return self.compute_task_api.rebuild_instance(context,
  4301. instance=instance,
  4302. new_pass=admin_password,
  4303. injected_files=None,
  4304. image_ref=None,
  4305. orig_image_ref=None,
  4306. orig_sys_metadata=None,
  4307. bdms=None,
  4308. recreate=True,
  4309. on_shared_storage=on_shared_storage,
  4310. host=host,
  4311. request_spec=request_spec,
  4312. )
  4313. def get_migrations(self, context, filters):
  4314. """Get all migrations for the given filters."""
  4315. load_cells()
  4316. migrations = []
  4317. for cell in CELLS:
  4318. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4319. continue
  4320. with nova_context.target_cell(context, cell) as cctxt:
  4321. migrations.extend(objects.MigrationList.get_by_filters(
  4322. cctxt, filters).objects)
  4323. return objects.MigrationList(objects=migrations)
  4324. def get_migrations_sorted(self, context, filters, sort_dirs=None,
  4325. sort_keys=None, limit=None, marker=None):
  4326. """Get all migrations for the given parameters."""
  4327. mig_objs = migration_list.get_migration_objects_sorted(
  4328. context, filters, limit, marker, sort_keys, sort_dirs)
  4329. # Due to cross-cell resize, we could have duplicate migration records
  4330. # while the instance is in VERIFY_RESIZE state in the destination cell
  4331. # but the original migration record still exists in the source cell.
  4332. # Filter out duplicate migration records here based on which record
  4333. # is newer (last updated).
  4334. def _get_newer_obj(obj1, obj2):
  4335. # created_at will always be set.
  4336. created_at1 = obj1.created_at
  4337. created_at2 = obj2.created_at
  4338. # updated_at might be None
  4339. updated_at1 = obj1.updated_at
  4340. updated_at2 = obj2.updated_at
  4341. # If both have updated_at, compare using that field.
  4342. if updated_at1 and updated_at2:
  4343. if updated_at1 > updated_at2:
  4344. return obj1
  4345. return obj2
  4346. # Compare created_at versus updated_at.
  4347. if updated_at1:
  4348. if updated_at1 > created_at2:
  4349. return obj1
  4350. return obj2
  4351. if updated_at2:
  4352. if updated_at2 > created_at1:
  4353. return obj2
  4354. return obj1
  4355. # Compare created_at only.
  4356. if created_at1 > created_at2:
  4357. return obj1
  4358. return obj2
  4359. # TODO(mriedem): This could be easier if we leveraged the "hidden"
  4360. # field on the Migration record and then just did like
  4361. # _get_unique_filter_method in the get_all() method for instances.
  4362. migrations_by_uuid = collections.OrderedDict() # maintain sort order
  4363. for migration in mig_objs:
  4364. if migration.uuid not in migrations_by_uuid:
  4365. migrations_by_uuid[migration.uuid] = migration
  4366. else:
  4367. # We have a collision, keep the newer record.
  4368. # Note that using updated_at could be wrong if changes-since or
  4369. # changes-before filters are being used but we have the same
  4370. # issue in _get_unique_filter_method for instances.
  4371. doppelganger = migrations_by_uuid[migration.uuid]
  4372. newer = _get_newer_obj(doppelganger, migration)
  4373. migrations_by_uuid[migration.uuid] = newer
  4374. return objects.MigrationList(objects=list(migrations_by_uuid.values()))
  4375. def get_migrations_in_progress_by_instance(self, context, instance_uuid,
  4376. migration_type=None):
  4377. """Get all migrations of an instance in progress."""
  4378. return objects.MigrationList.get_in_progress_by_instance(
  4379. context, instance_uuid, migration_type)
  4380. def get_migration_by_id_and_instance(self, context,
  4381. migration_id, instance_uuid):
  4382. """Get the migration of an instance by id."""
  4383. return objects.Migration.get_by_id_and_instance(
  4384. context, migration_id, instance_uuid)
  4385. def _get_bdm_by_volume_id(self, context, volume_id, expected_attrs=None):
  4386. """Retrieve a BDM without knowing its cell.
  4387. .. note:: The context will be targeted to the cell in which the
  4388. BDM is found, if any.
  4389. :param context: The API request context.
  4390. :param volume_id: The ID of the volume.
  4391. :param expected_attrs: list of any additional attributes that should
  4392. be joined when the BDM is loaded from the database.
  4393. :raises: nova.exception.VolumeBDMNotFound if not found in any cell
  4394. """
  4395. load_cells()
  4396. for cell in CELLS:
  4397. nova_context.set_target_cell(context, cell)
  4398. try:
  4399. return objects.BlockDeviceMapping.get_by_volume(
  4400. context, volume_id, expected_attrs=expected_attrs)
  4401. except exception.NotFound:
  4402. continue
  4403. raise exception.VolumeBDMNotFound(volume_id=volume_id)
  4404. def volume_snapshot_create(self, context, volume_id, create_info):
  4405. bdm = self._get_bdm_by_volume_id(
  4406. context, volume_id, expected_attrs=['instance'])
  4407. # We allow creating the snapshot in any vm_state as long as there is
  4408. # no task being performed on the instance and it has a host.
  4409. @check_instance_host
  4410. @check_instance_state(vm_state=None)
  4411. def do_volume_snapshot_create(self, context, instance):
  4412. self.compute_rpcapi.volume_snapshot_create(context, instance,
  4413. volume_id, create_info)
  4414. snapshot = {
  4415. 'snapshot': {
  4416. 'id': create_info.get('id'),
  4417. 'volumeId': volume_id
  4418. }
  4419. }
  4420. return snapshot
  4421. return do_volume_snapshot_create(self, context, bdm.instance)
  4422. def volume_snapshot_delete(self, context, volume_id, snapshot_id,
  4423. delete_info):
  4424. bdm = self._get_bdm_by_volume_id(
  4425. context, volume_id, expected_attrs=['instance'])
  4426. # We allow deleting the snapshot in any vm_state as long as there is
  4427. # no task being performed on the instance and it has a host.
  4428. @check_instance_host
  4429. @check_instance_state(vm_state=None)
  4430. def do_volume_snapshot_delete(self, context, instance):
  4431. self.compute_rpcapi.volume_snapshot_delete(context, instance,
  4432. volume_id, snapshot_id, delete_info)
  4433. do_volume_snapshot_delete(self, context, bdm.instance)
  4434. def external_instance_event(self, api_context, instances, events):
  4435. # NOTE(danms): The external API consumer just provides events,
  4436. # but doesn't know where they go. We need to collate lists
  4437. # by the host the affected instance is on and dispatch them
  4438. # according to host
  4439. instances_by_host = collections.defaultdict(list)
  4440. events_by_host = collections.defaultdict(list)
  4441. hosts_by_instance = collections.defaultdict(list)
  4442. cell_contexts_by_host = {}
  4443. for instance in instances:
  4444. # instance._context is used here since it's already targeted to
  4445. # the cell that the instance lives in, and we need to use that
  4446. # cell context to lookup any migrations associated to the instance.
  4447. for host in self._get_relevant_hosts(instance._context, instance):
  4448. # NOTE(danms): All instances on a host must have the same
  4449. # mapping, so just use that
  4450. # NOTE(mdbooth): We don't currently support migrations between
  4451. # cells, and given that the Migration record is hosted in the
  4452. # cell _get_relevant_hosts will likely have to change before we
  4453. # do. Consequently we can currently assume that the context for
  4454. # both the source and destination hosts of a migration is the
  4455. # same.
  4456. if host not in cell_contexts_by_host:
  4457. cell_contexts_by_host[host] = instance._context
  4458. instances_by_host[host].append(instance)
  4459. hosts_by_instance[instance.uuid].append(host)
  4460. for event in events:
  4461. if event.name == 'volume-extended':
  4462. # Volume extend is a user-initiated operation starting in the
  4463. # Block Storage service API. We record an instance action so
  4464. # the user can monitor the operation to completion.
  4465. host = hosts_by_instance[event.instance_uuid][0]
  4466. cell_context = cell_contexts_by_host[host]
  4467. objects.InstanceAction.action_start(
  4468. cell_context, event.instance_uuid,
  4469. instance_actions.EXTEND_VOLUME, want_result=False)
  4470. elif event.name == 'power-update':
  4471. host = hosts_by_instance[event.instance_uuid][0]
  4472. cell_context = cell_contexts_by_host[host]
  4473. if event.tag == external_event_obj.POWER_ON:
  4474. inst_action = instance_actions.START
  4475. elif event.tag == external_event_obj.POWER_OFF:
  4476. inst_action = instance_actions.STOP
  4477. else:
  4478. LOG.warning("Invalid power state %s. Cannot process "
  4479. "the event %s. Skipping it.", event.tag,
  4480. event)
  4481. continue
  4482. objects.InstanceAction.action_start(
  4483. cell_context, event.instance_uuid, inst_action,
  4484. want_result=False)
  4485. for host in hosts_by_instance[event.instance_uuid]:
  4486. events_by_host[host].append(event)
  4487. for host in instances_by_host:
  4488. cell_context = cell_contexts_by_host[host]
  4489. # TODO(salv-orlando): Handle exceptions raised by the rpc api layer
  4490. # in order to ensure that a failure in processing events on a host
  4491. # will not prevent processing events on other hosts
  4492. self.compute_rpcapi.external_instance_event(
  4493. cell_context, instances_by_host[host], events_by_host[host],
  4494. host=host)
  4495. def _get_relevant_hosts(self, context, instance):
  4496. hosts = set()
  4497. hosts.add(instance.host)
  4498. if instance.migration_context is not None:
  4499. migration_id = instance.migration_context.migration_id
  4500. migration = objects.Migration.get_by_id(context, migration_id)
  4501. hosts.add(migration.dest_compute)
  4502. hosts.add(migration.source_compute)
  4503. LOG.debug('Instance %(instance)s is migrating, '
  4504. 'copying events to all relevant hosts: '
  4505. '%(hosts)s', {'instance': instance.uuid,
  4506. 'hosts': hosts})
  4507. return hosts
  4508. def get_instance_host_status(self, instance):
  4509. if instance.host:
  4510. try:
  4511. service = [service for service in instance.services if
  4512. service.binary == 'nova-compute'][0]
  4513. if service.forced_down:
  4514. host_status = fields_obj.HostStatus.DOWN
  4515. elif service.disabled:
  4516. host_status = fields_obj.HostStatus.MAINTENANCE
  4517. else:
  4518. alive = self.servicegroup_api.service_is_up(service)
  4519. host_status = ((alive and fields_obj.HostStatus.UP) or
  4520. fields_obj.HostStatus.UNKNOWN)
  4521. except IndexError:
  4522. host_status = fields_obj.HostStatus.NONE
  4523. else:
  4524. host_status = fields_obj.HostStatus.NONE
  4525. return host_status
  4526. def get_instances_host_statuses(self, instance_list):
  4527. host_status_dict = dict()
  4528. host_statuses = dict()
  4529. for instance in instance_list:
  4530. if instance.host:
  4531. if instance.host not in host_status_dict:
  4532. host_status = self.get_instance_host_status(instance)
  4533. host_status_dict[instance.host] = host_status
  4534. else:
  4535. host_status = host_status_dict[instance.host]
  4536. else:
  4537. host_status = fields_obj.HostStatus.NONE
  4538. host_statuses[instance.uuid] = host_status
  4539. return host_statuses
  4540. def target_host_cell(fn):
  4541. """Target a host-based function to a cell.
  4542. Expects to wrap a function of signature:
  4543. func(self, context, host, ...)
  4544. """
  4545. @functools.wraps(fn)
  4546. def targeted(self, context, host, *args, **kwargs):
  4547. mapping = objects.HostMapping.get_by_host(context, host)
  4548. nova_context.set_target_cell(context, mapping.cell_mapping)
  4549. return fn(self, context, host, *args, **kwargs)
  4550. return targeted
  4551. def _find_service_in_cell(context, service_id=None, service_host=None):
  4552. """Find a service by id or hostname by searching all cells.
  4553. If one matching service is found, return it. If none or multiple
  4554. are found, raise an exception.
  4555. :param context: A context.RequestContext
  4556. :param service_id: If not none, the DB ID of the service to find
  4557. :param service_host: If not None, the hostname of the service to find
  4558. :returns: An objects.Service
  4559. :raises: ServiceNotUnique if multiple matching IDs are found
  4560. :raises: NotFound if no matches are found
  4561. :raises: NovaException if called with neither search option
  4562. """
  4563. load_cells()
  4564. service = None
  4565. found_in_cell = None
  4566. is_uuid = False
  4567. if service_id is not None:
  4568. is_uuid = uuidutils.is_uuid_like(service_id)
  4569. if is_uuid:
  4570. lookup_fn = lambda c: objects.Service.get_by_uuid(c, service_id)
  4571. else:
  4572. lookup_fn = lambda c: objects.Service.get_by_id(c, service_id)
  4573. elif service_host is not None:
  4574. lookup_fn = lambda c: (
  4575. objects.Service.get_by_compute_host(c, service_host))
  4576. else:
  4577. LOG.exception('_find_service_in_cell called with no search parameters')
  4578. # This is intentionally cryptic so we don't leak implementation details
  4579. # out of the API.
  4580. raise exception.NovaException()
  4581. for cell in CELLS:
  4582. # NOTE(danms): Services can be in cell0, so don't skip it here
  4583. try:
  4584. with nova_context.target_cell(context, cell) as cctxt:
  4585. cell_service = lookup_fn(cctxt)
  4586. except exception.NotFound:
  4587. # NOTE(danms): Keep looking in other cells
  4588. continue
  4589. if service and cell_service:
  4590. raise exception.ServiceNotUnique()
  4591. service = cell_service
  4592. found_in_cell = cell
  4593. if service and is_uuid:
  4594. break
  4595. if service:
  4596. # NOTE(danms): Set the cell on the context so it remains
  4597. # when we return to our caller
  4598. nova_context.set_target_cell(context, found_in_cell)
  4599. return service
  4600. else:
  4601. raise exception.NotFound()
  4602. class HostAPI(base.Base):
  4603. """Sub-set of the Compute Manager API for managing host operations."""
  4604. def __init__(self, rpcapi=None, servicegroup_api=None):
  4605. self.rpcapi = rpcapi or compute_rpcapi.ComputeAPI()
  4606. self.servicegroup_api = servicegroup_api or servicegroup.API()
  4607. super(HostAPI, self).__init__()
  4608. def _assert_host_exists(self, context, host_name, must_be_up=False):
  4609. """Raise HostNotFound if compute host doesn't exist."""
  4610. service = objects.Service.get_by_compute_host(context, host_name)
  4611. if not service:
  4612. raise exception.HostNotFound(host=host_name)
  4613. if must_be_up and not self.servicegroup_api.service_is_up(service):
  4614. raise exception.ComputeServiceUnavailable(host=host_name)
  4615. return service['host']
  4616. @wrap_exception()
  4617. @target_host_cell
  4618. def set_host_enabled(self, context, host_name, enabled):
  4619. """Sets the specified host's ability to accept new instances."""
  4620. host_name = self._assert_host_exists(context, host_name)
  4621. payload = {'host_name': host_name, 'enabled': enabled}
  4622. compute_utils.notify_about_host_update(context,
  4623. 'set_enabled.start',
  4624. payload)
  4625. result = self.rpcapi.set_host_enabled(context, enabled=enabled,
  4626. host=host_name)
  4627. compute_utils.notify_about_host_update(context,
  4628. 'set_enabled.end',
  4629. payload)
  4630. return result
  4631. @target_host_cell
  4632. def get_host_uptime(self, context, host_name):
  4633. """Returns the result of calling "uptime" on the target host."""
  4634. host_name = self._assert_host_exists(context, host_name,
  4635. must_be_up=True)
  4636. return self.rpcapi.get_host_uptime(context, host=host_name)
  4637. @wrap_exception()
  4638. @target_host_cell
  4639. def host_power_action(self, context, host_name, action):
  4640. """Reboots, shuts down or powers up the host."""
  4641. host_name = self._assert_host_exists(context, host_name)
  4642. payload = {'host_name': host_name, 'action': action}
  4643. compute_utils.notify_about_host_update(context,
  4644. 'power_action.start',
  4645. payload)
  4646. result = self.rpcapi.host_power_action(context, action=action,
  4647. host=host_name)
  4648. compute_utils.notify_about_host_update(context,
  4649. 'power_action.end',
  4650. payload)
  4651. return result
  4652. @wrap_exception()
  4653. @target_host_cell
  4654. def set_host_maintenance(self, context, host_name, mode):
  4655. """Start/Stop host maintenance window. On start, it triggers
  4656. guest VMs evacuation.
  4657. """
  4658. host_name = self._assert_host_exists(context, host_name)
  4659. payload = {'host_name': host_name, 'mode': mode}
  4660. compute_utils.notify_about_host_update(context,
  4661. 'set_maintenance.start',
  4662. payload)
  4663. result = self.rpcapi.host_maintenance_mode(context,
  4664. host_param=host_name, mode=mode, host=host_name)
  4665. compute_utils.notify_about_host_update(context,
  4666. 'set_maintenance.end',
  4667. payload)
  4668. return result
  4669. def service_get_all(self, context, filters=None, set_zones=False,
  4670. all_cells=False, cell_down_support=False):
  4671. """Returns a list of services, optionally filtering the results.
  4672. If specified, 'filters' should be a dictionary containing services
  4673. attributes and matching values. Ie, to get a list of services for
  4674. the 'compute' topic, use filters={'topic': 'compute'}.
  4675. If all_cells=True, then scan all cells and merge the results.
  4676. If cell_down_support=True then return minimal service records
  4677. for cells that do not respond based on what we have in the
  4678. host mappings. These will have only 'binary' and 'host' set.
  4679. """
  4680. if filters is None:
  4681. filters = {}
  4682. disabled = filters.pop('disabled', None)
  4683. if 'availability_zone' in filters:
  4684. set_zones = True
  4685. # NOTE(danms): Eventually this all_cells nonsense should go away
  4686. # and we should always iterate over the cells. However, certain
  4687. # callers need the legacy behavior for now.
  4688. if all_cells:
  4689. services = []
  4690. service_dict = nova_context.scatter_gather_all_cells(context,
  4691. objects.ServiceList.get_all, disabled, set_zones=set_zones)
  4692. for cell_uuid, service in service_dict.items():
  4693. if not nova_context.is_cell_failure_sentinel(service):
  4694. services.extend(service)
  4695. elif cell_down_support:
  4696. unavailable_services = objects.ServiceList()
  4697. cid = [cm.id for cm in nova_context.CELLS
  4698. if cm.uuid == cell_uuid]
  4699. # We know cid[0] is in the list because we are using the
  4700. # same list that scatter_gather_all_cells used
  4701. hms = objects.HostMappingList.get_by_cell_id(context,
  4702. cid[0])
  4703. for hm in hms:
  4704. unavailable_services.objects.append(objects.Service(
  4705. binary='nova-compute', host=hm.host))
  4706. LOG.warning("Cell %s is not responding and hence only "
  4707. "partial results are available from this "
  4708. "cell.", cell_uuid)
  4709. services.extend(unavailable_services)
  4710. else:
  4711. LOG.warning("Cell %s is not responding and hence skipped "
  4712. "from the results.", cell_uuid)
  4713. else:
  4714. services = objects.ServiceList.get_all(context, disabled,
  4715. set_zones=set_zones)
  4716. ret_services = []
  4717. for service in services:
  4718. for key, val in filters.items():
  4719. if service[key] != val:
  4720. break
  4721. else:
  4722. # All filters matched.
  4723. ret_services.append(service)
  4724. return ret_services
  4725. def service_get_by_id(self, context, service_id):
  4726. """Get service entry for the given service id or uuid."""
  4727. try:
  4728. return _find_service_in_cell(context, service_id=service_id)
  4729. except exception.NotFound:
  4730. raise exception.ServiceNotFound(service_id=service_id)
  4731. @target_host_cell
  4732. def service_get_by_compute_host(self, context, host_name):
  4733. """Get service entry for the given compute hostname."""
  4734. return objects.Service.get_by_compute_host(context, host_name)
  4735. def _update_compute_provider_status(self, context, service):
  4736. """Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
  4737. There are two cases where the API will not call the compute service:
  4738. * The compute service is down. In this case the trait is synchronized
  4739. when the compute service is restarted.
  4740. * The compute service is old. In this case the trait is synchronized
  4741. when the compute service is upgraded and restarted.
  4742. :param context: nova auth RequestContext
  4743. :param service: nova.objects.Service object which has been enabled
  4744. or disabled (see ``service_update``).
  4745. """
  4746. # Make sure the service is up so we can make the RPC call.
  4747. if not self.servicegroup_api.service_is_up(service):
  4748. LOG.info('Compute service on host %s is down. The '
  4749. 'COMPUTE_STATUS_DISABLED trait will be synchronized '
  4750. 'when the service is restarted.', service.host)
  4751. return
  4752. # Make sure the compute service is new enough for the trait sync
  4753. # behavior.
  4754. # TODO(mriedem): Remove this compat check in the U release.
  4755. if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
  4756. LOG.info('Compute service on host %s is too old to sync the '
  4757. 'COMPUTE_STATUS_DISABLED trait in Placement. The '
  4758. 'trait will be synchronized when the service is '
  4759. 'upgraded and restarted.', service.host)
  4760. return
  4761. enabled = not service.disabled
  4762. # Avoid leaking errors out of the API.
  4763. try:
  4764. LOG.debug('Calling the compute service on host %s to sync the '
  4765. 'COMPUTE_STATUS_DISABLED trait.', service.host)
  4766. self.rpcapi.set_host_enabled(context, service.host, enabled)
  4767. except Exception:
  4768. LOG.exception('An error occurred while updating the '
  4769. 'COMPUTE_STATUS_DISABLED trait on compute node '
  4770. 'resource providers managed by host %s. The trait '
  4771. 'will be synchronized automatically by the compute '
  4772. 'service when the update_available_resource '
  4773. 'periodic task runs.', service.host)
  4774. def service_update(self, context, service):
  4775. """Performs the actual service update operation.
  4776. If the "disabled" field is changed, potentially calls the compute
  4777. service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
  4778. resource providers managed by this compute service.
  4779. :param context: nova auth RequestContext
  4780. :param service: nova.objects.Service object with changes already
  4781. set on the object
  4782. """
  4783. # Before persisting changes and resetting the changed fields on the
  4784. # Service object, determine if the disabled field changed.
  4785. update_placement = 'disabled' in service.obj_what_changed()
  4786. # Persist the Service object changes to the database.
  4787. service.save()
  4788. # If the disabled field changed, potentially call the compute service
  4789. # to sync the COMPUTE_STATUS_DISABLED trait.
  4790. if update_placement:
  4791. self._update_compute_provider_status(context, service)
  4792. return service
  4793. @target_host_cell
  4794. def service_update_by_host_and_binary(self, context, host_name, binary,
  4795. params_to_update):
  4796. """Enable / Disable a service.
  4797. Determines the cell that the service is in using the HostMapping.
  4798. For compute services, this stops new builds and migrations going to
  4799. the host.
  4800. See also ``service_update``.
  4801. :param context: nova auth RequestContext
  4802. :param host_name: hostname of the service
  4803. :param binary: service binary (really only supports "nova-compute")
  4804. :param params_to_update: dict of changes to make to the Service object
  4805. :raises: HostMappingNotFound if the host is not mapped to a cell
  4806. :raises: HostBinaryNotFound if a services table record is not found
  4807. with the given host_name and binary
  4808. """
  4809. # TODO(mriedem): Service.get_by_args is deprecated; we should use
  4810. # get_by_compute_host here (remember to update the "raises" docstring).
  4811. service = objects.Service.get_by_args(context, host_name, binary)
  4812. service.update(params_to_update)
  4813. return self.service_update(context, service)
  4814. @target_host_cell
  4815. def instance_get_all_by_host(self, context, host_name):
  4816. """Return all instances on the given host."""
  4817. return objects.InstanceList.get_by_host(context, host_name)
  4818. def task_log_get_all(self, context, task_name, period_beginning,
  4819. period_ending, host=None, state=None):
  4820. """Return the task logs within a given range, optionally
  4821. filtering by host and/or state.
  4822. """
  4823. return self.db.task_log_get_all(context, task_name,
  4824. period_beginning,
  4825. period_ending,
  4826. host=host,
  4827. state=state)
  4828. def compute_node_get(self, context, compute_id):
  4829. """Return compute node entry for particular integer ID or UUID."""
  4830. load_cells()
  4831. # NOTE(danms): Unfortunately this API exposes database identifiers
  4832. # which means we really can't do something efficient here
  4833. is_uuid = uuidutils.is_uuid_like(compute_id)
  4834. for cell in CELLS:
  4835. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4836. continue
  4837. with nova_context.target_cell(context, cell) as cctxt:
  4838. try:
  4839. if is_uuid:
  4840. return objects.ComputeNode.get_by_uuid(cctxt,
  4841. compute_id)
  4842. return objects.ComputeNode.get_by_id(cctxt,
  4843. int(compute_id))
  4844. except exception.ComputeHostNotFound:
  4845. # NOTE(danms): Keep looking in other cells
  4846. continue
  4847. raise exception.ComputeHostNotFound(host=compute_id)
  4848. def compute_node_get_all(self, context, limit=None, marker=None):
  4849. load_cells()
  4850. computes = []
  4851. uuid_marker = marker and uuidutils.is_uuid_like(marker)
  4852. for cell in CELLS:
  4853. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4854. continue
  4855. with nova_context.target_cell(context, cell) as cctxt:
  4856. # If we have a marker and it's a uuid, see if the compute node
  4857. # is in this cell.
  4858. if marker and uuid_marker:
  4859. try:
  4860. compute_marker = objects.ComputeNode.get_by_uuid(
  4861. cctxt, marker)
  4862. # we found the marker compute node, so use it's id
  4863. # for the actual marker for paging in this cell's db
  4864. marker = compute_marker.id
  4865. except exception.ComputeHostNotFound:
  4866. # The marker node isn't in this cell so keep looking.
  4867. continue
  4868. try:
  4869. cell_computes = objects.ComputeNodeList.get_by_pagination(
  4870. cctxt, limit=limit, marker=marker)
  4871. except exception.MarkerNotFound:
  4872. # NOTE(danms): Keep looking through cells
  4873. continue
  4874. computes.extend(cell_computes)
  4875. # NOTE(danms): We must have found the marker, so continue on
  4876. # without one
  4877. marker = None
  4878. if limit:
  4879. limit -= len(cell_computes)
  4880. if limit <= 0:
  4881. break
  4882. if marker is not None and len(computes) == 0:
  4883. # NOTE(danms): If we did not find the marker in any cell,
  4884. # mimic the db_api behavior here.
  4885. raise exception.MarkerNotFound(marker=marker)
  4886. return objects.ComputeNodeList(objects=computes)
  4887. def compute_node_search_by_hypervisor(self, context, hypervisor_match):
  4888. load_cells()
  4889. computes = []
  4890. for cell in CELLS:
  4891. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4892. continue
  4893. with nova_context.target_cell(context, cell) as cctxt:
  4894. cell_computes = objects.ComputeNodeList.get_by_hypervisor(
  4895. cctxt, hypervisor_match)
  4896. computes.extend(cell_computes)
  4897. return objects.ComputeNodeList(objects=computes)
  4898. def compute_node_statistics(self, context):
  4899. load_cells()
  4900. cell_stats = []
  4901. for cell in CELLS:
  4902. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4903. continue
  4904. with nova_context.target_cell(context, cell) as cctxt:
  4905. cell_stats.append(self.db.compute_node_statistics(cctxt))
  4906. if cell_stats:
  4907. keys = cell_stats[0].keys()
  4908. return {k: sum(stats[k] for stats in cell_stats)
  4909. for k in keys}
  4910. else:
  4911. return {}
  4912. class InstanceActionAPI(base.Base):
  4913. """Sub-set of the Compute Manager API for managing instance actions."""
  4914. def actions_get(self, context, instance, limit=None, marker=None,
  4915. filters=None):
  4916. return objects.InstanceActionList.get_by_instance_uuid(
  4917. context, instance.uuid, limit, marker, filters)
  4918. def action_get_by_request_id(self, context, instance, request_id):
  4919. return objects.InstanceAction.get_by_request_id(
  4920. context, instance.uuid, request_id)
  4921. def action_events_get(self, context, instance, action_id):
  4922. return objects.InstanceActionEventList.get_by_action(
  4923. context, action_id)
  4924. class AggregateAPI(base.Base):
  4925. """Sub-set of the Compute Manager API for managing host aggregates."""
  4926. def __init__(self, **kwargs):
  4927. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  4928. self.query_client = query.SchedulerQueryClient()
  4929. self._placement_client = None # Lazy-load on first access.
  4930. super(AggregateAPI, self).__init__(**kwargs)
  4931. @property
  4932. def placement_client(self):
  4933. if self._placement_client is None:
  4934. self._placement_client = report.SchedulerReportClient()
  4935. return self._placement_client
  4936. @wrap_exception()
  4937. def create_aggregate(self, context, aggregate_name, availability_zone):
  4938. """Creates the model for the aggregate."""
  4939. aggregate = objects.Aggregate(context=context)
  4940. aggregate.name = aggregate_name
  4941. if availability_zone:
  4942. aggregate.metadata = {'availability_zone': availability_zone}
  4943. aggregate.create()
  4944. self.query_client.update_aggregates(context, [aggregate])
  4945. return aggregate
  4946. def get_aggregate(self, context, aggregate_id):
  4947. """Get an aggregate by id."""
  4948. return objects.Aggregate.get_by_id(context, aggregate_id)
  4949. def get_aggregate_list(self, context):
  4950. """Get all the aggregates."""
  4951. return objects.AggregateList.get_all(context)
  4952. def get_aggregates_by_host(self, context, compute_host):
  4953. """Get all the aggregates where the given host is presented."""
  4954. return objects.AggregateList.get_by_host(context, compute_host)
  4955. @wrap_exception()
  4956. def update_aggregate(self, context, aggregate_id, values):
  4957. """Update the properties of an aggregate."""
  4958. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4959. if 'name' in values:
  4960. aggregate.name = values.pop('name')
  4961. aggregate.save()
  4962. self.is_safe_to_update_az(context, values, aggregate=aggregate,
  4963. action_name=AGGREGATE_ACTION_UPDATE,
  4964. check_no_instances_in_az=True)
  4965. if values:
  4966. aggregate.update_metadata(values)
  4967. aggregate.updated_at = timeutils.utcnow()
  4968. self.query_client.update_aggregates(context, [aggregate])
  4969. # If updated values include availability_zones, then the cache
  4970. # which stored availability_zones and host need to be reset
  4971. if values.get('availability_zone'):
  4972. availability_zones.reset_cache()
  4973. return aggregate
  4974. @wrap_exception()
  4975. def update_aggregate_metadata(self, context, aggregate_id, metadata):
  4976. """Updates the aggregate metadata."""
  4977. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4978. self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
  4979. action_name=AGGREGATE_ACTION_UPDATE_META,
  4980. check_no_instances_in_az=True)
  4981. aggregate.update_metadata(metadata)
  4982. self.query_client.update_aggregates(context, [aggregate])
  4983. # If updated metadata include availability_zones, then the cache
  4984. # which stored availability_zones and host need to be reset
  4985. if metadata and metadata.get('availability_zone'):
  4986. availability_zones.reset_cache()
  4987. aggregate.updated_at = timeutils.utcnow()
  4988. return aggregate
  4989. @wrap_exception()
  4990. def delete_aggregate(self, context, aggregate_id):
  4991. """Deletes the aggregate."""
  4992. aggregate_payload = {'aggregate_id': aggregate_id}
  4993. compute_utils.notify_about_aggregate_update(context,
  4994. "delete.start",
  4995. aggregate_payload)
  4996. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4997. compute_utils.notify_about_aggregate_action(
  4998. context=context,
  4999. aggregate=aggregate,
  5000. action=fields_obj.NotificationAction.DELETE,
  5001. phase=fields_obj.NotificationPhase.START)
  5002. if len(aggregate.hosts) > 0:
  5003. msg = _("Host aggregate is not empty")
  5004. raise exception.InvalidAggregateActionDelete(
  5005. aggregate_id=aggregate_id, reason=msg)
  5006. aggregate.destroy()
  5007. self.query_client.delete_aggregate(context, aggregate)
  5008. compute_utils.notify_about_aggregate_update(context,
  5009. "delete.end",
  5010. aggregate_payload)
  5011. compute_utils.notify_about_aggregate_action(
  5012. context=context,
  5013. aggregate=aggregate,
  5014. action=fields_obj.NotificationAction.DELETE,
  5015. phase=fields_obj.NotificationPhase.END)
  5016. def is_safe_to_update_az(self, context, metadata, aggregate,
  5017. hosts=None,
  5018. action_name=AGGREGATE_ACTION_ADD,
  5019. check_no_instances_in_az=False):
  5020. """Determine if updates alter an aggregate's availability zone.
  5021. :param context: local context
  5022. :param metadata: Target metadata for updating aggregate
  5023. :param aggregate: Aggregate to update
  5024. :param hosts: Hosts to check. If None, aggregate.hosts is used
  5025. :type hosts: list
  5026. :param action_name: Calling method for logging purposes
  5027. :param check_no_instances_in_az: if True, it checks
  5028. there is no instances on any hosts of the aggregate
  5029. """
  5030. if 'availability_zone' in metadata:
  5031. if not metadata['availability_zone']:
  5032. msg = _("Aggregate %s does not support empty named "
  5033. "availability zone") % aggregate.name
  5034. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5035. msg)
  5036. _hosts = hosts or aggregate.hosts
  5037. host_aggregates = objects.AggregateList.get_by_metadata_key(
  5038. context, 'availability_zone', hosts=_hosts)
  5039. conflicting_azs = [
  5040. agg.availability_zone for agg in host_aggregates
  5041. if agg.availability_zone != metadata['availability_zone'] and
  5042. agg.id != aggregate.id]
  5043. if conflicting_azs:
  5044. msg = _("One or more hosts already in availability zone(s) "
  5045. "%s") % conflicting_azs
  5046. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  5047. msg)
  5048. same_az_name = (aggregate.availability_zone ==
  5049. metadata['availability_zone'])
  5050. if check_no_instances_in_az and not same_az_name:
  5051. instance_count_by_cell = (
  5052. nova_context.scatter_gather_skip_cell0(
  5053. context,
  5054. objects.InstanceList.get_count_by_hosts,
  5055. _hosts))
  5056. if any(cnt for cnt in instance_count_by_cell.values()):
  5057. msg = _("One or more hosts contain instances in this zone")
  5058. self._raise_invalid_aggregate_exc(
  5059. action_name, aggregate.id, msg)
  5060. def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
  5061. if action_name == AGGREGATE_ACTION_ADD:
  5062. raise exception.InvalidAggregateActionAdd(
  5063. aggregate_id=aggregate_id, reason=reason)
  5064. elif action_name == AGGREGATE_ACTION_UPDATE:
  5065. raise exception.InvalidAggregateActionUpdate(
  5066. aggregate_id=aggregate_id, reason=reason)
  5067. elif action_name == AGGREGATE_ACTION_UPDATE_META:
  5068. raise exception.InvalidAggregateActionUpdateMeta(
  5069. aggregate_id=aggregate_id, reason=reason)
  5070. elif action_name == AGGREGATE_ACTION_DELETE:
  5071. raise exception.InvalidAggregateActionDelete(
  5072. aggregate_id=aggregate_id, reason=reason)
  5073. raise exception.NovaException(
  5074. _("Unexpected aggregate action %s") % action_name)
  5075. def _update_az_cache_for_host(self, context, host_name, aggregate_meta):
  5076. # Update the availability_zone cache to avoid getting wrong
  5077. # availability_zone in cache retention time when add/remove
  5078. # host to/from aggregate.
  5079. if aggregate_meta and aggregate_meta.get('availability_zone'):
  5080. availability_zones.update_host_availability_zone_cache(context,
  5081. host_name)
  5082. @wrap_exception()
  5083. def add_host_to_aggregate(self, context, aggregate_id, host_name):
  5084. """Adds the host to an aggregate."""
  5085. aggregate_payload = {'aggregate_id': aggregate_id,
  5086. 'host_name': host_name}
  5087. compute_utils.notify_about_aggregate_update(context,
  5088. "addhost.start",
  5089. aggregate_payload)
  5090. # validates the host; HostMappingNotFound or ComputeHostNotFound
  5091. # is raised if invalid
  5092. try:
  5093. mapping = objects.HostMapping.get_by_host(context, host_name)
  5094. nova_context.set_target_cell(context, mapping.cell_mapping)
  5095. service = objects.Service.get_by_compute_host(context, host_name)
  5096. except exception.HostMappingNotFound:
  5097. try:
  5098. # NOTE(danms): This targets our cell
  5099. service = _find_service_in_cell(context,
  5100. service_host=host_name)
  5101. except exception.NotFound:
  5102. raise exception.ComputeHostNotFound(host=host_name)
  5103. if service.host != host_name:
  5104. # NOTE(danms): If we found a service but it is not an
  5105. # exact match, we may have a case-insensitive backend
  5106. # database (like mysql) which will end up with us
  5107. # adding the host-aggregate mapping with a
  5108. # non-matching hostname.
  5109. raise exception.ComputeHostNotFound(host=host_name)
  5110. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5111. compute_utils.notify_about_aggregate_action(
  5112. context=context,
  5113. aggregate=aggregate,
  5114. action=fields_obj.NotificationAction.ADD_HOST,
  5115. phase=fields_obj.NotificationPhase.START)
  5116. self.is_safe_to_update_az(context, aggregate.metadata,
  5117. hosts=[host_name], aggregate=aggregate)
  5118. aggregate.add_host(host_name)
  5119. self.query_client.update_aggregates(context, [aggregate])
  5120. try:
  5121. self.placement_client.aggregate_add_host(
  5122. context, aggregate.uuid, host_name=host_name)
  5123. except (exception.ResourceProviderNotFound,
  5124. exception.ResourceProviderAggregateRetrievalFailed,
  5125. exception.ResourceProviderUpdateFailed,
  5126. exception.ResourceProviderUpdateConflict) as err:
  5127. # NOTE(jaypipes): We don't want a failure perform the mirroring
  5128. # action in the placement service to be returned to the user (they
  5129. # probably don't know anything about the placement service and
  5130. # would just be confused). So, we just log a warning here, noting
  5131. # that on the next run of nova-manage placement sync_aggregates
  5132. # things will go back to normal
  5133. LOG.warning("Failed to associate %s with a placement "
  5134. "aggregate: %s. This may be corrected after running "
  5135. "nova-manage placement sync_aggregates.",
  5136. host_name, err)
  5137. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5138. # NOTE(jogo): Send message to host to support resource pools
  5139. self.compute_rpcapi.add_aggregate_host(context,
  5140. aggregate=aggregate, host_param=host_name, host=host_name)
  5141. aggregate_payload.update({'name': aggregate.name})
  5142. compute_utils.notify_about_aggregate_update(context,
  5143. "addhost.end",
  5144. aggregate_payload)
  5145. compute_utils.notify_about_aggregate_action(
  5146. context=context,
  5147. aggregate=aggregate,
  5148. action=fields_obj.NotificationAction.ADD_HOST,
  5149. phase=fields_obj.NotificationPhase.END)
  5150. return aggregate
  5151. @wrap_exception()
  5152. def remove_host_from_aggregate(self, context, aggregate_id, host_name):
  5153. """Removes host from the aggregate."""
  5154. aggregate_payload = {'aggregate_id': aggregate_id,
  5155. 'host_name': host_name}
  5156. compute_utils.notify_about_aggregate_update(context,
  5157. "removehost.start",
  5158. aggregate_payload)
  5159. # validates the host; HostMappingNotFound or ComputeHostNotFound
  5160. # is raised if invalid
  5161. mapping = objects.HostMapping.get_by_host(context, host_name)
  5162. nova_context.set_target_cell(context, mapping.cell_mapping)
  5163. objects.Service.get_by_compute_host(context, host_name)
  5164. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  5165. compute_utils.notify_about_aggregate_action(
  5166. context=context,
  5167. aggregate=aggregate,
  5168. action=fields_obj.NotificationAction.REMOVE_HOST,
  5169. phase=fields_obj.NotificationPhase.START)
  5170. # Remove the resource provider from the provider aggregate first before
  5171. # we change anything on the nova side because if we did the nova stuff
  5172. # first we can't re-attempt this from the compute API if cleaning up
  5173. # placement fails.
  5174. try:
  5175. # Anything else this raises is handled in the route handler as
  5176. # either a 409 (ResourceProviderUpdateConflict) or 500.
  5177. self.placement_client.aggregate_remove_host(
  5178. context, aggregate.uuid, host_name)
  5179. except exception.ResourceProviderNotFound as err:
  5180. # If the resource provider is not found then it's likely not part
  5181. # of the aggregate anymore anyway since provider aggregates are
  5182. # not resources themselves with metadata like nova aggregates, they
  5183. # are just a grouping concept around resource providers. Log and
  5184. # continue.
  5185. LOG.warning("Failed to remove association of %s with a placement "
  5186. "aggregate: %s.", host_name, err)
  5187. aggregate.delete_host(host_name)
  5188. self.query_client.update_aggregates(context, [aggregate])
  5189. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5190. self.compute_rpcapi.remove_aggregate_host(context,
  5191. aggregate=aggregate, host_param=host_name, host=host_name)
  5192. compute_utils.notify_about_aggregate_update(context,
  5193. "removehost.end",
  5194. aggregate_payload)
  5195. compute_utils.notify_about_aggregate_action(
  5196. context=context,
  5197. aggregate=aggregate,
  5198. action=fields_obj.NotificationAction.REMOVE_HOST,
  5199. phase=fields_obj.NotificationPhase.END)
  5200. return aggregate
  5201. class KeypairAPI(base.Base):
  5202. """Subset of the Compute Manager API for managing key pairs."""
  5203. get_notifier = functools.partial(rpc.get_notifier, service='api')
  5204. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  5205. get_notifier=get_notifier,
  5206. binary='nova-api')
  5207. def _notify(self, context, event_suffix, keypair_name):
  5208. payload = {
  5209. 'tenant_id': context.project_id,
  5210. 'user_id': context.user_id,
  5211. 'key_name': keypair_name,
  5212. }
  5213. notify = self.get_notifier()
  5214. notify.info(context, 'keypair.%s' % event_suffix, payload)
  5215. def _validate_new_key_pair(self, context, user_id, key_name, key_type):
  5216. safe_chars = "_- " + string.digits + string.ascii_letters
  5217. clean_value = "".join(x for x in key_name if x in safe_chars)
  5218. if clean_value != key_name:
  5219. raise exception.InvalidKeypair(
  5220. reason=_("Keypair name contains unsafe characters"))
  5221. try:
  5222. utils.check_string_length(key_name, min_length=1, max_length=255)
  5223. except exception.InvalidInput:
  5224. raise exception.InvalidKeypair(
  5225. reason=_('Keypair name must be string and between '
  5226. '1 and 255 characters long'))
  5227. try:
  5228. objects.Quotas.check_deltas(context, {'key_pairs': 1}, user_id)
  5229. except exception.OverQuota:
  5230. raise exception.KeypairLimitExceeded()
  5231. @wrap_exception()
  5232. def import_key_pair(self, context, user_id, key_name, public_key,
  5233. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5234. """Import a key pair using an existing public key."""
  5235. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5236. self._notify(context, 'import.start', key_name)
  5237. keypair = objects.KeyPair(context)
  5238. keypair.user_id = user_id
  5239. keypair.name = key_name
  5240. keypair.type = key_type
  5241. keypair.fingerprint = None
  5242. keypair.public_key = public_key
  5243. compute_utils.notify_about_keypair_action(
  5244. context=context,
  5245. keypair=keypair,
  5246. action=fields_obj.NotificationAction.IMPORT,
  5247. phase=fields_obj.NotificationPhase.START)
  5248. fingerprint = self._generate_fingerprint(public_key, key_type)
  5249. keypair.fingerprint = fingerprint
  5250. keypair.create()
  5251. compute_utils.notify_about_keypair_action(
  5252. context=context,
  5253. keypair=keypair,
  5254. action=fields_obj.NotificationAction.IMPORT,
  5255. phase=fields_obj.NotificationPhase.END)
  5256. self._notify(context, 'import.end', key_name)
  5257. return keypair
  5258. @wrap_exception()
  5259. def create_key_pair(self, context, user_id, key_name,
  5260. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5261. """Create a new key pair."""
  5262. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5263. keypair = objects.KeyPair(context)
  5264. keypair.user_id = user_id
  5265. keypair.name = key_name
  5266. keypair.type = key_type
  5267. keypair.fingerprint = None
  5268. keypair.public_key = None
  5269. self._notify(context, 'create.start', key_name)
  5270. compute_utils.notify_about_keypair_action(
  5271. context=context,
  5272. keypair=keypair,
  5273. action=fields_obj.NotificationAction.CREATE,
  5274. phase=fields_obj.NotificationPhase.START)
  5275. private_key, public_key, fingerprint = self._generate_key_pair(
  5276. user_id, key_type)
  5277. keypair.fingerprint = fingerprint
  5278. keypair.public_key = public_key
  5279. keypair.create()
  5280. # NOTE(melwitt): We recheck the quota after creating the object to
  5281. # prevent users from allocating more resources than their allowed quota
  5282. # in the event of a race. This is configurable because it can be
  5283. # expensive if strict quota limits are not required in a deployment.
  5284. if CONF.quota.recheck_quota:
  5285. try:
  5286. objects.Quotas.check_deltas(context, {'key_pairs': 0}, user_id)
  5287. except exception.OverQuota:
  5288. keypair.destroy()
  5289. raise exception.KeypairLimitExceeded()
  5290. compute_utils.notify_about_keypair_action(
  5291. context=context,
  5292. keypair=keypair,
  5293. action=fields_obj.NotificationAction.CREATE,
  5294. phase=fields_obj.NotificationPhase.END)
  5295. self._notify(context, 'create.end', key_name)
  5296. return keypair, private_key
  5297. def _generate_fingerprint(self, public_key, key_type):
  5298. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5299. return crypto.generate_fingerprint(public_key)
  5300. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5301. return crypto.generate_x509_fingerprint(public_key)
  5302. def _generate_key_pair(self, user_id, key_type):
  5303. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5304. return crypto.generate_key_pair()
  5305. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5306. return crypto.generate_winrm_x509_cert(user_id)
  5307. @wrap_exception()
  5308. def delete_key_pair(self, context, user_id, key_name):
  5309. """Delete a keypair by name."""
  5310. self._notify(context, 'delete.start', key_name)
  5311. keypair = self.get_key_pair(context, user_id, key_name)
  5312. compute_utils.notify_about_keypair_action(
  5313. context=context,
  5314. keypair=keypair,
  5315. action=fields_obj.NotificationAction.DELETE,
  5316. phase=fields_obj.NotificationPhase.START)
  5317. objects.KeyPair.destroy_by_name(context, user_id, key_name)
  5318. compute_utils.notify_about_keypair_action(
  5319. context=context,
  5320. keypair=keypair,
  5321. action=fields_obj.NotificationAction.DELETE,
  5322. phase=fields_obj.NotificationPhase.END)
  5323. self._notify(context, 'delete.end', key_name)
  5324. def get_key_pairs(self, context, user_id, limit=None, marker=None):
  5325. """List key pairs."""
  5326. return objects.KeyPairList.get_by_user(
  5327. context, user_id, limit=limit, marker=marker)
  5328. def get_key_pair(self, context, user_id, key_name):
  5329. """Get a keypair by name."""
  5330. return objects.KeyPair.get_by_name(context, user_id, key_name)
  5331. class SecurityGroupAPI(base.Base, security_group_base.SecurityGroupBase):
  5332. """Sub-set of the Compute API related to managing security groups
  5333. and security group rules
  5334. """
  5335. # The nova security group api does not use a uuid for the id.
  5336. id_is_uuid = False
  5337. def __init__(self, **kwargs):
  5338. super(SecurityGroupAPI, self).__init__(**kwargs)
  5339. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  5340. def validate_property(self, value, property, allowed):
  5341. """Validate given security group property.
  5342. :param value: the value to validate, as a string or unicode
  5343. :param property: the property, either 'name' or 'description'
  5344. :param allowed: the range of characters allowed
  5345. """
  5346. try:
  5347. val = value.strip()
  5348. except AttributeError:
  5349. msg = _("Security group %s is not a string or unicode") % property
  5350. self.raise_invalid_property(msg)
  5351. utils.check_string_length(val, name=property, min_length=1,
  5352. max_length=255)
  5353. if allowed and not re.match(allowed, val):
  5354. # Some validation to ensure that values match API spec.
  5355. # - Alphanumeric characters, spaces, dashes, and underscores.
  5356. # TODO(Daviey): LP: #813685 extend beyond group_name checking, and
  5357. # probably create a param validator that can be used elsewhere.
  5358. msg = (_("Value (%(value)s) for parameter Group%(property)s is "
  5359. "invalid. Content limited to '%(allowed)s'.") %
  5360. {'value': value, 'allowed': allowed,
  5361. 'property': property.capitalize()})
  5362. self.raise_invalid_property(msg)
  5363. def ensure_default(self, context):
  5364. """Ensure that a context has a security group.
  5365. Creates a security group for the security context if it does not
  5366. already exist.
  5367. :param context: the security context
  5368. """
  5369. self.db.security_group_ensure_default(context)
  5370. def create_security_group(self, context, name, description):
  5371. try:
  5372. objects.Quotas.check_deltas(context, {'security_groups': 1},
  5373. context.project_id,
  5374. user_id=context.user_id)
  5375. except exception.OverQuota:
  5376. msg = _("Quota exceeded, too many security groups.")
  5377. self.raise_over_quota(msg)
  5378. LOG.info("Create Security Group %s", name)
  5379. self.ensure_default(context)
  5380. group = {'user_id': context.user_id,
  5381. 'project_id': context.project_id,
  5382. 'name': name,
  5383. 'description': description}
  5384. try:
  5385. group_ref = self.db.security_group_create(context, group)
  5386. except exception.SecurityGroupExists:
  5387. msg = _('Security group %s already exists') % name
  5388. self.raise_group_already_exists(msg)
  5389. # NOTE(melwitt): We recheck the quota after creating the object to
  5390. # prevent users from allocating more resources than their allowed quota
  5391. # in the event of a race. This is configurable because it can be
  5392. # expensive if strict quota limits are not required in a deployment.
  5393. if CONF.quota.recheck_quota:
  5394. try:
  5395. objects.Quotas.check_deltas(context, {'security_groups': 0},
  5396. context.project_id,
  5397. user_id=context.user_id)
  5398. except exception.OverQuota:
  5399. self.db.security_group_destroy(context, group_ref['id'])
  5400. msg = _("Quota exceeded, too many security groups.")
  5401. self.raise_over_quota(msg)
  5402. return group_ref
  5403. def update_security_group(self, context, security_group,
  5404. name, description):
  5405. if security_group['name'] in RO_SECURITY_GROUPS:
  5406. msg = (_("Unable to update system group '%s'") %
  5407. security_group['name'])
  5408. self.raise_invalid_group(msg)
  5409. group = {'name': name,
  5410. 'description': description}
  5411. columns_to_join = ['rules.grantee_group']
  5412. group_ref = self.db.security_group_update(context,
  5413. security_group['id'],
  5414. group,
  5415. columns_to_join=columns_to_join)
  5416. return group_ref
  5417. def get(self, context, name=None, id=None, map_exception=False):
  5418. self.ensure_default(context)
  5419. cols = ['rules']
  5420. try:
  5421. if name:
  5422. return self.db.security_group_get_by_name(context,
  5423. context.project_id,
  5424. name,
  5425. columns_to_join=cols)
  5426. elif id:
  5427. return self.db.security_group_get(context, id,
  5428. columns_to_join=cols)
  5429. except exception.NotFound as exp:
  5430. if map_exception:
  5431. msg = exp.format_message()
  5432. self.raise_not_found(msg)
  5433. else:
  5434. raise
  5435. def list(self, context, names=None, ids=None, project=None,
  5436. search_opts=None):
  5437. self.ensure_default(context)
  5438. groups = []
  5439. if names or ids:
  5440. if names:
  5441. for name in names:
  5442. groups.append(self.db.security_group_get_by_name(context,
  5443. project,
  5444. name))
  5445. if ids:
  5446. for id in ids:
  5447. groups.append(self.db.security_group_get(context, id))
  5448. elif context.is_admin:
  5449. # TODO(eglynn): support a wider set of search options than just
  5450. # all_tenants, at least include the standard filters defined for
  5451. # the EC2 DescribeSecurityGroups API for the non-admin case also
  5452. if (search_opts and 'all_tenants' in search_opts):
  5453. groups = self.db.security_group_get_all(context)
  5454. else:
  5455. groups = self.db.security_group_get_by_project(context,
  5456. project)
  5457. elif project:
  5458. groups = self.db.security_group_get_by_project(context, project)
  5459. return groups
  5460. def destroy(self, context, security_group):
  5461. if security_group['name'] in RO_SECURITY_GROUPS:
  5462. msg = _("Unable to delete system group '%s'") % \
  5463. security_group['name']
  5464. self.raise_invalid_group(msg)
  5465. if self.db.security_group_in_use(context, security_group['id']):
  5466. msg = _("Security group is still in use")
  5467. self.raise_invalid_group(msg)
  5468. LOG.info("Delete security group %s", security_group['name'])
  5469. self.db.security_group_destroy(context, security_group['id'])
  5470. def is_associated_with_server(self, security_group, instance_uuid):
  5471. """Check if the security group is already associated
  5472. with the instance. If Yes, return True.
  5473. """
  5474. if not security_group:
  5475. return False
  5476. instances = security_group.get('instances')
  5477. if not instances:
  5478. return False
  5479. for inst in instances:
  5480. if (instance_uuid == inst['uuid']):
  5481. return True
  5482. return False
  5483. def add_to_instance(self, context, instance, security_group_name):
  5484. """Add security group to the instance."""
  5485. security_group = self.db.security_group_get_by_name(context,
  5486. context.project_id,
  5487. security_group_name)
  5488. instance_uuid = instance.uuid
  5489. # check if the security group is associated with the server
  5490. if self.is_associated_with_server(security_group, instance_uuid):
  5491. raise exception.SecurityGroupExistsForInstance(
  5492. security_group_id=security_group['id'],
  5493. instance_id=instance_uuid)
  5494. self.db.instance_add_security_group(context.elevated(),
  5495. instance_uuid,
  5496. security_group['id'])
  5497. if instance.host:
  5498. self.compute_rpcapi.refresh_instance_security_rules(
  5499. context, instance, instance.host)
  5500. def remove_from_instance(self, context, instance, security_group_name):
  5501. """Remove the security group associated with the instance."""
  5502. security_group = self.db.security_group_get_by_name(context,
  5503. context.project_id,
  5504. security_group_name)
  5505. instance_uuid = instance.uuid
  5506. # check if the security group is associated with the server
  5507. if not self.is_associated_with_server(security_group, instance_uuid):
  5508. raise exception.SecurityGroupNotExistsForInstance(
  5509. security_group_id=security_group['id'],
  5510. instance_id=instance_uuid)
  5511. self.db.instance_remove_security_group(context.elevated(),
  5512. instance_uuid,
  5513. security_group['id'])
  5514. if instance.host:
  5515. self.compute_rpcapi.refresh_instance_security_rules(
  5516. context, instance, instance.host)
  5517. def get_rule(self, context, id):
  5518. self.ensure_default(context)
  5519. try:
  5520. return self.db.security_group_rule_get(context, id)
  5521. except exception.NotFound:
  5522. msg = _("Rule (%s) not found") % id
  5523. self.raise_not_found(msg)
  5524. def add_rules(self, context, id, name, vals):
  5525. """Add security group rule(s) to security group.
  5526. Note: the Nova security group API doesn't support adding multiple
  5527. security group rules at once but the EC2 one does. Therefore,
  5528. this function is written to support both.
  5529. """
  5530. try:
  5531. objects.Quotas.check_deltas(context,
  5532. {'security_group_rules': len(vals)},
  5533. id)
  5534. except exception.OverQuota:
  5535. msg = _("Quota exceeded, too many security group rules.")
  5536. self.raise_over_quota(msg)
  5537. msg = ("Security group %(name)s added %(protocol)s ingress "
  5538. "(%(from_port)s:%(to_port)s)")
  5539. rules = []
  5540. for v in vals:
  5541. rule = self.db.security_group_rule_create(context, v)
  5542. # NOTE(melwitt): We recheck the quota after creating the object to
  5543. # prevent users from allocating more resources than their allowed
  5544. # quota in the event of a race. This is configurable because it can
  5545. # be expensive if strict quota limits are not required in a
  5546. # deployment.
  5547. if CONF.quota.recheck_quota:
  5548. try:
  5549. objects.Quotas.check_deltas(context,
  5550. {'security_group_rules': 0},
  5551. id)
  5552. except exception.OverQuota:
  5553. self.db.security_group_rule_destroy(context, rule['id'])
  5554. msg = _("Quota exceeded, too many security group rules.")
  5555. self.raise_over_quota(msg)
  5556. rules.append(rule)
  5557. LOG.info(msg, {'name': name,
  5558. 'protocol': rule.protocol,
  5559. 'from_port': rule.from_port,
  5560. 'to_port': rule.to_port})
  5561. self.trigger_rules_refresh(context, id=id)
  5562. return rules
  5563. def remove_rules(self, context, security_group, rule_ids):
  5564. msg = ("Security group %(name)s removed %(protocol)s ingress "
  5565. "(%(from_port)s:%(to_port)s)")
  5566. for rule_id in rule_ids:
  5567. rule = self.get_rule(context, rule_id)
  5568. LOG.info(msg, {'name': security_group['name'],
  5569. 'protocol': rule.protocol,
  5570. 'from_port': rule.from_port,
  5571. 'to_port': rule.to_port})
  5572. self.db.security_group_rule_destroy(context, rule_id)
  5573. # NOTE(vish): we removed some rules, so refresh
  5574. self.trigger_rules_refresh(context, id=security_group['id'])
  5575. def validate_id(self, id):
  5576. try:
  5577. return int(id)
  5578. except ValueError:
  5579. msg = _("Security group id should be integer")
  5580. self.raise_invalid_property(msg)
  5581. def _refresh_instance_security_rules(self, context, instances):
  5582. for instance in instances:
  5583. if instance.host is not None:
  5584. self.compute_rpcapi.refresh_instance_security_rules(
  5585. context, instance, instance.host)
  5586. def trigger_rules_refresh(self, context, id):
  5587. """Called when a rule is added to or removed from a security_group."""
  5588. instances = objects.InstanceList.get_by_security_group_id(context, id)
  5589. self._refresh_instance_security_rules(context, instances)
  5590. def trigger_members_refresh(self, context, group_ids):
  5591. """Called when a security group gains a new or loses a member.
  5592. Sends an update request to each compute node for each instance for
  5593. which this is relevant.
  5594. """
  5595. instances = objects.InstanceList.get_by_grantee_security_group_ids(
  5596. context, group_ids)
  5597. self._refresh_instance_security_rules(context, instances)
  5598. def get_instance_security_groups(self, context, instance, detailed=False):
  5599. if detailed:
  5600. return self.db.security_group_get_by_instance(context,
  5601. instance.uuid)
  5602. return [{'name': group.name} for group in instance.security_groups]