OpenStack Compute (Nova)
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

6224 lines
287KB

  1. # Copyright 2010 United States Government as represented by the
  2. # Administrator of the National Aeronautics and Space Administration.
  3. # Copyright 2011 Piston Cloud Computing, Inc.
  4. # Copyright 2012-2013 Red Hat, Inc.
  5. # All Rights Reserved.
  6. #
  7. # Licensed under the Apache License, Version 2.0 (the "License"); you may
  8. # not use this file except in compliance with the License. You may obtain
  9. # a copy of the License at
  10. #
  11. # http://www.apache.org/licenses/LICENSE-2.0
  12. #
  13. # Unless required by applicable law or agreed to in writing, software
  14. # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  15. # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  16. # License for the specific language governing permissions and limitations
  17. # under the License.
  18. """Handles all requests relating to compute resources (e.g. guest VMs,
  19. networking and storage of VMs, and compute hosts on which they run)."""
  20. import collections
  21. import functools
  22. import re
  23. import string
  24. from castellan import key_manager
  25. from oslo_log import log as logging
  26. from oslo_messaging import exceptions as oslo_exceptions
  27. from oslo_serialization import base64 as base64utils
  28. from oslo_utils import excutils
  29. from oslo_utils import strutils
  30. from oslo_utils import timeutils
  31. from oslo_utils import units
  32. from oslo_utils import uuidutils
  33. import six
  34. from six.moves import range
  35. from nova import availability_zones
  36. from nova import block_device
  37. from nova.compute import flavors
  38. from nova.compute import instance_actions
  39. from nova.compute import instance_list
  40. from nova.compute import migration_list
  41. from nova.compute import power_state
  42. from nova.compute import rpcapi as compute_rpcapi
  43. from nova.compute import task_states
  44. from nova.compute import utils as compute_utils
  45. from nova.compute.utils import wrap_instance_event
  46. from nova.compute import vm_states
  47. from nova import conductor
  48. import nova.conf
  49. from nova import context as nova_context
  50. from nova import crypto
  51. from nova.db import base
  52. from nova.db.sqlalchemy import api as db_api
  53. from nova import exception
  54. from nova import exception_wrapper
  55. from nova import hooks
  56. from nova.i18n import _
  57. from nova import image
  58. from nova import network
  59. from nova.network import model as network_model
  60. from nova.network.neutronv2 import constants
  61. from nova.network.security_group import openstack_driver
  62. from nova.network.security_group import security_group_base
  63. from nova import objects
  64. from nova.objects import base as obj_base
  65. from nova.objects import block_device as block_device_obj
  66. from nova.objects import external_event as external_event_obj
  67. from nova.objects import fields as fields_obj
  68. from nova.objects import keypair as keypair_obj
  69. from nova.objects import quotas as quotas_obj
  70. from nova.pci import request as pci_request
  71. from nova.policies import servers as servers_policies
  72. import nova.policy
  73. from nova import profiler
  74. from nova import rpc
  75. from nova.scheduler.client import query
  76. from nova.scheduler.client import report
  77. from nova.scheduler import utils as scheduler_utils
  78. from nova import servicegroup
  79. from nova import utils
  80. from nova.virt import hardware
  81. from nova.volume import cinder
  82. LOG = logging.getLogger(__name__)
  83. get_notifier = functools.partial(rpc.get_notifier, service='compute')
  84. # NOTE(gibi): legacy notification used compute as a service but these
  85. # calls still run on the client side of the compute service which is
  86. # nova-api. By setting the binary to nova-api below, we can make sure
  87. # that the new versioned notifications has the right publisher_id but the
  88. # legacy notifications does not change.
  89. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  90. get_notifier=get_notifier,
  91. binary='nova-api')
  92. CONF = nova.conf.CONF
  93. RO_SECURITY_GROUPS = ['default']
  94. AGGREGATE_ACTION_UPDATE = 'Update'
  95. AGGREGATE_ACTION_UPDATE_META = 'UpdateMeta'
  96. AGGREGATE_ACTION_DELETE = 'Delete'
  97. AGGREGATE_ACTION_ADD = 'Add'
  98. MIN_COMPUTE_ABORT_QUEUED_LIVE_MIGRATION = 34
  99. MIN_COMPUTE_VOLUME_TYPE = 36
  100. MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED = 38
  101. # FIXME(danms): Keep a global cache of the cells we find the
  102. # first time we look. This needs to be refreshed on a timer or
  103. # trigger.
  104. CELLS = []
  105. def check_instance_state(vm_state=None, task_state=(None,),
  106. must_have_launched=True):
  107. """Decorator to check VM and/or task state before entry to API functions.
  108. If the instance is in the wrong state, or has not been successfully
  109. started at least once the wrapper will raise an exception.
  110. """
  111. if vm_state is not None and not isinstance(vm_state, set):
  112. vm_state = set(vm_state)
  113. if task_state is not None and not isinstance(task_state, set):
  114. task_state = set(task_state)
  115. def outer(f):
  116. @six.wraps(f)
  117. def inner(self, context, instance, *args, **kw):
  118. if vm_state is not None and instance.vm_state not in vm_state:
  119. raise exception.InstanceInvalidState(
  120. attr='vm_state',
  121. instance_uuid=instance.uuid,
  122. state=instance.vm_state,
  123. method=f.__name__)
  124. if (task_state is not None and
  125. instance.task_state not in task_state):
  126. raise exception.InstanceInvalidState(
  127. attr='task_state',
  128. instance_uuid=instance.uuid,
  129. state=instance.task_state,
  130. method=f.__name__)
  131. if must_have_launched and not instance.launched_at:
  132. raise exception.InstanceInvalidState(
  133. attr='launched_at',
  134. instance_uuid=instance.uuid,
  135. state=instance.launched_at,
  136. method=f.__name__)
  137. return f(self, context, instance, *args, **kw)
  138. return inner
  139. return outer
  140. def _set_or_none(q):
  141. return q if q is None or isinstance(q, set) else set(q)
  142. def reject_instance_state(vm_state=None, task_state=None):
  143. """Decorator. Raise InstanceInvalidState if instance is in any of the
  144. given states.
  145. """
  146. vm_state = _set_or_none(vm_state)
  147. task_state = _set_or_none(task_state)
  148. def outer(f):
  149. @six.wraps(f)
  150. def inner(self, context, instance, *args, **kw):
  151. _InstanceInvalidState = functools.partial(
  152. exception.InstanceInvalidState,
  153. instance_uuid=instance.uuid,
  154. method=f.__name__)
  155. if vm_state is not None and instance.vm_state in vm_state:
  156. raise _InstanceInvalidState(
  157. attr='vm_state', state=instance.vm_state)
  158. if task_state is not None and instance.task_state in task_state:
  159. raise _InstanceInvalidState(
  160. attr='task_state', state=instance.task_state)
  161. return f(self, context, instance, *args, **kw)
  162. return inner
  163. return outer
  164. def check_instance_host(function):
  165. @six.wraps(function)
  166. def wrapped(self, context, instance, *args, **kwargs):
  167. if not instance.host:
  168. raise exception.InstanceNotReady(instance_id=instance.uuid)
  169. return function(self, context, instance, *args, **kwargs)
  170. return wrapped
  171. def check_instance_lock(function):
  172. @six.wraps(function)
  173. def inner(self, context, instance, *args, **kwargs):
  174. if instance.locked and not context.is_admin:
  175. raise exception.InstanceIsLocked(instance_uuid=instance.uuid)
  176. return function(self, context, instance, *args, **kwargs)
  177. return inner
  178. def reject_sev_instances(operation):
  179. """Decorator. Raise OperationNotSupportedForSEV if instance has SEV
  180. enabled.
  181. """
  182. def outer(f):
  183. @six.wraps(f)
  184. def inner(self, context, instance, *args, **kw):
  185. if hardware.get_mem_encryption_constraint(instance.flavor,
  186. instance.image_meta):
  187. raise exception.OperationNotSupportedForSEV(
  188. instance_uuid=instance.uuid,
  189. operation=operation)
  190. return f(self, context, instance, *args, **kw)
  191. return inner
  192. return outer
  193. def _diff_dict(orig, new):
  194. """Return a dict describing how to change orig to new. The keys
  195. correspond to values that have changed; the value will be a list
  196. of one or two elements. The first element of the list will be
  197. either '+' or '-', indicating whether the key was updated or
  198. deleted; if the key was updated, the list will contain a second
  199. element, giving the updated value.
  200. """
  201. # Figure out what keys went away
  202. result = {k: ['-'] for k in set(orig.keys()) - set(new.keys())}
  203. # Compute the updates
  204. for key, value in new.items():
  205. if key not in orig or value != orig[key]:
  206. result[key] = ['+', value]
  207. return result
  208. def load_cells():
  209. global CELLS
  210. if not CELLS:
  211. CELLS = objects.CellMappingList.get_all(
  212. nova_context.get_admin_context())
  213. LOG.debug('Found %(count)i cells: %(cells)s',
  214. dict(count=len(CELLS),
  215. cells=','.join([c.identity for c in CELLS])))
  216. if not CELLS:
  217. LOG.error('No cells are configured, unable to continue')
  218. def _get_image_meta_obj(image_meta_dict):
  219. try:
  220. image_meta = objects.ImageMeta.from_dict(image_meta_dict)
  221. except ValueError as e:
  222. # there must be invalid values in the image meta properties so
  223. # consider this an invalid request
  224. msg = _('Invalid image metadata. Error: %s') % six.text_type(e)
  225. raise exception.InvalidRequest(msg)
  226. return image_meta
  227. @profiler.trace_cls("compute_api")
  228. class API(base.Base):
  229. """API for interacting with the compute manager."""
  230. def __init__(self, image_api=None, network_api=None, volume_api=None,
  231. security_group_api=None, **kwargs):
  232. self.image_api = image_api or image.API()
  233. self.network_api = network_api or network.API()
  234. self.volume_api = volume_api or cinder.API()
  235. self._placementclient = None # Lazy-load on first access.
  236. self.security_group_api = (security_group_api or
  237. openstack_driver.get_openstack_security_group_driver())
  238. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  239. self.compute_task_api = conductor.ComputeTaskAPI()
  240. self.servicegroup_api = servicegroup.API()
  241. self.host_api = HostAPI(self.compute_rpcapi, self.servicegroup_api)
  242. self.notifier = rpc.get_notifier('compute', CONF.host)
  243. if CONF.ephemeral_storage_encryption.enabled:
  244. self.key_manager = key_manager.API()
  245. # Help us to record host in EventReporter
  246. self.host = CONF.host
  247. super(API, self).__init__(**kwargs)
  248. def _record_action_start(self, context, instance, action):
  249. objects.InstanceAction.action_start(context, instance.uuid,
  250. action, want_result=False)
  251. def _check_injected_file_quota(self, context, injected_files):
  252. """Enforce quota limits on injected files.
  253. Raises a QuotaError if any limit is exceeded.
  254. """
  255. if injected_files is None:
  256. return
  257. # Check number of files first
  258. try:
  259. objects.Quotas.limit_check(context,
  260. injected_files=len(injected_files))
  261. except exception.OverQuota:
  262. raise exception.OnsetFileLimitExceeded()
  263. # OK, now count path and content lengths; we're looking for
  264. # the max...
  265. max_path = 0
  266. max_content = 0
  267. for path, content in injected_files:
  268. max_path = max(max_path, len(path))
  269. max_content = max(max_content, len(content))
  270. try:
  271. objects.Quotas.limit_check(context,
  272. injected_file_path_bytes=max_path,
  273. injected_file_content_bytes=max_content)
  274. except exception.OverQuota as exc:
  275. # Favor path limit over content limit for reporting
  276. # purposes
  277. if 'injected_file_path_bytes' in exc.kwargs['overs']:
  278. raise exception.OnsetFilePathLimitExceeded(
  279. allowed=exc.kwargs['quotas']['injected_file_path_bytes'])
  280. else:
  281. raise exception.OnsetFileContentLimitExceeded(
  282. allowed=exc.kwargs['quotas']['injected_file_content_bytes'])
  283. def _check_metadata_properties_quota(self, context, metadata=None):
  284. """Enforce quota limits on metadata properties."""
  285. if not metadata:
  286. metadata = {}
  287. if not isinstance(metadata, dict):
  288. msg = (_("Metadata type should be dict."))
  289. raise exception.InvalidMetadata(reason=msg)
  290. num_metadata = len(metadata)
  291. try:
  292. objects.Quotas.limit_check(context, metadata_items=num_metadata)
  293. except exception.OverQuota as exc:
  294. quota_metadata = exc.kwargs['quotas']['metadata_items']
  295. raise exception.MetadataLimitExceeded(allowed=quota_metadata)
  296. # Because metadata is stored in the DB, we hard-code the size limits
  297. # In future, we may support more variable length strings, so we act
  298. # as if this is quota-controlled for forwards compatibility.
  299. # Those are only used in V2 API, from V2.1 API, those checks are
  300. # validated at API layer schema validation.
  301. for k, v in metadata.items():
  302. try:
  303. utils.check_string_length(v)
  304. utils.check_string_length(k, min_length=1)
  305. except exception.InvalidInput as e:
  306. raise exception.InvalidMetadata(reason=e.format_message())
  307. if len(k) > 255:
  308. msg = _("Metadata property key greater than 255 characters")
  309. raise exception.InvalidMetadataSize(reason=msg)
  310. if len(v) > 255:
  311. msg = _("Metadata property value greater than 255 characters")
  312. raise exception.InvalidMetadataSize(reason=msg)
  313. def _check_requested_secgroups(self, context, secgroups):
  314. """Check if the security group requested exists and belongs to
  315. the project.
  316. :param context: The nova request context.
  317. :type context: nova.context.RequestContext
  318. :param secgroups: list of requested security group names, or uuids in
  319. the case of Neutron.
  320. :type secgroups: list
  321. :returns: list of requested security group names unmodified if using
  322. nova-network. If using Neutron, the list returned is all uuids.
  323. Note that 'default' is a special case and will be unmodified if
  324. it's requested.
  325. """
  326. security_groups = []
  327. for secgroup in secgroups:
  328. # NOTE(sdague): default is handled special
  329. if secgroup == "default":
  330. security_groups.append(secgroup)
  331. continue
  332. secgroup_dict = self.security_group_api.get(context, secgroup)
  333. if not secgroup_dict:
  334. raise exception.SecurityGroupNotFoundForProject(
  335. project_id=context.project_id, security_group_id=secgroup)
  336. # Check to see if it's a nova-network or neutron type.
  337. if isinstance(secgroup_dict['id'], int):
  338. # This is nova-network so just return the requested name.
  339. security_groups.append(secgroup)
  340. else:
  341. # The id for neutron is a uuid, so we return the id (uuid).
  342. security_groups.append(secgroup_dict['id'])
  343. return security_groups
  344. def _check_requested_networks(self, context, requested_networks,
  345. max_count):
  346. """Check if the networks requested belongs to the project
  347. and the fixed IP address for each network provided is within
  348. same the network block
  349. """
  350. if requested_networks is not None:
  351. if requested_networks.no_allocate:
  352. # If the network request was specifically 'none' meaning don't
  353. # allocate any networks, we just return the number of requested
  354. # instances since quotas don't change at all.
  355. return max_count
  356. # NOTE(danms): Temporary transition
  357. requested_networks = requested_networks.as_tuples()
  358. return self.network_api.validate_networks(context, requested_networks,
  359. max_count)
  360. def _handle_kernel_and_ramdisk(self, context, kernel_id, ramdisk_id,
  361. image):
  362. """Choose kernel and ramdisk appropriate for the instance.
  363. The kernel and ramdisk can be chosen in one of two ways:
  364. 1. Passed in with create-instance request.
  365. 2. Inherited from image metadata.
  366. If inherited from image metadata, and if that image metadata value is
  367. set to 'nokernel', both kernel and ramdisk will default to None.
  368. """
  369. # Inherit from image if not specified
  370. image_properties = image.get('properties', {})
  371. if kernel_id is None:
  372. kernel_id = image_properties.get('kernel_id')
  373. if ramdisk_id is None:
  374. ramdisk_id = image_properties.get('ramdisk_id')
  375. # Force to None if kernel_id indicates that a kernel is not to be used
  376. if kernel_id == 'nokernel':
  377. kernel_id = None
  378. ramdisk_id = None
  379. # Verify kernel and ramdisk exist (fail-fast)
  380. if kernel_id is not None:
  381. kernel_image = self.image_api.get(context, kernel_id)
  382. # kernel_id could have been a URI, not a UUID, so to keep behaviour
  383. # from before, which leaked that implementation detail out to the
  384. # caller, we return the image UUID of the kernel image and ramdisk
  385. # image (below) and not any image URIs that might have been
  386. # supplied.
  387. # TODO(jaypipes): Get rid of this silliness once we move to a real
  388. # Image object and hide all of that stuff within nova.image.api.
  389. kernel_id = kernel_image['id']
  390. if ramdisk_id is not None:
  391. ramdisk_image = self.image_api.get(context, ramdisk_id)
  392. ramdisk_id = ramdisk_image['id']
  393. return kernel_id, ramdisk_id
  394. @staticmethod
  395. def parse_availability_zone(context, availability_zone):
  396. # NOTE(vish): We have a legacy hack to allow admins to specify hosts
  397. # via az using az:host:node. It might be nice to expose an
  398. # api to specify specific hosts to force onto, but for
  399. # now it just supports this legacy hack.
  400. # NOTE(deva): It is also possible to specify az::node, in which case
  401. # the host manager will determine the correct host.
  402. forced_host = None
  403. forced_node = None
  404. if availability_zone and ':' in availability_zone:
  405. c = availability_zone.count(':')
  406. if c == 1:
  407. availability_zone, forced_host = availability_zone.split(':')
  408. elif c == 2:
  409. if '::' in availability_zone:
  410. availability_zone, forced_node = \
  411. availability_zone.split('::')
  412. else:
  413. availability_zone, forced_host, forced_node = \
  414. availability_zone.split(':')
  415. else:
  416. raise exception.InvalidInput(
  417. reason="Unable to parse availability_zone")
  418. if not availability_zone:
  419. availability_zone = CONF.default_schedule_zone
  420. return availability_zone, forced_host, forced_node
  421. def _ensure_auto_disk_config_is_valid(self, auto_disk_config_img,
  422. auto_disk_config, image):
  423. auto_disk_config_disabled = \
  424. utils.is_auto_disk_config_disabled(auto_disk_config_img)
  425. if auto_disk_config_disabled and auto_disk_config:
  426. raise exception.AutoDiskConfigDisabledByImage(image=image)
  427. def _inherit_properties_from_image(self, image, auto_disk_config):
  428. image_properties = image.get('properties', {})
  429. auto_disk_config_img = \
  430. utils.get_auto_disk_config_from_image_props(image_properties)
  431. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  432. auto_disk_config,
  433. image.get("id"))
  434. if auto_disk_config is None:
  435. auto_disk_config = strutils.bool_from_string(auto_disk_config_img)
  436. return {
  437. 'os_type': image_properties.get('os_type'),
  438. 'architecture': image_properties.get('architecture'),
  439. 'vm_mode': image_properties.get('vm_mode'),
  440. 'auto_disk_config': auto_disk_config
  441. }
  442. def _check_config_drive(self, config_drive):
  443. if config_drive:
  444. try:
  445. bool_val = strutils.bool_from_string(config_drive,
  446. strict=True)
  447. except ValueError:
  448. raise exception.ConfigDriveInvalidValue(option=config_drive)
  449. else:
  450. bool_val = False
  451. # FIXME(comstud): Bug ID 1193438 filed for this. This looks silly,
  452. # but this is because the config drive column is a String. False
  453. # is represented by using an empty string. And for whatever
  454. # reason, we rely on the DB to cast True to a String.
  455. return True if bool_val else ''
  456. def _validate_flavor_image(self, context, image_id, image,
  457. instance_type, root_bdm, validate_numa=True):
  458. """Validate the flavor and image.
  459. This is called from the API service to ensure that the flavor
  460. extra-specs and image properties are self-consistent and compatible
  461. with each other.
  462. :param context: A context.RequestContext
  463. :param image_id: UUID of the image
  464. :param image: a dict representation of the image including properties,
  465. enforces the image status is active.
  466. :param instance_type: Flavor object
  467. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  468. the resize case.
  469. :param validate_numa: Flag to indicate whether or not to validate
  470. the NUMA-related metadata.
  471. :raises: Many different possible exceptions. See
  472. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  473. for the full list.
  474. """
  475. if image and image['status'] != 'active':
  476. raise exception.ImageNotActive(image_id=image_id)
  477. self._validate_flavor_image_nostatus(context, image, instance_type,
  478. root_bdm, validate_numa)
  479. @staticmethod
  480. def _validate_flavor_image_nostatus(context, image, instance_type,
  481. root_bdm, validate_numa=True,
  482. validate_pci=False):
  483. """Validate the flavor and image.
  484. This is called from the API service to ensure that the flavor
  485. extra-specs and image properties are self-consistent and compatible
  486. with each other.
  487. :param context: A context.RequestContext
  488. :param image: a dict representation of the image including properties
  489. :param instance_type: Flavor object
  490. :param root_bdm: BlockDeviceMapping for root disk. Will be None for
  491. the resize case.
  492. :param validate_numa: Flag to indicate whether or not to validate
  493. the NUMA-related metadata.
  494. :param validate_pci: Flag to indicate whether or not to validate
  495. the PCI-related metadata.
  496. :raises: Many different possible exceptions. See
  497. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  498. for the full list.
  499. """
  500. if not image:
  501. return
  502. image_properties = image.get('properties', {})
  503. config_drive_option = image_properties.get(
  504. 'img_config_drive', 'optional')
  505. if config_drive_option not in ['optional', 'mandatory']:
  506. raise exception.InvalidImageConfigDrive(
  507. config_drive=config_drive_option)
  508. if instance_type['memory_mb'] < int(image.get('min_ram') or 0):
  509. raise exception.FlavorMemoryTooSmall()
  510. # Image min_disk is in gb, size is in bytes. For sanity, have them both
  511. # in bytes.
  512. image_min_disk = int(image.get('min_disk') or 0) * units.Gi
  513. image_size = int(image.get('size') or 0)
  514. # Target disk is a volume. Don't check flavor disk size because it
  515. # doesn't make sense, and check min_disk against the volume size.
  516. if root_bdm is not None and root_bdm.is_volume:
  517. # There are 2 possibilities here:
  518. #
  519. # 1. The target volume already exists but bdm.volume_size is not
  520. # yet set because this method is called before
  521. # _bdm_validate_set_size_and_instance during server create.
  522. # 2. The target volume doesn't exist, in which case the bdm will
  523. # contain the intended volume size
  524. #
  525. # Note that rebuild also calls this method with potentially a new
  526. # image but you can't rebuild a volume-backed server with a new
  527. # image (yet).
  528. #
  529. # Cinder does its own check against min_disk, so if the target
  530. # volume already exists this has already been done and we don't
  531. # need to check it again here. In this case, volume_size may not be
  532. # set on the bdm.
  533. #
  534. # If we're going to create the volume, the bdm will contain
  535. # volume_size. Therefore we should check it if it exists. This will
  536. # still be checked again by cinder when the volume is created, but
  537. # that will not happen until the request reaches a host. By
  538. # checking it here, the user gets an immediate and useful failure
  539. # indication.
  540. #
  541. # The third possibility is that we have failed to consider
  542. # something, and there are actually more than 2 possibilities. In
  543. # this case cinder will still do the check at volume creation time.
  544. # The behaviour will still be correct, but the user will not get an
  545. # immediate failure from the api, and will instead have to
  546. # determine why the instance is in an error state with a task of
  547. # block_device_mapping.
  548. #
  549. # We could reasonably refactor this check into _validate_bdm at
  550. # some future date, as the various size logic is already split out
  551. # in there.
  552. dest_size = root_bdm.volume_size
  553. if dest_size is not None:
  554. dest_size *= units.Gi
  555. if image_min_disk > dest_size:
  556. raise exception.VolumeSmallerThanMinDisk(
  557. volume_size=dest_size, image_min_disk=image_min_disk)
  558. # Target disk is a local disk whose size is taken from the flavor
  559. else:
  560. dest_size = instance_type['root_gb'] * units.Gi
  561. # NOTE(johannes): root_gb is allowed to be 0 for legacy reasons
  562. # since libvirt interpreted the value differently than other
  563. # drivers. A value of 0 means don't check size.
  564. if dest_size != 0:
  565. if image_size > dest_size:
  566. raise exception.FlavorDiskSmallerThanImage(
  567. flavor_size=dest_size, image_size=image_size)
  568. if image_min_disk > dest_size:
  569. raise exception.FlavorDiskSmallerThanMinDisk(
  570. flavor_size=dest_size, image_min_disk=image_min_disk)
  571. else:
  572. # The user is attempting to create a server with a 0-disk
  573. # image-backed flavor, which can lead to issues with a large
  574. # image consuming an unexpectedly large amount of local disk
  575. # on the compute host. Check to see if the deployment will
  576. # allow that.
  577. if not context.can(
  578. servers_policies.ZERO_DISK_FLAVOR, fatal=False):
  579. raise exception.BootFromVolumeRequiredForZeroDiskFlavor()
  580. API._validate_flavor_image_numa_pci(
  581. image, instance_type, validate_numa=validate_numa,
  582. validate_pci=validate_pci)
  583. @staticmethod
  584. def _validate_flavor_image_numa_pci(image, instance_type,
  585. validate_numa=True,
  586. validate_pci=False):
  587. """Validate the flavor and image NUMA/PCI values.
  588. This is called from the API service to ensure that the flavor
  589. extra-specs and image properties are self-consistent and compatible
  590. with each other.
  591. :param image: a dict representation of the image including properties
  592. :param instance_type: Flavor object
  593. :param validate_numa: Flag to indicate whether or not to validate
  594. the NUMA-related metadata.
  595. :param validate_pci: Flag to indicate whether or not to validate
  596. the PCI-related metadata.
  597. :raises: Many different possible exceptions. See
  598. api.openstack.compute.servers.INVALID_FLAVOR_IMAGE_EXCEPTIONS
  599. for the full list.
  600. """
  601. image_meta = _get_image_meta_obj(image)
  602. API._validate_flavor_image_mem_encryption(instance_type, image_meta)
  603. # validate PMU extra spec and image metadata
  604. flavor_pmu = instance_type.extra_specs.get('hw:pmu')
  605. image_pmu = image_meta.properties.get('hw_pmu')
  606. if (flavor_pmu is not None and image_pmu is not None and
  607. image_pmu != strutils.bool_from_string(flavor_pmu)):
  608. raise exception.ImagePMUConflict()
  609. # Only validate values of flavor/image so the return results of
  610. # following 'get' functions are not used.
  611. hardware.get_number_of_serial_ports(instance_type, image_meta)
  612. if hardware.is_realtime_enabled(instance_type):
  613. hardware.vcpus_realtime_topology(instance_type, image_meta)
  614. hardware.get_cpu_topology_constraints(instance_type, image_meta)
  615. if validate_numa:
  616. hardware.numa_get_constraints(instance_type, image_meta)
  617. if validate_pci:
  618. pci_request.get_pci_requests_from_flavor(instance_type)
  619. @staticmethod
  620. def _validate_flavor_image_mem_encryption(instance_type, image):
  621. """Validate that the flavor and image don't make contradictory
  622. requests regarding memory encryption.
  623. :param instance_type: Flavor object
  624. :param image: an ImageMeta object
  625. :raises: nova.exception.FlavorImageConflict
  626. """
  627. # This library function will raise the exception for us if
  628. # necessary; if not, we can ignore the result returned.
  629. hardware.get_mem_encryption_constraint(instance_type, image)
  630. def _get_image_defined_bdms(self, instance_type, image_meta,
  631. root_device_name):
  632. image_properties = image_meta.get('properties', {})
  633. # Get the block device mappings defined by the image.
  634. image_defined_bdms = image_properties.get('block_device_mapping', [])
  635. legacy_image_defined = not image_properties.get('bdm_v2', False)
  636. image_mapping = image_properties.get('mappings', [])
  637. if legacy_image_defined:
  638. image_defined_bdms = block_device.from_legacy_mapping(
  639. image_defined_bdms, None, root_device_name)
  640. else:
  641. image_defined_bdms = list(map(block_device.BlockDeviceDict,
  642. image_defined_bdms))
  643. if image_mapping:
  644. image_mapping = self._prepare_image_mapping(instance_type,
  645. image_mapping)
  646. image_defined_bdms = self._merge_bdms_lists(
  647. image_mapping, image_defined_bdms)
  648. return image_defined_bdms
  649. def _get_flavor_defined_bdms(self, instance_type, block_device_mapping):
  650. flavor_defined_bdms = []
  651. have_ephemeral_bdms = any(filter(
  652. block_device.new_format_is_ephemeral, block_device_mapping))
  653. have_swap_bdms = any(filter(
  654. block_device.new_format_is_swap, block_device_mapping))
  655. if instance_type.get('ephemeral_gb') and not have_ephemeral_bdms:
  656. flavor_defined_bdms.append(
  657. block_device.create_blank_bdm(instance_type['ephemeral_gb']))
  658. if instance_type.get('swap') and not have_swap_bdms:
  659. flavor_defined_bdms.append(
  660. block_device.create_blank_bdm(instance_type['swap'], 'swap'))
  661. return flavor_defined_bdms
  662. def _merge_bdms_lists(self, overridable_mappings, overrider_mappings):
  663. """Override any block devices from the first list by device name
  664. :param overridable_mappings: list which items are overridden
  665. :param overrider_mappings: list which items override
  666. :returns: A merged list of bdms
  667. """
  668. device_names = set(bdm['device_name'] for bdm in overrider_mappings
  669. if bdm['device_name'])
  670. return (overrider_mappings +
  671. [bdm for bdm in overridable_mappings
  672. if bdm['device_name'] not in device_names])
  673. def _check_and_transform_bdm(self, context, base_options, instance_type,
  674. image_meta, min_count, max_count,
  675. block_device_mapping, legacy_bdm):
  676. # NOTE (ndipanov): Assume root dev name is 'vda' if not supplied.
  677. # It's needed for legacy conversion to work.
  678. root_device_name = (base_options.get('root_device_name') or 'vda')
  679. image_ref = base_options.get('image_ref', '')
  680. # If the instance is booted by image and has a volume attached,
  681. # the volume cannot have the same device name as root_device_name
  682. if image_ref:
  683. for bdm in block_device_mapping:
  684. if (bdm.get('destination_type') == 'volume' and
  685. block_device.strip_dev(bdm.get(
  686. 'device_name')) == root_device_name):
  687. msg = _('The volume cannot be assigned the same device'
  688. ' name as the root device %s') % root_device_name
  689. raise exception.InvalidRequest(msg)
  690. image_defined_bdms = self._get_image_defined_bdms(
  691. instance_type, image_meta, root_device_name)
  692. root_in_image_bdms = (
  693. block_device.get_root_bdm(image_defined_bdms) is not None)
  694. if legacy_bdm:
  695. block_device_mapping = block_device.from_legacy_mapping(
  696. block_device_mapping, image_ref, root_device_name,
  697. no_root=root_in_image_bdms)
  698. elif root_in_image_bdms:
  699. # NOTE (ndipanov): client will insert an image mapping into the v2
  700. # block_device_mapping, but if there is a bootable device in image
  701. # mappings - we need to get rid of the inserted image
  702. # NOTE (gibi): another case is when a server is booted with an
  703. # image to bdm mapping where the image only contains a bdm to a
  704. # snapshot. In this case the other image to bdm mapping
  705. # contains an unnecessary device with boot_index == 0.
  706. # Also in this case the image_ref is None as we are booting from
  707. # an image to volume bdm.
  708. def not_image_and_root_bdm(bdm):
  709. return not (bdm.get('boot_index') == 0 and
  710. bdm.get('source_type') == 'image')
  711. block_device_mapping = list(
  712. filter(not_image_and_root_bdm, block_device_mapping))
  713. block_device_mapping = self._merge_bdms_lists(
  714. image_defined_bdms, block_device_mapping)
  715. if min_count > 1 or max_count > 1:
  716. if any(map(lambda bdm: bdm['source_type'] == 'volume',
  717. block_device_mapping)):
  718. msg = _('Cannot attach one or more volumes to multiple'
  719. ' instances')
  720. raise exception.InvalidRequest(msg)
  721. block_device_mapping += self._get_flavor_defined_bdms(
  722. instance_type, block_device_mapping)
  723. return block_device_obj.block_device_make_list_from_dicts(
  724. context, block_device_mapping)
  725. def _get_image(self, context, image_href):
  726. if not image_href:
  727. return None, {}
  728. image = self.image_api.get(context, image_href)
  729. return image['id'], image
  730. def _checks_for_create_and_rebuild(self, context, image_id, image,
  731. instance_type, metadata,
  732. files_to_inject, root_bdm,
  733. validate_numa=True):
  734. self._check_metadata_properties_quota(context, metadata)
  735. self._check_injected_file_quota(context, files_to_inject)
  736. self._validate_flavor_image(context, image_id, image,
  737. instance_type, root_bdm,
  738. validate_numa=validate_numa)
  739. def _validate_and_build_base_options(self, context, instance_type,
  740. boot_meta, image_href, image_id,
  741. kernel_id, ramdisk_id, display_name,
  742. display_description, key_name,
  743. key_data, security_groups,
  744. availability_zone, user_data,
  745. metadata, access_ip_v4, access_ip_v6,
  746. requested_networks, config_drive,
  747. auto_disk_config, reservation_id,
  748. max_count,
  749. supports_port_resource_request):
  750. """Verify all the input parameters regardless of the provisioning
  751. strategy being performed.
  752. """
  753. if instance_type['disabled']:
  754. raise exception.FlavorNotFound(flavor_id=instance_type['id'])
  755. if user_data:
  756. try:
  757. base64utils.decode_as_bytes(user_data)
  758. except TypeError:
  759. raise exception.InstanceUserDataMalformed()
  760. # When using Neutron, _check_requested_secgroups will translate and
  761. # return any requested security group names to uuids.
  762. security_groups = (
  763. self._check_requested_secgroups(context, security_groups))
  764. # Note: max_count is the number of instances requested by the user,
  765. # max_network_count is the maximum number of instances taking into
  766. # account any network quotas
  767. max_network_count = self._check_requested_networks(context,
  768. requested_networks, max_count)
  769. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  770. context, kernel_id, ramdisk_id, boot_meta)
  771. config_drive = self._check_config_drive(config_drive)
  772. if key_data is None and key_name is not None:
  773. key_pair = objects.KeyPair.get_by_name(context,
  774. context.user_id,
  775. key_name)
  776. key_data = key_pair.public_key
  777. else:
  778. key_pair = None
  779. root_device_name = block_device.prepend_dev(
  780. block_device.properties_root_device_name(
  781. boot_meta.get('properties', {})))
  782. image_meta = _get_image_meta_obj(boot_meta)
  783. numa_topology = hardware.numa_get_constraints(
  784. instance_type, image_meta)
  785. system_metadata = {}
  786. # PCI requests come from two sources: instance flavor and
  787. # requested_networks. The first call in below returns an
  788. # InstancePCIRequests object which is a list of InstancePCIRequest
  789. # objects. The second call in below creates an InstancePCIRequest
  790. # object for each SR-IOV port, and append it to the list in the
  791. # InstancePCIRequests object
  792. pci_request_info = pci_request.get_pci_requests_from_flavor(
  793. instance_type)
  794. result = self.network_api.create_resource_requests(
  795. context, requested_networks, pci_request_info)
  796. network_metadata, port_resource_requests = result
  797. # Creating servers with ports that have resource requests, like QoS
  798. # minimum bandwidth rules, is only supported in a requested minimum
  799. # microversion.
  800. if port_resource_requests and not supports_port_resource_request:
  801. raise exception.CreateWithPortResourceRequestOldVersion()
  802. base_options = {
  803. 'reservation_id': reservation_id,
  804. 'image_ref': image_href,
  805. 'kernel_id': kernel_id or '',
  806. 'ramdisk_id': ramdisk_id or '',
  807. 'power_state': power_state.NOSTATE,
  808. 'vm_state': vm_states.BUILDING,
  809. 'config_drive': config_drive,
  810. 'user_id': context.user_id,
  811. 'project_id': context.project_id,
  812. 'instance_type_id': instance_type['id'],
  813. 'memory_mb': instance_type['memory_mb'],
  814. 'vcpus': instance_type['vcpus'],
  815. 'root_gb': instance_type['root_gb'],
  816. 'ephemeral_gb': instance_type['ephemeral_gb'],
  817. 'display_name': display_name,
  818. 'display_description': display_description,
  819. 'user_data': user_data,
  820. 'key_name': key_name,
  821. 'key_data': key_data,
  822. 'locked': False,
  823. 'metadata': metadata or {},
  824. 'access_ip_v4': access_ip_v4,
  825. 'access_ip_v6': access_ip_v6,
  826. 'availability_zone': availability_zone,
  827. 'root_device_name': root_device_name,
  828. 'progress': 0,
  829. 'pci_requests': pci_request_info,
  830. 'numa_topology': numa_topology,
  831. 'system_metadata': system_metadata,
  832. 'port_resource_requests': port_resource_requests}
  833. options_from_image = self._inherit_properties_from_image(
  834. boot_meta, auto_disk_config)
  835. base_options.update(options_from_image)
  836. # return the validated options and maximum number of instances allowed
  837. # by the network quotas
  838. return (base_options, max_network_count, key_pair, security_groups,
  839. network_metadata)
  840. @staticmethod
  841. @db_api.api_context_manager.writer
  842. def _create_reqspec_buildreq_instmapping(context, rs, br, im):
  843. """Create the request spec, build request, and instance mapping in a
  844. single database transaction.
  845. The RequestContext must be passed in to this method so that the
  846. database transaction context manager decorator will nest properly and
  847. include each create() into the same transaction context.
  848. """
  849. rs.create()
  850. br.create()
  851. im.create()
  852. def _validate_host_or_node(self, context, host, hypervisor_hostname):
  853. """Check whether compute nodes exist by validating the host
  854. and/or the hypervisor_hostname. There are three cases:
  855. 1. If only host is supplied, we can lookup the HostMapping in
  856. the API DB.
  857. 2. If only node is supplied, we can query a resource provider
  858. with that name in placement.
  859. 3. If both host and node are supplied, we can get the cell from
  860. HostMapping and from that lookup the ComputeNode with the
  861. given cell.
  862. :param context: The API request context.
  863. :param host: Target host.
  864. :param hypervisor_hostname: Target node.
  865. :raises: ComputeHostNotFound if we find no compute nodes with host
  866. and/or hypervisor_hostname.
  867. """
  868. if host:
  869. # When host is specified.
  870. try:
  871. host_mapping = objects.HostMapping.get_by_host(context, host)
  872. except exception.HostMappingNotFound:
  873. LOG.warning('No host-to-cell mapping found for host '
  874. '%(host)s.', {'host': host})
  875. raise exception.ComputeHostNotFound(host=host)
  876. # When both host and node are specified.
  877. if hypervisor_hostname:
  878. cell = host_mapping.cell_mapping
  879. with nova_context.target_cell(context, cell) as cctxt:
  880. # Here we only do an existence check, so we don't
  881. # need to store the return value into a variable.
  882. objects.ComputeNode.get_by_host_and_nodename(
  883. cctxt, host, hypervisor_hostname)
  884. elif hypervisor_hostname:
  885. # When only node is specified.
  886. try:
  887. self.placementclient.get_provider_by_name(
  888. context, hypervisor_hostname)
  889. except exception.ResourceProviderNotFound:
  890. raise exception.ComputeHostNotFound(host=hypervisor_hostname)
  891. def _provision_instances(self, context, instance_type, min_count,
  892. max_count, base_options, boot_meta, security_groups,
  893. block_device_mapping, shutdown_terminate,
  894. instance_group, check_server_group_quota, filter_properties,
  895. key_pair, tags, trusted_certs, supports_multiattach,
  896. network_metadata=None, requested_host=None,
  897. requested_hypervisor_hostname=None):
  898. # NOTE(boxiang): Check whether compute nodes exist by validating
  899. # the host and/or the hypervisor_hostname. Pass the destination
  900. # to the scheduler with host and/or hypervisor_hostname(node).
  901. destination = None
  902. if requested_host or requested_hypervisor_hostname:
  903. self._validate_host_or_node(context, requested_host,
  904. requested_hypervisor_hostname)
  905. destination = objects.Destination()
  906. if requested_host:
  907. destination.host = requested_host
  908. destination.node = requested_hypervisor_hostname
  909. # Check quotas
  910. num_instances = compute_utils.check_num_instances_quota(
  911. context, instance_type, min_count, max_count)
  912. security_groups = self.security_group_api.populate_security_groups(
  913. security_groups)
  914. self.security_group_api.ensure_default(context)
  915. port_resource_requests = base_options.pop('port_resource_requests')
  916. LOG.debug("Going to run %s instances...", num_instances)
  917. instances_to_build = []
  918. try:
  919. for i in range(num_instances):
  920. # Create a uuid for the instance so we can store the
  921. # RequestSpec before the instance is created.
  922. instance_uuid = uuidutils.generate_uuid()
  923. # Store the RequestSpec that will be used for scheduling.
  924. req_spec = objects.RequestSpec.from_components(context,
  925. instance_uuid, boot_meta, instance_type,
  926. base_options['numa_topology'],
  927. base_options['pci_requests'], filter_properties,
  928. instance_group, base_options['availability_zone'],
  929. security_groups=security_groups,
  930. port_resource_requests=port_resource_requests)
  931. if block_device_mapping:
  932. # Record whether or not we are a BFV instance
  933. root = block_device_mapping.root_bdm()
  934. req_spec.is_bfv = bool(root and root.is_volume)
  935. else:
  936. # If we have no BDMs, we're clearly not BFV
  937. req_spec.is_bfv = False
  938. # NOTE(danms): We need to record num_instances on the request
  939. # spec as this is how the conductor knows how many were in this
  940. # batch.
  941. req_spec.num_instances = num_instances
  942. # NOTE(stephenfin): The network_metadata field is not persisted
  943. # inside RequestSpec object.
  944. if network_metadata:
  945. req_spec.network_metadata = network_metadata
  946. if destination:
  947. req_spec.requested_destination = destination
  948. # Create an instance object, but do not store in db yet.
  949. instance = objects.Instance(context=context)
  950. instance.uuid = instance_uuid
  951. instance.update(base_options)
  952. instance.keypairs = objects.KeyPairList(objects=[])
  953. if key_pair:
  954. instance.keypairs.objects.append(key_pair)
  955. instance.trusted_certs = self._retrieve_trusted_certs_object(
  956. context, trusted_certs)
  957. instance = self.create_db_entry_for_new_instance(context,
  958. instance_type, boot_meta, instance, security_groups,
  959. block_device_mapping, num_instances, i,
  960. shutdown_terminate, create_instance=False)
  961. block_device_mapping = (
  962. self._bdm_validate_set_size_and_instance(context,
  963. instance, instance_type, block_device_mapping,
  964. supports_multiattach))
  965. instance_tags = self._transform_tags(tags, instance.uuid)
  966. build_request = objects.BuildRequest(context,
  967. instance=instance, instance_uuid=instance.uuid,
  968. project_id=instance.project_id,
  969. block_device_mappings=block_device_mapping,
  970. tags=instance_tags)
  971. # Create an instance_mapping. The null cell_mapping indicates
  972. # that the instance doesn't yet exist in a cell, and lookups
  973. # for it need to instead look for the RequestSpec.
  974. # cell_mapping will be populated after scheduling, with a
  975. # scheduling failure using the cell_mapping for the special
  976. # cell0.
  977. inst_mapping = objects.InstanceMapping(context=context)
  978. inst_mapping.instance_uuid = instance_uuid
  979. inst_mapping.project_id = context.project_id
  980. inst_mapping.user_id = context.user_id
  981. inst_mapping.cell_mapping = None
  982. # Create the request spec, build request, and instance mapping
  983. # records in a single transaction so that if a DBError is
  984. # raised from any of them, all INSERTs will be rolled back and
  985. # no orphaned records will be left behind.
  986. self._create_reqspec_buildreq_instmapping(context, req_spec,
  987. build_request,
  988. inst_mapping)
  989. instances_to_build.append(
  990. (req_spec, build_request, inst_mapping))
  991. if instance_group:
  992. if check_server_group_quota:
  993. try:
  994. objects.Quotas.check_deltas(
  995. context, {'server_group_members': 1},
  996. instance_group, context.user_id)
  997. except exception.OverQuota:
  998. msg = _("Quota exceeded, too many servers in "
  999. "group")
  1000. raise exception.QuotaError(msg)
  1001. members = objects.InstanceGroup.add_members(
  1002. context, instance_group.uuid, [instance.uuid])
  1003. # NOTE(melwitt): We recheck the quota after creating the
  1004. # object to prevent users from allocating more resources
  1005. # than their allowed quota in the event of a race. This is
  1006. # configurable because it can be expensive if strict quota
  1007. # limits are not required in a deployment.
  1008. if CONF.quota.recheck_quota and check_server_group_quota:
  1009. try:
  1010. objects.Quotas.check_deltas(
  1011. context, {'server_group_members': 0},
  1012. instance_group, context.user_id)
  1013. except exception.OverQuota:
  1014. objects.InstanceGroup._remove_members_in_db(
  1015. context, instance_group.id, [instance.uuid])
  1016. msg = _("Quota exceeded, too many servers in "
  1017. "group")
  1018. raise exception.QuotaError(msg)
  1019. # list of members added to servers group in this iteration
  1020. # is needed to check quota of server group during add next
  1021. # instance
  1022. instance_group.members.extend(members)
  1023. # In the case of any exceptions, attempt DB cleanup
  1024. except Exception:
  1025. with excutils.save_and_reraise_exception():
  1026. self._cleanup_build_artifacts(None, instances_to_build)
  1027. return instances_to_build
  1028. @staticmethod
  1029. def _retrieve_trusted_certs_object(context, trusted_certs, rebuild=False):
  1030. """Convert user-requested trusted cert IDs to TrustedCerts object
  1031. Also validates that the deployment is new enough to support trusted
  1032. image certification validation.
  1033. :param context: The user request auth context
  1034. :param trusted_certs: list of user-specified trusted cert string IDs,
  1035. may be None
  1036. :param rebuild: True if rebuilding the server, False if creating a
  1037. new server
  1038. :returns: nova.objects.TrustedCerts object or None if no user-specified
  1039. trusted cert IDs were given and nova is not configured with
  1040. default trusted cert IDs
  1041. """
  1042. # Retrieve trusted_certs parameter, or use CONF value if certificate
  1043. # validation is enabled
  1044. if trusted_certs:
  1045. certs_to_return = objects.TrustedCerts(ids=trusted_certs)
  1046. elif (CONF.glance.verify_glance_signatures and
  1047. CONF.glance.enable_certificate_validation and
  1048. CONF.glance.default_trusted_certificate_ids):
  1049. certs_to_return = objects.TrustedCerts(
  1050. ids=CONF.glance.default_trusted_certificate_ids)
  1051. else:
  1052. return None
  1053. return certs_to_return
  1054. def _get_bdm_image_metadata(self, context, block_device_mapping,
  1055. legacy_bdm=True):
  1056. """If we are booting from a volume, we need to get the
  1057. volume details from Cinder and make sure we pass the
  1058. metadata back accordingly.
  1059. """
  1060. if not block_device_mapping:
  1061. return {}
  1062. for bdm in block_device_mapping:
  1063. if (legacy_bdm and
  1064. block_device.get_device_letter(
  1065. bdm.get('device_name', '')) != 'a'):
  1066. continue
  1067. elif not legacy_bdm and bdm.get('boot_index') != 0:
  1068. continue
  1069. volume_id = bdm.get('volume_id')
  1070. snapshot_id = bdm.get('snapshot_id')
  1071. if snapshot_id:
  1072. # NOTE(alaski): A volume snapshot inherits metadata from the
  1073. # originating volume, but the API does not expose metadata
  1074. # on the snapshot itself. So we query the volume for it below.
  1075. snapshot = self.volume_api.get_snapshot(context, snapshot_id)
  1076. volume_id = snapshot['volume_id']
  1077. if bdm.get('image_id'):
  1078. try:
  1079. image_id = bdm['image_id']
  1080. image_meta = self.image_api.get(context, image_id)
  1081. return image_meta
  1082. except Exception:
  1083. raise exception.InvalidBDMImage(id=image_id)
  1084. elif volume_id:
  1085. try:
  1086. volume = self.volume_api.get(context, volume_id)
  1087. except exception.CinderConnectionFailed:
  1088. raise
  1089. except Exception:
  1090. raise exception.InvalidBDMVolume(id=volume_id)
  1091. if not volume.get('bootable', True):
  1092. raise exception.InvalidBDMVolumeNotBootable(id=volume_id)
  1093. return utils.get_image_metadata_from_volume(volume)
  1094. return {}
  1095. @staticmethod
  1096. def _get_requested_instance_group(context, filter_properties):
  1097. if (not filter_properties or
  1098. not filter_properties.get('scheduler_hints')):
  1099. return
  1100. group_hint = filter_properties.get('scheduler_hints').get('group')
  1101. if not group_hint:
  1102. return
  1103. return objects.InstanceGroup.get_by_uuid(context, group_hint)
  1104. def _create_instance(self, context, instance_type,
  1105. image_href, kernel_id, ramdisk_id,
  1106. min_count, max_count,
  1107. display_name, display_description,
  1108. key_name, key_data, security_groups,
  1109. availability_zone, user_data, metadata, injected_files,
  1110. admin_password, access_ip_v4, access_ip_v6,
  1111. requested_networks, config_drive,
  1112. block_device_mapping, auto_disk_config, filter_properties,
  1113. reservation_id=None, legacy_bdm=True, shutdown_terminate=False,
  1114. check_server_group_quota=False, tags=None,
  1115. supports_multiattach=False, trusted_certs=None,
  1116. supports_port_resource_request=False,
  1117. requested_host=None, requested_hypervisor_hostname=None):
  1118. """Verify all the input parameters regardless of the provisioning
  1119. strategy being performed and schedule the instance(s) for
  1120. creation.
  1121. """
  1122. # Normalize and setup some parameters
  1123. if reservation_id is None:
  1124. reservation_id = utils.generate_uid('r')
  1125. security_groups = security_groups or ['default']
  1126. min_count = min_count or 1
  1127. max_count = max_count or min_count
  1128. block_device_mapping = block_device_mapping or []
  1129. tags = tags or []
  1130. if image_href:
  1131. image_id, boot_meta = self._get_image(context, image_href)
  1132. else:
  1133. # This is similar to the logic in _retrieve_trusted_certs_object.
  1134. if (trusted_certs or
  1135. (CONF.glance.verify_glance_signatures and
  1136. CONF.glance.enable_certificate_validation and
  1137. CONF.glance.default_trusted_certificate_ids)):
  1138. msg = _("Image certificate validation is not supported "
  1139. "when booting from volume")
  1140. raise exception.CertificateValidationFailed(message=msg)
  1141. image_id = None
  1142. boot_meta = self._get_bdm_image_metadata(
  1143. context, block_device_mapping, legacy_bdm)
  1144. self._check_auto_disk_config(image=boot_meta,
  1145. auto_disk_config=auto_disk_config)
  1146. base_options, max_net_count, key_pair, security_groups, \
  1147. network_metadata = self._validate_and_build_base_options(
  1148. context, instance_type, boot_meta, image_href, image_id,
  1149. kernel_id, ramdisk_id, display_name, display_description,
  1150. key_name, key_data, security_groups, availability_zone,
  1151. user_data, metadata, access_ip_v4, access_ip_v6,
  1152. requested_networks, config_drive, auto_disk_config,
  1153. reservation_id, max_count, supports_port_resource_request)
  1154. # max_net_count is the maximum number of instances requested by the
  1155. # user adjusted for any network quota constraints, including
  1156. # consideration of connections to each requested network
  1157. if max_net_count < min_count:
  1158. raise exception.PortLimitExceeded()
  1159. elif max_net_count < max_count:
  1160. LOG.info("max count reduced from %(max_count)d to "
  1161. "%(max_net_count)d due to network port quota",
  1162. {'max_count': max_count,
  1163. 'max_net_count': max_net_count})
  1164. max_count = max_net_count
  1165. block_device_mapping = self._check_and_transform_bdm(context,
  1166. base_options, instance_type, boot_meta, min_count, max_count,
  1167. block_device_mapping, legacy_bdm)
  1168. # We can't do this check earlier because we need bdms from all sources
  1169. # to have been merged in order to get the root bdm.
  1170. # Set validate_numa=False since numa validation is already done by
  1171. # _validate_and_build_base_options().
  1172. self._checks_for_create_and_rebuild(context, image_id, boot_meta,
  1173. instance_type, metadata, injected_files,
  1174. block_device_mapping.root_bdm(), validate_numa=False)
  1175. instance_group = self._get_requested_instance_group(context,
  1176. filter_properties)
  1177. tags = self._create_tag_list_obj(context, tags)
  1178. instances_to_build = self._provision_instances(
  1179. context, instance_type, min_count, max_count, base_options,
  1180. boot_meta, security_groups, block_device_mapping,
  1181. shutdown_terminate, instance_group, check_server_group_quota,
  1182. filter_properties, key_pair, tags, trusted_certs,
  1183. supports_multiattach, network_metadata,
  1184. requested_host, requested_hypervisor_hostname)
  1185. instances = []
  1186. request_specs = []
  1187. build_requests = []
  1188. for rs, build_request, im in instances_to_build:
  1189. build_requests.append(build_request)
  1190. instance = build_request.get_new_instance(context)
  1191. instances.append(instance)
  1192. request_specs.append(rs)
  1193. self.compute_task_api.schedule_and_build_instances(
  1194. context,
  1195. build_requests=build_requests,
  1196. request_spec=request_specs,
  1197. image=boot_meta,
  1198. admin_password=admin_password,
  1199. injected_files=injected_files,
  1200. requested_networks=requested_networks,
  1201. block_device_mapping=block_device_mapping,
  1202. tags=tags)
  1203. return instances, reservation_id
  1204. @staticmethod
  1205. def _cleanup_build_artifacts(instances, instances_to_build):
  1206. # instances_to_build is a list of tuples:
  1207. # (RequestSpec, BuildRequest, InstanceMapping)
  1208. # Be paranoid about artifacts being deleted underneath us.
  1209. for instance in instances or []:
  1210. try:
  1211. instance.destroy()
  1212. except exception.InstanceNotFound:
  1213. pass
  1214. for rs, build_request, im in instances_to_build or []:
  1215. try:
  1216. rs.destroy()
  1217. except exception.RequestSpecNotFound:
  1218. pass
  1219. try:
  1220. build_request.destroy()
  1221. except exception.BuildRequestNotFound:
  1222. pass
  1223. try:
  1224. im.destroy()
  1225. except exception.InstanceMappingNotFound:
  1226. pass
  1227. @staticmethod
  1228. def _volume_size(instance_type, bdm):
  1229. size = bdm.get('volume_size')
  1230. # NOTE (ndipanov): inherit flavor size only for swap and ephemeral
  1231. if (size is None and bdm.get('source_type') == 'blank' and
  1232. bdm.get('destination_type') == 'local'):
  1233. if bdm.get('guest_format') == 'swap':
  1234. size = instance_type.get('swap', 0)
  1235. else:
  1236. size = instance_type.get('ephemeral_gb', 0)
  1237. return size
  1238. def _prepare_image_mapping(self, instance_type, mappings):
  1239. """Extract and format blank devices from image mappings."""
  1240. prepared_mappings = []
  1241. for bdm in block_device.mappings_prepend_dev(mappings):
  1242. LOG.debug("Image bdm %s", bdm)
  1243. virtual_name = bdm['virtual']
  1244. if virtual_name == 'ami' or virtual_name == 'root':
  1245. continue
  1246. if not block_device.is_swap_or_ephemeral(virtual_name):
  1247. continue
  1248. guest_format = bdm.get('guest_format')
  1249. if virtual_name == 'swap':
  1250. guest_format = 'swap'
  1251. if not guest_format:
  1252. guest_format = CONF.default_ephemeral_format
  1253. values = block_device.BlockDeviceDict({
  1254. 'device_name': bdm['device'],
  1255. 'source_type': 'blank',
  1256. 'destination_type': 'local',
  1257. 'device_type': 'disk',
  1258. 'guest_format': guest_format,
  1259. 'delete_on_termination': True,
  1260. 'boot_index': -1})
  1261. values['volume_size'] = self._volume_size(
  1262. instance_type, values)
  1263. if values['volume_size'] == 0:
  1264. continue
  1265. prepared_mappings.append(values)
  1266. return prepared_mappings
  1267. def _bdm_validate_set_size_and_instance(self, context, instance,
  1268. instance_type,
  1269. block_device_mapping,
  1270. supports_multiattach=False):
  1271. """Ensure the bdms are valid, then set size and associate with instance
  1272. Because this method can be called multiple times when more than one
  1273. instance is booted in a single request it makes a copy of the bdm list.
  1274. """
  1275. LOG.debug("block_device_mapping %s", list(block_device_mapping),
  1276. instance_uuid=instance.uuid)
  1277. self._validate_bdm(
  1278. context, instance, instance_type, block_device_mapping,
  1279. supports_multiattach)
  1280. instance_block_device_mapping = block_device_mapping.obj_clone()
  1281. for bdm in instance_block_device_mapping:
  1282. bdm.volume_size = self._volume_size(instance_type, bdm)
  1283. bdm.instance_uuid = instance.uuid
  1284. return instance_block_device_mapping
  1285. @staticmethod
  1286. def _check_requested_volume_type(bdm, volume_type_id_or_name,
  1287. volume_types):
  1288. """If we are specifying a volume type, we need to get the
  1289. volume type details from Cinder and make sure the ``volume_type``
  1290. is available.
  1291. """
  1292. # NOTE(brinzhang): Verify that the specified volume type exists.
  1293. # And save the volume type name internally for consistency in the
  1294. # BlockDeviceMapping object.
  1295. for vol_type in volume_types:
  1296. if (volume_type_id_or_name == vol_type['id'] or
  1297. volume_type_id_or_name == vol_type['name']):
  1298. bdm.volume_type = vol_type['name']
  1299. break
  1300. else:
  1301. raise exception.VolumeTypeNotFound(
  1302. id_or_name=volume_type_id_or_name)
  1303. @staticmethod
  1304. def _check_compute_supports_volume_type(context):
  1305. # NOTE(brinzhang): Checking the minimum nova-compute service
  1306. # version across the deployment. Just make sure the volume
  1307. # type can be supported when the bdm.volume_type is requested.
  1308. min_compute_version = objects.service.get_minimum_version_all_cells(
  1309. context, ['nova-compute'])
  1310. if min_compute_version < MIN_COMPUTE_VOLUME_TYPE:
  1311. raise exception.VolumeTypeSupportNotYetAvailable()
  1312. def _validate_bdm(self, context, instance, instance_type,
  1313. block_device_mappings, supports_multiattach=False):
  1314. # Make sure that the boot indexes make sense.
  1315. # Setting a negative value or None indicates that the device should not
  1316. # be used for booting.
  1317. boot_indexes = sorted([bdm.boot_index
  1318. for bdm in block_device_mappings
  1319. if bdm.boot_index is not None and
  1320. bdm.boot_index >= 0])
  1321. # Each device which is capable of being used as boot device should
  1322. # be given a unique boot index, starting from 0 in ascending order, and
  1323. # there needs to be at least one boot device.
  1324. if not boot_indexes or any(i != v for i, v in enumerate(boot_indexes)):
  1325. # Convert the BlockDeviceMappingList to a list for repr details.
  1326. LOG.debug('Invalid block device mapping boot sequence for '
  1327. 'instance: %s', list(block_device_mappings),
  1328. instance=instance)
  1329. raise exception.InvalidBDMBootSequence()
  1330. volume_types = None
  1331. volume_type_is_supported = False
  1332. for bdm in block_device_mappings:
  1333. volume_type = bdm.volume_type
  1334. if volume_type:
  1335. if not volume_type_is_supported:
  1336. # The following method raises
  1337. # VolumeTypeSupportNotYetAvailable if the minimum
  1338. # nova-compute service version across the deployment is
  1339. # not new enough to support creating volumes with a
  1340. # specific type.
  1341. self._check_compute_supports_volume_type(context)
  1342. # Set the flag to avoid calling
  1343. # _check_compute_supports_volume_type more than once in
  1344. # this for loop.
  1345. volume_type_is_supported = True
  1346. if not volume_types:
  1347. # In order to reduce the number of hit cinder APIs,
  1348. # initialize our cache of volume types.
  1349. volume_types = self.volume_api.get_all_volume_types(
  1350. context)
  1351. # NOTE(brinzhang): Ensure the validity of volume_type.
  1352. self._check_requested_volume_type(bdm, volume_type,
  1353. volume_types)
  1354. # NOTE(vish): For now, just make sure the volumes are accessible.
  1355. # Additionally, check that the volume can be attached to this
  1356. # instance.
  1357. snapshot_id = bdm.snapshot_id
  1358. volume_id = bdm.volume_id
  1359. image_id = bdm.image_id
  1360. if image_id is not None:
  1361. if image_id != instance.get('image_ref'):
  1362. try:
  1363. self._get_image(context, image_id)
  1364. except Exception:
  1365. raise exception.InvalidBDMImage(id=image_id)
  1366. if (bdm.source_type == 'image' and
  1367. bdm.destination_type == 'volume' and
  1368. not bdm.volume_size):
  1369. raise exception.InvalidBDM(message=_("Images with "
  1370. "destination_type 'volume' need to have a non-zero "
  1371. "size specified"))
  1372. elif volume_id is not None:
  1373. try:
  1374. volume = self.volume_api.get(context, volume_id)
  1375. self._check_attach_and_reserve_volume(
  1376. context, volume, instance, bdm, supports_multiattach)
  1377. bdm.volume_size = volume.get('size')
  1378. # NOTE(mnaser): If we end up reserving the volume, it will
  1379. # not have an attachment_id which is needed
  1380. # for cleanups. This can be removed once
  1381. # all calls to reserve_volume are gone.
  1382. if 'attachment_id' not in bdm:
  1383. bdm.attachment_id = None
  1384. except (exception.CinderConnectionFailed,
  1385. exception.InvalidVolume,
  1386. exception.MultiattachNotSupportedOldMicroversion):
  1387. raise
  1388. except exception.InvalidInput as exc:
  1389. raise exception.InvalidVolume(reason=exc.format_message())
  1390. except Exception as e:
  1391. LOG.info('Failed validating volume %s. Error: %s',
  1392. volume_id, e)
  1393. raise exception.InvalidBDMVolume(id=volume_id)
  1394. elif snapshot_id is not None:
  1395. try:
  1396. snap = self.volume_api.get_snapshot(context, snapshot_id)
  1397. bdm.volume_size = bdm.volume_size or snap.get('size')
  1398. except exception.CinderConnectionFailed:
  1399. raise
  1400. except Exception:
  1401. raise exception.InvalidBDMSnapshot(id=snapshot_id)
  1402. elif (bdm.source_type == 'blank' and
  1403. bdm.destination_type == 'volume' and
  1404. not bdm.volume_size):
  1405. raise exception.InvalidBDM(message=_("Blank volumes "
  1406. "(source: 'blank', dest: 'volume') need to have non-zero "
  1407. "size"))
  1408. ephemeral_size = sum(bdm.volume_size or instance_type['ephemeral_gb']
  1409. for bdm in block_device_mappings
  1410. if block_device.new_format_is_ephemeral(bdm))
  1411. if ephemeral_size > instance_type['ephemeral_gb']:
  1412. raise exception.InvalidBDMEphemeralSize()
  1413. # There should be only one swap
  1414. swap_list = block_device.get_bdm_swap_list(block_device_mappings)
  1415. if len(swap_list) > 1:
  1416. msg = _("More than one swap drive requested.")
  1417. raise exception.InvalidBDMFormat(details=msg)
  1418. if swap_list:
  1419. swap_size = swap_list[0].volume_size or 0
  1420. if swap_size > instance_type['swap']:
  1421. raise exception.InvalidBDMSwapSize()
  1422. max_local = CONF.max_local_block_devices
  1423. if max_local >= 0:
  1424. num_local = len([bdm for bdm in block_device_mappings
  1425. if bdm.destination_type == 'local'])
  1426. if num_local > max_local:
  1427. raise exception.InvalidBDMLocalsLimit()
  1428. def _populate_instance_names(self, instance, num_instances, index):
  1429. """Populate instance display_name and hostname.
  1430. :param instance: The instance to set the display_name, hostname for
  1431. :type instance: nova.objects.Instance
  1432. :param num_instances: Total number of instances being created in this
  1433. request
  1434. :param index: The 0-based index of this particular instance
  1435. """
  1436. # NOTE(mriedem): This is only here for test simplicity since a server
  1437. # name is required in the REST API.
  1438. if 'display_name' not in instance or instance.display_name is None:
  1439. instance.display_name = 'Server %s' % instance.uuid
  1440. # if we're booting multiple instances, we need to add an indexing
  1441. # suffix to both instance.hostname and instance.display_name. This is
  1442. # not necessary for a single instance.
  1443. if num_instances == 1:
  1444. default_hostname = 'Server-%s' % instance.uuid
  1445. instance.hostname = utils.sanitize_hostname(
  1446. instance.display_name, default_hostname)
  1447. elif num_instances > 1:
  1448. old_display_name = instance.display_name
  1449. new_display_name = '%s-%d' % (old_display_name, index + 1)
  1450. if utils.sanitize_hostname(old_display_name) == "":
  1451. instance.hostname = 'Server-%s' % instance.uuid
  1452. else:
  1453. instance.hostname = utils.sanitize_hostname(
  1454. new_display_name)
  1455. instance.display_name = new_display_name
  1456. def _populate_instance_for_create(self, context, instance, image,
  1457. index, security_groups, instance_type,
  1458. num_instances, shutdown_terminate):
  1459. """Build the beginning of a new instance."""
  1460. instance.launch_index = index
  1461. instance.vm_state = vm_states.BUILDING
  1462. instance.task_state = task_states.SCHEDULING
  1463. info_cache = objects.InstanceInfoCache()
  1464. info_cache.instance_uuid = instance.uuid
  1465. info_cache.network_info = network_model.NetworkInfo()
  1466. instance.info_cache = info_cache
  1467. instance.flavor = instance_type
  1468. instance.old_flavor = None
  1469. instance.new_flavor = None
  1470. if CONF.ephemeral_storage_encryption.enabled:
  1471. # NOTE(kfarr): dm-crypt expects the cipher in a
  1472. # hyphenated format: cipher-chainmode-ivmode
  1473. # (ex: aes-xts-plain64). The algorithm needs
  1474. # to be parsed out to pass to the key manager (ex: aes).
  1475. cipher = CONF.ephemeral_storage_encryption.cipher
  1476. algorithm = cipher.split('-')[0] if cipher else None
  1477. instance.ephemeral_key_uuid = self.key_manager.create_key(
  1478. context,
  1479. algorithm=algorithm,
  1480. length=CONF.ephemeral_storage_encryption.key_size)
  1481. else:
  1482. instance.ephemeral_key_uuid = None
  1483. # Store image properties so we can use them later
  1484. # (for notifications, etc). Only store what we can.
  1485. if not instance.obj_attr_is_set('system_metadata'):
  1486. instance.system_metadata = {}
  1487. # Make sure we have the dict form that we need for instance_update.
  1488. instance.system_metadata = utils.instance_sys_meta(instance)
  1489. system_meta = utils.get_system_metadata_from_image(
  1490. image, instance_type)
  1491. # In case we couldn't find any suitable base_image
  1492. system_meta.setdefault('image_base_image_ref', instance.image_ref)
  1493. system_meta['owner_user_name'] = context.user_name
  1494. system_meta['owner_project_name'] = context.project_name
  1495. instance.system_metadata.update(system_meta)
  1496. if CONF.use_neutron:
  1497. # For Neutron we don't actually store anything in the database, we
  1498. # proxy the security groups on the instance from the ports
  1499. # attached to the instance.
  1500. instance.security_groups = objects.SecurityGroupList()
  1501. else:
  1502. instance.security_groups = security_groups
  1503. self._populate_instance_names(instance, num_instances, index)
  1504. instance.shutdown_terminate = shutdown_terminate
  1505. return instance
  1506. def _create_tag_list_obj(self, context, tags):
  1507. """Create TagList objects from simple string tags.
  1508. :param context: security context.
  1509. :param tags: simple string tags from API request.
  1510. :returns: TagList object.
  1511. """
  1512. tag_list = [objects.Tag(context=context, tag=t) for t in tags]
  1513. tag_list_obj = objects.TagList(objects=tag_list)
  1514. return tag_list_obj
  1515. def _transform_tags(self, tags, resource_id):
  1516. """Change the resource_id of the tags according to the input param.
  1517. Because this method can be called multiple times when more than one
  1518. instance is booted in a single request it makes a copy of the tags
  1519. list.
  1520. :param tags: TagList object.
  1521. :param resource_id: string.
  1522. :returns: TagList object.
  1523. """
  1524. instance_tags = tags.obj_clone()
  1525. for tag in instance_tags:
  1526. tag.resource_id = resource_id
  1527. return instance_tags
  1528. # This method remains because cellsv1 uses it in the scheduler
  1529. def create_db_entry_for_new_instance(self, context, instance_type, image,
  1530. instance, security_group, block_device_mapping, num_instances,
  1531. index, shutdown_terminate=False, create_instance=True):
  1532. """Create an entry in the DB for this new instance,
  1533. including any related table updates (such as security group,
  1534. etc).
  1535. This is called by the scheduler after a location for the
  1536. instance has been determined.
  1537. :param create_instance: Determines if the instance is created here or
  1538. just populated for later creation. This is done so that this code
  1539. can be shared with cellsv1 which needs the instance creation to
  1540. happen here. It should be removed and this method cleaned up when
  1541. cellsv1 is a distant memory.
  1542. """
  1543. self._populate_instance_for_create(context, instance, image, index,
  1544. security_group, instance_type,
  1545. num_instances, shutdown_terminate)
  1546. if create_instance:
  1547. instance.create()
  1548. return instance
  1549. def _check_multiple_instances_with_neutron_ports(self,
  1550. requested_networks):
  1551. """Check whether multiple instances are created from port id(s)."""
  1552. for requested_net in requested_networks:
  1553. if requested_net.port_id:
  1554. msg = _("Unable to launch multiple instances with"
  1555. " a single configured port ID. Please launch your"
  1556. " instance one by one with different ports.")
  1557. raise exception.MultiplePortsNotApplicable(reason=msg)
  1558. def _check_multiple_instances_with_specified_ip(self, requested_networks):
  1559. """Check whether multiple instances are created with specified ip."""
  1560. for requested_net in requested_networks:
  1561. if requested_net.network_id and requested_net.address:
  1562. msg = _("max_count cannot be greater than 1 if an fixed_ip "
  1563. "is specified.")
  1564. raise exception.InvalidFixedIpAndMaxCountRequest(reason=msg)
  1565. @hooks.add_hook("create_instance")
  1566. def create(self, context, instance_type,
  1567. image_href, kernel_id=None, ramdisk_id=None,
  1568. min_count=None, max_count=None,
  1569. display_name=None, display_description=None,
  1570. key_name=None, key_data=None, security_groups=None,
  1571. availability_zone=None, forced_host=None, forced_node=None,
  1572. user_data=None, metadata=None, injected_files=None,
  1573. admin_password=None, block_device_mapping=None,
  1574. access_ip_v4=None, access_ip_v6=None, requested_networks=None,
  1575. config_drive=None, auto_disk_config=None, scheduler_hints=None,
  1576. legacy_bdm=True, shutdown_terminate=False,
  1577. check_server_group_quota=False, tags=None,
  1578. supports_multiattach=False, trusted_certs=None,
  1579. supports_port_resource_request=False,
  1580. requested_host=None, requested_hypervisor_hostname=None):
  1581. """Provision instances, sending instance information to the
  1582. scheduler. The scheduler will determine where the instance(s)
  1583. go and will handle creating the DB entries.
  1584. Returns a tuple of (instances, reservation_id)
  1585. """
  1586. if requested_networks and max_count is not None and max_count > 1:
  1587. self._check_multiple_instances_with_specified_ip(
  1588. requested_networks)
  1589. if utils.is_neutron():
  1590. self._check_multiple_instances_with_neutron_ports(
  1591. requested_networks)
  1592. if availability_zone:
  1593. available_zones = availability_zones.\
  1594. get_availability_zones(context.elevated(), self.host_api,
  1595. get_only_available=True)
  1596. if forced_host is None and availability_zone not in \
  1597. available_zones:
  1598. msg = _('The requested availability zone is not available')
  1599. raise exception.InvalidRequest(msg)
  1600. filter_properties = scheduler_utils.build_filter_properties(
  1601. scheduler_hints, forced_host, forced_node, instance_type)
  1602. return self._create_instance(
  1603. context, instance_type,
  1604. image_href, kernel_id, ramdisk_id,
  1605. min_count, max_count,
  1606. display_name, display_description,
  1607. key_name, key_data, security_groups,
  1608. availability_zone, user_data, metadata,
  1609. injected_files, admin_password,
  1610. access_ip_v4, access_ip_v6,
  1611. requested_networks, config_drive,
  1612. block_device_mapping, auto_disk_config,
  1613. filter_properties=filter_properties,
  1614. legacy_bdm=legacy_bdm,
  1615. shutdown_terminate=shutdown_terminate,
  1616. check_server_group_quota=check_server_group_quota,
  1617. tags=tags, supports_multiattach=supports_multiattach,
  1618. trusted_certs=trusted_certs,
  1619. supports_port_resource_request=supports_port_resource_request,
  1620. requested_host=requested_host,
  1621. requested_hypervisor_hostname=requested_hypervisor_hostname)
  1622. def _check_auto_disk_config(self, instance=None, image=None,
  1623. **extra_instance_updates):
  1624. auto_disk_config = extra_instance_updates.get("auto_disk_config")
  1625. if auto_disk_config is None:
  1626. return
  1627. if not image and not instance:
  1628. return
  1629. if image:
  1630. image_props = image.get("properties", {})
  1631. auto_disk_config_img = \
  1632. utils.get_auto_disk_config_from_image_props(image_props)
  1633. image_ref = image.get("id")
  1634. else:
  1635. sys_meta = utils.instance_sys_meta(instance)
  1636. image_ref = sys_meta.get('image_base_image_ref')
  1637. auto_disk_config_img = \
  1638. utils.get_auto_disk_config_from_instance(sys_meta=sys_meta)
  1639. self._ensure_auto_disk_config_is_valid(auto_disk_config_img,
  1640. auto_disk_config,
  1641. image_ref)
  1642. def _lookup_instance(self, context, uuid):
  1643. '''Helper method for pulling an instance object from a database.
  1644. During the transition to cellsv2 there is some complexity around
  1645. retrieving an instance from the database which this method hides. If
  1646. there is an instance mapping then query the cell for the instance, if
  1647. no mapping exists then query the configured nova database.
  1648. Once we are past the point that all deployments can be assumed to be
  1649. migrated to cellsv2 this method can go away.
  1650. '''
  1651. inst_map = None
  1652. try:
  1653. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  1654. context, uuid)
  1655. except exception.InstanceMappingNotFound:
  1656. # TODO(alaski): This exception block can be removed once we're
  1657. # guaranteed everyone is using cellsv2.
  1658. pass
  1659. if inst_map is None or inst_map.cell_mapping is None:
  1660. # If inst_map is None then the deployment has not migrated to
  1661. # cellsv2 yet.
  1662. # If inst_map.cell_mapping is None then the instance is not in a
  1663. # cell yet. Until instance creation moves to the conductor the
  1664. # instance can be found in the configured database, so attempt
  1665. # to look it up.
  1666. cell = None
  1667. try:
  1668. instance = objects.Instance.get_by_uuid(context, uuid)
  1669. except exception.InstanceNotFound:
  1670. # If we get here then the conductor is in charge of writing the
  1671. # instance to the database and hasn't done that yet. It's up to
  1672. # the caller of this method to determine what to do with that
  1673. # information.
  1674. return None, None
  1675. else:
  1676. cell = inst_map.cell_mapping
  1677. with nova_context.target_cell(context, cell) as cctxt:
  1678. try:
  1679. instance = objects.Instance.get_by_uuid(cctxt, uuid)
  1680. except exception.InstanceNotFound:
  1681. # Since the cell_mapping exists we know the instance is in
  1682. # the cell, however InstanceNotFound means it's already
  1683. # deleted.
  1684. return None, None
  1685. return cell, instance
  1686. def _delete_while_booting(self, context, instance):
  1687. """Handle deletion if the instance has not reached a cell yet
  1688. Deletion before an instance reaches a cell needs to be handled
  1689. differently. What we're attempting to do is delete the BuildRequest
  1690. before the api level conductor does. If we succeed here then the boot
  1691. request stops before reaching a cell. If not then the instance will
  1692. need to be looked up in a cell db and the normal delete path taken.
  1693. """
  1694. deleted = self._attempt_delete_of_buildrequest(context, instance)
  1695. if deleted:
  1696. # If we've reached this block the successful deletion of the
  1697. # buildrequest indicates that the build process should be halted by
  1698. # the conductor.
  1699. # NOTE(alaski): Though the conductor halts the build process it
  1700. # does not currently delete the instance record. This is
  1701. # because in the near future the instance record will not be
  1702. # created if the buildrequest has been deleted here. For now we
  1703. # ensure the instance has been set to deleted at this point.
  1704. # Yes this directly contradicts the comment earlier in this
  1705. # method, but this is a temporary measure.
  1706. # Look up the instance because the current instance object was
  1707. # stashed on the buildrequest and therefore not complete enough
  1708. # to run .destroy().
  1709. try:
  1710. instance_uuid = instance.uuid
  1711. cell, instance = self._lookup_instance(context, instance_uuid)
  1712. if instance is not None:
  1713. # If instance is None it has already been deleted.
  1714. if cell:
  1715. with nova_context.target_cell(context, cell) as cctxt:
  1716. # FIXME: When the instance context is targeted,
  1717. # we can remove this
  1718. with compute_utils.notify_about_instance_delete(
  1719. self.notifier, cctxt, instance):
  1720. instance.destroy()
  1721. else:
  1722. instance.destroy()
  1723. except exception.InstanceNotFound:
  1724. pass
  1725. return True
  1726. return False
  1727. def _attempt_delete_of_buildrequest(self, context, instance):
  1728. # If there is a BuildRequest then the instance may not have been
  1729. # written to a cell db yet. Delete the BuildRequest here, which
  1730. # will indicate that the Instance build should not proceed.
  1731. try:
  1732. build_req = objects.BuildRequest.get_by_instance_uuid(
  1733. context, instance.uuid)
  1734. build_req.destroy()
  1735. except exception.BuildRequestNotFound:
  1736. # This means that conductor has deleted the BuildRequest so the
  1737. # instance is now in a cell and the delete needs to proceed
  1738. # normally.
  1739. return False
  1740. # We need to detach from any volumes so they aren't orphaned.
  1741. self._local_cleanup_bdm_volumes(
  1742. build_req.block_device_mappings, instance, context)
  1743. return True
  1744. def _delete(self, context, instance, delete_type, cb, **instance_attrs):
  1745. if instance.disable_terminate:
  1746. LOG.info('instance termination disabled', instance=instance)
  1747. return
  1748. cell = None
  1749. # If there is an instance.host (or the instance is shelved-offloaded or
  1750. # in error state), the instance has been scheduled and sent to a
  1751. # cell/compute which means it was pulled from the cell db.
  1752. # Normal delete should be attempted.
  1753. may_have_ports_or_volumes = compute_utils.may_have_ports_or_volumes(
  1754. instance)
  1755. if not instance.host and not may_have_ports_or_volumes:
  1756. try:
  1757. if self._delete_while_booting(context, instance):
  1758. return
  1759. # If instance.host was not set it's possible that the Instance
  1760. # object here was pulled from a BuildRequest object and is not
  1761. # fully populated. Notably it will be missing an 'id' field
  1762. # which will prevent instance.destroy from functioning
  1763. # properly. A lookup is attempted which will either return a
  1764. # full Instance or None if not found. If not found then it's
  1765. # acceptable to skip the rest of the delete processing.
  1766. cell, instance = self._lookup_instance(context, instance.uuid)
  1767. if cell and instance:
  1768. try:
  1769. # Now destroy the instance from the cell it lives in.
  1770. with compute_utils.notify_about_instance_delete(
  1771. self.notifier, context, instance):
  1772. instance.destroy()
  1773. except exception.InstanceNotFound:
  1774. pass
  1775. # The instance was deleted or is already gone.
  1776. return
  1777. if not instance:
  1778. # Instance is already deleted.
  1779. return
  1780. except exception.ObjectActionError:
  1781. # NOTE(melwitt): This means the instance.host changed
  1782. # under us indicating the instance became scheduled
  1783. # during the destroy(). Refresh the instance from the DB and
  1784. # continue on with the delete logic for a scheduled instance.
  1785. # NOTE(danms): If instance.host is set, we should be able to
  1786. # do the following lookup. If not, there's not much we can
  1787. # do to recover.
  1788. cell, instance = self._lookup_instance(context, instance.uuid)
  1789. if not instance:
  1790. # Instance is already deleted
  1791. return
  1792. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  1793. context, instance.uuid)
  1794. # At these states an instance has a snapshot associate.
  1795. if instance.vm_state in (vm_states.SHELVED,
  1796. vm_states.SHELVED_OFFLOADED):
  1797. snapshot_id = instance.system_metadata.get('shelved_image_id')
  1798. LOG.info("Working on deleting snapshot %s "
  1799. "from shelved instance...",
  1800. snapshot_id, instance=instance)
  1801. try:
  1802. self.image_api.delete(context, snapshot_id)
  1803. except (exception.ImageNotFound,
  1804. exception.ImageNotAuthorized) as exc:
  1805. LOG.warning("Failed to delete snapshot "
  1806. "from shelved instance (%s).",
  1807. exc.format_message(), instance=instance)
  1808. except Exception:
  1809. LOG.exception("Something wrong happened when trying to "
  1810. "delete snapshot from shelved instance.",
  1811. instance=instance)
  1812. original_task_state = instance.task_state
  1813. try:
  1814. # NOTE(maoy): no expected_task_state needs to be set
  1815. instance.update(instance_attrs)
  1816. instance.progress = 0
  1817. instance.save()
  1818. if not instance.host and not may_have_ports_or_volumes:
  1819. try:
  1820. with compute_utils.notify_about_instance_delete(
  1821. self.notifier, context, instance,
  1822. delete_type
  1823. if delete_type != 'soft_delete'
  1824. else 'delete'):
  1825. instance.destroy()
  1826. LOG.info('Instance deleted and does not have host '
  1827. 'field, its vm_state is %(state)s.',
  1828. {'state': instance.vm_state},
  1829. instance=instance)
  1830. return
  1831. except exception.ObjectActionError as ex:
  1832. # The instance's host likely changed under us as
  1833. # this instance could be building and has since been
  1834. # scheduled. Continue with attempts to delete it.
  1835. LOG.debug('Refreshing instance because: %s', ex,
  1836. instance=instance)
  1837. instance.refresh()
  1838. if instance.vm_state == vm_states.RESIZED:
  1839. self._confirm_resize_on_deleting(context, instance)
  1840. # NOTE(neha_alhat): After confirm resize vm_state will become
  1841. # 'active' and task_state will be set to 'None'. But for soft
  1842. # deleting a vm, the _do_soft_delete callback requires
  1843. # task_state in 'SOFT_DELETING' status. So, we need to set
  1844. # task_state as 'SOFT_DELETING' again for soft_delete case.
  1845. # After confirm resize and before saving the task_state to
  1846. # "SOFT_DELETING", during the short window, user can submit
  1847. # soft delete vm request again and system will accept and
  1848. # process it without any errors.
  1849. if delete_type == 'soft_delete':
  1850. instance.task_state = instance_attrs['task_state']
  1851. instance.save()
  1852. is_local_delete = True
  1853. try:
  1854. # instance.host must be set in order to look up the service.
  1855. if instance.host is not None:
  1856. service = objects.Service.get_by_compute_host(
  1857. context.elevated(), instance.host)
  1858. is_local_delete = not self.servicegroup_api.service_is_up(
  1859. service)
  1860. if not is_local_delete:
  1861. if original_task_state in (task_states.DELETING,
  1862. task_states.SOFT_DELETING):
  1863. LOG.info('Instance is already in deleting state, '
  1864. 'ignoring this request',
  1865. instance=instance)
  1866. return
  1867. self._record_action_start(context, instance,
  1868. instance_actions.DELETE)
  1869. cb(context, instance, bdms)
  1870. except exception.ComputeHostNotFound:
  1871. LOG.debug('Compute host %s not found during service up check, '
  1872. 'going to local delete instance', instance.host,
  1873. instance=instance)
  1874. if is_local_delete:
  1875. # If instance is in shelved_offloaded state or compute node
  1876. # isn't up, delete instance from db and clean bdms info and
  1877. # network info
  1878. if cell is None:
  1879. # NOTE(danms): If we didn't get our cell from one of the
  1880. # paths above, look it up now.
  1881. try:
  1882. im = objects.InstanceMapping.get_by_instance_uuid(
  1883. context, instance.uuid)
  1884. cell = im.cell_mapping
  1885. except exception.InstanceMappingNotFound:
  1886. LOG.warning('During local delete, failed to find '
  1887. 'instance mapping', instance=instance)
  1888. return
  1889. LOG.debug('Doing local delete in cell %s', cell.identity,
  1890. instance=instance)
  1891. with nova_context.target_cell(context, cell) as cctxt:
  1892. self._local_delete(cctxt, instance, bdms, delete_type, cb)
  1893. except exception.InstanceNotFound:
  1894. # NOTE(comstud): Race condition. Instance already gone.
  1895. pass
  1896. def _confirm_resize_on_deleting(self, context, instance):
  1897. # If in the middle of a resize, use confirm_resize to
  1898. # ensure the original instance is cleaned up too along
  1899. # with its allocations (and migration-based allocations)
  1900. # in placement.
  1901. migration = None
  1902. for status in ('finished', 'confirming'):
  1903. try:
  1904. migration = objects.Migration.get_by_instance_and_status(
  1905. context.elevated(), instance.uuid, status)
  1906. LOG.info('Found an unconfirmed migration during delete, '
  1907. 'id: %(id)s, status: %(status)s',
  1908. {'id': migration.id,
  1909. 'status': migration.status},
  1910. instance=instance)
  1911. break
  1912. except exception.MigrationNotFoundByStatus:
  1913. pass
  1914. if not migration:
  1915. LOG.info('Instance may have been confirmed during delete',
  1916. instance=instance)
  1917. return
  1918. src_host = migration.source_compute
  1919. self._record_action_start(context, instance,
  1920. instance_actions.CONFIRM_RESIZE)
  1921. self.compute_rpcapi.confirm_resize(context,
  1922. instance, migration, src_host, cast=False)
  1923. def _local_cleanup_bdm_volumes(self, bdms, instance, context):
  1924. """The method deletes the bdm records and, if a bdm is a volume, call
  1925. the terminate connection and the detach volume via the Volume API.
  1926. """
  1927. elevated = context.elevated()
  1928. for bdm in bdms:
  1929. if bdm.is_volume:
  1930. try:
  1931. if bdm.attachment_id:
  1932. self.volume_api.attachment_delete(context,
  1933. bdm.attachment_id)
  1934. else:
  1935. connector = compute_utils.get_stashed_volume_connector(
  1936. bdm, instance)
  1937. if connector:
  1938. self.volume_api.terminate_connection(context,
  1939. bdm.volume_id,
  1940. connector)
  1941. else:
  1942. LOG.debug('Unable to find connector for volume %s,'
  1943. ' not attempting terminate_connection.',
  1944. bdm.volume_id, instance=instance)
  1945. # Attempt to detach the volume. If there was no
  1946. # connection made in the first place this is just
  1947. # cleaning up the volume state in the Cinder DB.
  1948. self.volume_api.detach(elevated, bdm.volume_id,
  1949. instance.uuid)
  1950. if bdm.delete_on_termination:
  1951. self.volume_api.delete(context, bdm.volume_id)
  1952. except Exception as exc:
  1953. LOG.warning("Ignoring volume cleanup failure due to %s",
  1954. exc, instance=instance)
  1955. # If we're cleaning up volumes from an instance that wasn't yet
  1956. # created in a cell, i.e. the user deleted the server while
  1957. # the BuildRequest still existed, then the BDM doesn't actually
  1958. # exist in the DB to destroy it.
  1959. if 'id' in bdm:
  1960. bdm.destroy()
  1961. @property
  1962. def placementclient(self):
  1963. if self._placementclient is None:
  1964. self._placementclient = report.SchedulerReportClient()
  1965. return self._placementclient
  1966. def _local_delete(self, context, instance, bdms, delete_type, cb):
  1967. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  1968. LOG.info("instance is in SHELVED_OFFLOADED state, cleanup"
  1969. " the instance's info from database.",
  1970. instance=instance)
  1971. else:
  1972. LOG.warning("instance's host %s is down, deleting from "
  1973. "database", instance.host, instance=instance)
  1974. with compute_utils.notify_about_instance_delete(
  1975. self.notifier, context, instance,
  1976. delete_type if delete_type != 'soft_delete' else 'delete'):
  1977. elevated = context.elevated()
  1978. # NOTE(liusheng): In nova-network multi_host scenario,deleting
  1979. # network info of the instance may need instance['host'] as
  1980. # destination host of RPC call. If instance in
  1981. # SHELVED_OFFLOADED state, instance['host'] is None, here, use
  1982. # shelved_host as host to deallocate network info and reset
  1983. # instance['host'] after that. Here we shouldn't use
  1984. # instance.save(), because this will mislead user who may think
  1985. # the instance's host has been changed, and actually, the
  1986. # instance.host is always None.
  1987. orig_host = instance.host
  1988. try:
  1989. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  1990. sysmeta = getattr(instance,
  1991. obj_base.get_attrname(
  1992. 'system_metadata'))
  1993. instance.host = sysmeta.get('shelved_host')
  1994. self.network_api.deallocate_for_instance(elevated,
  1995. instance)
  1996. finally:
  1997. instance.host = orig_host
  1998. # cleanup volumes
  1999. self._local_cleanup_bdm_volumes(bdms, instance, context)
  2000. # Cleanup allocations in Placement since we can't do it from the
  2001. # compute service.
  2002. self.placementclient.delete_allocation_for_instance(
  2003. context, instance.uuid)
  2004. cb(context, instance, bdms, local=True)
  2005. instance.destroy()
  2006. @staticmethod
  2007. def _update_queued_for_deletion(context, instance, qfd):
  2008. # NOTE(tssurya): We query the instance_mapping record of this instance
  2009. # and update the queued_for_delete flag to True (or False according to
  2010. # the state of the instance). This just means that the instance is
  2011. # queued for deletion (or is no longer queued for deletion). It does
  2012. # not guarantee its successful deletion (or restoration). Hence the
  2013. # value could be stale which is fine, considering its use is only
  2014. # during down cell (desperate) situation.
  2015. im = objects.InstanceMapping.get_by_instance_uuid(context,
  2016. instance.uuid)
  2017. im.queued_for_delete = qfd
  2018. im.save()
  2019. def _do_delete(self, context, instance, bdms, local=False):
  2020. if local:
  2021. instance.vm_state = vm_states.DELETED
  2022. instance.task_state = None
  2023. instance.terminated_at = timeutils.utcnow()
  2024. instance.save()
  2025. else:
  2026. self.compute_rpcapi.terminate_instance(context, instance, bdms)
  2027. self._update_queued_for_deletion(context, instance, True)
  2028. def _do_soft_delete(self, context, instance, bdms, local=False):
  2029. if local:
  2030. instance.vm_state = vm_states.SOFT_DELETED
  2031. instance.task_state = None
  2032. instance.terminated_at = timeutils.utcnow()
  2033. instance.save()
  2034. else:
  2035. self.compute_rpcapi.soft_delete_instance(context, instance)
  2036. self._update_queued_for_deletion(context, instance, True)
  2037. # NOTE(maoy): we allow delete to be called no matter what vm_state says.
  2038. @check_instance_lock
  2039. @check_instance_state(vm_state=None, task_state=None,
  2040. must_have_launched=True)
  2041. def soft_delete(self, context, instance):
  2042. """Terminate an instance."""
  2043. LOG.debug('Going to try to soft delete instance',
  2044. instance=instance)
  2045. self._delete(context, instance, 'soft_delete', self._do_soft_delete,
  2046. task_state=task_states.SOFT_DELETING,
  2047. deleted_at=timeutils.utcnow())
  2048. def _delete_instance(self, context, instance):
  2049. self._delete(context, instance, 'delete', self._do_delete,
  2050. task_state=task_states.DELETING)
  2051. @check_instance_lock
  2052. @check_instance_state(vm_state=None, task_state=None,
  2053. must_have_launched=False)
  2054. def delete(self, context, instance):
  2055. """Terminate an instance."""
  2056. LOG.debug("Going to try to terminate instance", instance=instance)
  2057. self._delete_instance(context, instance)
  2058. @check_instance_lock
  2059. @check_instance_state(vm_state=[vm_states.SOFT_DELETED])
  2060. def restore(self, context, instance):
  2061. """Restore a previously deleted (but not reclaimed) instance."""
  2062. # Check quotas
  2063. flavor = instance.get_flavor()
  2064. project_id, user_id = quotas_obj.ids_from_instance(context, instance)
  2065. compute_utils.check_num_instances_quota(context, flavor, 1, 1,
  2066. project_id=project_id, user_id=user_id)
  2067. self._record_action_start(context, instance, instance_actions.RESTORE)
  2068. if instance.host:
  2069. instance.task_state = task_states.RESTORING
  2070. instance.deleted_at = None
  2071. instance.save(expected_task_state=[None])
  2072. # TODO(melwitt): We're not rechecking for strict quota here to
  2073. # guard against going over quota during a race at this time because
  2074. # the resource consumption for this operation is written to the
  2075. # database by compute.
  2076. self.compute_rpcapi.restore_instance(context, instance)
  2077. else:
  2078. instance.vm_state = vm_states.ACTIVE
  2079. instance.task_state = None
  2080. instance.deleted_at = None
  2081. instance.save(expected_task_state=[None])
  2082. self._update_queued_for_deletion(context, instance, False)
  2083. @check_instance_lock
  2084. @check_instance_state(task_state=None,
  2085. must_have_launched=False)
  2086. def force_delete(self, context, instance):
  2087. """Force delete an instance in any vm_state/task_state."""
  2088. self._delete(context, instance, 'force_delete', self._do_delete,
  2089. task_state=task_states.DELETING)
  2090. def force_stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2091. LOG.debug("Going to try to stop instance", instance=instance)
  2092. instance.task_state = task_states.POWERING_OFF
  2093. instance.progress = 0
  2094. instance.save(expected_task_state=[None])
  2095. self._record_action_start(context, instance, instance_actions.STOP)
  2096. self.compute_rpcapi.stop_instance(context, instance, do_cast=do_cast,
  2097. clean_shutdown=clean_shutdown)
  2098. @check_instance_lock
  2099. @check_instance_host
  2100. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.ERROR])
  2101. def stop(self, context, instance, do_cast=True, clean_shutdown=True):
  2102. """Stop an instance."""
  2103. self.force_stop(context, instance, do_cast, clean_shutdown)
  2104. @check_instance_lock
  2105. @check_instance_host
  2106. @check_instance_state(vm_state=[vm_states.STOPPED])
  2107. def start(self, context, instance):
  2108. """Start an instance."""
  2109. LOG.debug("Going to try to start instance", instance=instance)
  2110. instance.task_state = task_states.POWERING_ON
  2111. instance.save(expected_task_state=[None])
  2112. self._record_action_start(context, instance, instance_actions.START)
  2113. self.compute_rpcapi.start_instance(context, instance)
  2114. @check_instance_lock
  2115. @check_instance_host
  2116. @check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
  2117. def trigger_crash_dump(self, context, instance):
  2118. """Trigger crash dump in an instance."""
  2119. LOG.debug("Try to trigger crash dump", instance=instance)
  2120. self._record_action_start(context, instance,
  2121. instance_actions.TRIGGER_CRASH_DUMP)
  2122. self.compute_rpcapi.trigger_crash_dump(context, instance)
  2123. def _generate_minimal_construct_for_down_cells(self, context,
  2124. down_cell_uuids,
  2125. project, limit):
  2126. """Generate a list of minimal instance constructs for a given list of
  2127. cells that did not respond to a list operation. This will list
  2128. every instance mapping in the affected cells and return a minimal
  2129. objects.Instance for each (non-queued-for-delete) mapping.
  2130. :param context: RequestContext
  2131. :param down_cell_uuids: A list of cell UUIDs that did not respond
  2132. :param project: A project ID to filter mappings, or None
  2133. :param limit: A numeric limit on the number of results, or None
  2134. :returns: An InstanceList() of partial Instance() objects
  2135. """
  2136. unavailable_servers = objects.InstanceList()
  2137. for cell_uuid in down_cell_uuids:
  2138. LOG.warning("Cell %s is not responding and hence only "
  2139. "partial results are available from this "
  2140. "cell if any.", cell_uuid)
  2141. instance_mappings = (objects.InstanceMappingList.
  2142. get_not_deleted_by_cell_and_project(context, cell_uuid,
  2143. project, limit=limit))
  2144. for im in instance_mappings:
  2145. unavailable_servers.objects.append(
  2146. objects.Instance(
  2147. context=context,
  2148. uuid=im.instance_uuid,
  2149. project_id=im.project_id,
  2150. created_at=im.created_at
  2151. )
  2152. )
  2153. if limit is not None:
  2154. limit -= len(instance_mappings)
  2155. if limit <= 0:
  2156. break
  2157. return unavailable_servers
  2158. def _get_instance_map_or_none(self, context, instance_uuid):
  2159. try:
  2160. inst_map = objects.InstanceMapping.get_by_instance_uuid(
  2161. context, instance_uuid)
  2162. except exception.InstanceMappingNotFound:
  2163. # InstanceMapping should always be found generally. This exception
  2164. # may be raised if a deployment has partially migrated the nova-api
  2165. # services.
  2166. inst_map = None
  2167. return inst_map
  2168. @staticmethod
  2169. def _save_user_id_in_instance_mapping(mapping, instance):
  2170. # TODO(melwitt): We take the opportunity to migrate user_id on the
  2171. # instance mapping if it's not yet been migrated. This can be removed
  2172. # in a future release, when all migrations are complete.
  2173. # If the instance came from a RequestSpec because of a down cell, its
  2174. # user_id could be None and the InstanceMapping.user_id field is
  2175. # non-nullable. Avoid trying to set/save the user_id in that case.
  2176. if 'user_id' not in mapping and instance.user_id is not None:
  2177. mapping.user_id = instance.user_id
  2178. mapping.save()
  2179. def _get_instance_from_cell(self, context, im, expected_attrs,
  2180. cell_down_support):
  2181. # NOTE(danms): Even though we're going to scatter/gather to the
  2182. # right cell, other code depends on this being force targeted when
  2183. # the get call returns.
  2184. nova_context.set_target_cell(context, im.cell_mapping)
  2185. uuid = im.instance_uuid
  2186. result = nova_context.scatter_gather_single_cell(context,
  2187. im.cell_mapping, objects.Instance.get_by_uuid, uuid,
  2188. expected_attrs=expected_attrs)
  2189. cell_uuid = im.cell_mapping.uuid
  2190. if not nova_context.is_cell_failure_sentinel(result[cell_uuid]):
  2191. inst = result[cell_uuid]
  2192. self._save_user_id_in_instance_mapping(im, inst)
  2193. return inst
  2194. elif isinstance(result[cell_uuid], exception.InstanceNotFound):
  2195. raise exception.InstanceNotFound(instance_id=uuid)
  2196. elif cell_down_support:
  2197. if im.queued_for_delete:
  2198. # should be treated like deleted instance.
  2199. raise exception.InstanceNotFound(instance_id=uuid)
  2200. # instance in down cell, return a minimal construct
  2201. LOG.warning("Cell %s is not responding and hence only "
  2202. "partial results are available from this "
  2203. "cell.", cell_uuid)
  2204. try:
  2205. rs = objects.RequestSpec.get_by_instance_uuid(context,
  2206. uuid)
  2207. # For BFV case, we could have rs.image but rs.image.id might
  2208. # still not be set. So we check the existence of both image
  2209. # and its id.
  2210. image_ref = (rs.image.id if rs.image and
  2211. 'id' in rs.image else None)
  2212. inst = objects.Instance(context=context, power_state=0,
  2213. uuid=uuid,
  2214. project_id=im.project_id,
  2215. created_at=im.created_at,
  2216. user_id=rs.user_id,
  2217. flavor=rs.flavor,
  2218. image_ref=image_ref,
  2219. availability_zone=rs.availability_zone)
  2220. self._save_user_id_in_instance_mapping(im, inst)
  2221. return inst
  2222. except exception.RequestSpecNotFound:
  2223. # could be that a deleted instance whose request
  2224. # spec has been archived is being queried.
  2225. raise exception.InstanceNotFound(instance_id=uuid)
  2226. else:
  2227. raise exception.NovaException(
  2228. _("Cell %s is not responding and hence instance "
  2229. "info is not available.") % cell_uuid)
  2230. def _get_instance(self, context, instance_uuid, expected_attrs,
  2231. cell_down_support=False):
  2232. inst_map = self._get_instance_map_or_none(context, instance_uuid)
  2233. if inst_map and (inst_map.cell_mapping is not None):
  2234. instance = self._get_instance_from_cell(context, inst_map,
  2235. expected_attrs, cell_down_support)
  2236. elif inst_map and (inst_map.cell_mapping is None):
  2237. # This means the instance has not been scheduled and put in
  2238. # a cell yet. For now it also may mean that the deployer
  2239. # has not created their cell(s) yet.
  2240. try:
  2241. build_req = objects.BuildRequest.get_by_instance_uuid(
  2242. context, instance_uuid)
  2243. instance = build_req.instance
  2244. except exception.BuildRequestNotFound:
  2245. # Instance was mapped and the BuildRequest was deleted
  2246. # while fetching. Try again.
  2247. inst_map = self._get_instance_map_or_none(context,
  2248. instance_uuid)
  2249. if inst_map and (inst_map.cell_mapping is not None):
  2250. instance = self._get_instance_from_cell(context, inst_map,
  2251. expected_attrs, cell_down_support)
  2252. else:
  2253. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2254. else:
  2255. # If we got here, we don't have an instance mapping, but we aren't
  2256. # sure why. The instance mapping might be missing because the
  2257. # upgrade is incomplete (map_instances wasn't run). Or because the
  2258. # instance was deleted and the DB was archived at which point the
  2259. # mapping is deleted. The former case is bad, but because of the
  2260. # latter case we can't really log any kind of warning/error here
  2261. # since it might be normal.
  2262. raise exception.InstanceNotFound(instance_id=instance_uuid)
  2263. return instance
  2264. def get(self, context, instance_id, expected_attrs=None,
  2265. cell_down_support=False):
  2266. """Get a single instance with the given instance_id.
  2267. :param cell_down_support: True if the API (and caller) support
  2268. returning a minimal instance
  2269. construct if the relevant cell is
  2270. down. If False, an error is raised
  2271. since the instance cannot be retrieved
  2272. due to the cell being down.
  2273. """
  2274. if not expected_attrs:
  2275. expected_attrs = []
  2276. expected_attrs.extend(['metadata', 'system_metadata',
  2277. 'security_groups', 'info_cache'])
  2278. # NOTE(ameade): we still need to support integer ids for ec2
  2279. try:
  2280. if uuidutils.is_uuid_like(instance_id):
  2281. LOG.debug("Fetching instance by UUID",
  2282. instance_uuid=instance_id)
  2283. instance = self._get_instance(context, instance_id,
  2284. expected_attrs, cell_down_support=cell_down_support)
  2285. else:
  2286. LOG.debug("Failed to fetch instance by id %s", instance_id)
  2287. raise exception.InstanceNotFound(instance_id=instance_id)
  2288. except exception.InvalidID:
  2289. LOG.debug("Invalid instance id %s", instance_id)
  2290. raise exception.InstanceNotFound(instance_id=instance_id)
  2291. return instance
  2292. def get_all(self, context, search_opts=None, limit=None, marker=None,
  2293. expected_attrs=None, sort_keys=None, sort_dirs=None,
  2294. cell_down_support=False, all_tenants=False):
  2295. """Get all instances filtered by one of the given parameters.
  2296. If there is no filter and the context is an admin, it will retrieve
  2297. all instances in the system.
  2298. Deleted instances will be returned by default, unless there is a
  2299. search option that says otherwise.
  2300. The results will be sorted based on the list of sort keys in the
  2301. 'sort_keys' parameter (first value is primary sort key, second value is
  2302. secondary sort ket, etc.). For each sort key, the associated sort
  2303. direction is based on the list of sort directions in the 'sort_dirs'
  2304. parameter.
  2305. :param cell_down_support: True if the API (and caller) support
  2306. returning a minimal instance
  2307. construct if the relevant cell is
  2308. down. If False, instances from
  2309. unreachable cells will be omitted.
  2310. :param all_tenants: True if the "all_tenants" filter was passed.
  2311. """
  2312. if search_opts is None:
  2313. search_opts = {}
  2314. LOG.debug("Searching by: %s", str(search_opts))
  2315. # Fixups for the DB call
  2316. filters = {}
  2317. def _remap_flavor_filter(flavor_id):
  2318. flavor = objects.Flavor.get_by_flavor_id(context, flavor_id)
  2319. filters['instance_type_id'] = flavor.id
  2320. def _remap_fixed_ip_filter(fixed_ip):
  2321. # Turn fixed_ip into a regexp match. Since '.' matches
  2322. # any character, we need to use regexp escaping for it.
  2323. filters['ip'] = '^%s$' % fixed_ip.replace('.', '\\.')
  2324. # search_option to filter_name mapping.
  2325. filter_mapping = {
  2326. 'image': 'image_ref',
  2327. 'name': 'display_name',
  2328. 'tenant_id': 'project_id',
  2329. 'flavor': _remap_flavor_filter,
  2330. 'fixed_ip': _remap_fixed_ip_filter}
  2331. # copy from search_opts, doing various remappings as necessary
  2332. for opt, value in search_opts.items():
  2333. # Do remappings.
  2334. # Values not in the filter_mapping table are copied as-is.
  2335. # If remapping is None, option is not copied
  2336. # If the remapping is a string, it is the filter_name to use
  2337. try:
  2338. remap_object = filter_mapping[opt]
  2339. except KeyError:
  2340. filters[opt] = value
  2341. else:
  2342. # Remaps are strings to translate to, or functions to call
  2343. # to do the translating as defined by the table above.
  2344. if isinstance(remap_object, six.string_types):
  2345. filters[remap_object] = value
  2346. else:
  2347. try:
  2348. remap_object(value)
  2349. # We already know we can't match the filter, so
  2350. # return an empty list
  2351. except ValueError:
  2352. return objects.InstanceList()
  2353. # IP address filtering cannot be applied at the DB layer, remove any DB
  2354. # limit so that it can be applied after the IP filter.
  2355. filter_ip = 'ip6' in filters or 'ip' in filters
  2356. skip_build_request = False
  2357. orig_limit = limit
  2358. if filter_ip:
  2359. # We cannot skip build requests if there is a marker since the
  2360. # the marker could be a build request.
  2361. skip_build_request = marker is None
  2362. if self.network_api.has_substr_port_filtering_extension(context):
  2363. # We're going to filter by IP using Neutron so set filter_ip
  2364. # to False so we don't attempt post-DB query filtering in
  2365. # memory below.
  2366. filter_ip = False
  2367. instance_uuids = self._ip_filter_using_neutron(context,
  2368. filters)
  2369. if instance_uuids:
  2370. # Note that 'uuid' is not in the 2.1 GET /servers query
  2371. # parameter schema, however, we allow additionalProperties
  2372. # so someone could filter instances by uuid, which doesn't
  2373. # make a lot of sense but we have to account for it.
  2374. if 'uuid' in filters and filters['uuid']:
  2375. filter_uuids = filters['uuid']
  2376. if isinstance(filter_uuids, list):
  2377. instance_uuids.extend(filter_uuids)
  2378. else:
  2379. # Assume a string. If it's a dict or tuple or
  2380. # something, well...that's too bad. This is why
  2381. # we have query parameter schema definitions.
  2382. if filter_uuids not in instance_uuids:
  2383. instance_uuids.append(filter_uuids)
  2384. filters['uuid'] = instance_uuids
  2385. else:
  2386. # No matches on the ip filter(s), return an empty list.
  2387. return objects.InstanceList()
  2388. elif limit:
  2389. LOG.debug('Removing limit for DB query due to IP filter')
  2390. limit = None
  2391. # Skip get BuildRequest if filtering by IP address, as building
  2392. # instances will not have IP addresses.
  2393. if skip_build_request:
  2394. build_requests = objects.BuildRequestList()
  2395. else:
  2396. # The ordering of instances will be
  2397. # [sorted instances with no host] + [sorted instances with host].
  2398. # This means BuildRequest and cell0 instances first, then cell
  2399. # instances
  2400. try:
  2401. build_requests = objects.BuildRequestList.get_by_filters(
  2402. context, filters, limit=limit, marker=marker,
  2403. sort_keys=sort_keys, sort_dirs=sort_dirs)
  2404. # If we found the marker in we need to set it to None
  2405. # so we don't expect to find it in the cells below.
  2406. marker = None
  2407. except exception.MarkerNotFound:
  2408. # If we didn't find the marker in the build requests then keep
  2409. # looking for it in the cells.
  2410. build_requests = objects.BuildRequestList()
  2411. build_req_instances = objects.InstanceList(
  2412. objects=[build_req.instance for build_req in build_requests])
  2413. # Only subtract from limit if it is not None
  2414. limit = (limit - len(build_req_instances)) if limit else limit
  2415. # We could arguably avoid joining on security_groups if we're using
  2416. # neutron (which is the default) but if you're using neutron then the
  2417. # security_group_instance_association table should be empty anyway
  2418. # and the DB should optimize out that join, making it insignificant.
  2419. fields = ['metadata', 'info_cache', 'security_groups']
  2420. if expected_attrs:
  2421. fields.extend(expected_attrs)
  2422. insts, down_cell_uuids = instance_list.get_instance_objects_sorted(
  2423. context, filters, limit, marker, fields, sort_keys, sort_dirs,
  2424. cell_down_support=cell_down_support)
  2425. def _get_unique_filter_method():
  2426. seen_uuids = set()
  2427. def _filter(instance):
  2428. # During a cross-cell move operation we could have the instance
  2429. # in more than one cell database so we not only have to filter
  2430. # duplicates but we want to make sure we only return the
  2431. # "current" one which should also be the one that the instance
  2432. # mapping points to, but we don't want to do that expensive
  2433. # lookup here. The DB API will filter out hidden instances by
  2434. # default but there is a small window where two copies of an
  2435. # instance could be hidden=False in separate cell DBs.
  2436. # NOTE(mriedem): We could make this better in the case that we
  2437. # have duplicate instances that are both hidden=False by
  2438. # showing the one with the newer updated_at value, but that
  2439. # could be tricky if the user is filtering on
  2440. # changes-since/before or updated_at, or sorting on updated_at,
  2441. # but technically that was already potentially broken with this
  2442. # _filter method if we return an older BuildRequest.instance,
  2443. # and given the window should be very small where we have
  2444. # duplicates, it's probably not worth the complexity.
  2445. if instance.uuid in seen_uuids:
  2446. return False
  2447. seen_uuids.add(instance.uuid)
  2448. return True
  2449. return _filter
  2450. filter_method = _get_unique_filter_method()
  2451. # Only subtract from limit if it is not None
  2452. limit = (limit - len(insts)) if limit else limit
  2453. # TODO(alaski): Clean up the objects concatenation when List objects
  2454. # support it natively.
  2455. instances = objects.InstanceList(
  2456. objects=list(filter(filter_method,
  2457. build_req_instances.objects +
  2458. insts.objects)))
  2459. if filter_ip:
  2460. instances = self._ip_filter(instances, filters, orig_limit)
  2461. if cell_down_support:
  2462. # API and client want minimal construct instances for any cells
  2463. # that didn't return, so generate and prefix those to the actual
  2464. # results.
  2465. project = search_opts.get('project_id', context.project_id)
  2466. if all_tenants:
  2467. # NOTE(tssurya): The only scenario where project has to be None
  2468. # is when using "all_tenants" in which case we do not want
  2469. # the query to be restricted based on the project_id.
  2470. project = None
  2471. limit = (orig_limit - len(instances)) if limit else limit
  2472. return (self._generate_minimal_construct_for_down_cells(context,
  2473. down_cell_uuids, project, limit) + instances)
  2474. return instances
  2475. @staticmethod
  2476. def _ip_filter(inst_models, filters, limit):
  2477. ipv4_f = re.compile(str(filters.get('ip')))
  2478. ipv6_f = re.compile(str(filters.get('ip6')))
  2479. def _match_instance(instance):
  2480. nw_info = instance.get_network_info()
  2481. for vif in nw_info:
  2482. for fixed_ip in vif.fixed_ips():
  2483. address = fixed_ip.get('address')
  2484. if not address:
  2485. continue
  2486. version = fixed_ip.get('version')
  2487. if ((version == 4 and ipv4_f.match(address)) or
  2488. (version == 6 and ipv6_f.match(address))):
  2489. return True
  2490. return False
  2491. result_objs = []
  2492. for instance in inst_models:
  2493. if _match_instance(instance):
  2494. result_objs.append(instance)
  2495. if limit and len(result_objs) == limit:
  2496. break
  2497. return objects.InstanceList(objects=result_objs)
  2498. def _ip_filter_using_neutron(self, context, filters):
  2499. ip4_address = filters.get('ip')
  2500. ip6_address = filters.get('ip6')
  2501. addresses = [ip4_address, ip6_address]
  2502. uuids = []
  2503. for address in addresses:
  2504. if address:
  2505. try:
  2506. ports = self.network_api.list_ports(
  2507. context, fixed_ips='ip_address_substr=' + address,
  2508. fields=['device_id'])['ports']
  2509. for port in ports:
  2510. uuids.append(port['device_id'])
  2511. except Exception as e:
  2512. LOG.error('An error occurred while listing ports '
  2513. 'with an ip_address filter value of "%s". '
  2514. 'Error: %s',
  2515. address, six.text_type(e))
  2516. return uuids
  2517. def update_instance(self, context, instance, updates):
  2518. """Updates a single Instance object with some updates dict.
  2519. Returns the updated instance.
  2520. """
  2521. # NOTE(sbauza): Given we only persist the Instance object after we
  2522. # create the BuildRequest, we are sure that if the Instance object
  2523. # has an ID field set, then it was persisted in the right Cell DB.
  2524. if instance.obj_attr_is_set('id'):
  2525. instance.update(updates)
  2526. instance.save()
  2527. else:
  2528. # Instance is not yet mapped to a cell, so we need to update
  2529. # BuildRequest instead
  2530. # TODO(sbauza): Fix the possible race conditions where BuildRequest
  2531. # could be deleted because of either a concurrent instance delete
  2532. # or because the scheduler just returned a destination right
  2533. # after we called the instance in the API.
  2534. try:
  2535. build_req = objects.BuildRequest.get_by_instance_uuid(
  2536. context, instance.uuid)
  2537. instance = build_req.instance
  2538. instance.update(updates)
  2539. # FIXME(sbauza): Here we are updating the current
  2540. # thread-related BuildRequest object. Given that another worker
  2541. # could have looking up at that BuildRequest in the API, it
  2542. # means that it could pass it down to the conductor without
  2543. # making sure that it's not updated, we could have some race
  2544. # condition where it would missing the updated fields, but
  2545. # that's something we could discuss once the instance record
  2546. # is persisted by the conductor.
  2547. build_req.save()
  2548. except exception.BuildRequestNotFound:
  2549. # Instance was mapped and the BuildRequest was deleted
  2550. # while fetching (and possibly the instance could have been
  2551. # deleted as well). We need to lookup again the Instance object
  2552. # in order to correctly update it.
  2553. # TODO(sbauza): Figure out a good way to know the expected
  2554. # attributes by checking which fields are set or not.
  2555. expected_attrs = ['flavor', 'pci_devices', 'numa_topology',
  2556. 'tags', 'metadata', 'system_metadata',
  2557. 'security_groups', 'info_cache']
  2558. inst_map = self._get_instance_map_or_none(context,
  2559. instance.uuid)
  2560. if inst_map and (inst_map.cell_mapping is not None):
  2561. with nova_context.target_cell(
  2562. context,
  2563. inst_map.cell_mapping) as cctxt:
  2564. instance = objects.Instance.get_by_uuid(
  2565. cctxt, instance.uuid,
  2566. expected_attrs=expected_attrs)
  2567. instance.update(updates)
  2568. instance.save()
  2569. else:
  2570. # Conductor doesn't delete the BuildRequest until after the
  2571. # InstanceMapping record is created, so if we didn't get
  2572. # that and the BuildRequest doesn't exist, then the
  2573. # instance is already gone and we need to just error out.
  2574. raise exception.InstanceNotFound(instance_id=instance.uuid)
  2575. return instance
  2576. # NOTE(melwitt): We don't check instance lock for backup because lock is
  2577. # intended to prevent accidental change/delete of instances
  2578. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2579. vm_states.PAUSED, vm_states.SUSPENDED])
  2580. def backup(self, context, instance, name, backup_type, rotation,
  2581. extra_properties=None):
  2582. """Backup the given instance
  2583. :param instance: nova.objects.instance.Instance object
  2584. :param name: name of the backup
  2585. :param backup_type: 'daily' or 'weekly'
  2586. :param rotation: int representing how many backups to keep around;
  2587. None if rotation shouldn't be used (as in the case of snapshots)
  2588. :param extra_properties: dict of extra image properties to include
  2589. when creating the image.
  2590. :returns: A dict containing image metadata
  2591. """
  2592. props_copy = dict(extra_properties, backup_type=backup_type)
  2593. if compute_utils.is_volume_backed_instance(context, instance):
  2594. LOG.info("It's not supported to backup volume backed "
  2595. "instance.", instance=instance)
  2596. raise exception.InvalidRequest(
  2597. _('Backup is not supported for volume-backed instances.'))
  2598. else:
  2599. image_meta = compute_utils.create_image(
  2600. context, instance, name, 'backup', self.image_api,
  2601. extra_properties=props_copy)
  2602. instance.task_state = task_states.IMAGE_BACKUP
  2603. instance.save(expected_task_state=[None])
  2604. self._record_action_start(context, instance,
  2605. instance_actions.BACKUP)
  2606. self.compute_rpcapi.backup_instance(context, instance,
  2607. image_meta['id'],
  2608. backup_type,
  2609. rotation)
  2610. return image_meta
  2611. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2612. # intended to prevent accidental change/delete of instances
  2613. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2614. vm_states.PAUSED, vm_states.SUSPENDED])
  2615. def snapshot(self, context, instance, name, extra_properties=None):
  2616. """Snapshot the given instance.
  2617. :param instance: nova.objects.instance.Instance object
  2618. :param name: name of the snapshot
  2619. :param extra_properties: dict of extra image properties to include
  2620. when creating the image.
  2621. :returns: A dict containing image metadata
  2622. """
  2623. image_meta = compute_utils.create_image(
  2624. context, instance, name, 'snapshot', self.image_api,
  2625. extra_properties=extra_properties)
  2626. instance.task_state = task_states.IMAGE_SNAPSHOT_PENDING
  2627. try:
  2628. instance.save(expected_task_state=[None])
  2629. except (exception.InstanceNotFound,
  2630. exception.UnexpectedDeletingTaskStateError) as ex:
  2631. # Changing the instance task state to use in raising the
  2632. # InstanceInvalidException below
  2633. LOG.debug('Instance disappeared during snapshot.',
  2634. instance=instance)
  2635. try:
  2636. image_id = image_meta['id']
  2637. self.image_api.delete(context, image_id)
  2638. LOG.info('Image %s deleted because instance '
  2639. 'deleted before snapshot started.',
  2640. image_id, instance=instance)
  2641. except exception.ImageNotFound:
  2642. pass
  2643. except Exception as exc:
  2644. LOG.warning("Error while trying to clean up image %(img_id)s: "
  2645. "%(error_msg)s",
  2646. {"img_id": image_meta['id'],
  2647. "error_msg": six.text_type(exc)})
  2648. attr = 'task_state'
  2649. state = task_states.DELETING
  2650. if type(ex) == exception.InstanceNotFound:
  2651. attr = 'vm_state'
  2652. state = vm_states.DELETED
  2653. raise exception.InstanceInvalidState(attr=attr,
  2654. instance_uuid=instance.uuid,
  2655. state=state,
  2656. method='snapshot')
  2657. self._record_action_start(context, instance,
  2658. instance_actions.CREATE_IMAGE)
  2659. self.compute_rpcapi.snapshot_instance(context, instance,
  2660. image_meta['id'])
  2661. return image_meta
  2662. # NOTE(melwitt): We don't check instance lock for snapshot because lock is
  2663. # intended to prevent accidental change/delete of instances
  2664. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2665. vm_states.SUSPENDED])
  2666. def snapshot_volume_backed(self, context, instance, name,
  2667. extra_properties=None):
  2668. """Snapshot the given volume-backed instance.
  2669. :param instance: nova.objects.instance.Instance object
  2670. :param name: name of the backup or snapshot
  2671. :param extra_properties: dict of extra image properties to include
  2672. :returns: the new image metadata
  2673. """
  2674. image_meta = compute_utils.initialize_instance_snapshot_metadata(
  2675. context, instance, name, extra_properties)
  2676. # the new image is simply a bucket of properties (particularly the
  2677. # block device mapping, kernel and ramdisk IDs) with no image data,
  2678. # hence the zero size
  2679. image_meta['size'] = 0
  2680. for attr in ('container_format', 'disk_format'):
  2681. image_meta.pop(attr, None)
  2682. properties = image_meta['properties']
  2683. # clean properties before filling
  2684. for key in ('block_device_mapping', 'bdm_v2', 'root_device_name'):
  2685. properties.pop(key, None)
  2686. if instance.root_device_name:
  2687. properties['root_device_name'] = instance.root_device_name
  2688. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2689. context, instance.uuid)
  2690. mapping = [] # list of BDM dicts that can go into the image properties
  2691. # Do some up-front filtering of the list of BDMs from
  2692. # which we are going to create snapshots.
  2693. volume_bdms = []
  2694. for bdm in bdms:
  2695. if bdm.no_device:
  2696. continue
  2697. if bdm.is_volume:
  2698. # These will be handled below.
  2699. volume_bdms.append(bdm)
  2700. else:
  2701. mapping.append(bdm.get_image_mapping())
  2702. # Check limits in Cinder before creating snapshots to avoid going over
  2703. # quota in the middle of a list of volumes. This is a best-effort check
  2704. # but concurrently running snapshot requests from the same project
  2705. # could still fail to create volume snapshots if they go over limit.
  2706. if volume_bdms:
  2707. limits = self.volume_api.get_absolute_limits(context)
  2708. total_snapshots_used = limits['totalSnapshotsUsed']
  2709. max_snapshots = limits['maxTotalSnapshots']
  2710. # -1 means there is unlimited quota for snapshots
  2711. if (max_snapshots > -1 and
  2712. len(volume_bdms) + total_snapshots_used > max_snapshots):
  2713. LOG.debug('Unable to create volume snapshots for instance. '
  2714. 'Currently has %s snapshots, requesting %s new '
  2715. 'snapshots, with a limit of %s.',
  2716. total_snapshots_used, len(volume_bdms),
  2717. max_snapshots, instance=instance)
  2718. raise exception.OverQuota(overs='snapshots')
  2719. quiesced = False
  2720. if instance.vm_state == vm_states.ACTIVE:
  2721. try:
  2722. LOG.info("Attempting to quiesce instance before volume "
  2723. "snapshot.", instance=instance)
  2724. self.compute_rpcapi.quiesce_instance(context, instance)
  2725. quiesced = True
  2726. except (exception.InstanceQuiesceNotSupported,
  2727. exception.QemuGuestAgentNotEnabled,
  2728. exception.NovaException, NotImplementedError) as err:
  2729. if strutils.bool_from_string(instance.system_metadata.get(
  2730. 'image_os_require_quiesce')):
  2731. raise
  2732. if isinstance(err, exception.NovaException):
  2733. LOG.info('Skipping quiescing instance: %(reason)s.',
  2734. {'reason': err.format_message()},
  2735. instance=instance)
  2736. else:
  2737. LOG.info('Skipping quiescing instance because the '
  2738. 'operation is not supported by the underlying '
  2739. 'compute driver.', instance=instance)
  2740. # NOTE(tasker): discovered that an uncaught exception could occur
  2741. # after the instance has been frozen. catch and thaw.
  2742. except Exception as ex:
  2743. with excutils.save_and_reraise_exception():
  2744. LOG.error("An error occurred during quiesce of instance. "
  2745. "Unquiescing to ensure instance is thawed. "
  2746. "Error: %s", six.text_type(ex),
  2747. instance=instance)
  2748. self.compute_rpcapi.unquiesce_instance(context, instance,
  2749. mapping=None)
  2750. @wrap_instance_event(prefix='api')
  2751. def snapshot_instance(self, context, instance, bdms):
  2752. try:
  2753. for bdm in volume_bdms:
  2754. # create snapshot based on volume_id
  2755. volume = self.volume_api.get(context, bdm.volume_id)
  2756. # NOTE(yamahata): Should we wait for snapshot creation?
  2757. # Linux LVM snapshot creation completes in
  2758. # short time, it doesn't matter for now.
  2759. name = _('snapshot for %s') % image_meta['name']
  2760. LOG.debug('Creating snapshot from volume %s.',
  2761. volume['id'], instance=instance)
  2762. snapshot = self.volume_api.create_snapshot_force(
  2763. context, volume['id'],
  2764. name, volume['display_description'])
  2765. mapping_dict = block_device.snapshot_from_bdm(
  2766. snapshot['id'], bdm)
  2767. mapping_dict = mapping_dict.get_image_mapping()
  2768. mapping.append(mapping_dict)
  2769. return mapping
  2770. # NOTE(tasker): No error handling is done in the above for loop.
  2771. # This means that if the snapshot fails and throws an exception
  2772. # the traceback will skip right over the unquiesce needed below.
  2773. # Here, catch any exception, unquiesce the instance, and raise the
  2774. # error so that the calling function can do what it needs to in
  2775. # order to properly treat a failed snap.
  2776. except Exception:
  2777. with excutils.save_and_reraise_exception():
  2778. if quiesced:
  2779. LOG.info("Unquiescing instance after volume snapshot "
  2780. "failure.", instance=instance)
  2781. self.compute_rpcapi.unquiesce_instance(
  2782. context, instance, mapping)
  2783. self._record_action_start(context, instance,
  2784. instance_actions.CREATE_IMAGE)
  2785. mapping = snapshot_instance(self, context, instance, bdms)
  2786. if quiesced:
  2787. self.compute_rpcapi.unquiesce_instance(context, instance, mapping)
  2788. if mapping:
  2789. properties['block_device_mapping'] = mapping
  2790. properties['bdm_v2'] = True
  2791. return self.image_api.create(context, image_meta)
  2792. @check_instance_lock
  2793. def reboot(self, context, instance, reboot_type):
  2794. """Reboot the given instance."""
  2795. if reboot_type == 'SOFT':
  2796. self._soft_reboot(context, instance)
  2797. else:
  2798. self._hard_reboot(context, instance)
  2799. @check_instance_state(vm_state=set(vm_states.ALLOW_SOFT_REBOOT),
  2800. task_state=[None])
  2801. def _soft_reboot(self, context, instance):
  2802. expected_task_state = [None]
  2803. instance.task_state = task_states.REBOOTING
  2804. instance.save(expected_task_state=expected_task_state)
  2805. self._record_action_start(context, instance, instance_actions.REBOOT)
  2806. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2807. block_device_info=None,
  2808. reboot_type='SOFT')
  2809. @check_instance_state(vm_state=set(vm_states.ALLOW_HARD_REBOOT),
  2810. task_state=task_states.ALLOW_REBOOT)
  2811. def _hard_reboot(self, context, instance):
  2812. instance.task_state = task_states.REBOOTING_HARD
  2813. instance.save(expected_task_state=task_states.ALLOW_REBOOT)
  2814. self._record_action_start(context, instance, instance_actions.REBOOT)
  2815. self.compute_rpcapi.reboot_instance(context, instance=instance,
  2816. block_device_info=None,
  2817. reboot_type='HARD')
  2818. # TODO(stephenfin): We should expand kwargs out to named args
  2819. @check_instance_lock
  2820. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  2821. vm_states.ERROR])
  2822. def rebuild(self, context, instance, image_href, admin_password,
  2823. files_to_inject=None, **kwargs):
  2824. """Rebuild the given instance with the provided attributes."""
  2825. files_to_inject = files_to_inject or []
  2826. metadata = kwargs.get('metadata', {})
  2827. preserve_ephemeral = kwargs.get('preserve_ephemeral', False)
  2828. auto_disk_config = kwargs.get('auto_disk_config')
  2829. if 'key_name' in kwargs:
  2830. key_name = kwargs.pop('key_name')
  2831. if key_name:
  2832. # NOTE(liuyulong): we are intentionally using the user_id from
  2833. # the request context rather than the instance.user_id because
  2834. # users own keys but instances are owned by projects, and
  2835. # another user in the same project can rebuild an instance
  2836. # even if they didn't create it.
  2837. key_pair = objects.KeyPair.get_by_name(context,
  2838. context.user_id,
  2839. key_name)
  2840. instance.key_name = key_pair.name
  2841. instance.key_data = key_pair.public_key
  2842. instance.keypairs = objects.KeyPairList(objects=[key_pair])
  2843. else:
  2844. instance.key_name = None
  2845. instance.key_data = None
  2846. instance.keypairs = objects.KeyPairList(objects=[])
  2847. # Use trusted_certs value from kwargs to create TrustedCerts object
  2848. trusted_certs = None
  2849. if 'trusted_certs' in kwargs:
  2850. # Note that the user can set, change, or unset / reset trusted
  2851. # certs. If they are explicitly specifying
  2852. # trusted_image_certificates=None, that means we'll either unset
  2853. # them on the instance *or* reset to use the defaults (if defaults
  2854. # are configured).
  2855. trusted_certs = kwargs.pop('trusted_certs')
  2856. instance.trusted_certs = self._retrieve_trusted_certs_object(
  2857. context, trusted_certs, rebuild=True)
  2858. image_id, image = self._get_image(context, image_href)
  2859. self._check_auto_disk_config(image=image, **kwargs)
  2860. flavor = instance.get_flavor()
  2861. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  2862. context, instance.uuid)
  2863. root_bdm = compute_utils.get_root_bdm(context, instance, bdms)
  2864. # Check to see if the image is changing and we have a volume-backed
  2865. # server. The compute doesn't support changing the image in the
  2866. # root disk of a volume-backed server, so we need to just fail fast.
  2867. is_volume_backed = compute_utils.is_volume_backed_instance(
  2868. context, instance, bdms)
  2869. if is_volume_backed:
  2870. if trusted_certs:
  2871. # The only way we can get here is if the user tried to set
  2872. # trusted certs or specified trusted_image_certificates=None
  2873. # and default_trusted_certificate_ids is configured.
  2874. msg = _("Image certificate validation is not supported "
  2875. "for volume-backed servers.")
  2876. raise exception.CertificateValidationFailed(message=msg)
  2877. # For boot from volume, instance.image_ref is empty, so we need to
  2878. # query the image from the volume.
  2879. if root_bdm is None:
  2880. # This shouldn't happen and is an error, we need to fail. This
  2881. # is not the users fault, it's an internal error. Without a
  2882. # root BDM we have no way of knowing the backing volume (or
  2883. # image in that volume) for this instance.
  2884. raise exception.NovaException(
  2885. _('Unable to find root block device mapping for '
  2886. 'volume-backed instance.'))
  2887. volume = self.volume_api.get(context, root_bdm.volume_id)
  2888. volume_image_metadata = volume.get('volume_image_metadata', {})
  2889. orig_image_ref = volume_image_metadata.get('image_id')
  2890. if orig_image_ref != image_href:
  2891. # Leave a breadcrumb.
  2892. LOG.debug('Requested to rebuild instance with a new image %s '
  2893. 'for a volume-backed server with image %s in its '
  2894. 'root volume which is not supported.', image_href,
  2895. orig_image_ref, instance=instance)
  2896. msg = _('Unable to rebuild with a different image for a '
  2897. 'volume-backed server.')
  2898. raise exception.ImageUnacceptable(
  2899. image_id=image_href, reason=msg)
  2900. else:
  2901. orig_image_ref = instance.image_ref
  2902. request_spec = objects.RequestSpec.get_by_instance_uuid(
  2903. context, instance.uuid)
  2904. self._checks_for_create_and_rebuild(context, image_id, image,
  2905. flavor, metadata, files_to_inject, root_bdm)
  2906. kernel_id, ramdisk_id = self._handle_kernel_and_ramdisk(
  2907. context, None, None, image)
  2908. def _reset_image_metadata():
  2909. """Remove old image properties that we're storing as instance
  2910. system metadata. These properties start with 'image_'.
  2911. Then add the properties for the new image.
  2912. """
  2913. # FIXME(comstud): There's a race condition here in that if
  2914. # the system_metadata for this instance is updated after
  2915. # we do the previous save() and before we update.. those
  2916. # other updates will be lost. Since this problem exists in
  2917. # a lot of other places, I think it should be addressed in
  2918. # a DB layer overhaul.
  2919. orig_sys_metadata = dict(instance.system_metadata)
  2920. # Remove the old keys
  2921. for key in list(instance.system_metadata.keys()):
  2922. if key.startswith(utils.SM_IMAGE_PROP_PREFIX):
  2923. del instance.system_metadata[key]
  2924. # Add the new ones
  2925. new_sys_metadata = utils.get_system_metadata_from_image(
  2926. image, flavor)
  2927. instance.system_metadata.update(new_sys_metadata)
  2928. instance.save()
  2929. return orig_sys_metadata
  2930. # Since image might have changed, we may have new values for
  2931. # os_type, vm_mode, etc
  2932. options_from_image = self._inherit_properties_from_image(
  2933. image, auto_disk_config)
  2934. instance.update(options_from_image)
  2935. instance.task_state = task_states.REBUILDING
  2936. # An empty instance.image_ref is currently used as an indication
  2937. # of BFV. Preserve that over a rebuild to not break users.
  2938. if not is_volume_backed:
  2939. instance.image_ref = image_href
  2940. instance.kernel_id = kernel_id or ""
  2941. instance.ramdisk_id = ramdisk_id or ""
  2942. instance.progress = 0
  2943. instance.update(kwargs)
  2944. instance.save(expected_task_state=[None])
  2945. # On a rebuild, since we're potentially changing images, we need to
  2946. # wipe out the old image properties that we're storing as instance
  2947. # system metadata... and copy in the properties for the new image.
  2948. orig_sys_metadata = _reset_image_metadata()
  2949. self._record_action_start(context, instance, instance_actions.REBUILD)
  2950. # NOTE(sbauza): The migration script we provided in Newton should make
  2951. # sure that all our instances are currently migrated to have an
  2952. # attached RequestSpec object but let's consider that the operator only
  2953. # half migrated all their instances in the meantime.
  2954. host = instance.host
  2955. # If a new image is provided on rebuild, we will need to run
  2956. # through the scheduler again, but we want the instance to be
  2957. # rebuilt on the same host it's already on.
  2958. if orig_image_ref != image_href:
  2959. # We have to modify the request spec that goes to the scheduler
  2960. # to contain the new image. We persist this since we've already
  2961. # changed the instance.image_ref above so we're being
  2962. # consistent.
  2963. request_spec.image = objects.ImageMeta.from_dict(image)
  2964. request_spec.save()
  2965. if 'scheduler_hints' not in request_spec:
  2966. request_spec.scheduler_hints = {}
  2967. # Nuke the id on this so we can't accidentally save
  2968. # this hint hack later
  2969. del request_spec.id
  2970. # NOTE(danms): Passing host=None tells conductor to
  2971. # call the scheduler. The _nova_check_type hint
  2972. # requires that the scheduler returns only the same
  2973. # host that we are currently on and only checks
  2974. # rebuild-related filters.
  2975. request_spec.scheduler_hints['_nova_check_type'] = ['rebuild']
  2976. request_spec.force_hosts = [instance.host]
  2977. request_spec.force_nodes = [instance.node]
  2978. host = None
  2979. self.compute_task_api.rebuild_instance(context, instance=instance,
  2980. new_pass=admin_password, injected_files=files_to_inject,
  2981. image_ref=image_href, orig_image_ref=orig_image_ref,
  2982. orig_sys_metadata=orig_sys_metadata, bdms=bdms,
  2983. preserve_ephemeral=preserve_ephemeral, host=host,
  2984. request_spec=request_spec)
  2985. @staticmethod
  2986. def _check_quota_for_upsize(context, instance, current_flavor, new_flavor):
  2987. project_id, user_id = quotas_obj.ids_from_instance(context,
  2988. instance)
  2989. # Deltas will be empty if the resize is not an upsize.
  2990. deltas = compute_utils.upsize_quota_delta(new_flavor,
  2991. current_flavor)
  2992. if deltas:
  2993. try:
  2994. res_deltas = {'cores': deltas.get('cores', 0),
  2995. 'ram': deltas.get('ram', 0)}
  2996. objects.Quotas.check_deltas(context, res_deltas,
  2997. project_id, user_id=user_id,
  2998. check_project_id=project_id,
  2999. check_user_id=user_id)
  3000. except exception.OverQuota as exc:
  3001. quotas = exc.kwargs['quotas']
  3002. overs = exc.kwargs['overs']
  3003. usages = exc.kwargs['usages']
  3004. headroom = compute_utils.get_headroom(quotas, usages,
  3005. deltas)
  3006. (overs, reqs, total_alloweds,
  3007. useds) = compute_utils.get_over_quota_detail(headroom,
  3008. overs,
  3009. quotas,
  3010. deltas)
  3011. LOG.info("%(overs)s quota exceeded for %(pid)s,"
  3012. " tried to resize instance.",
  3013. {'overs': overs, 'pid': context.project_id})
  3014. raise exception.TooManyInstances(overs=overs,
  3015. req=reqs,
  3016. used=useds,
  3017. allowed=total_alloweds)
  3018. @check_instance_lock
  3019. @check_instance_state(vm_state=[vm_states.RESIZED])
  3020. def revert_resize(self, context, instance):
  3021. """Reverts a resize or cold migration, deleting the 'new' instance in
  3022. the process.
  3023. """
  3024. elevated = context.elevated()
  3025. migration = objects.Migration.get_by_instance_and_status(
  3026. elevated, instance.uuid, 'finished')
  3027. # If this is a resize down, a revert might go over quota.
  3028. self._check_quota_for_upsize(context, instance, instance.flavor,
  3029. instance.old_flavor)
  3030. # The AZ for the server may have changed when it was migrated so while
  3031. # we are in the API and have access to the API DB, update the
  3032. # instance.availability_zone before casting off to the compute service.
  3033. # Note that we do this in the API to avoid an "up-call" from the
  3034. # compute service to the API DB. This is not great in case something
  3035. # fails during revert before the instance.host is updated to the
  3036. # original source host, but it is good enough for now. Long-term we
  3037. # could consider passing the AZ down to compute so it can set it when
  3038. # the instance.host value is set in finish_revert_resize.
  3039. instance.availability_zone = (
  3040. availability_zones.get_host_availability_zone(
  3041. context, migration.source_compute))
  3042. # Conductor updated the RequestSpec.flavor during the initial resize
  3043. # operation to point at the new flavor, so we need to update the
  3044. # RequestSpec to point back at the original flavor, otherwise
  3045. # subsequent move operations through the scheduler will be using the
  3046. # wrong flavor.
  3047. reqspec = objects.RequestSpec.get_by_instance_uuid(
  3048. context, instance.uuid)
  3049. reqspec.flavor = instance.old_flavor
  3050. reqspec.save()
  3051. # NOTE(gibi): This is a performance optimization. If the network info
  3052. # cache does not have ports with allocations in the binding profile
  3053. # then we can skip reading port resource request from neutron below.
  3054. # If a port has resource request then that would have already caused
  3055. # that the finish_resize call put allocation in the binding profile
  3056. # during the resize.
  3057. if instance.get_network_info().has_port_with_allocation():
  3058. # TODO(gibi): do not directly overwrite the
  3059. # RequestSpec.requested_resources as others like cyborg might added
  3060. # to things there already
  3061. # NOTE(gibi): We need to collect the requested resource again as it
  3062. # is intentionally not persisted in nova. Note that this needs to
  3063. # be done here as the nova API code directly calls revert on the
  3064. # dest compute service skipping the conductor.
  3065. port_res_req = (
  3066. self.network_api.get_requested_resource_for_instance(
  3067. context, instance.uuid))
  3068. reqspec.requested_resources = port_res_req
  3069. instance.task_state = task_states.RESIZE_REVERTING
  3070. instance.save(expected_task_state=[None])
  3071. migration.status = 'reverting'
  3072. migration.save()
  3073. self._record_action_start(context, instance,
  3074. instance_actions.REVERT_RESIZE)
  3075. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3076. # against going over quota during a race at this time because the
  3077. # resource consumption for this operation is written to the database
  3078. # by compute.
  3079. self.compute_rpcapi.revert_resize(context, instance,
  3080. migration,
  3081. migration.dest_compute,
  3082. reqspec)
  3083. @check_instance_lock
  3084. @check_instance_state(vm_state=[vm_states.RESIZED])
  3085. def confirm_resize(self, context, instance, migration=None):
  3086. """Confirms a migration/resize and deletes the 'old' instance."""
  3087. elevated = context.elevated()
  3088. # NOTE(melwitt): We're not checking quota here because there isn't a
  3089. # change in resource usage when confirming a resize. Resource
  3090. # consumption for resizes are written to the database by compute, so
  3091. # a confirm resize is just a clean up of the migration objects and a
  3092. # state change in compute.
  3093. if migration is None:
  3094. migration = objects.Migration.get_by_instance_and_status(
  3095. elevated, instance.uuid, 'finished')
  3096. migration.status = 'confirming'
  3097. migration.save()
  3098. self._record_action_start(context, instance,
  3099. instance_actions.CONFIRM_RESIZE)
  3100. self.compute_rpcapi.confirm_resize(context,
  3101. instance,
  3102. migration,
  3103. migration.source_compute)
  3104. # TODO(mriedem): It looks like for resize (not cold migrate) the only
  3105. # possible kwarg here is auto_disk_config. Drop this dumb **kwargs and make
  3106. # it explicitly an auto_disk_config param
  3107. @check_instance_lock
  3108. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED])
  3109. def resize(self, context, instance, flavor_id=None, clean_shutdown=True,
  3110. host_name=None, **extra_instance_updates):
  3111. """Resize (ie, migrate) a running instance.
  3112. If flavor_id is None, the process is considered a migration, keeping
  3113. the original flavor_id. If flavor_id is not None, the instance should
  3114. be migrated to a new host and resized to the new flavor_id.
  3115. host_name is always None in the resize case.
  3116. host_name can be set in the cold migration case only.
  3117. """
  3118. if host_name is not None:
  3119. # Cannot migrate to the host where the instance exists
  3120. # because it is useless.
  3121. if host_name == instance.host:
  3122. raise exception.CannotMigrateToSameHost()
  3123. # Check whether host exists or not.
  3124. node = objects.ComputeNode.get_first_node_by_host_for_old_compat(
  3125. context, host_name, use_slave=True)
  3126. self._check_auto_disk_config(instance, **extra_instance_updates)
  3127. current_instance_type = instance.get_flavor()
  3128. # If flavor_id is not provided, only migrate the instance.
  3129. volume_backed = None
  3130. if not flavor_id:
  3131. LOG.debug("flavor_id is None. Assuming migration.",
  3132. instance=instance)
  3133. new_instance_type = current_instance_type
  3134. else:
  3135. new_instance_type = flavors.get_flavor_by_flavor_id(
  3136. flavor_id, read_deleted="no")
  3137. # Check to see if we're resizing to a zero-disk flavor which is
  3138. # only supported with volume-backed servers.
  3139. if (new_instance_type.get('root_gb') == 0 and
  3140. current_instance_type.get('root_gb') != 0):
  3141. volume_backed = compute_utils.is_volume_backed_instance(
  3142. context, instance)
  3143. if not volume_backed:
  3144. reason = _('Resize to zero disk flavor is not allowed.')
  3145. raise exception.CannotResizeDisk(reason=reason)
  3146. current_instance_type_name = current_instance_type['name']
  3147. new_instance_type_name = new_instance_type['name']
  3148. LOG.debug("Old instance type %(current_instance_type_name)s, "
  3149. "new instance type %(new_instance_type_name)s",
  3150. {'current_instance_type_name': current_instance_type_name,
  3151. 'new_instance_type_name': new_instance_type_name},
  3152. instance=instance)
  3153. same_instance_type = (current_instance_type['id'] ==
  3154. new_instance_type['id'])
  3155. # NOTE(sirp): We don't want to force a customer to change their flavor
  3156. # when Ops is migrating off of a failed host.
  3157. if not same_instance_type and new_instance_type.get('disabled'):
  3158. raise exception.FlavorNotFound(flavor_id=flavor_id)
  3159. if same_instance_type and flavor_id:
  3160. raise exception.CannotResizeToSameFlavor()
  3161. # ensure there is sufficient headroom for upsizes
  3162. if flavor_id:
  3163. self._check_quota_for_upsize(context, instance,
  3164. current_instance_type,
  3165. new_instance_type)
  3166. if not same_instance_type:
  3167. image = utils.get_image_from_system_metadata(
  3168. instance.system_metadata)
  3169. # Figure out if the instance is volume-backed but only if we didn't
  3170. # already figure that out above (avoid the extra db hit).
  3171. if volume_backed is None:
  3172. volume_backed = compute_utils.is_volume_backed_instance(
  3173. context, instance)
  3174. # If the server is volume-backed, we still want to validate numa
  3175. # and pci information in the new flavor, but we don't call
  3176. # _validate_flavor_image_nostatus because how it handles checking
  3177. # disk size validation was not intended for a volume-backed
  3178. # resize case.
  3179. if volume_backed:
  3180. self._validate_flavor_image_numa_pci(
  3181. image, new_instance_type, validate_pci=True)
  3182. else:
  3183. self._validate_flavor_image_nostatus(
  3184. context, image, new_instance_type, root_bdm=None,
  3185. validate_pci=True)
  3186. filter_properties = {'ignore_hosts': []}
  3187. if not CONF.allow_resize_to_same_host:
  3188. filter_properties['ignore_hosts'].append(instance.host)
  3189. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3190. context, instance.uuid)
  3191. request_spec.ignore_hosts = filter_properties['ignore_hosts']
  3192. instance.task_state = task_states.RESIZE_PREP
  3193. instance.progress = 0
  3194. instance.update(extra_instance_updates)
  3195. instance.save(expected_task_state=[None])
  3196. if not flavor_id:
  3197. self._record_action_start(context, instance,
  3198. instance_actions.MIGRATE)
  3199. else:
  3200. self._record_action_start(context, instance,
  3201. instance_actions.RESIZE)
  3202. # TODO(melwitt): We're not rechecking for strict quota here to guard
  3203. # against going over quota during a race at this time because the
  3204. # resource consumption for this operation is written to the database
  3205. # by compute.
  3206. scheduler_hint = {'filter_properties': filter_properties}
  3207. if host_name is None:
  3208. # If 'host_name' is not specified,
  3209. # clear the 'requested_destination' field of the RequestSpec.
  3210. request_spec.requested_destination = None
  3211. else:
  3212. # Set the host and the node so that the scheduler will
  3213. # validate them.
  3214. request_spec.requested_destination = objects.Destination(
  3215. host=node.host, node=node.hypervisor_hostname)
  3216. self.compute_task_api.resize_instance(context, instance,
  3217. scheduler_hint=scheduler_hint,
  3218. flavor=new_instance_type,
  3219. clean_shutdown=clean_shutdown,
  3220. request_spec=request_spec)
  3221. @check_instance_lock
  3222. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3223. vm_states.PAUSED, vm_states.SUSPENDED])
  3224. def shelve(self, context, instance, clean_shutdown=True):
  3225. """Shelve an instance.
  3226. Shuts down an instance and frees it up to be removed from the
  3227. hypervisor.
  3228. """
  3229. instance.task_state = task_states.SHELVING
  3230. instance.save(expected_task_state=[None])
  3231. self._record_action_start(context, instance, instance_actions.SHELVE)
  3232. if not compute_utils.is_volume_backed_instance(context, instance):
  3233. name = '%s-shelved' % instance.display_name
  3234. image_meta = compute_utils.create_image(
  3235. context, instance, name, 'snapshot', self.image_api)
  3236. image_id = image_meta['id']
  3237. self.compute_rpcapi.shelve_instance(context, instance=instance,
  3238. image_id=image_id, clean_shutdown=clean_shutdown)
  3239. else:
  3240. self.compute_rpcapi.shelve_offload_instance(context,
  3241. instance=instance, clean_shutdown=clean_shutdown)
  3242. @check_instance_lock
  3243. @check_instance_state(vm_state=[vm_states.SHELVED])
  3244. def shelve_offload(self, context, instance, clean_shutdown=True):
  3245. """Remove a shelved instance from the hypervisor."""
  3246. instance.task_state = task_states.SHELVING_OFFLOADING
  3247. instance.save(expected_task_state=[None])
  3248. self._record_action_start(context, instance,
  3249. instance_actions.SHELVE_OFFLOAD)
  3250. self.compute_rpcapi.shelve_offload_instance(context, instance=instance,
  3251. clean_shutdown=clean_shutdown)
  3252. def _validate_unshelve_az(self, context, instance, availability_zone):
  3253. """Verify the specified availability_zone during unshelve.
  3254. Verifies that the server is shelved offloaded, the AZ exists and
  3255. if [cinder]/cross_az_attach=False, that any attached volumes are in
  3256. the same AZ.
  3257. :param context: nova auth RequestContext for the unshelve action
  3258. :param instance: Instance object for the server being unshelved
  3259. :param availability_zone: The user-requested availability zone in
  3260. which to unshelve the server.
  3261. :raises: UnshelveInstanceInvalidState if the server is not shelved
  3262. offloaded
  3263. :raises: InvalidRequest if the requested AZ does not exist
  3264. :raises: MismatchVolumeAZException if [cinder]/cross_az_attach=False
  3265. and any attached volumes are not in the requested AZ
  3266. """
  3267. if instance.vm_state != vm_states.SHELVED_OFFLOADED:
  3268. # NOTE(brinzhang): If the server status is 'SHELVED', it still
  3269. # belongs to a host, the availability_zone has not changed.
  3270. # Unshelving a shelved offloaded server will go through the
  3271. # scheduler to find a new host.
  3272. raise exception.UnshelveInstanceInvalidState(
  3273. state=instance.vm_state, instance_uuid=instance.uuid)
  3274. available_zones = availability_zones.get_availability_zones(
  3275. context, self.host_api, get_only_available=True)
  3276. if availability_zone not in available_zones:
  3277. msg = _('The requested availability zone is not available')
  3278. raise exception.InvalidRequest(msg)
  3279. # NOTE(brinzhang): When specifying a availability zone to unshelve
  3280. # a shelved offloaded server, and conf cross_az_attach=False, need
  3281. # to determine if attached volume AZ matches the user-specified AZ.
  3282. if not CONF.cinder.cross_az_attach:
  3283. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3284. context, instance.uuid)
  3285. for bdm in bdms:
  3286. if bdm.is_volume and bdm.volume_id:
  3287. volume = self.volume_api.get(context, bdm.volume_id)
  3288. if availability_zone != volume['availability_zone']:
  3289. msg = _("The specified availability zone does not "
  3290. "match the volume %(vol_id)s attached to the "
  3291. "server. Specified availability zone is "
  3292. "%(az)s. Volume is in %(vol_zone)s.") % {
  3293. "vol_id": volume['id'],
  3294. "az": availability_zone,
  3295. "vol_zone": volume['availability_zone']}
  3296. raise exception.MismatchVolumeAZException(reason=msg)
  3297. @check_instance_lock
  3298. @check_instance_state(vm_state=[vm_states.SHELVED,
  3299. vm_states.SHELVED_OFFLOADED])
  3300. def unshelve(self, context, instance, new_az=None):
  3301. """Restore a shelved instance."""
  3302. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3303. context, instance.uuid)
  3304. if new_az:
  3305. self._validate_unshelve_az(context, instance, new_az)
  3306. LOG.debug("Replace the old AZ %(old_az)s in RequestSpec "
  3307. "with a new AZ %(new_az)s of the instance.",
  3308. {"old_az": request_spec.availability_zone,
  3309. "new_az": new_az}, instance=instance)
  3310. # Unshelving a shelved offloaded server will go through the
  3311. # scheduler to pick a new host, so we update the
  3312. # RequestSpec.availability_zone here. Note that if scheduling
  3313. # fails the RequestSpec will remain updated, which is not great,
  3314. # but if we want to change that we need to defer updating the
  3315. # RequestSpec until conductor which probably means RPC changes to
  3316. # pass the new_az variable to conductor. This is likely low
  3317. # priority since the RequestSpec.availability_zone on a shelved
  3318. # offloaded server does not mean much anyway and clearly the user
  3319. # is trying to put the server in the target AZ.
  3320. request_spec.availability_zone = new_az
  3321. request_spec.save()
  3322. instance.task_state = task_states.UNSHELVING
  3323. instance.save(expected_task_state=[None])
  3324. self._record_action_start(context, instance, instance_actions.UNSHELVE)
  3325. self.compute_task_api.unshelve_instance(context, instance,
  3326. request_spec)
  3327. @check_instance_lock
  3328. def add_fixed_ip(self, context, instance, network_id):
  3329. """Add fixed_ip from specified network to given instance."""
  3330. self.compute_rpcapi.add_fixed_ip_to_instance(context,
  3331. instance=instance, network_id=network_id)
  3332. @check_instance_lock
  3333. def remove_fixed_ip(self, context, instance, address):
  3334. """Remove fixed_ip from specified network to given instance."""
  3335. self.compute_rpcapi.remove_fixed_ip_from_instance(context,
  3336. instance=instance, address=address)
  3337. @check_instance_lock
  3338. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3339. def pause(self, context, instance):
  3340. """Pause the given instance."""
  3341. instance.task_state = task_states.PAUSING
  3342. instance.save(expected_task_state=[None])
  3343. self._record_action_start(context, instance, instance_actions.PAUSE)
  3344. self.compute_rpcapi.pause_instance(context, instance)
  3345. @check_instance_lock
  3346. @check_instance_state(vm_state=[vm_states.PAUSED])
  3347. def unpause(self, context, instance):
  3348. """Unpause the given instance."""
  3349. instance.task_state = task_states.UNPAUSING
  3350. instance.save(expected_task_state=[None])
  3351. self._record_action_start(context, instance, instance_actions.UNPAUSE)
  3352. self.compute_rpcapi.unpause_instance(context, instance)
  3353. @check_instance_host
  3354. def get_diagnostics(self, context, instance):
  3355. """Retrieve diagnostics for the given instance."""
  3356. return self.compute_rpcapi.get_diagnostics(context, instance=instance)
  3357. @check_instance_host
  3358. def get_instance_diagnostics(self, context, instance):
  3359. """Retrieve diagnostics for the given instance."""
  3360. return self.compute_rpcapi.get_instance_diagnostics(context,
  3361. instance=instance)
  3362. @reject_sev_instances(instance_actions.SUSPEND)
  3363. @check_instance_lock
  3364. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3365. def suspend(self, context, instance):
  3366. """Suspend the given instance."""
  3367. instance.task_state = task_states.SUSPENDING
  3368. instance.save(expected_task_state=[None])
  3369. self._record_action_start(context, instance, instance_actions.SUSPEND)
  3370. self.compute_rpcapi.suspend_instance(context, instance)
  3371. @check_instance_lock
  3372. @check_instance_state(vm_state=[vm_states.SUSPENDED])
  3373. def resume(self, context, instance):
  3374. """Resume the given instance."""
  3375. instance.task_state = task_states.RESUMING
  3376. instance.save(expected_task_state=[None])
  3377. self._record_action_start(context, instance, instance_actions.RESUME)
  3378. self.compute_rpcapi.resume_instance(context, instance)
  3379. @check_instance_lock
  3380. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  3381. vm_states.ERROR])
  3382. def rescue(self, context, instance, rescue_password=None,
  3383. rescue_image_ref=None, clean_shutdown=True):
  3384. """Rescue the given instance."""
  3385. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid(
  3386. context, instance.uuid)
  3387. for bdm in bdms:
  3388. if bdm.volume_id:
  3389. vol = self.volume_api.get(context, bdm.volume_id)
  3390. self.volume_api.check_attached(context, vol)
  3391. if compute_utils.is_volume_backed_instance(context, instance, bdms):
  3392. reason = _("Cannot rescue a volume-backed instance")
  3393. raise exception.InstanceNotRescuable(instance_id=instance.uuid,
  3394. reason=reason)
  3395. instance.task_state = task_states.RESCUING
  3396. instance.save(expected_task_state=[None])
  3397. self._record_action_start(context, instance, instance_actions.RESCUE)
  3398. self.compute_rpcapi.rescue_instance(context, instance=instance,
  3399. rescue_password=rescue_password, rescue_image_ref=rescue_image_ref,
  3400. clean_shutdown=clean_shutdown)
  3401. @check_instance_lock
  3402. @check_instance_state(vm_state=[vm_states.RESCUED])
  3403. def unrescue(self, context, instance):
  3404. """Unrescue the given instance."""
  3405. instance.task_state = task_states.UNRESCUING
  3406. instance.save(expected_task_state=[None])
  3407. self._record_action_start(context, instance, instance_actions.UNRESCUE)
  3408. self.compute_rpcapi.unrescue_instance(context, instance=instance)
  3409. @check_instance_lock
  3410. @check_instance_state(vm_state=[vm_states.ACTIVE])
  3411. def set_admin_password(self, context, instance, password=None):
  3412. """Set the root/admin password for the given instance.
  3413. @param context: Nova auth context.
  3414. @param instance: Nova instance object.
  3415. @param password: The admin password for the instance.
  3416. """
  3417. instance.task_state = task_states.UPDATING_PASSWORD
  3418. instance.save(expected_task_state=[None])
  3419. self._record_action_start(context, instance,
  3420. instance_actions.CHANGE_PASSWORD)
  3421. self.compute_rpcapi.set_admin_password(context,
  3422. instance=instance,
  3423. new_pass=password)
  3424. @check_instance_host
  3425. @reject_instance_state(
  3426. task_state=[task_states.DELETING, task_states.MIGRATING])
  3427. def get_vnc_console(self, context, instance, console_type):
  3428. """Get a url to an instance Console."""
  3429. connect_info = self.compute_rpcapi.get_vnc_console(context,
  3430. instance=instance, console_type=console_type)
  3431. return {'url': connect_info['access_url']}
  3432. @check_instance_host
  3433. @reject_instance_state(
  3434. task_state=[task_states.DELETING, task_states.MIGRATING])
  3435. def get_spice_console(self, context, instance, console_type):
  3436. """Get a url to an instance Console."""
  3437. connect_info = self.compute_rpcapi.get_spice_console(context,
  3438. instance=instance, console_type=console_type)
  3439. return {'url': connect_info['access_url']}
  3440. @check_instance_host
  3441. @reject_instance_state(
  3442. task_state=[task_states.DELETING, task_states.MIGRATING])
  3443. def get_rdp_console(self, context, instance, console_type):
  3444. """Get a url to an instance Console."""
  3445. connect_info = self.compute_rpcapi.get_rdp_console(context,
  3446. instance=instance, console_type=console_type)
  3447. return {'url': connect_info['access_url']}
  3448. @check_instance_host
  3449. @reject_instance_state(
  3450. task_state=[task_states.DELETING, task_states.MIGRATING])
  3451. def get_serial_console(self, context, instance, console_type):
  3452. """Get a url to a serial console."""
  3453. connect_info = self.compute_rpcapi.get_serial_console(context,
  3454. instance=instance, console_type=console_type)
  3455. return {'url': connect_info['access_url']}
  3456. @check_instance_host
  3457. @reject_instance_state(
  3458. task_state=[task_states.DELETING, task_states.MIGRATING])
  3459. def get_mks_console(self, context, instance, console_type):
  3460. """Get a url to a MKS console."""
  3461. connect_info = self.compute_rpcapi.get_mks_console(context,
  3462. instance=instance, console_type=console_type)
  3463. return {'url': connect_info['access_url']}
  3464. @check_instance_host
  3465. def get_console_output(self, context, instance, tail_length=None):
  3466. """Get console output for an instance."""
  3467. return self.compute_rpcapi.get_console_output(context,
  3468. instance=instance, tail_length=tail_length)
  3469. def lock(self, context, instance, reason=None):
  3470. """Lock the given instance."""
  3471. # Only update the lock if we are an admin (non-owner)
  3472. is_owner = instance.project_id == context.project_id
  3473. if instance.locked and is_owner:
  3474. return
  3475. context = context.elevated()
  3476. self._record_action_start(context, instance,
  3477. instance_actions.LOCK)
  3478. @wrap_instance_event(prefix='api')
  3479. def lock(self, context, instance, reason=None):
  3480. LOG.debug('Locking', instance=instance)
  3481. instance.locked = True
  3482. instance.locked_by = 'owner' if is_owner else 'admin'
  3483. if reason:
  3484. instance.system_metadata['locked_reason'] = reason
  3485. instance.save()
  3486. lock(self, context, instance, reason=reason)
  3487. compute_utils.notify_about_instance_action(
  3488. context, instance, CONF.host,
  3489. action=fields_obj.NotificationAction.LOCK,
  3490. source=fields_obj.NotificationSource.API)
  3491. def is_expected_locked_by(self, context, instance):
  3492. is_owner = instance.project_id == context.project_id
  3493. expect_locked_by = 'owner' if is_owner else 'admin'
  3494. locked_by = instance.locked_by
  3495. if locked_by and locked_by != expect_locked_by:
  3496. return False
  3497. return True
  3498. def unlock(self, context, instance):
  3499. """Unlock the given instance."""
  3500. context = context.elevated()
  3501. self._record_action_start(context, instance,
  3502. instance_actions.UNLOCK)
  3503. @wrap_instance_event(prefix='api')
  3504. def unlock(self, context, instance):
  3505. LOG.debug('Unlocking', instance=instance)
  3506. instance.locked = False
  3507. instance.locked_by = None
  3508. instance.system_metadata.pop('locked_reason', None)
  3509. instance.save()
  3510. unlock(self, context, instance)
  3511. compute_utils.notify_about_instance_action(
  3512. context, instance, CONF.host,
  3513. action=fields_obj.NotificationAction.UNLOCK,
  3514. source=fields_obj.NotificationSource.API)
  3515. @check_instance_lock
  3516. def reset_network(self, context, instance):
  3517. """Reset networking on the instance."""
  3518. self.compute_rpcapi.reset_network(context, instance=instance)
  3519. @check_instance_lock
  3520. def inject_network_info(self, context, instance):
  3521. """Inject network info for the instance."""
  3522. self.compute_rpcapi.inject_network_info(context, instance=instance)
  3523. def _create_volume_bdm(self, context, instance, device, volume,
  3524. disk_bus, device_type, is_local_creation=False,
  3525. tag=None, delete_on_termination=False):
  3526. volume_id = volume['id']
  3527. if is_local_creation:
  3528. # when the creation is done locally we can't specify the device
  3529. # name as we do not have a way to check that the name specified is
  3530. # a valid one.
  3531. # We leave the setting of that value when the actual attach
  3532. # happens on the compute manager
  3533. # NOTE(artom) Local attach (to a shelved-offload instance) cannot
  3534. # support device tagging because we have no way to call the compute
  3535. # manager to check that it supports device tagging. In fact, we
  3536. # don't even know which computer manager the instance will
  3537. # eventually end up on when it's unshelved.
  3538. volume_bdm = objects.BlockDeviceMapping(
  3539. context=context,
  3540. source_type='volume', destination_type='volume',
  3541. instance_uuid=instance.uuid, boot_index=None,
  3542. volume_id=volume_id,
  3543. device_name=None, guest_format=None,
  3544. disk_bus=disk_bus, device_type=device_type,
  3545. delete_on_termination=delete_on_termination)
  3546. volume_bdm.create()
  3547. else:
  3548. # NOTE(vish): This is done on the compute host because we want
  3549. # to avoid a race where two devices are requested at
  3550. # the same time. When db access is removed from
  3551. # compute, the bdm will be created here and we will
  3552. # have to make sure that they are assigned atomically.
  3553. volume_bdm = self.compute_rpcapi.reserve_block_device_name(
  3554. context, instance, device, volume_id, disk_bus=disk_bus,
  3555. device_type=device_type, tag=tag,
  3556. multiattach=volume['multiattach'])
  3557. volume_bdm.delete_on_termination = delete_on_termination
  3558. volume_bdm.save()
  3559. return volume_bdm
  3560. def _check_volume_already_attached_to_instance(self, context, instance,
  3561. volume_id):
  3562. """Avoid attaching the same volume to the same instance twice.
  3563. As the new Cinder flow (microversion 3.44) is handling the checks
  3564. differently and allows to attach the same volume to the same
  3565. instance twice to enable live_migrate we are checking whether the
  3566. BDM already exists for this combination for the new flow and fail
  3567. if it does.
  3568. """
  3569. try:
  3570. objects.BlockDeviceMapping.get_by_volume_and_instance(
  3571. context, volume_id, instance.uuid)
  3572. msg = _("volume %s already attached") % volume_id
  3573. raise exception.InvalidVolume(reason=msg)
  3574. except exception.VolumeBDMNotFound:
  3575. pass
  3576. def _check_attach_and_reserve_volume(self, context, volume, instance,
  3577. bdm, supports_multiattach=False):
  3578. volume_id = volume['id']
  3579. self.volume_api.check_availability_zone(context, volume,
  3580. instance=instance)
  3581. # If volume.multiattach=True and the microversion to
  3582. # support multiattach is not used, fail the request.
  3583. if volume['multiattach'] and not supports_multiattach:
  3584. raise exception.MultiattachNotSupportedOldMicroversion()
  3585. attachment_id = self.volume_api.attachment_create(
  3586. context, volume_id, instance.uuid)['id']
  3587. bdm.attachment_id = attachment_id
  3588. # NOTE(ildikov): In case of boot from volume the BDM at this
  3589. # point is not yet created in a cell database, so we can't
  3590. # call save(). When attaching a volume to an existing
  3591. # instance, the instance is already in a cell and the BDM has
  3592. # been created in that same cell so updating here in that case
  3593. # is "ok".
  3594. if bdm.obj_attr_is_set('id'):
  3595. bdm.save()
  3596. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3597. # override it.
  3598. def _attach_volume(self, context, instance, volume, device,
  3599. disk_bus, device_type, tag=None,
  3600. supports_multiattach=False,
  3601. delete_on_termination=False):
  3602. """Attach an existing volume to an existing instance.
  3603. This method is separated to make it possible for cells version
  3604. to override it.
  3605. """
  3606. volume_bdm = self._create_volume_bdm(
  3607. context, instance, device, volume, disk_bus=disk_bus,
  3608. device_type=device_type, tag=tag,
  3609. delete_on_termination=delete_on_termination)
  3610. try:
  3611. self._check_attach_and_reserve_volume(context, volume, instance,
  3612. volume_bdm,
  3613. supports_multiattach)
  3614. self._record_action_start(
  3615. context, instance, instance_actions.ATTACH_VOLUME)
  3616. self.compute_rpcapi.attach_volume(context, instance, volume_bdm)
  3617. except Exception:
  3618. with excutils.save_and_reraise_exception():
  3619. volume_bdm.destroy()
  3620. return volume_bdm.device_name
  3621. def _attach_volume_shelved_offloaded(self, context, instance, volume,
  3622. device, disk_bus, device_type,
  3623. delete_on_termination):
  3624. """Attach an existing volume to an instance in shelved offloaded state.
  3625. Attaching a volume for an instance in shelved offloaded state requires
  3626. to perform the regular check to see if we can attach and reserve the
  3627. volume then we need to call the attach method on the volume API
  3628. to mark the volume as 'in-use'.
  3629. The instance at this stage is not managed by a compute manager
  3630. therefore the actual attachment will be performed once the
  3631. instance will be unshelved.
  3632. """
  3633. volume_id = volume['id']
  3634. @wrap_instance_event(prefix='api')
  3635. def attach_volume(self, context, v_id, instance, dev, attachment_id):
  3636. if attachment_id:
  3637. # Normally we wouldn't complete an attachment without a host
  3638. # connector, but we do this to make the volume status change
  3639. # to "in-use" to maintain the API semantics with the old flow.
  3640. # When unshelving the instance, the compute service will deal
  3641. # with this disconnected attachment.
  3642. self.volume_api.attachment_complete(context, attachment_id)
  3643. else:
  3644. self.volume_api.attach(context,
  3645. v_id,
  3646. instance.uuid,
  3647. dev)
  3648. volume_bdm = self._create_volume_bdm(
  3649. context, instance, device, volume, disk_bus=disk_bus,
  3650. device_type=device_type, is_local_creation=True,
  3651. delete_on_termination=delete_on_termination)
  3652. try:
  3653. self._check_attach_and_reserve_volume(context, volume, instance,
  3654. volume_bdm)
  3655. self._record_action_start(
  3656. context, instance,
  3657. instance_actions.ATTACH_VOLUME)
  3658. attach_volume(self, context, volume_id, instance, device,
  3659. volume_bdm.attachment_id)
  3660. except Exception:
  3661. with excutils.save_and_reraise_exception():
  3662. volume_bdm.destroy()
  3663. return volume_bdm.device_name
  3664. @check_instance_lock
  3665. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3666. vm_states.STOPPED, vm_states.RESIZED,
  3667. vm_states.SOFT_DELETED, vm_states.SHELVED,
  3668. vm_states.SHELVED_OFFLOADED])
  3669. def attach_volume(self, context, instance, volume_id, device=None,
  3670. disk_bus=None, device_type=None, tag=None,
  3671. supports_multiattach=False,
  3672. delete_on_termination=False):
  3673. """Attach an existing volume to an existing instance."""
  3674. # NOTE(vish): Fail fast if the device is not going to pass. This
  3675. # will need to be removed along with the test if we
  3676. # change the logic in the manager for what constitutes
  3677. # a valid device.
  3678. if device and not block_device.match_device(device):
  3679. raise exception.InvalidDevicePath(path=device)
  3680. # Make sure the volume isn't already attached to this instance
  3681. # because we'll use the v3.44 attachment flow in
  3682. # _check_attach_and_reserve_volume and Cinder will allow multiple
  3683. # attachments between the same volume and instance but the old flow
  3684. # API semantics don't allow that so we enforce it here.
  3685. self._check_volume_already_attached_to_instance(context,
  3686. instance,
  3687. volume_id)
  3688. volume = self.volume_api.get(context, volume_id)
  3689. is_shelved_offloaded = instance.vm_state == vm_states.SHELVED_OFFLOADED
  3690. if is_shelved_offloaded:
  3691. if tag:
  3692. # NOTE(artom) Local attach (to a shelved-offload instance)
  3693. # cannot support device tagging because we have no way to call
  3694. # the compute manager to check that it supports device tagging.
  3695. # In fact, we don't even know which computer manager the
  3696. # instance will eventually end up on when it's unshelved.
  3697. raise exception.VolumeTaggedAttachToShelvedNotSupported()
  3698. if volume['multiattach']:
  3699. # NOTE(mriedem): Similar to tagged attach, we don't support
  3700. # attaching a multiattach volume to shelved offloaded instances
  3701. # because we can't tell if the compute host (since there isn't
  3702. # one) supports it. This could possibly be supported in the
  3703. # future if the scheduler was made aware of which computes
  3704. # support multiattach volumes.
  3705. raise exception.MultiattachToShelvedNotSupported()
  3706. return self._attach_volume_shelved_offloaded(context,
  3707. instance,
  3708. volume,
  3709. device,
  3710. disk_bus,
  3711. device_type,
  3712. delete_on_termination)
  3713. return self._attach_volume(context, instance, volume, device,
  3714. disk_bus, device_type, tag=tag,
  3715. supports_multiattach=supports_multiattach,
  3716. delete_on_termination=delete_on_termination)
  3717. # TODO(stephenfin): Fold this back in now that cells v1 no longer needs to
  3718. # override it.
  3719. def _detach_volume(self, context, instance, volume):
  3720. """Detach volume from instance.
  3721. This method is separated to make it easier for cells version
  3722. to override.
  3723. """
  3724. try:
  3725. self.volume_api.begin_detaching(context, volume['id'])
  3726. except exception.InvalidInput as exc:
  3727. raise exception.InvalidVolume(reason=exc.format_message())
  3728. attachments = volume.get('attachments', {})
  3729. attachment_id = None
  3730. if attachments and instance.uuid in attachments:
  3731. attachment_id = attachments[instance.uuid]['attachment_id']
  3732. self._record_action_start(
  3733. context, instance, instance_actions.DETACH_VOLUME)
  3734. self.compute_rpcapi.detach_volume(context, instance=instance,
  3735. volume_id=volume['id'], attachment_id=attachment_id)
  3736. def _detach_volume_shelved_offloaded(self, context, instance, volume):
  3737. """Detach a volume from an instance in shelved offloaded state.
  3738. If the instance is shelved offloaded we just need to cleanup volume
  3739. calling the volume api detach, the volume api terminate_connection
  3740. and delete the bdm record.
  3741. If the volume has delete_on_termination option set then we call the
  3742. volume api delete as well.
  3743. """
  3744. @wrap_instance_event(prefix='api')
  3745. def detach_volume(self, context, instance, bdms):
  3746. self._local_cleanup_bdm_volumes(bdms, instance, context)
  3747. bdms = [objects.BlockDeviceMapping.get_by_volume_id(
  3748. context, volume['id'], instance.uuid)]
  3749. # The begin_detaching() call only works with in-use volumes,
  3750. # which will not be the case for volumes attached to a shelved
  3751. # offloaded server via the attachments API since those volumes
  3752. # will have `reserved` status.
  3753. if not bdms[0].attachment_id:
  3754. try:
  3755. self.volume_api.begin_detaching(context, volume['id'])
  3756. except exception.InvalidInput as exc:
  3757. raise exception.InvalidVolume(reason=exc.format_message())
  3758. self._record_action_start(
  3759. context, instance,
  3760. instance_actions.DETACH_VOLUME)
  3761. detach_volume(self, context, instance, bdms)
  3762. @check_instance_lock
  3763. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3764. vm_states.STOPPED, vm_states.RESIZED,
  3765. vm_states.SOFT_DELETED, vm_states.SHELVED,
  3766. vm_states.SHELVED_OFFLOADED])
  3767. def detach_volume(self, context, instance, volume):
  3768. """Detach a volume from an instance."""
  3769. if instance.vm_state == vm_states.SHELVED_OFFLOADED:
  3770. self._detach_volume_shelved_offloaded(context, instance, volume)
  3771. else:
  3772. self._detach_volume(context, instance, volume)
  3773. def _count_attachments_for_swap(self, ctxt, volume):
  3774. """Counts the number of attachments for a swap-related volume.
  3775. Attempts to only count read/write attachments if the volume attachment
  3776. records exist, otherwise simply just counts the number of attachments
  3777. regardless of attach mode.
  3778. :param ctxt: nova.context.RequestContext - user request context
  3779. :param volume: nova-translated volume dict from nova.volume.cinder.
  3780. :returns: count of attachments for the volume
  3781. """
  3782. # This is a dict, keyed by server ID, to a dict of attachment_id and
  3783. # mountpoint.
  3784. attachments = volume.get('attachments', {})
  3785. # Multiattach volumes can have more than one attachment, so if there
  3786. # is more than one attachment, attempt to count the read/write
  3787. # attachments.
  3788. if len(attachments) > 1:
  3789. count = 0
  3790. for attachment in attachments.values():
  3791. attachment_id = attachment['attachment_id']
  3792. # Get the attachment record for this attachment so we can
  3793. # get the attach_mode.
  3794. # TODO(mriedem): This could be optimized if we had
  3795. # GET /attachments/detail?volume_id=volume['id'] in Cinder.
  3796. try:
  3797. attachment_record = self.volume_api.attachment_get(
  3798. ctxt, attachment_id)
  3799. # Note that the attachment record from Cinder has
  3800. # attach_mode in the top-level of the resource but the
  3801. # nova.volume.cinder code translates it and puts the
  3802. # attach_mode in the connection_info for some legacy
  3803. # reason...
  3804. if attachment_record['attach_mode'] == 'rw':
  3805. count += 1
  3806. except exception.VolumeAttachmentNotFound:
  3807. # attachments are read/write by default so count it
  3808. count += 1
  3809. else:
  3810. count = len(attachments)
  3811. return count
  3812. @check_instance_lock
  3813. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3814. vm_states.RESIZED])
  3815. def swap_volume(self, context, instance, old_volume, new_volume):
  3816. """Swap volume attached to an instance."""
  3817. # The caller likely got the instance from volume['attachments']
  3818. # in the first place, but let's sanity check.
  3819. if not old_volume.get('attachments', {}).get(instance.uuid):
  3820. msg = _("Old volume is attached to a different instance.")
  3821. raise exception.InvalidVolume(reason=msg)
  3822. if new_volume['attach_status'] == 'attached':
  3823. msg = _("New volume must be detached in order to swap.")
  3824. raise exception.InvalidVolume(reason=msg)
  3825. if int(new_volume['size']) < int(old_volume['size']):
  3826. msg = _("New volume must be the same size or larger.")
  3827. raise exception.InvalidVolume(reason=msg)
  3828. self.volume_api.check_availability_zone(context, new_volume,
  3829. instance=instance)
  3830. try:
  3831. self.volume_api.begin_detaching(context, old_volume['id'])
  3832. except exception.InvalidInput as exc:
  3833. raise exception.InvalidVolume(reason=exc.format_message())
  3834. # Disallow swapping from multiattach volumes that have more than one
  3835. # read/write attachment. We know the old_volume has at least one
  3836. # attachment since it's attached to this server. The new_volume
  3837. # can't have any attachments because of the attach_status check above.
  3838. # We do this count after calling "begin_detaching" to lock against
  3839. # concurrent attachments being made while we're counting.
  3840. try:
  3841. if self._count_attachments_for_swap(context, old_volume) > 1:
  3842. raise exception.MultiattachSwapVolumeNotSupported()
  3843. except Exception: # This is generic to handle failures while counting
  3844. # We need to reset the detaching status before raising.
  3845. with excutils.save_and_reraise_exception():
  3846. self.volume_api.roll_detaching(context, old_volume['id'])
  3847. # Get the BDM for the attached (old) volume so we can tell if it was
  3848. # attached with the new-style Cinder 3.44 API.
  3849. bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
  3850. context, old_volume['id'], instance.uuid)
  3851. new_attachment_id = None
  3852. if bdm.attachment_id is None:
  3853. # This is an old-style attachment so reserve the new volume before
  3854. # we cast to the compute host.
  3855. self.volume_api.reserve_volume(context, new_volume['id'])
  3856. else:
  3857. try:
  3858. self._check_volume_already_attached_to_instance(
  3859. context, instance, new_volume['id'])
  3860. except exception.InvalidVolume:
  3861. with excutils.save_and_reraise_exception():
  3862. self.volume_api.roll_detaching(context, old_volume['id'])
  3863. # This is a new-style attachment so for the volume that we are
  3864. # going to swap to, create a new volume attachment.
  3865. new_attachment_id = self.volume_api.attachment_create(
  3866. context, new_volume['id'], instance.uuid)['id']
  3867. self._record_action_start(
  3868. context, instance, instance_actions.SWAP_VOLUME)
  3869. try:
  3870. self.compute_rpcapi.swap_volume(
  3871. context, instance=instance,
  3872. old_volume_id=old_volume['id'],
  3873. new_volume_id=new_volume['id'],
  3874. new_attachment_id=new_attachment_id)
  3875. except Exception:
  3876. with excutils.save_and_reraise_exception():
  3877. self.volume_api.roll_detaching(context, old_volume['id'])
  3878. if new_attachment_id is None:
  3879. self.volume_api.unreserve_volume(context, new_volume['id'])
  3880. else:
  3881. self.volume_api.attachment_delete(
  3882. context, new_attachment_id)
  3883. @check_instance_lock
  3884. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3885. vm_states.STOPPED],
  3886. task_state=[None])
  3887. def attach_interface(self, context, instance, network_id, port_id,
  3888. requested_ip, tag=None):
  3889. """Use hotplug to add an network adapter to an instance."""
  3890. self._record_action_start(
  3891. context, instance, instance_actions.ATTACH_INTERFACE)
  3892. # NOTE(gibi): Checking if the requested port has resource request as
  3893. # such ports are currently not supported as they would at least
  3894. # need resource allocation manipulation in placement but might also
  3895. # need a new scheduling if resource on this host is not available.
  3896. if port_id:
  3897. port = self.network_api.show_port(context, port_id)
  3898. if port['port'].get(constants.RESOURCE_REQUEST):
  3899. raise exception.AttachInterfaceWithQoSPolicyNotSupported(
  3900. instance_uuid=instance.uuid)
  3901. return self.compute_rpcapi.attach_interface(context,
  3902. instance=instance, network_id=network_id, port_id=port_id,
  3903. requested_ip=requested_ip, tag=tag)
  3904. @check_instance_lock
  3905. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3906. vm_states.STOPPED],
  3907. task_state=[None])
  3908. def detach_interface(self, context, instance, port_id):
  3909. """Detach an network adapter from an instance."""
  3910. self._record_action_start(
  3911. context, instance, instance_actions.DETACH_INTERFACE)
  3912. self.compute_rpcapi.detach_interface(context, instance=instance,
  3913. port_id=port_id)
  3914. def get_instance_metadata(self, context, instance):
  3915. """Get all metadata associated with an instance."""
  3916. return self.db.instance_metadata_get(context, instance.uuid)
  3917. @check_instance_lock
  3918. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3919. vm_states.SUSPENDED, vm_states.STOPPED],
  3920. task_state=None)
  3921. def delete_instance_metadata(self, context, instance, key):
  3922. """Delete the given metadata item from an instance."""
  3923. instance.delete_metadata_key(key)
  3924. self.compute_rpcapi.change_instance_metadata(context,
  3925. instance=instance,
  3926. diff={key: ['-']})
  3927. @check_instance_lock
  3928. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED,
  3929. vm_states.SUSPENDED, vm_states.STOPPED],
  3930. task_state=None)
  3931. def update_instance_metadata(self, context, instance,
  3932. metadata, delete=False):
  3933. """Updates or creates instance metadata.
  3934. If delete is True, metadata items that are not specified in the
  3935. `metadata` argument will be deleted.
  3936. """
  3937. orig = dict(instance.metadata)
  3938. if delete:
  3939. _metadata = metadata
  3940. else:
  3941. _metadata = dict(instance.metadata)
  3942. _metadata.update(metadata)
  3943. self._check_metadata_properties_quota(context, _metadata)
  3944. instance.metadata = _metadata
  3945. instance.save()
  3946. diff = _diff_dict(orig, instance.metadata)
  3947. self.compute_rpcapi.change_instance_metadata(context,
  3948. instance=instance,
  3949. diff=diff)
  3950. return _metadata
  3951. @reject_sev_instances(instance_actions.LIVE_MIGRATION)
  3952. @check_instance_lock
  3953. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.PAUSED])
  3954. def live_migrate(self, context, instance, block_migration,
  3955. disk_over_commit, host_name, force=None, async_=False):
  3956. """Migrate a server lively to a new host."""
  3957. LOG.debug("Going to try to live migrate instance to %s",
  3958. host_name or "another host", instance=instance)
  3959. if host_name:
  3960. # Validate the specified host before changing the instance task
  3961. # state.
  3962. nodes = objects.ComputeNodeList.get_all_by_host(context, host_name)
  3963. request_spec = objects.RequestSpec.get_by_instance_uuid(
  3964. context, instance.uuid)
  3965. instance.task_state = task_states.MIGRATING
  3966. instance.save(expected_task_state=[None])
  3967. self._record_action_start(context, instance,
  3968. instance_actions.LIVE_MIGRATION)
  3969. # NOTE(sbauza): Force is a boolean by the new related API version
  3970. if force is False and host_name:
  3971. # Unset the host to make sure we call the scheduler
  3972. # from the conductor LiveMigrationTask. Yes this is tightly-coupled
  3973. # to behavior in conductor and not great.
  3974. host_name = None
  3975. # FIXME(sbauza): Since only Ironic driver uses more than one
  3976. # compute per service but doesn't support live migrations,
  3977. # let's provide the first one.
  3978. target = nodes[0]
  3979. destination = objects.Destination(
  3980. host=target.host,
  3981. node=target.hypervisor_hostname
  3982. )
  3983. # This is essentially a hint to the scheduler to only consider
  3984. # the specified host but still run it through the filters.
  3985. request_spec.requested_destination = destination
  3986. try:
  3987. self.compute_task_api.live_migrate_instance(context, instance,
  3988. host_name, block_migration=block_migration,
  3989. disk_over_commit=disk_over_commit,
  3990. request_spec=request_spec, async_=async_)
  3991. except oslo_exceptions.MessagingTimeout as messaging_timeout:
  3992. with excutils.save_and_reraise_exception():
  3993. # NOTE(pkoniszewski): It is possible that MessagingTimeout
  3994. # occurs, but LM will still be in progress, so write
  3995. # instance fault to database
  3996. compute_utils.add_instance_fault_from_exc(context,
  3997. instance,
  3998. messaging_timeout)
  3999. @check_instance_lock
  4000. @check_instance_state(vm_state=[vm_states.ACTIVE],
  4001. task_state=[task_states.MIGRATING])
  4002. def live_migrate_force_complete(self, context, instance, migration_id):
  4003. """Force live migration to complete.
  4004. :param context: Security context
  4005. :param instance: The instance that is being migrated
  4006. :param migration_id: ID of ongoing migration
  4007. """
  4008. LOG.debug("Going to try to force live migration to complete",
  4009. instance=instance)
  4010. # NOTE(pkoniszewski): Get migration object to check if there is ongoing
  4011. # live migration for particular instance. Also pass migration id to
  4012. # compute to double check and avoid possible race condition.
  4013. migration = objects.Migration.get_by_id_and_instance(
  4014. context, migration_id, instance.uuid)
  4015. if migration.status != 'running':
  4016. raise exception.InvalidMigrationState(migration_id=migration_id,
  4017. instance_uuid=instance.uuid,
  4018. state=migration.status,
  4019. method='force complete')
  4020. self._record_action_start(
  4021. context, instance, instance_actions.LIVE_MIGRATION_FORCE_COMPLETE)
  4022. self.compute_rpcapi.live_migration_force_complete(
  4023. context, instance, migration)
  4024. @check_instance_lock
  4025. @check_instance_state(task_state=[task_states.MIGRATING])
  4026. def live_migrate_abort(self, context, instance, migration_id,
  4027. support_abort_in_queue=False):
  4028. """Abort an in-progress live migration.
  4029. :param context: Security context
  4030. :param instance: The instance that is being migrated
  4031. :param migration_id: ID of in-progress live migration
  4032. :param support_abort_in_queue: Flag indicating whether we can support
  4033. abort migrations in "queued" or "preparing" status.
  4034. """
  4035. migration = objects.Migration.get_by_id_and_instance(context,
  4036. migration_id, instance.uuid)
  4037. LOG.debug("Going to cancel live migration %s",
  4038. migration.id, instance=instance)
  4039. # If the microversion does not support abort migration in queue,
  4040. # we are only be able to abort migrations with `running` status;
  4041. # if it is supported, we are able to also abort migrations in
  4042. # `queued` and `preparing` status.
  4043. allowed_states = ['running']
  4044. queued_states = ['queued', 'preparing']
  4045. if support_abort_in_queue:
  4046. # The user requested a microversion that supports aborting a queued
  4047. # or preparing live migration. But we need to check that the
  4048. # compute service hosting the instance is new enough to support
  4049. # aborting a queued/preparing live migration, so we check the
  4050. # service version here.
  4051. # TODO(Kevin_Zheng): This service version check can be removed in
  4052. # Stein (at the earliest) when the API only supports Rocky or
  4053. # newer computes.
  4054. if migration.status in queued_states:
  4055. service = objects.Service.get_by_compute_host(
  4056. context, instance.host)
  4057. if service.version < MIN_COMPUTE_ABORT_QUEUED_LIVE_MIGRATION:
  4058. raise exception.AbortQueuedLiveMigrationNotYetSupported(
  4059. migration_id=migration_id, status=migration.status)
  4060. allowed_states.extend(queued_states)
  4061. if migration.status not in allowed_states:
  4062. raise exception.InvalidMigrationState(migration_id=migration_id,
  4063. instance_uuid=instance.uuid,
  4064. state=migration.status,
  4065. method='abort live migration')
  4066. self._record_action_start(context, instance,
  4067. instance_actions.LIVE_MIGRATION_CANCEL)
  4068. self.compute_rpcapi.live_migration_abort(context,
  4069. instance, migration.id)
  4070. @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED,
  4071. vm_states.ERROR])
  4072. def evacuate(self, context, instance, host, on_shared_storage,
  4073. admin_password=None, force=None):
  4074. """Running evacuate to target host.
  4075. Checking vm compute host state, if the host not in expected_state,
  4076. raising an exception.
  4077. :param instance: The instance to evacuate
  4078. :param host: Target host. if not set, the scheduler will pick up one
  4079. :param on_shared_storage: True if instance files on shared storage
  4080. :param admin_password: password to set on rebuilt instance
  4081. :param force: Force the evacuation to the specific host target
  4082. """
  4083. LOG.debug('vm evacuation scheduled', instance=instance)
  4084. inst_host = instance.host
  4085. service = objects.Service.get_by_compute_host(context, inst_host)
  4086. if self.servicegroup_api.service_is_up(service):
  4087. LOG.error('Instance compute service state on %s '
  4088. 'expected to be down, but it was up.', inst_host)
  4089. raise exception.ComputeServiceInUse(host=inst_host)
  4090. request_spec = objects.RequestSpec.get_by_instance_uuid(
  4091. context, instance.uuid)
  4092. instance.task_state = task_states.REBUILDING
  4093. instance.save(expected_task_state=[None])
  4094. self._record_action_start(context, instance, instance_actions.EVACUATE)
  4095. # NOTE(danms): Create this as a tombstone for the source compute
  4096. # to find and cleanup. No need to pass it anywhere else.
  4097. migration = objects.Migration(context,
  4098. source_compute=instance.host,
  4099. source_node=instance.node,
  4100. instance_uuid=instance.uuid,
  4101. status='accepted',
  4102. migration_type='evacuation')
  4103. if host:
  4104. migration.dest_compute = host
  4105. migration.create()
  4106. compute_utils.notify_about_instance_usage(
  4107. self.notifier, context, instance, "evacuate")
  4108. compute_utils.notify_about_instance_action(
  4109. context, instance, CONF.host,
  4110. action=fields_obj.NotificationAction.EVACUATE,
  4111. source=fields_obj.NotificationSource.API)
  4112. # NOTE(sbauza): Force is a boolean by the new related API version
  4113. # TODO(stephenfin): Any reason we can't use 'not force' here to handle
  4114. # the pre-v2.29 API microversion, which wouldn't set force
  4115. if force is False and host:
  4116. nodes = objects.ComputeNodeList.get_all_by_host(context, host)
  4117. # NOTE(sbauza): Unset the host to make sure we call the scheduler
  4118. host = None
  4119. # FIXME(sbauza): Since only Ironic driver uses more than one
  4120. # compute per service but doesn't support evacuations,
  4121. # let's provide the first one.
  4122. target = nodes[0]
  4123. destination = objects.Destination(
  4124. host=target.host,
  4125. node=target.hypervisor_hostname
  4126. )
  4127. request_spec.requested_destination = destination
  4128. return self.compute_task_api.rebuild_instance(context,
  4129. instance=instance,
  4130. new_pass=admin_password,
  4131. injected_files=None,
  4132. image_ref=None,
  4133. orig_image_ref=None,
  4134. orig_sys_metadata=None,
  4135. bdms=None,
  4136. recreate=True,
  4137. on_shared_storage=on_shared_storage,
  4138. host=host,
  4139. request_spec=request_spec,
  4140. )
  4141. def get_migrations(self, context, filters):
  4142. """Get all migrations for the given filters."""
  4143. load_cells()
  4144. migrations = []
  4145. for cell in CELLS:
  4146. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4147. continue
  4148. with nova_context.target_cell(context, cell) as cctxt:
  4149. migrations.extend(objects.MigrationList.get_by_filters(
  4150. cctxt, filters).objects)
  4151. return objects.MigrationList(objects=migrations)
  4152. def get_migrations_sorted(self, context, filters, sort_dirs=None,
  4153. sort_keys=None, limit=None, marker=None):
  4154. """Get all migrations for the given parameters."""
  4155. mig_objs = migration_list.get_migration_objects_sorted(
  4156. context, filters, limit, marker, sort_keys, sort_dirs)
  4157. return mig_objs
  4158. def get_migrations_in_progress_by_instance(self, context, instance_uuid,
  4159. migration_type=None):
  4160. """Get all migrations of an instance in progress."""
  4161. return objects.MigrationList.get_in_progress_by_instance(
  4162. context, instance_uuid, migration_type)
  4163. def get_migration_by_id_and_instance(self, context,
  4164. migration_id, instance_uuid):
  4165. """Get the migration of an instance by id."""
  4166. return objects.Migration.get_by_id_and_instance(
  4167. context, migration_id, instance_uuid)
  4168. def _get_bdm_by_volume_id(self, context, volume_id, expected_attrs=None):
  4169. """Retrieve a BDM without knowing its cell.
  4170. .. note:: The context will be targeted to the cell in which the
  4171. BDM is found, if any.
  4172. :param context: The API request context.
  4173. :param volume_id: The ID of the volume.
  4174. :param expected_attrs: list of any additional attributes that should
  4175. be joined when the BDM is loaded from the database.
  4176. :raises: nova.exception.VolumeBDMNotFound if not found in any cell
  4177. """
  4178. load_cells()
  4179. for cell in CELLS:
  4180. nova_context.set_target_cell(context, cell)
  4181. try:
  4182. return objects.BlockDeviceMapping.get_by_volume(
  4183. context, volume_id, expected_attrs=expected_attrs)
  4184. except exception.NotFound:
  4185. continue
  4186. raise exception.VolumeBDMNotFound(volume_id=volume_id)
  4187. def volume_snapshot_create(self, context, volume_id, create_info):
  4188. bdm = self._get_bdm_by_volume_id(
  4189. context, volume_id, expected_attrs=['instance'])
  4190. # We allow creating the snapshot in any vm_state as long as there is
  4191. # no task being performed on the instance and it has a host.
  4192. @check_instance_host
  4193. @check_instance_state(vm_state=None)
  4194. def do_volume_snapshot_create(self, context, instance):
  4195. self.compute_rpcapi.volume_snapshot_create(context, instance,
  4196. volume_id, create_info)
  4197. snapshot = {
  4198. 'snapshot': {
  4199. 'id': create_info.get('id'),
  4200. 'volumeId': volume_id
  4201. }
  4202. }
  4203. return snapshot
  4204. return do_volume_snapshot_create(self, context, bdm.instance)
  4205. def volume_snapshot_delete(self, context, volume_id, snapshot_id,
  4206. delete_info):
  4207. bdm = self._get_bdm_by_volume_id(
  4208. context, volume_id, expected_attrs=['instance'])
  4209. # We allow deleting the snapshot in any vm_state as long as there is
  4210. # no task being performed on the instance and it has a host.
  4211. @check_instance_host
  4212. @check_instance_state(vm_state=None)
  4213. def do_volume_snapshot_delete(self, context, instance):
  4214. self.compute_rpcapi.volume_snapshot_delete(context, instance,
  4215. volume_id, snapshot_id, delete_info)
  4216. do_volume_snapshot_delete(self, context, bdm.instance)
  4217. def external_instance_event(self, api_context, instances, events):
  4218. # NOTE(danms): The external API consumer just provides events,
  4219. # but doesn't know where they go. We need to collate lists
  4220. # by the host the affected instance is on and dispatch them
  4221. # according to host
  4222. instances_by_host = collections.defaultdict(list)
  4223. events_by_host = collections.defaultdict(list)
  4224. hosts_by_instance = collections.defaultdict(list)
  4225. cell_contexts_by_host = {}
  4226. for instance in instances:
  4227. # instance._context is used here since it's already targeted to
  4228. # the cell that the instance lives in, and we need to use that
  4229. # cell context to lookup any migrations associated to the instance.
  4230. for host in self._get_relevant_hosts(instance._context, instance):
  4231. # NOTE(danms): All instances on a host must have the same
  4232. # mapping, so just use that
  4233. # NOTE(mdbooth): We don't currently support migrations between
  4234. # cells, and given that the Migration record is hosted in the
  4235. # cell _get_relevant_hosts will likely have to change before we
  4236. # do. Consequently we can currently assume that the context for
  4237. # both the source and destination hosts of a migration is the
  4238. # same.
  4239. if host not in cell_contexts_by_host:
  4240. cell_contexts_by_host[host] = instance._context
  4241. instances_by_host[host].append(instance)
  4242. hosts_by_instance[instance.uuid].append(host)
  4243. for event in events:
  4244. if event.name == 'volume-extended':
  4245. # Volume extend is a user-initiated operation starting in the
  4246. # Block Storage service API. We record an instance action so
  4247. # the user can monitor the operation to completion.
  4248. host = hosts_by_instance[event.instance_uuid][0]
  4249. cell_context = cell_contexts_by_host[host]
  4250. objects.InstanceAction.action_start(
  4251. cell_context, event.instance_uuid,
  4252. instance_actions.EXTEND_VOLUME, want_result=False)
  4253. elif event.name == 'power-update':
  4254. host = hosts_by_instance[event.instance_uuid][0]
  4255. cell_context = cell_contexts_by_host[host]
  4256. if event.tag == external_event_obj.POWER_ON:
  4257. inst_action = instance_actions.START
  4258. elif event.tag == external_event_obj.POWER_OFF:
  4259. inst_action = instance_actions.STOP
  4260. else:
  4261. LOG.warning("Invalid power state %s. Cannot process "
  4262. "the event %s. Skipping it.", event.tag,
  4263. event)
  4264. continue
  4265. objects.InstanceAction.action_start(
  4266. cell_context, event.instance_uuid, inst_action,
  4267. want_result=False)
  4268. for host in hosts_by_instance[event.instance_uuid]:
  4269. events_by_host[host].append(event)
  4270. for host in instances_by_host:
  4271. cell_context = cell_contexts_by_host[host]
  4272. # TODO(salv-orlando): Handle exceptions raised by the rpc api layer
  4273. # in order to ensure that a failure in processing events on a host
  4274. # will not prevent processing events on other hosts
  4275. self.compute_rpcapi.external_instance_event(
  4276. cell_context, instances_by_host[host], events_by_host[host],
  4277. host=host)
  4278. def _get_relevant_hosts(self, context, instance):
  4279. hosts = set()
  4280. hosts.add(instance.host)
  4281. if instance.migration_context is not None:
  4282. migration_id = instance.migration_context.migration_id
  4283. migration = objects.Migration.get_by_id(context, migration_id)
  4284. hosts.add(migration.dest_compute)
  4285. hosts.add(migration.source_compute)
  4286. LOG.debug('Instance %(instance)s is migrating, '
  4287. 'copying events to all relevant hosts: '
  4288. '%(hosts)s', {'instance': instance.uuid,
  4289. 'hosts': hosts})
  4290. return hosts
  4291. def get_instance_host_status(self, instance):
  4292. if instance.host:
  4293. try:
  4294. service = [service for service in instance.services if
  4295. service.binary == 'nova-compute'][0]
  4296. if service.forced_down:
  4297. host_status = fields_obj.HostStatus.DOWN
  4298. elif service.disabled:
  4299. host_status = fields_obj.HostStatus.MAINTENANCE
  4300. else:
  4301. alive = self.servicegroup_api.service_is_up(service)
  4302. host_status = ((alive and fields_obj.HostStatus.UP) or
  4303. fields_obj.HostStatus.UNKNOWN)
  4304. except IndexError:
  4305. host_status = fields_obj.HostStatus.NONE
  4306. else:
  4307. host_status = fields_obj.HostStatus.NONE
  4308. return host_status
  4309. def get_instances_host_statuses(self, instance_list):
  4310. host_status_dict = dict()
  4311. host_statuses = dict()
  4312. for instance in instance_list:
  4313. if instance.host:
  4314. if instance.host not in host_status_dict:
  4315. host_status = self.get_instance_host_status(instance)
  4316. host_status_dict[instance.host] = host_status
  4317. else:
  4318. host_status = host_status_dict[instance.host]
  4319. else:
  4320. host_status = fields_obj.HostStatus.NONE
  4321. host_statuses[instance.uuid] = host_status
  4322. return host_statuses
  4323. def target_host_cell(fn):
  4324. """Target a host-based function to a cell.
  4325. Expects to wrap a function of signature:
  4326. func(self, context, host, ...)
  4327. """
  4328. @functools.wraps(fn)
  4329. def targeted(self, context, host, *args, **kwargs):
  4330. mapping = objects.HostMapping.get_by_host(context, host)
  4331. nova_context.set_target_cell(context, mapping.cell_mapping)
  4332. return fn(self, context, host, *args, **kwargs)
  4333. return targeted
  4334. def _find_service_in_cell(context, service_id=None, service_host=None):
  4335. """Find a service by id or hostname by searching all cells.
  4336. If one matching service is found, return it. If none or multiple
  4337. are found, raise an exception.
  4338. :param context: A context.RequestContext
  4339. :param service_id: If not none, the DB ID of the service to find
  4340. :param service_host: If not None, the hostname of the service to find
  4341. :returns: An objects.Service
  4342. :raises: ServiceNotUnique if multiple matching IDs are found
  4343. :raises: NotFound if no matches are found
  4344. :raises: NovaException if called with neither search option
  4345. """
  4346. load_cells()
  4347. service = None
  4348. found_in_cell = None
  4349. is_uuid = False
  4350. if service_id is not None:
  4351. is_uuid = uuidutils.is_uuid_like(service_id)
  4352. if is_uuid:
  4353. lookup_fn = lambda c: objects.Service.get_by_uuid(c, service_id)
  4354. else:
  4355. lookup_fn = lambda c: objects.Service.get_by_id(c, service_id)
  4356. elif service_host is not None:
  4357. lookup_fn = lambda c: (
  4358. objects.Service.get_by_compute_host(c, service_host))
  4359. else:
  4360. LOG.exception('_find_service_in_cell called with no search parameters')
  4361. # This is intentionally cryptic so we don't leak implementation details
  4362. # out of the API.
  4363. raise exception.NovaException()
  4364. for cell in CELLS:
  4365. # NOTE(danms): Services can be in cell0, so don't skip it here
  4366. try:
  4367. with nova_context.target_cell(context, cell) as cctxt:
  4368. cell_service = lookup_fn(cctxt)
  4369. except exception.NotFound:
  4370. # NOTE(danms): Keep looking in other cells
  4371. continue
  4372. if service and cell_service:
  4373. raise exception.ServiceNotUnique()
  4374. service = cell_service
  4375. found_in_cell = cell
  4376. if service and is_uuid:
  4377. break
  4378. if service:
  4379. # NOTE(danms): Set the cell on the context so it remains
  4380. # when we return to our caller
  4381. nova_context.set_target_cell(context, found_in_cell)
  4382. return service
  4383. else:
  4384. raise exception.NotFound()
  4385. class HostAPI(base.Base):
  4386. """Sub-set of the Compute Manager API for managing host operations."""
  4387. def __init__(self, rpcapi=None, servicegroup_api=None):
  4388. self.rpcapi = rpcapi or compute_rpcapi.ComputeAPI()
  4389. self.servicegroup_api = servicegroup_api or servicegroup.API()
  4390. super(HostAPI, self).__init__()
  4391. def _assert_host_exists(self, context, host_name, must_be_up=False):
  4392. """Raise HostNotFound if compute host doesn't exist."""
  4393. service = objects.Service.get_by_compute_host(context, host_name)
  4394. if not service:
  4395. raise exception.HostNotFound(host=host_name)
  4396. if must_be_up and not self.servicegroup_api.service_is_up(service):
  4397. raise exception.ComputeServiceUnavailable(host=host_name)
  4398. return service['host']
  4399. @wrap_exception()
  4400. @target_host_cell
  4401. def set_host_enabled(self, context, host_name, enabled):
  4402. """Sets the specified host's ability to accept new instances."""
  4403. host_name = self._assert_host_exists(context, host_name)
  4404. payload = {'host_name': host_name, 'enabled': enabled}
  4405. compute_utils.notify_about_host_update(context,
  4406. 'set_enabled.start',
  4407. payload)
  4408. result = self.rpcapi.set_host_enabled(context, enabled=enabled,
  4409. host=host_name)
  4410. compute_utils.notify_about_host_update(context,
  4411. 'set_enabled.end',
  4412. payload)
  4413. return result
  4414. @target_host_cell
  4415. def get_host_uptime(self, context, host_name):
  4416. """Returns the result of calling "uptime" on the target host."""
  4417. host_name = self._assert_host_exists(context, host_name,
  4418. must_be_up=True)
  4419. return self.rpcapi.get_host_uptime(context, host=host_name)
  4420. @wrap_exception()
  4421. @target_host_cell
  4422. def host_power_action(self, context, host_name, action):
  4423. """Reboots, shuts down or powers up the host."""
  4424. host_name = self._assert_host_exists(context, host_name)
  4425. payload = {'host_name': host_name, 'action': action}
  4426. compute_utils.notify_about_host_update(context,
  4427. 'power_action.start',
  4428. payload)
  4429. result = self.rpcapi.host_power_action(context, action=action,
  4430. host=host_name)
  4431. compute_utils.notify_about_host_update(context,
  4432. 'power_action.end',
  4433. payload)
  4434. return result
  4435. @wrap_exception()
  4436. @target_host_cell
  4437. def set_host_maintenance(self, context, host_name, mode):
  4438. """Start/Stop host maintenance window. On start, it triggers
  4439. guest VMs evacuation.
  4440. """
  4441. host_name = self._assert_host_exists(context, host_name)
  4442. payload = {'host_name': host_name, 'mode': mode}
  4443. compute_utils.notify_about_host_update(context,
  4444. 'set_maintenance.start',
  4445. payload)
  4446. result = self.rpcapi.host_maintenance_mode(context,
  4447. host_param=host_name, mode=mode, host=host_name)
  4448. compute_utils.notify_about_host_update(context,
  4449. 'set_maintenance.end',
  4450. payload)
  4451. return result
  4452. def service_get_all(self, context, filters=None, set_zones=False,
  4453. all_cells=False, cell_down_support=False):
  4454. """Returns a list of services, optionally filtering the results.
  4455. If specified, 'filters' should be a dictionary containing services
  4456. attributes and matching values. Ie, to get a list of services for
  4457. the 'compute' topic, use filters={'topic': 'compute'}.
  4458. If all_cells=True, then scan all cells and merge the results.
  4459. If cell_down_support=True then return minimal service records
  4460. for cells that do not respond based on what we have in the
  4461. host mappings. These will have only 'binary' and 'host' set.
  4462. """
  4463. if filters is None:
  4464. filters = {}
  4465. disabled = filters.pop('disabled', None)
  4466. if 'availability_zone' in filters:
  4467. set_zones = True
  4468. # NOTE(danms): Eventually this all_cells nonsense should go away
  4469. # and we should always iterate over the cells. However, certain
  4470. # callers need the legacy behavior for now.
  4471. if all_cells:
  4472. services = []
  4473. service_dict = nova_context.scatter_gather_all_cells(context,
  4474. objects.ServiceList.get_all, disabled, set_zones=set_zones)
  4475. for cell_uuid, service in service_dict.items():
  4476. if not nova_context.is_cell_failure_sentinel(service):
  4477. services.extend(service)
  4478. elif cell_down_support:
  4479. unavailable_services = objects.ServiceList()
  4480. cid = [cm.id for cm in nova_context.CELLS
  4481. if cm.uuid == cell_uuid]
  4482. # We know cid[0] is in the list because we are using the
  4483. # same list that scatter_gather_all_cells used
  4484. hms = objects.HostMappingList.get_by_cell_id(context,
  4485. cid[0])
  4486. for hm in hms:
  4487. unavailable_services.objects.append(objects.Service(
  4488. binary='nova-compute', host=hm.host))
  4489. LOG.warning("Cell %s is not responding and hence only "
  4490. "partial results are available from this "
  4491. "cell.", cell_uuid)
  4492. services.extend(unavailable_services)
  4493. else:
  4494. LOG.warning("Cell %s is not responding and hence skipped "
  4495. "from the results.", cell_uuid)
  4496. else:
  4497. services = objects.ServiceList.get_all(context, disabled,
  4498. set_zones=set_zones)
  4499. ret_services = []
  4500. for service in services:
  4501. for key, val in filters.items():
  4502. if service[key] != val:
  4503. break
  4504. else:
  4505. # All filters matched.
  4506. ret_services.append(service)
  4507. return ret_services
  4508. def service_get_by_id(self, context, service_id):
  4509. """Get service entry for the given service id or uuid."""
  4510. try:
  4511. return _find_service_in_cell(context, service_id=service_id)
  4512. except exception.NotFound:
  4513. raise exception.ServiceNotFound(service_id=service_id)
  4514. @target_host_cell
  4515. def service_get_by_compute_host(self, context, host_name):
  4516. """Get service entry for the given compute hostname."""
  4517. return objects.Service.get_by_compute_host(context, host_name)
  4518. def _update_compute_provider_status(self, context, service):
  4519. """Calls the compute service to sync the COMPUTE_STATUS_DISABLED trait.
  4520. There are two cases where the API will not call the compute service:
  4521. * The compute service is down. In this case the trait is synchronized
  4522. when the compute service is restarted.
  4523. * The compute service is old. In this case the trait is synchronized
  4524. when the compute service is upgraded and restarted.
  4525. :param context: nova auth RequestContext
  4526. :param service: nova.objects.Service object which has been enabled
  4527. or disabled (see ``service_update``).
  4528. """
  4529. # Make sure the service is up so we can make the RPC call.
  4530. if not self.servicegroup_api.service_is_up(service):
  4531. LOG.info('Compute service on host %s is down. The '
  4532. 'COMPUTE_STATUS_DISABLED trait will be synchronized '
  4533. 'when the service is restarted.', service.host)
  4534. return
  4535. # Make sure the compute service is new enough for the trait sync
  4536. # behavior.
  4537. # TODO(mriedem): Remove this compat check in the U release.
  4538. if service.version < MIN_COMPUTE_SYNC_COMPUTE_STATUS_DISABLED:
  4539. LOG.info('Compute service on host %s is too old to sync the '
  4540. 'COMPUTE_STATUS_DISABLED trait in Placement. The '
  4541. 'trait will be synchronized when the service is '
  4542. 'upgraded and restarted.', service.host)
  4543. return
  4544. enabled = not service.disabled
  4545. # Avoid leaking errors out of the API.
  4546. try:
  4547. LOG.debug('Calling the compute service on host %s to sync the '
  4548. 'COMPUTE_STATUS_DISABLED trait.', service.host)
  4549. self.rpcapi.set_host_enabled(context, service.host, enabled)
  4550. except Exception:
  4551. LOG.exception('An error occurred while updating the '
  4552. 'COMPUTE_STATUS_DISABLED trait on compute node '
  4553. 'resource providers managed by host %s. The trait '
  4554. 'will be synchronized automatically by the compute '
  4555. 'service when the update_available_resource '
  4556. 'periodic task runs.', service.host)
  4557. def service_update(self, context, service):
  4558. """Performs the actual service update operation.
  4559. If the "disabled" field is changed, potentially calls the compute
  4560. service to sync the COMPUTE_STATUS_DISABLED trait on the compute node
  4561. resource providers managed by this compute service.
  4562. :param context: nova auth RequestContext
  4563. :param service: nova.objects.Service object with changes already
  4564. set on the object
  4565. """
  4566. # Before persisting changes and resetting the changed fields on the
  4567. # Service object, determine if the disabled field changed.
  4568. update_placement = 'disabled' in service.obj_what_changed()
  4569. # Persist the Service object changes to the database.
  4570. service.save()
  4571. # If the disabled field changed, potentially call the compute service
  4572. # to sync the COMPUTE_STATUS_DISABLED trait.
  4573. if update_placement:
  4574. self._update_compute_provider_status(context, service)
  4575. return service
  4576. @target_host_cell
  4577. def service_update_by_host_and_binary(self, context, host_name, binary,
  4578. params_to_update):
  4579. """Enable / Disable a service.
  4580. Determines the cell that the service is in using the HostMapping.
  4581. For compute services, this stops new builds and migrations going to
  4582. the host.
  4583. See also ``service_update``.
  4584. :param context: nova auth RequestContext
  4585. :param host_name: hostname of the service
  4586. :param binary: service binary (really only supports "nova-compute")
  4587. :param params_to_update: dict of changes to make to the Service object
  4588. :raises: HostMappingNotFound if the host is not mapped to a cell
  4589. :raises: HostBinaryNotFound if a services table record is not found
  4590. with the given host_name and binary
  4591. """
  4592. # TODO(mriedem): Service.get_by_args is deprecated; we should use
  4593. # get_by_compute_host here (remember to update the "raises" docstring).
  4594. service = objects.Service.get_by_args(context, host_name, binary)
  4595. service.update(params_to_update)
  4596. return self.service_update(context, service)
  4597. def _service_delete(self, context, service_id):
  4598. """Performs the actual Service deletion operation."""
  4599. try:
  4600. service = _find_service_in_cell(context, service_id=service_id)
  4601. except exception.NotFound:
  4602. raise exception.ServiceNotFound(service_id=service_id)
  4603. service.destroy()
  4604. # TODO(mriedem): Nothing outside of tests is using this now so we should
  4605. # be able to remove it.
  4606. def service_delete(self, context, service_id):
  4607. """Deletes the specified service found via id or uuid."""
  4608. self._service_delete(context, service_id)
  4609. @target_host_cell
  4610. def instance_get_all_by_host(self, context, host_name):
  4611. """Return all instances on the given host."""
  4612. return objects.InstanceList.get_by_host(context, host_name)
  4613. def task_log_get_all(self, context, task_name, period_beginning,
  4614. period_ending, host=None, state=None):
  4615. """Return the task logs within a given range, optionally
  4616. filtering by host and/or state.
  4617. """
  4618. return self.db.task_log_get_all(context, task_name,
  4619. period_beginning,
  4620. period_ending,
  4621. host=host,
  4622. state=state)
  4623. def compute_node_get(self, context, compute_id):
  4624. """Return compute node entry for particular integer ID or UUID."""
  4625. load_cells()
  4626. # NOTE(danms): Unfortunately this API exposes database identifiers
  4627. # which means we really can't do something efficient here
  4628. is_uuid = uuidutils.is_uuid_like(compute_id)
  4629. for cell in CELLS:
  4630. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4631. continue
  4632. with nova_context.target_cell(context, cell) as cctxt:
  4633. try:
  4634. if is_uuid:
  4635. return objects.ComputeNode.get_by_uuid(cctxt,
  4636. compute_id)
  4637. return objects.ComputeNode.get_by_id(cctxt,
  4638. int(compute_id))
  4639. except exception.ComputeHostNotFound:
  4640. # NOTE(danms): Keep looking in other cells
  4641. continue
  4642. raise exception.ComputeHostNotFound(host=compute_id)
  4643. def compute_node_get_all(self, context, limit=None, marker=None):
  4644. load_cells()
  4645. computes = []
  4646. uuid_marker = marker and uuidutils.is_uuid_like(marker)
  4647. for cell in CELLS:
  4648. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4649. continue
  4650. with nova_context.target_cell(context, cell) as cctxt:
  4651. # If we have a marker and it's a uuid, see if the compute node
  4652. # is in this cell.
  4653. if marker and uuid_marker:
  4654. try:
  4655. compute_marker = objects.ComputeNode.get_by_uuid(
  4656. cctxt, marker)
  4657. # we found the marker compute node, so use it's id
  4658. # for the actual marker for paging in this cell's db
  4659. marker = compute_marker.id
  4660. except exception.ComputeHostNotFound:
  4661. # The marker node isn't in this cell so keep looking.
  4662. continue
  4663. try:
  4664. cell_computes = objects.ComputeNodeList.get_by_pagination(
  4665. cctxt, limit=limit, marker=marker)
  4666. except exception.MarkerNotFound:
  4667. # NOTE(danms): Keep looking through cells
  4668. continue
  4669. computes.extend(cell_computes)
  4670. # NOTE(danms): We must have found the marker, so continue on
  4671. # without one
  4672. marker = None
  4673. if limit:
  4674. limit -= len(cell_computes)
  4675. if limit <= 0:
  4676. break
  4677. if marker is not None and len(computes) == 0:
  4678. # NOTE(danms): If we did not find the marker in any cell,
  4679. # mimic the db_api behavior here.
  4680. raise exception.MarkerNotFound(marker=marker)
  4681. return objects.ComputeNodeList(objects=computes)
  4682. def compute_node_search_by_hypervisor(self, context, hypervisor_match):
  4683. load_cells()
  4684. computes = []
  4685. for cell in CELLS:
  4686. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4687. continue
  4688. with nova_context.target_cell(context, cell) as cctxt:
  4689. cell_computes = objects.ComputeNodeList.get_by_hypervisor(
  4690. cctxt, hypervisor_match)
  4691. computes.extend(cell_computes)
  4692. return objects.ComputeNodeList(objects=computes)
  4693. def compute_node_statistics(self, context):
  4694. load_cells()
  4695. cell_stats = []
  4696. for cell in CELLS:
  4697. if cell.uuid == objects.CellMapping.CELL0_UUID:
  4698. continue
  4699. with nova_context.target_cell(context, cell) as cctxt:
  4700. cell_stats.append(self.db.compute_node_statistics(cctxt))
  4701. if cell_stats:
  4702. keys = cell_stats[0].keys()
  4703. return {k: sum(stats[k] for stats in cell_stats)
  4704. for k in keys}
  4705. else:
  4706. return {}
  4707. class InstanceActionAPI(base.Base):
  4708. """Sub-set of the Compute Manager API for managing instance actions."""
  4709. def actions_get(self, context, instance, limit=None, marker=None,
  4710. filters=None):
  4711. return objects.InstanceActionList.get_by_instance_uuid(
  4712. context, instance.uuid, limit, marker, filters)
  4713. def action_get_by_request_id(self, context, instance, request_id):
  4714. return objects.InstanceAction.get_by_request_id(
  4715. context, instance.uuid, request_id)
  4716. def action_events_get(self, context, instance, action_id):
  4717. return objects.InstanceActionEventList.get_by_action(
  4718. context, action_id)
  4719. class AggregateAPI(base.Base):
  4720. """Sub-set of the Compute Manager API for managing host aggregates."""
  4721. def __init__(self, **kwargs):
  4722. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  4723. self.query_client = query.SchedulerQueryClient()
  4724. self._placement_client = None # Lazy-load on first access.
  4725. super(AggregateAPI, self).__init__(**kwargs)
  4726. @property
  4727. def placement_client(self):
  4728. if self._placement_client is None:
  4729. self._placement_client = report.SchedulerReportClient()
  4730. return self._placement_client
  4731. @wrap_exception()
  4732. def create_aggregate(self, context, aggregate_name, availability_zone):
  4733. """Creates the model for the aggregate."""
  4734. aggregate = objects.Aggregate(context=context)
  4735. aggregate.name = aggregate_name
  4736. if availability_zone:
  4737. aggregate.metadata = {'availability_zone': availability_zone}
  4738. aggregate.create()
  4739. self.query_client.update_aggregates(context, [aggregate])
  4740. return aggregate
  4741. def get_aggregate(self, context, aggregate_id):
  4742. """Get an aggregate by id."""
  4743. return objects.Aggregate.get_by_id(context, aggregate_id)
  4744. def get_aggregate_list(self, context):
  4745. """Get all the aggregates."""
  4746. return objects.AggregateList.get_all(context)
  4747. def get_aggregates_by_host(self, context, compute_host):
  4748. """Get all the aggregates where the given host is presented."""
  4749. return objects.AggregateList.get_by_host(context, compute_host)
  4750. @wrap_exception()
  4751. def update_aggregate(self, context, aggregate_id, values):
  4752. """Update the properties of an aggregate."""
  4753. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4754. if 'name' in values:
  4755. aggregate.name = values.pop('name')
  4756. aggregate.save()
  4757. self.is_safe_to_update_az(context, values, aggregate=aggregate,
  4758. action_name=AGGREGATE_ACTION_UPDATE,
  4759. check_no_instances_in_az=True)
  4760. if values:
  4761. aggregate.update_metadata(values)
  4762. aggregate.updated_at = timeutils.utcnow()
  4763. self.query_client.update_aggregates(context, [aggregate])
  4764. # If updated values include availability_zones, then the cache
  4765. # which stored availability_zones and host need to be reset
  4766. if values.get('availability_zone'):
  4767. availability_zones.reset_cache()
  4768. return aggregate
  4769. @wrap_exception()
  4770. def update_aggregate_metadata(self, context, aggregate_id, metadata):
  4771. """Updates the aggregate metadata."""
  4772. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4773. self.is_safe_to_update_az(context, metadata, aggregate=aggregate,
  4774. action_name=AGGREGATE_ACTION_UPDATE_META,
  4775. check_no_instances_in_az=True)
  4776. aggregate.update_metadata(metadata)
  4777. self.query_client.update_aggregates(context, [aggregate])
  4778. # If updated metadata include availability_zones, then the cache
  4779. # which stored availability_zones and host need to be reset
  4780. if metadata and metadata.get('availability_zone'):
  4781. availability_zones.reset_cache()
  4782. aggregate.updated_at = timeutils.utcnow()
  4783. return aggregate
  4784. @wrap_exception()
  4785. def delete_aggregate(self, context, aggregate_id):
  4786. """Deletes the aggregate."""
  4787. aggregate_payload = {'aggregate_id': aggregate_id}
  4788. compute_utils.notify_about_aggregate_update(context,
  4789. "delete.start",
  4790. aggregate_payload)
  4791. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4792. compute_utils.notify_about_aggregate_action(
  4793. context=context,
  4794. aggregate=aggregate,
  4795. action=fields_obj.NotificationAction.DELETE,
  4796. phase=fields_obj.NotificationPhase.START)
  4797. if len(aggregate.hosts) > 0:
  4798. msg = _("Host aggregate is not empty")
  4799. raise exception.InvalidAggregateActionDelete(
  4800. aggregate_id=aggregate_id, reason=msg)
  4801. aggregate.destroy()
  4802. self.query_client.delete_aggregate(context, aggregate)
  4803. compute_utils.notify_about_aggregate_update(context,
  4804. "delete.end",
  4805. aggregate_payload)
  4806. compute_utils.notify_about_aggregate_action(
  4807. context=context,
  4808. aggregate=aggregate,
  4809. action=fields_obj.NotificationAction.DELETE,
  4810. phase=fields_obj.NotificationPhase.END)
  4811. def is_safe_to_update_az(self, context, metadata, aggregate,
  4812. hosts=None,
  4813. action_name=AGGREGATE_ACTION_ADD,
  4814. check_no_instances_in_az=False):
  4815. """Determine if updates alter an aggregate's availability zone.
  4816. :param context: local context
  4817. :param metadata: Target metadata for updating aggregate
  4818. :param aggregate: Aggregate to update
  4819. :param hosts: Hosts to check. If None, aggregate.hosts is used
  4820. :type hosts: list
  4821. :param action_name: Calling method for logging purposes
  4822. :param check_no_instances_in_az: if True, it checks
  4823. there is no instances on any hosts of the aggregate
  4824. """
  4825. if 'availability_zone' in metadata:
  4826. if not metadata['availability_zone']:
  4827. msg = _("Aggregate %s does not support empty named "
  4828. "availability zone") % aggregate.name
  4829. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  4830. msg)
  4831. _hosts = hosts or aggregate.hosts
  4832. host_aggregates = objects.AggregateList.get_by_metadata_key(
  4833. context, 'availability_zone', hosts=_hosts)
  4834. conflicting_azs = [
  4835. agg.availability_zone for agg in host_aggregates
  4836. if agg.availability_zone != metadata['availability_zone'] and
  4837. agg.id != aggregate.id]
  4838. if conflicting_azs:
  4839. msg = _("One or more hosts already in availability zone(s) "
  4840. "%s") % conflicting_azs
  4841. self._raise_invalid_aggregate_exc(action_name, aggregate.id,
  4842. msg)
  4843. same_az_name = (aggregate.availability_zone ==
  4844. metadata['availability_zone'])
  4845. if check_no_instances_in_az and not same_az_name:
  4846. instance_count_by_cell = (
  4847. nova_context.scatter_gather_skip_cell0(
  4848. context,
  4849. objects.InstanceList.get_count_by_hosts,
  4850. _hosts))
  4851. if any(cnt for cnt in instance_count_by_cell.values()):
  4852. msg = _("One or more hosts contain instances in this zone")
  4853. self._raise_invalid_aggregate_exc(
  4854. action_name, aggregate.id, msg)
  4855. def _raise_invalid_aggregate_exc(self, action_name, aggregate_id, reason):
  4856. if action_name == AGGREGATE_ACTION_ADD:
  4857. raise exception.InvalidAggregateActionAdd(
  4858. aggregate_id=aggregate_id, reason=reason)
  4859. elif action_name == AGGREGATE_ACTION_UPDATE:
  4860. raise exception.InvalidAggregateActionUpdate(
  4861. aggregate_id=aggregate_id, reason=reason)
  4862. elif action_name == AGGREGATE_ACTION_UPDATE_META:
  4863. raise exception.InvalidAggregateActionUpdateMeta(
  4864. aggregate_id=aggregate_id, reason=reason)
  4865. elif action_name == AGGREGATE_ACTION_DELETE:
  4866. raise exception.InvalidAggregateActionDelete(
  4867. aggregate_id=aggregate_id, reason=reason)
  4868. raise exception.NovaException(
  4869. _("Unexpected aggregate action %s") % action_name)
  4870. def _update_az_cache_for_host(self, context, host_name, aggregate_meta):
  4871. # Update the availability_zone cache to avoid getting wrong
  4872. # availability_zone in cache retention time when add/remove
  4873. # host to/from aggregate.
  4874. if aggregate_meta and aggregate_meta.get('availability_zone'):
  4875. availability_zones.update_host_availability_zone_cache(context,
  4876. host_name)
  4877. @wrap_exception()
  4878. def add_host_to_aggregate(self, context, aggregate_id, host_name):
  4879. """Adds the host to an aggregate."""
  4880. aggregate_payload = {'aggregate_id': aggregate_id,
  4881. 'host_name': host_name}
  4882. compute_utils.notify_about_aggregate_update(context,
  4883. "addhost.start",
  4884. aggregate_payload)
  4885. # validates the host; HostMappingNotFound or ComputeHostNotFound
  4886. # is raised if invalid
  4887. try:
  4888. mapping = objects.HostMapping.get_by_host(context, host_name)
  4889. nova_context.set_target_cell(context, mapping.cell_mapping)
  4890. service = objects.Service.get_by_compute_host(context, host_name)
  4891. except exception.HostMappingNotFound:
  4892. try:
  4893. # NOTE(danms): This targets our cell
  4894. service = _find_service_in_cell(context,
  4895. service_host=host_name)
  4896. except exception.NotFound:
  4897. raise exception.ComputeHostNotFound(host=host_name)
  4898. if service.host != host_name:
  4899. # NOTE(danms): If we found a service but it is not an
  4900. # exact match, we may have a case-insensitive backend
  4901. # database (like mysql) which will end up with us
  4902. # adding the host-aggregate mapping with a
  4903. # non-matching hostname.
  4904. raise exception.ComputeHostNotFound(host=host_name)
  4905. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4906. compute_utils.notify_about_aggregate_action(
  4907. context=context,
  4908. aggregate=aggregate,
  4909. action=fields_obj.NotificationAction.ADD_HOST,
  4910. phase=fields_obj.NotificationPhase.START)
  4911. self.is_safe_to_update_az(context, aggregate.metadata,
  4912. hosts=[host_name], aggregate=aggregate)
  4913. aggregate.add_host(host_name)
  4914. self.query_client.update_aggregates(context, [aggregate])
  4915. try:
  4916. self.placement_client.aggregate_add_host(
  4917. context, aggregate.uuid, host_name=host_name)
  4918. except exception.PlacementAPIConnectFailure:
  4919. # NOTE(jaypipes): Rocky should be able to tolerate the nova-api
  4920. # service not communicating with the Placement API, so just log a
  4921. # warning here.
  4922. # TODO(jaypipes): Remove this in Stein, when placement must be able
  4923. # to be contacted from the nova-api service.
  4924. LOG.warning("Failed to associate %s with a placement "
  4925. "aggregate: %s. There was a failure to communicate "
  4926. "with the placement service.",
  4927. host_name, aggregate.uuid)
  4928. except (exception.ResourceProviderNotFound,
  4929. exception.ResourceProviderAggregateRetrievalFailed,
  4930. exception.ResourceProviderUpdateFailed,
  4931. exception.ResourceProviderUpdateConflict) as err:
  4932. # NOTE(jaypipes): We don't want a failure perform the mirroring
  4933. # action in the placement service to be returned to the user (they
  4934. # probably don't know anything about the placement service and
  4935. # would just be confused). So, we just log a warning here, noting
  4936. # that on the next run of nova-manage placement sync_aggregates
  4937. # things will go back to normal
  4938. LOG.warning("Failed to associate %s with a placement "
  4939. "aggregate: %s. This may be corrected after running "
  4940. "nova-manage placement sync_aggregates.",
  4941. host_name, err)
  4942. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  4943. # NOTE(jogo): Send message to host to support resource pools
  4944. self.compute_rpcapi.add_aggregate_host(context,
  4945. aggregate=aggregate, host_param=host_name, host=host_name)
  4946. aggregate_payload.update({'name': aggregate.name})
  4947. compute_utils.notify_about_aggregate_update(context,
  4948. "addhost.end",
  4949. aggregate_payload)
  4950. compute_utils.notify_about_aggregate_action(
  4951. context=context,
  4952. aggregate=aggregate,
  4953. action=fields_obj.NotificationAction.ADD_HOST,
  4954. phase=fields_obj.NotificationPhase.END)
  4955. return aggregate
  4956. @wrap_exception()
  4957. def remove_host_from_aggregate(self, context, aggregate_id, host_name):
  4958. """Removes host from the aggregate."""
  4959. aggregate_payload = {'aggregate_id': aggregate_id,
  4960. 'host_name': host_name}
  4961. compute_utils.notify_about_aggregate_update(context,
  4962. "removehost.start",
  4963. aggregate_payload)
  4964. # validates the host; HostMappingNotFound or ComputeHostNotFound
  4965. # is raised if invalid
  4966. mapping = objects.HostMapping.get_by_host(context, host_name)
  4967. nova_context.set_target_cell(context, mapping.cell_mapping)
  4968. objects.Service.get_by_compute_host(context, host_name)
  4969. aggregate = objects.Aggregate.get_by_id(context, aggregate_id)
  4970. compute_utils.notify_about_aggregate_action(
  4971. context=context,
  4972. aggregate=aggregate,
  4973. action=fields_obj.NotificationAction.REMOVE_HOST,
  4974. phase=fields_obj.NotificationPhase.START)
  4975. aggregate.delete_host(host_name)
  4976. self.query_client.update_aggregates(context, [aggregate])
  4977. try:
  4978. self.placement_client.aggregate_remove_host(
  4979. context, aggregate.uuid, host_name)
  4980. except exception.PlacementAPIConnectFailure:
  4981. # NOTE(jaypipes): Rocky should be able to tolerate the nova-api
  4982. # service not communicating with the Placement API, so just log a
  4983. # warning here.
  4984. # TODO(jaypipes): Remove this in Stein, when placement must be able
  4985. # to be contacted from the nova-api service.
  4986. LOG.warning("Failed to remove association of %s with a placement "
  4987. "aggregate: %s. There was a failure to communicate "
  4988. "with the placement service.",
  4989. host_name, aggregate.uuid)
  4990. except (exception.ResourceProviderNotFound,
  4991. exception.ResourceProviderAggregateRetrievalFailed,
  4992. exception.ResourceProviderUpdateFailed,
  4993. exception.ResourceProviderUpdateConflict) as err:
  4994. # NOTE(jaypipes): We don't want a failure perform the mirroring
  4995. # action in the placement service to be returned to the user (they
  4996. # probably don't know anything about the placement service and
  4997. # would just be confused). So, we just log a warning here, noting
  4998. # that on the next run of nova-manage placement sync_aggregates
  4999. # things will go back to normal
  5000. LOG.warning("Failed to remove association of %s with a placement "
  5001. "aggregate: %s. This may be corrected after running "
  5002. "nova-manage placement sync_aggregates.",
  5003. host_name, err)
  5004. self._update_az_cache_for_host(context, host_name, aggregate.metadata)
  5005. self.compute_rpcapi.remove_aggregate_host(context,
  5006. aggregate=aggregate, host_param=host_name, host=host_name)
  5007. compute_utils.notify_about_aggregate_update(context,
  5008. "removehost.end",
  5009. aggregate_payload)
  5010. compute_utils.notify_about_aggregate_action(
  5011. context=context,
  5012. aggregate=aggregate,
  5013. action=fields_obj.NotificationAction.REMOVE_HOST,
  5014. phase=fields_obj.NotificationPhase.END)
  5015. return aggregate
  5016. class KeypairAPI(base.Base):
  5017. """Subset of the Compute Manager API for managing key pairs."""
  5018. get_notifier = functools.partial(rpc.get_notifier, service='api')
  5019. wrap_exception = functools.partial(exception_wrapper.wrap_exception,
  5020. get_notifier=get_notifier,
  5021. binary='nova-api')
  5022. def _notify(self, context, event_suffix, keypair_name):
  5023. payload = {
  5024. 'tenant_id': context.project_id,
  5025. 'user_id': context.user_id,
  5026. 'key_name': keypair_name,
  5027. }
  5028. notify = self.get_notifier()
  5029. notify.info(context, 'keypair.%s' % event_suffix, payload)
  5030. def _validate_new_key_pair(self, context, user_id, key_name, key_type):
  5031. safe_chars = "_- " + string.digits + string.ascii_letters
  5032. clean_value = "".join(x for x in key_name if x in safe_chars)
  5033. if clean_value != key_name:
  5034. raise exception.InvalidKeypair(
  5035. reason=_("Keypair name contains unsafe characters"))
  5036. try:
  5037. utils.check_string_length(key_name, min_length=1, max_length=255)
  5038. except exception.InvalidInput:
  5039. raise exception.InvalidKeypair(
  5040. reason=_('Keypair name must be string and between '
  5041. '1 and 255 characters long'))
  5042. try:
  5043. objects.Quotas.check_deltas(context, {'key_pairs': 1}, user_id)
  5044. except exception.OverQuota:
  5045. raise exception.KeypairLimitExceeded()
  5046. @wrap_exception()
  5047. def import_key_pair(self, context, user_id, key_name, public_key,
  5048. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5049. """Import a key pair using an existing public key."""
  5050. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5051. self._notify(context, 'import.start', key_name)
  5052. keypair = objects.KeyPair(context)
  5053. keypair.user_id = user_id
  5054. keypair.name = key_name
  5055. keypair.type = key_type
  5056. keypair.fingerprint = None
  5057. keypair.public_key = public_key
  5058. compute_utils.notify_about_keypair_action(
  5059. context=context,
  5060. keypair=keypair,
  5061. action=fields_obj.NotificationAction.IMPORT,
  5062. phase=fields_obj.NotificationPhase.START)
  5063. fingerprint = self._generate_fingerprint(public_key, key_type)
  5064. keypair.fingerprint = fingerprint
  5065. keypair.create()
  5066. compute_utils.notify_about_keypair_action(
  5067. context=context,
  5068. keypair=keypair,
  5069. action=fields_obj.NotificationAction.IMPORT,
  5070. phase=fields_obj.NotificationPhase.END)
  5071. self._notify(context, 'import.end', key_name)
  5072. return keypair
  5073. @wrap_exception()
  5074. def create_key_pair(self, context, user_id, key_name,
  5075. key_type=keypair_obj.KEYPAIR_TYPE_SSH):
  5076. """Create a new key pair."""
  5077. self._validate_new_key_pair(context, user_id, key_name, key_type)
  5078. keypair = objects.KeyPair(context)
  5079. keypair.user_id = user_id
  5080. keypair.name = key_name
  5081. keypair.type = key_type
  5082. keypair.fingerprint = None
  5083. keypair.public_key = None
  5084. self._notify(context, 'create.start', key_name)
  5085. compute_utils.notify_about_keypair_action(
  5086. context=context,
  5087. keypair=keypair,
  5088. action=fields_obj.NotificationAction.CREATE,
  5089. phase=fields_obj.NotificationPhase.START)
  5090. private_key, public_key, fingerprint = self._generate_key_pair(
  5091. user_id, key_type)
  5092. keypair.fingerprint = fingerprint
  5093. keypair.public_key = public_key
  5094. keypair.create()
  5095. # NOTE(melwitt): We recheck the quota after creating the object to
  5096. # prevent users from allocating more resources than their allowed quota
  5097. # in the event of a race. This is configurable because it can be
  5098. # expensive if strict quota limits are not required in a deployment.
  5099. if CONF.quota.recheck_quota:
  5100. try:
  5101. objects.Quotas.check_deltas(context, {'key_pairs': 0}, user_id)
  5102. except exception.OverQuota:
  5103. keypair.destroy()
  5104. raise exception.KeypairLimitExceeded()
  5105. compute_utils.notify_about_keypair_action(
  5106. context=context,
  5107. keypair=keypair,
  5108. action=fields_obj.NotificationAction.CREATE,
  5109. phase=fields_obj.NotificationPhase.END)
  5110. self._notify(context, 'create.end', key_name)
  5111. return keypair, private_key
  5112. def _generate_fingerprint(self, public_key, key_type):
  5113. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5114. return crypto.generate_fingerprint(public_key)
  5115. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5116. return crypto.generate_x509_fingerprint(public_key)
  5117. def _generate_key_pair(self, user_id, key_type):
  5118. if key_type == keypair_obj.KEYPAIR_TYPE_SSH:
  5119. return crypto.generate_key_pair()
  5120. elif key_type == keypair_obj.KEYPAIR_TYPE_X509:
  5121. return crypto.generate_winrm_x509_cert(user_id)
  5122. @wrap_exception()
  5123. def delete_key_pair(self, context, user_id, key_name):
  5124. """Delete a keypair by name."""
  5125. self._notify(context, 'delete.start', key_name)
  5126. keypair = self.get_key_pair(context, user_id, key_name)
  5127. compute_utils.notify_about_keypair_action(
  5128. context=context,
  5129. keypair=keypair,
  5130. action=fields_obj.NotificationAction.DELETE,
  5131. phase=fields_obj.NotificationPhase.START)
  5132. objects.KeyPair.destroy_by_name(context, user_id, key_name)
  5133. compute_utils.notify_about_keypair_action(
  5134. context=context,
  5135. keypair=keypair,
  5136. action=fields_obj.NotificationAction.DELETE,
  5137. phase=fields_obj.NotificationPhase.END)
  5138. self._notify(context, 'delete.end', key_name)
  5139. def get_key_pairs(self, context, user_id, limit=None, marker=None):
  5140. """List key pairs."""
  5141. return objects.KeyPairList.get_by_user(
  5142. context, user_id, limit=limit, marker=marker)
  5143. def get_key_pair(self, context, user_id, key_name):
  5144. """Get a keypair by name."""
  5145. return objects.KeyPair.get_by_name(context, user_id, key_name)
  5146. class SecurityGroupAPI(base.Base, security_group_base.SecurityGroupBase):
  5147. """Sub-set of the Compute API related to managing security groups
  5148. and security group rules
  5149. """
  5150. # The nova security group api does not use a uuid for the id.
  5151. id_is_uuid = False
  5152. def __init__(self, **kwargs):
  5153. super(SecurityGroupAPI, self).__init__(**kwargs)
  5154. self.compute_rpcapi = compute_rpcapi.ComputeAPI()
  5155. def validate_property(self, value, property, allowed):
  5156. """Validate given security group property.
  5157. :param value: the value to validate, as a string or unicode
  5158. :param property: the property, either 'name' or 'description'
  5159. :param allowed: the range of characters allowed
  5160. """
  5161. try:
  5162. val = value.strip()
  5163. except AttributeError:
  5164. msg = _("Security group %s is not a string or unicode") % property
  5165. self.raise_invalid_property(msg)
  5166. utils.check_string_length(val, name=property, min_length=1,
  5167. max_length=255)
  5168. if allowed and not re.match(allowed, val):
  5169. # Some validation to ensure that values match API spec.
  5170. # - Alphanumeric characters, spaces, dashes, and underscores.
  5171. # TODO(Daviey): LP: #813685 extend beyond group_name checking, and
  5172. # probably create a param validator that can be used elsewhere.
  5173. msg = (_("Value (%(value)s) for parameter Group%(property)s is "
  5174. "invalid. Content limited to '%(allowed)s'.") %
  5175. {'value': value, 'allowed': allowed,
  5176. 'property': property.capitalize()})
  5177. self.raise_invalid_property(msg)
  5178. def ensure_default(self, context):
  5179. """Ensure that a context has a security group.
  5180. Creates a security group for the security context if it does not
  5181. already exist.
  5182. :param context: the security context
  5183. """
  5184. self.db.security_group_ensure_default(context)
  5185. def create_security_group(self, context, name, description):
  5186. try:
  5187. objects.Quotas.check_deltas(context, {'security_groups': 1},
  5188. context.project_id,
  5189. user_id=context.user_id)
  5190. except exception.OverQuota:
  5191. msg = _("Quota exceeded, too many security groups.")
  5192. self.raise_over_quota(msg)
  5193. LOG.info("Create Security Group %s", name)
  5194. self.ensure_default(context)
  5195. group = {'user_id': context.user_id,
  5196. 'project_id': context.project_id,
  5197. 'name': name,
  5198. 'description': description}
  5199. try:
  5200. group_ref = self.db.security_group_create(context, group)
  5201. except exception.SecurityGroupExists:
  5202. msg = _('Security group %s already exists') % name
  5203. self.raise_group_already_exists(msg)
  5204. # NOTE(melwitt): We recheck the quota after creating the object to
  5205. # prevent users from allocating more resources than their allowed quota
  5206. # in the event of a race. This is configurable because it can be
  5207. # expensive if strict quota limits are not required in a deployment.
  5208. if CONF.quota.recheck_quota:
  5209. try:
  5210. objects.Quotas.check_deltas(context, {'security_groups': 0},
  5211. context.project_id,
  5212. user_id=context.user_id)
  5213. except exception.OverQuota:
  5214. self.db.security_group_destroy(context, group_ref['id'])
  5215. msg = _("Quota exceeded, too many security groups.")
  5216. self.raise_over_quota(msg)
  5217. return group_ref
  5218. def update_security_group(self, context, security_group,
  5219. name, description):
  5220. if security_group['name'] in RO_SECURITY_GROUPS:
  5221. msg = (_("Unable to update system group '%s'") %
  5222. security_group['name'])
  5223. self.raise_invalid_group(msg)
  5224. group = {'name': name,
  5225. 'description': description}
  5226. columns_to_join = ['rules.grantee_group']
  5227. group_ref = self.db.security_group_update(context,
  5228. security_group['id'],
  5229. group,
  5230. columns_to_join=columns_to_join)
  5231. return group_ref
  5232. def get(self, context, name=None, id=None, map_exception=False):
  5233. self.ensure_default(context)
  5234. cols = ['rules']
  5235. try:
  5236. if name:
  5237. return self.db.security_group_get_by_name(context,
  5238. context.project_id,
  5239. name,
  5240. columns_to_join=cols)
  5241. elif id:
  5242. return self.db.security_group_get(context, id,
  5243. columns_to_join=cols)
  5244. except exception.NotFound as exp:
  5245. if map_exception:
  5246. msg = exp.format_message()
  5247. self.raise_not_found(msg)
  5248. else:
  5249. raise
  5250. def list(self, context, names=None, ids=None, project=None,
  5251. search_opts=None):
  5252. self.ensure_default(context)
  5253. groups = []
  5254. if names or ids:
  5255. if names:
  5256. for name in names:
  5257. groups.append(self.db.security_group_get_by_name(context,
  5258. project,
  5259. name))
  5260. if ids:
  5261. for id in ids:
  5262. groups.append(self.db.security_group_get(context, id))
  5263. elif context.is_admin:
  5264. # TODO(eglynn): support a wider set of search options than just
  5265. # all_tenants, at least include the standard filters defined for
  5266. # the EC2 DescribeSecurityGroups API for the non-admin case also
  5267. if (search_opts and 'all_tenants' in search_opts):
  5268. groups = self.db.security_group_get_all(context)
  5269. else:
  5270. groups = self.db.security_group_get_by_project(context,
  5271. project)
  5272. elif project:
  5273. groups = self.db.security_group_get_by_project(context, project)
  5274. return groups
  5275. def destroy(self, context, security_group):
  5276. if security_group['name'] in RO_SECURITY_GROUPS:
  5277. msg = _("Unable to delete system group '%s'") % \
  5278. security_group['name']
  5279. self.raise_invalid_group(msg)
  5280. if self.db.security_group_in_use(context, security_group['id']):
  5281. msg = _("Security group is still in use")
  5282. self.raise_invalid_group(msg)
  5283. LOG.info("Delete security group %s", security_group['name'])
  5284. self.db.security_group_destroy(context, security_group['id'])
  5285. def is_associated_with_server(self, security_group, instance_uuid):
  5286. """Check if the security group is already associated
  5287. with the instance. If Yes, return True.
  5288. """
  5289. if not security_group:
  5290. return False
  5291. instances = security_group.get('instances')
  5292. if not instances:
  5293. return False
  5294. for inst in instances:
  5295. if (instance_uuid == inst['uuid']):
  5296. return True
  5297. return False
  5298. def add_to_instance(self, context, instance, security_group_name):
  5299. """Add security group to the instance."""
  5300. security_group = self.db.security_group_get_by_name(context,
  5301. context.project_id,
  5302. security_group_name)
  5303. instance_uuid = instance.uuid
  5304. # check if the security group is associated with the server
  5305. if self.is_associated_with_server(security_group, instance_uuid):
  5306. raise exception.SecurityGroupExistsForInstance(
  5307. security_group_id=security_group['id'],
  5308. instance_id=instance_uuid)
  5309. self.db.instance_add_security_group(context.elevated(),
  5310. instance_uuid,
  5311. security_group['id'])
  5312. if instance.host:
  5313. self.compute_rpcapi.refresh_instance_security_rules(
  5314. context, instance, instance.host)
  5315. def remove_from_instance(self, context, instance, security_group_name):
  5316. """Remove the security group associated with the instance."""
  5317. security_group = self.db.security_group_get_by_name(context,
  5318. context.project_id,
  5319. security_group_name)
  5320. instance_uuid = instance.uuid
  5321. # check if the security group is associated with the server
  5322. if not self.is_associated_with_server(security_group, instance_uuid):
  5323. raise exception.SecurityGroupNotExistsForInstance(
  5324. security_group_id=security_group['id'],
  5325. instance_id=instance_uuid)
  5326. self.db.instance_remove_security_group(context.elevated(),
  5327. instance_uuid,
  5328. security_group['id'])
  5329. if instance.host:
  5330. self.compute_rpcapi.refresh_instance_security_rules(
  5331. context, instance, instance.host)
  5332. def get_rule(self, context, id):
  5333. self.ensure_default(context)
  5334. try:
  5335. return self.db.security_group_rule_get(context, id)
  5336. except exception.NotFound:
  5337. msg = _("Rule (%s) not found") % id
  5338. self.raise_not_found(msg)
  5339. def add_rules(self, context, id, name, vals):
  5340. """Add security group rule(s) to security group.
  5341. Note: the Nova security group API doesn't support adding multiple
  5342. security group rules at once but the EC2 one does. Therefore,
  5343. this function is written to support both.
  5344. """
  5345. try:
  5346. objects.Quotas.check_deltas(context,
  5347. {'security_group_rules': len(vals)},
  5348. id)
  5349. except exception.OverQuota:
  5350. msg = _("Quota exceeded, too many security group rules.")
  5351. self.raise_over_quota(msg)
  5352. msg = ("Security group %(name)s added %(protocol)s ingress "
  5353. "(%(from_port)s:%(to_port)s)")
  5354. rules = []
  5355. for v in vals:
  5356. rule = self.db.security_group_rule_create(context, v)
  5357. # NOTE(melwitt): We recheck the quota after creating the object to
  5358. # prevent users from allocating more resources than their allowed
  5359. # quota in the event of a race. This is configurable because it can
  5360. # be expensive if strict quota limits are not required in a
  5361. # deployment.
  5362. if CONF.quota.recheck_quota:
  5363. try:
  5364. objects.Quotas.check_deltas(context,
  5365. {'security_group_rules': 0},
  5366. id)
  5367. except exception.OverQuota:
  5368. self.db.security_group_rule_destroy(context, rule['id'])
  5369. msg = _("Quota exceeded, too many security group rules.")
  5370. self.raise_over_quota(msg)
  5371. rules.append(rule)
  5372. LOG.info(msg, {'name': name,
  5373. 'protocol': rule.protocol,
  5374. 'from_port': rule.from_port,
  5375. 'to_port': rule.to_port})
  5376. self.trigger_rules_refresh(context, id=id)
  5377. return rules
  5378. def remove_rules(self, context, security_group, rule_ids):
  5379. msg = ("Security group %(name)s removed %(protocol)s ingress "
  5380. "(%(from_port)s:%(to_port)s)")
  5381. for rule_id in rule_ids:
  5382. rule = self.get_rule(context, rule_id)
  5383. LOG.info(msg, {'name': security_group['name'],
  5384. 'protocol': rule.protocol,
  5385. 'from_port': rule.from_port,
  5386. 'to_port': rule.to_port})
  5387. self.db.security_group_rule_destroy(context, rule_id)
  5388. # NOTE(vish): we removed some rules, so refresh
  5389. self.trigger_rules_refresh(context, id=security_group['id'])
  5390. def remove_default_rules(self, context, rule_ids):
  5391. for rule_id in rule_ids:
  5392. self.db.security_group_default_rule_destroy(context, rule_id)
  5393. def add_default_rules(self, context, vals):
  5394. rules = [self.db.security_group_default_rule_create(context, v)
  5395. for v in vals]
  5396. return rules
  5397. def default_rule_exists(self, context, values):
  5398. """Indicates whether the specified rule values are already
  5399. defined in the default security group rules.
  5400. """
  5401. for rule in self.db.security_group_default_rule_list(context):
  5402. keys = ('cidr', 'from_port', 'to_port', 'protocol')
  5403. for key in keys:
  5404. if rule.get(key) != values.get(key):
  5405. break
  5406. else:
  5407. return rule.get('id') or True
  5408. return False
  5409. def get_all_default_rules(self, context):
  5410. try:
  5411. rules = self.db.security_group_default_rule_list(context)
  5412. except Exception:
  5413. msg = 'cannot get default security group rules'
  5414. raise exception.SecurityGroupDefaultRuleNotFound(msg)
  5415. return rules
  5416. def get_default_rule(self, context, id):
  5417. return self.db.security_group_default_rule_get(context, id)
  5418. def validate_id(self, id):
  5419. try:
  5420. return int(id)
  5421. except ValueError:
  5422. msg = _("Security group id should be integer")
  5423. self.raise_invalid_property(msg)
  5424. def _refresh_instance_security_rules(self, context, instances):
  5425. for instance in instances:
  5426. if instance.host is not None:
  5427. self.compute_rpcapi.refresh_instance_security_rules(
  5428. context, instance, instance.host)
  5429. def trigger_rules_refresh(self, context, id):
  5430. """Called when a rule is added to or removed from a security_group."""
  5431. instances = objects.InstanceList.get_by_security_group_id(context, id)
  5432. self._refresh_instance_security_rules(context, instances)
  5433. def trigger_members_refresh(self, context, group_ids):
  5434. """Called when a security group gains a new or loses a member.
  5435. Sends an update request to each compute node for each instance for
  5436. which this is relevant.
  5437. """
  5438. instances = objects.InstanceList.get_by_grantee_security_group_ids(
  5439. context, group_ids)
  5440. self._refresh_instance_security_rules(context, instances)
  5441. def get_instance_security_groups(self, context, instance, detailed=False):
  5442. if detailed:
  5443. return self.db.security_group_get_by_instance(context,
  5444. instance.uuid)
  5445. return [{'name': group.name} for group in instance.security_groups]