Make conductor pass and use host_lists

The earlier patches in the series generated alternates and Selection
objects, and modified the RPC calls to send them to the conductor. This
patch has the conductor pass these host_lists to the compute for the
build process, and, if the build fails, has the compute pass the
host_list back to the conductor.

Also fixes a bug in the scheduler manager exposed by this change
when using the CachingScheduler in a reschedule functional test.

Blueprint: return-alternate-hosts

Change-Id: Iae904afb6cb4fcea8bb27741d774ffbe986a5fb4
This commit is contained in:
Ed Leafe 2017-10-10 23:25:29 +00:00 committed by Matt Riedemann
parent f8747407fc
commit d913de7471
14 changed files with 365 additions and 101 deletions

View File

@ -485,7 +485,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='4.18')
target = messaging.Target(version='4.19')
# How long to wait in seconds before re-issuing a shutdown
# signal to an instance during power off. The overall
@ -1738,7 +1738,7 @@ class ComputeManager(manager.Manager):
filter_properties, admin_password=None,
injected_files=None, requested_networks=None,
security_groups=None, block_device_mapping=None,
node=None, limits=None):
node=None, limits=None, host_list=None):
@utils.synchronized(instance.uuid)
def _locked_do_build_and_run_instance(*args, **kwargs):
@ -1760,24 +1760,24 @@ class ComputeManager(manager.Manager):
result = build_results.FAILED
raise
finally:
fails = (build_results.FAILED,
build_results.RESCHEDULED)
if result in fails:
# Remove the allocation records from Placement for
# the instance if the build failed or is being
# rescheduled to another node. The instance.host is
if result == build_results.FAILED:
# Remove the allocation records from Placement for the
# instance if the build failed. The instance.host is
# likely set to None in _do_build_and_run_instance
# which means if the user deletes the instance, it will
# be deleted in the API, not the compute service.
# which means if the user deletes the instance, it
# will be deleted in the API, not the compute service.
# Setting the instance.host to None in
# _do_build_and_run_instance means that the
# ResourceTracker will no longer consider this instance
# to be claiming resources against it, so we want to
# reflect that same thing in Placement.
rt = self._get_resource_tracker()
rt.reportclient.delete_allocation_for_instance(
instance.uuid)
# reflect that same thing in Placement. No need to
# call this for a reschedule, as the allocations will
# have already been removed in
# self._do_build_and_run_instance().
self._delete_allocation_for_instance(instance.uuid)
if result in (build_results.FAILED,
build_results.RESCHEDULED):
self._build_failed()
else:
self._failed_builds = 0
@ -1789,7 +1789,11 @@ class ComputeManager(manager.Manager):
context, instance, image, request_spec,
filter_properties, admin_password, injected_files,
requested_networks, security_groups,
block_device_mapping, node, limits)
block_device_mapping, node, limits, host_list)
def _delete_allocation_for_instance(self, instance_uuid):
rt = self._get_resource_tracker()
rt.reportclient.delete_allocation_for_instance(instance_uuid)
def _check_device_tagging(self, requested_networks, block_device_mapping):
tagging_requested = False
@ -1817,7 +1821,7 @@ class ComputeManager(manager.Manager):
def _do_build_and_run_instance(self, context, instance, image,
request_spec, filter_properties, admin_password, injected_files,
requested_networks, security_groups, block_device_mapping,
node=None, limits=None):
node=None, limits=None, host_list=None):
try:
LOG.debug('Starting instance...', instance=instance)
@ -1895,11 +1899,18 @@ class ComputeManager(manager.Manager):
self._nil_out_instance_obj_host_and_node(instance)
instance.task_state = task_states.SCHEDULING
instance.save()
# The instance will have already claimed resources from this host
# before this build was attempted. Now that it has failed, we need
# to unclaim those resources before casting to the conductor, so
# that if there are alternate hosts available for a retry, it can
# claim resources on that new host for the instance.
self._delete_allocation_for_instance(instance.uuid)
self.compute_task_api.build_instances(context, [instance],
image, filter_properties, admin_password,
injected_files, requested_networks, security_groups,
block_device_mapping, request_spec=request_spec)
block_device_mapping, request_spec=request_spec,
host_lists=[host_list])
return build_results.RESCHEDULED
except (exception.InstanceNotFound,
exception.UnexpectedDeletingTaskStateError):

View File

@ -329,6 +329,14 @@ class ComputeAPI(object):
* 4.16 - Add tag argument to attach_interface()
* 4.17 - Add new_attachment_id to swap_volume.
* 4.18 - Add migration to prep_resize()
... Pike supports messaging version 4.18. So any changes to existing
methods in 4.x after that point should be done so that they can handle
the version_cap being set to 4.18.
* 4.19 - build_and_run_instance() now gets a 'host_list' parameter
representing potential alternate hosts for retries within a
cell.
'''
VERSION_ALIASES = {
@ -339,6 +347,7 @@ class ComputeAPI(object):
'mitaka': '4.11',
'newton': '4.13',
'ocata': '4.13',
'pike': '4.18',
}
def __init__(self):
@ -1116,22 +1125,31 @@ class ComputeAPI(object):
def build_and_run_instance(self, ctxt, instance, host, image, request_spec,
filter_properties, admin_password=None, injected_files=None,
requested_networks=None, security_groups=None,
block_device_mapping=None, node=None, limits=None):
block_device_mapping=None, node=None, limits=None,
host_list=None):
# NOTE(edleafe): compute nodes can only use the dict form of limits.
if isinstance(limits, objects.SchedulerLimits):
limits = limits.to_dict()
version = '4.0'
cctxt = self.router.client(ctxt).prepare(
server=host, version=version)
cctxt.cast(ctxt, 'build_and_run_instance', instance=instance,
image=image, request_spec=request_spec,
filter_properties=filter_properties,
admin_password=admin_password,
injected_files=injected_files,
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping, node=node,
limits=limits)
kwargs = {"instance": instance,
"image": image,
"request_spec": request_spec,
"filter_properties": filter_properties,
"admin_password": admin_password,
"injected_files": injected_files,
"requested_networks": requested_networks,
"security_groups": security_groups,
"block_device_mapping": block_device_mapping,
"node": node,
"limits": limits,
"host_list": host_list,
}
client = self.router.client(ctxt)
version = '4.19'
if not client.can_send_version(version):
version = '4.0'
kwargs.pop("host_list")
cctxt = client.prepare(server=host, version=version)
cctxt.cast(ctxt, 'build_and_run_instance', **kwargs)
def quiesce_instance(self, ctxt, instance):
version = '4.0'

View File

@ -113,7 +113,7 @@ class ComputeTaskAPI(object):
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True,
request_spec=None):
request_spec=None, host_lists=None):
self.conductor_compute_rpcapi.build_instances(context,
instances=instances, image=image,
filter_properties=filter_properties,
@ -121,7 +121,8 @@ class ComputeTaskAPI(object):
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=block_device_mapping,
legacy_bdm=legacy_bdm, request_spec=request_spec)
legacy_bdm=legacy_bdm, request_spec=request_spec,
host_lists=host_lists)
def schedule_and_build_instances(self, context, build_requests,
request_spec, image,

View File

@ -21,6 +21,7 @@ import functools
from oslo_config import cfg
from oslo_log import log as logging
import oslo_messaging as messaging
from oslo_serialization import jsonutils
from oslo_utils import excutils
from oslo_utils import versionutils
import six
@ -218,7 +219,7 @@ class ComputeTaskManager(base.Base):
may involve coordinating activities on multiple compute nodes.
"""
target = messaging.Target(namespace='compute_task', version='1.18')
target = messaging.Target(namespace='compute_task', version='1.19')
def __init__(self):
super(ComputeTaskManager, self).__init__()
@ -227,6 +228,7 @@ class ComputeTaskManager(base.Base):
self.network_api = network.API()
self.servicegroup_api = servicegroup.API()
self.scheduler_client = scheduler_client.SchedulerClient()
self.report_client = self.scheduler_client.reportclient
self.notifier = rpc.get_notifier('compute', CONF.host)
def reset(self):
@ -516,7 +518,7 @@ class ComputeTaskManager(base.Base):
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping=None, legacy_bdm=True,
request_spec=None):
request_spec=None, host_lists=None):
# TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
# 2.0 of the RPC API.
# TODO(danms): Remove this in version 2.0 of the RPC API
@ -548,6 +550,11 @@ class ComputeTaskManager(base.Base):
# work on a RequestSpec object rather than filter_properties.
request_spec = request_spec.to_legacy_request_spec_dict()
# 'host_lists' will be None in one of two cases: when running cellsv1,
# or during a reschedule from a pre-Queens compute. In all other cases,
# it will be a list of lists, though the lists may be empty if there
# are no more hosts left in a rescheduling situation.
is_reschedule = host_lists is not None
try:
# check retry policy. Rather ugly use of instances[0]...
# but if we've exceeded max retries... then we really only
@ -559,8 +566,22 @@ class ComputeTaskManager(base.Base):
instance_uuids = [instance.uuid for instance in instances]
spec_obj = objects.RequestSpec.from_primitives(
context, request_spec, filter_properties)
host_lists = self._schedule_instances(context, spec_obj,
instance_uuids, return_alternates=True)
LOG.debug("Rescheduling: %s", is_reschedule)
if is_reschedule:
# Make sure that we have a host, as we may have exhausted all
# our alternates
if not host_lists[0]:
# We have an empty list of hosts, so this instance has
# failed to build.
msg = ("Exhausted all hosts available for retrying build "
"failures for instance %(instance_uuid)s." %
{"instance_uuid": instances[0].uuid})
raise exception.MaxRetriesExceeded(reason=msg)
else:
# This is not a reschedule, so we need to call the scheduler to
# get appropriate hosts for the request.
host_lists = self._schedule_instances(context, spec_obj,
instance_uuids, return_alternates=True)
except Exception as exc:
num_attempts = filter_properties.get(
'retry', {}).get('num_attempts', 1)
@ -585,8 +606,45 @@ class ComputeTaskManager(base.Base):
context, instance, requested_networks)
return
elevated = context.elevated()
for (instance, host_list) in six.moves.zip(instances, host_lists):
host = host_list[0]
host = host_list.pop(0)
if is_reschedule:
# If this runs in the superconductor, the first instance will
# already have its resources claimed in placement. If this is a
# retry, though, this is running in the cell conductor, and we
# need to claim first to ensure that the alternate host still
# has its resources available. Note that there are schedulers
# that don't support Placement, so must assume that the host is
# still available.
host_available = False
while host and not host_available:
if host.allocation_request:
alloc_req = jsonutils.loads(host.allocation_request)
else:
alloc_req = None
if alloc_req:
host_available = scheduler_utils.claim_resources(
elevated, self.report_client, spec_obj,
instance.uuid, alloc_req,
host.allocation_request_version)
else:
# Some deployments use different schedulers that do not
# use Placement, so they will not have an
# allocation_request to claim with. For those cases,
# there is no concept of claiming, so just assume that
# the host is valid.
host_available = True
if not host_available:
# Insufficient resources remain on that host, so
# discard it and try the next.
host = host_list.pop(0) if host_list else None
if not host_available:
# No more available hosts for retrying the build.
msg = ("Exhausted all hosts available for retrying build "
"failures for instance %(instance_uuid)s." %
{"instance_uuid": instance.uuid})
raise exception.MaxRetriesExceeded(reason=msg)
instance.availability_zone = (
availability_zones.get_host_availability_zone(context,
host.service_host))
@ -634,6 +692,10 @@ class ComputeTaskManager(base.Base):
inst_mapping.destroy()
return
alts = [(alt.service_host, alt.nodename) for alt in host_list]
LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
host.service_host, host.nodename, alts, instance=instance)
self.compute_rpcapi.build_and_run_instance(context,
instance=instance, host=host.service_host, image=image,
request_spec=local_reqspec,
@ -643,7 +705,7 @@ class ComputeTaskManager(base.Base):
requested_networks=requested_networks,
security_groups=security_groups,
block_device_mapping=bdms, node=host.nodename,
limits=host.limits)
limits=host.limits, host_list=host_list)
def _schedule_instances(self, context, request_spec,
instance_uuids=None, return_alternates=False):
@ -803,8 +865,7 @@ class ComputeTaskManager(base.Base):
# in the API.
try:
scheduler_utils.claim_resources_on_destination(
self.scheduler_client.reportclient, instance,
source_node, dest_node)
self.report_client, instance, source_node, dest_node)
except exception.NoValidHost as ex:
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify(
@ -1134,7 +1195,10 @@ class ComputeTaskManager(base.Base):
cell = cell_mapping_cache[instance.uuid]
# host_list is a list of one or more Selection objects, the first
# of which has been selected and its resources claimed.
host = host_list[0]
host = host_list.pop(0)
alts = [(alt.service_host, alt.nodename) for alt in host_list]
LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
host.service_host, host.nodename, alts, instance=instance)
filter_props = request_spec.to_legacy_filter_properties_dict()
scheduler_utils.populate_retry(filter_props, instance.uuid)
scheduler_utils.populate_filter_properties(filter_props,
@ -1192,7 +1256,7 @@ class ComputeTaskManager(base.Base):
security_groups=legacy_secgroups,
block_device_mapping=instance_bdms,
host=host.service_host, node=host.nodename,
limits=host.limits)
limits=host.limits, host_list=host_list)
def _cleanup_build_artifacts(self, context, exc, instances, build_requests,
request_specs, cell_mapping_cache):

View File

@ -275,6 +275,9 @@ class ComputeTaskAPI(object):
1.16 - Added schedule_and_build_instances
1.17 - Added tags to schedule_and_build_instances()
1.18 - Added request_spec to build_instances().
1.19 - build_instances() now gets a 'host_lists' parameter that represents
potential alternate hosts for retries within a cell for each
instance.
"""
def __init__(self):
@ -329,39 +332,42 @@ class ComputeTaskAPI(object):
def build_instances(self, context, instances, image, filter_properties,
admin_password, injected_files, requested_networks,
security_groups, block_device_mapping, legacy_bdm=True,
request_spec=None):
request_spec=None, host_lists=None):
image_p = jsonutils.to_primitive(image)
version = '1.18'
send_reqspec = True
kwargs = {"instances": instances, "image": image_p,
"filter_properties": filter_properties,
"admin_password": admin_password,
"injected_files": injected_files,
"requested_networks": requested_networks,
"security_groups": security_groups,
"request_spec": request_spec,
"host_lists": host_lists}
version = '1.19'
if not self.client.can_send_version(version):
version = '1.18'
kwargs.pop("host_lists")
if not self.client.can_send_version(version):
send_reqspec = False
version = '1.10'
kwargs.pop("request_spec")
if not self.client.can_send_version(version):
version = '1.9'
if 'instance_type' in filter_properties:
flavor = filter_properties['instance_type']
flavor_p = objects_base.obj_to_primitive(flavor)
filter_properties = dict(filter_properties,
instance_type=flavor_p)
kw = {'instances': instances, 'image': image_p,
'filter_properties': filter_properties,
'admin_password': admin_password,
'injected_files': injected_files,
'requested_networks': requested_networks,
'security_groups': security_groups}
if send_reqspec:
kw['request_spec'] = request_spec
kwargs["filter_properties"] = dict(filter_properties,
instance_type=flavor_p)
if not self.client.can_send_version(version):
version = '1.8'
kw['requested_networks'] = kw['requested_networks'].as_tuples()
nets = kwargs['requested_networks'].as_tuples()
kwargs['requested_networks'] = nets
if not self.client.can_send_version('1.7'):
version = '1.5'
bdm_p = objects_base.obj_to_primitive(block_device_mapping)
kw.update({'block_device_mapping': bdm_p,
'legacy_bdm': legacy_bdm})
kwargs.update({'block_device_mapping': bdm_p,
'legacy_bdm': legacy_bdm})
cctxt = self.client.prepare(version=version)
cctxt.cast(context, 'build_instances', **kw)
cctxt.cast(context, 'build_instances', **kwargs)
def schedule_and_build_instances(self, context, build_requests,
request_specs,

View File

@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 25
SERVICE_VERSION = 26
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -120,6 +120,8 @@ SERVICE_VERSION_HISTORY = (
# Version 25: Compute hosts allow migration-based allocations
# for live migration.
{'compute_rpc': '4.18'},
# Version 26: Adds a 'host_list' parameter to build_and_run_instance()
{'compute_rpc': '4.19'},
)

View File

@ -345,7 +345,14 @@ class FilterScheduler(driver.Scheduler):
selections_to_return = []
for selected_host in selected_hosts:
# This is the list of hosts for one particular instance.
selection = objects.Selection.from_host_state(selected_host)
if alloc_reqs_by_rp_uuid:
selected_alloc_req = alloc_reqs_by_rp_uuid.get(
selected_host.uuid)[0]
else:
selected_alloc_req = None
selection = objects.Selection.from_host_state(selected_host,
allocation_request=selected_alloc_req,
allocation_request_version=allocation_request_version)
selected_plus_alts = [selection]
cell_uuid = selected_host.cell_uuid
# This will populate the alternates with many of the same unclaimed

View File

@ -116,7 +116,8 @@ class SchedulerManager(manager.Manager):
request_spec,
filter_properties)
resources = utils.resources_from_request_spec(spec_obj)
alloc_reqs_by_rp_uuid, provider_summaries = None, None
alloc_reqs_by_rp_uuid, provider_summaries, allocation_request_version \
= None, None, None
if self.driver.USES_ALLOCATION_CANDIDATES:
res = self.placement_client.get_allocation_candidates(resources)
if res is None:
@ -144,7 +145,7 @@ class SchedulerManager(manager.Manager):
rp_uuid = rr['resource_provider']['uuid']
alloc_reqs_by_rp_uuid[rp_uuid].append(ar)
# Only return alteranates if both return_objects and return_alternates
# Only return alternates if both return_objects and return_alternates
# are True.
return_alternates = return_alternates and return_objects
selections = self.driver.select_destinations(ctxt, spec_obj,

View File

@ -148,3 +148,11 @@ class TestRetryBetweenComputeNodeBuilds(test.TestCase):
# Assert that we retried.
self.assertEqual(2, self.attempts)
class TestRetryBetweenComputeNodeBuildsCachingScheduler(
TestRetryBetweenComputeNodeBuilds):
"""Tests the reschedule scenario using the CachingScheduler."""
def setUp(self):
self.flags(driver='caching_scheduler', group='scheduler')
super(TestRetryBetweenComputeNodeBuildsCachingScheduler, self).setUp()

View File

@ -73,6 +73,7 @@ from nova.volume import cinder
CONF = nova.conf.CONF
fake_host_list = [mock.sentinel.host1]
class ComputeManagerUnitTestCase(test.NoDBTestCase):
@ -4365,7 +4366,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
self._assert_build_instance_hook_called(mock_hooks,
build_results.ACTIVE)
@ -4396,7 +4397,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
port_id=uuids.port_instance)],
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
requested_network = mock_build_and_run.call_args[0][5][0]
self.assertEqual('fake_network_id', requested_network.network_id)
self.assertEqual('10.0.0.1', str(requested_network.address))
@ -4431,7 +4432,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
self._instance_action_events(mock_start, mock_finish)
self._assert_build_instance_update(mock_save)
@ -4480,7 +4481,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping,
node=self.node, limits=self.limits)
node=self.node, limits=self.limits,
host_list=fake_host_list)
self._assert_build_instance_hook_called(mock_hooks,
build_results.RESCHEDULED)
@ -4498,7 +4500,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
[self.instance], self.image, self.filter_properties,
self.admin_pass, self.injected_files, self.requested_networks,
self.security_groups, self.block_device_mapping,
request_spec={})
request_spec={}, host_lists=[fake_host_list])
@mock.patch.object(manager.ComputeManager, '_shutdown_instance')
@mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@ -4569,7 +4571,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
mock_build_and_run.assert_called_once_with(self.context,
instance,
@ -4583,7 +4585,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
[instance], self.image, self.filter_properties,
self.admin_pass, self.injected_files, self.requested_networks,
self.security_groups, self.block_device_mapping,
request_spec={})
request_spec={}, host_lists=[fake_host_list])
@mock.patch.object(manager.ComputeManager, '_build_and_run_instance')
@mock.patch.object(conductor_api.ComputeTaskAPI, 'build_instances')
@ -4625,7 +4627,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
mock_build_and_run.assert_called_once_with(self.context,
instance,
@ -4639,7 +4641,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
[instance], self.image, self.filter_properties,
self.admin_pass, self.injected_files, self.requested_networks,
self.security_groups, self.block_device_mapping,
request_spec={})
request_spec={}, host_lists=[fake_host_list])
@mock.patch.object(objects.InstanceActionEvent,
'event_finish_with_failure')
@ -4668,7 +4670,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
self._assert_build_instance_hook_called(mock_hooks,
build_results.FAILED)
@ -4721,7 +4723,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping,
node=self.node, limits=self.limits)
node=self.node, limits=self.limits,
host_list=fake_host_list)
self._assert_build_instance_hook_called(mock_hooks,
build_results.RESCHEDULED)
@ -4740,7 +4743,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
[self.instance], self.image, self.filter_properties,
self.admin_pass, self.injected_files, self.requested_networks,
self.security_groups, self.block_device_mapping,
request_spec={})
request_spec={}, host_lists=[fake_host_list])
@mock.patch.object(objects.InstanceActionEvent,
'event_finish_with_failure')
@ -4770,7 +4773,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
self._assert_build_instance_hook_called(mock_hooks,
build_results.RESCHEDULED)
@ -4789,7 +4792,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
[self.instance], self.image, self.filter_properties,
self.admin_pass, self.injected_files, self.requested_networks,
self.security_groups, self.block_device_mapping,
request_spec={})
request_spec={}, host_lists=[fake_host_list])
@mock.patch.object(objects.InstanceActionEvent,
'event_finish_with_failure')
@ -4820,7 +4823,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping, node=self.node,
limits=self.limits)
limits=self.limits, host_list=fake_host_list)
self._assert_build_instance_hook_called(mock_hooks,
build_results.FAILED)
@ -5163,7 +5166,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
requested_networks=self.requested_networks,
security_groups=self.security_groups,
block_device_mapping=self.block_device_mapping,
node=self.node, limits=self.limits)
node=self.node, limits=self.limits,
host_list=fake_host_list)
self._instance_action_events(mock_start, mock_finish)
self._assert_build_instance_update(mock_save, reschedule_update=True)
@ -5177,7 +5181,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
self.image, self.filter_properties, self.admin_pass,
self.injected_files, self.requested_networks,
self.security_groups, self.block_device_mapping,
request_spec={})
request_spec={}, host_lists=[fake_host_list])
mock_nil.assert_called_once_with(self.instance)
mock_clean.assert_called_once_with(self.context, self.instance,
self.compute.host)

View File

@ -16,6 +16,7 @@
Unit Tests for nova.compute.rpcapi
"""
import copy
import mock
from oslo_serialization import jsonutils
@ -23,6 +24,7 @@ from nova.compute import rpcapi as compute_rpcapi
import nova.conf
from nova import context
from nova import exception
from nova import objects
from nova.objects import block_device as objects_block_dev
from nova.objects import migrate_data as migrate_data_obj
from nova.objects import migration as migration_obj
@ -731,7 +733,49 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
admin_password='passwd', injected_files=None,
requested_networks=['network1'], security_groups=None,
block_device_mapping=None, node='node', limits=[],
version='4.0')
host_list=None, version='4.19')
def test_build_and_run_instance_4_18(self):
ctxt = context.RequestContext('fake_user', 'fake_project')
rpcapi = compute_rpcapi.ComputeAPI()
mock_client = mock.Mock()
rpcapi.router.client = mock.Mock(return_value=mock_client)
mock_client.can_send_version = mock.Mock(return_value=False)
prepare_mock = mock.Mock()
prepare_mock.cast = mock.Mock()
mock_client.prepare.return_value = prepare_mock
fake_limit = {"memory_mb": 1024, "disk_gb": 100, "vcpus": 2,
"numa_topology": None}
fake_limit_obj = objects.SchedulerLimits.from_dict(fake_limit)
args = (self.fake_instance_obj, "host", "image", "request_spec",
"filter_properties")
kwargs = {
"admin_password": 'passwd',
"injected_files": None,
"requested_networks": ['network1'],
"security_groups": None,
"block_device_mapping": None,
"node": 'node',
"limits": fake_limit_obj,
"host_list": ["host"],
}
expected_kwargs = copy.deepcopy(kwargs)
# Since we're failing the 'can_send_version' check, the host_list
# should be removed, and the limits objects should be converted to the
# older dict format.
expected_kwargs.pop("host_list")
expected_kwargs["limits"] = fake_limit_obj.to_dict()
# Add in the args, which will be added to the kwargs dict in the RPC
# call
expected_kwargs["instance"] = self.fake_instance_obj
expected_kwargs["image"] = "image"
expected_kwargs["request_spec"] = "request_spec"
expected_kwargs["filter_properties"] = "filter_properties"
rpcapi.build_and_run_instance(ctxt, *args, **kwargs)
prepare_mock.cast.assert_called_once_with(ctxt,
"build_and_run_instance", **expected_kwargs)
def test_quiesce_instance(self):
self._test_compute_api('quiesce_instance', 'call',

View File

@ -65,10 +65,40 @@ from nova import utils
CONF = conf.CONF
fake_alloc1 = {"allocations": [
{"resource_provider": {"uuid": uuids.host1},
"resources": {"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}
}]}
fake_alloc2 = {"allocations": [
{"resource_provider": {"uuid": uuids.host2},
"resources": {"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}
}]}
fake_alloc3 = {"allocations": [
{"resource_provider": {"uuid": uuids.host3},
"resources": {"VCPU": 1,
"MEMORY_MB": 1024,
"DISK_GB": 100}
}]}
fake_alloc_json1 = jsonutils.dumps(fake_alloc1)
fake_alloc_json2 = jsonutils.dumps(fake_alloc2)
fake_alloc_json3 = jsonutils.dumps(fake_alloc3)
fake_alloc_version = "1.23"
fake_selection1 = objects.Selection(service_host="host1", nodename="node1",
cell_uuid=uuids.cell, limits=None)
cell_uuid=uuids.cell, limits=None, allocation_request=fake_alloc_json1,
allocation_request_version=fake_alloc_version)
fake_selection2 = objects.Selection(service_host="host2", nodename="node2",
cell_uuid=uuids.cell, limits=None)
cell_uuid=uuids.cell, limits=None, allocation_request=fake_alloc_json2,
allocation_request_version=fake_alloc_version)
fake_selection3 = objects.Selection(service_host="host3", nodename="node3",
cell_uuid=uuids.cell, limits=None, allocation_request=fake_alloc_json3,
allocation_request_version=fake_alloc_version)
fake_host_lists1 = [[fake_selection1]]
fake_host_lists2 = [[fake_selection1], [fake_selection2]]
fake_host_lists_alt = [[fake_selection1, fake_selection2, fake_selection3]]
class FakeContext(context.RequestContext):
@ -422,9 +452,10 @@ class _BaseTaskTestCase(object):
'instance_type': instance_type_p,
'num_instances': 2}
filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
sched_return = copy.deepcopy(fake_host_lists2)
self.conductor_manager._schedule_instances(self.context,
fake_spec, [uuids.fake, uuids.fake], return_alternates=True
).AndReturn([[fake_selection1], [fake_selection2]])
).AndReturn(sched_return)
db.block_device_mapping_get_all_by_instance(self.context,
instances[0].uuid).AndReturn([])
filter_properties2 = {'retry': {'num_attempts': 1,
@ -442,7 +473,7 @@ class _BaseTaskTestCase(object):
requested_networks=None,
security_groups='security_groups',
block_device_mapping=mox.IgnoreArg(),
node='node1', limits=None)
node='node1', limits=None, host_list=sched_return[0])
db.block_device_mapping_get_all_by_instance(self.context,
instances[1].uuid).AndReturn([])
filter_properties3 = {'limits': {},
@ -460,7 +491,7 @@ class _BaseTaskTestCase(object):
requested_networks=None,
security_groups='security_groups',
block_device_mapping=mox.IgnoreArg(),
node='node2', limits=None)
node='node2', limits=None, host_list=sched_return[1])
self.mox.ReplayAll()
# build_instances() is a cast, we need to wait for it to complete
@ -477,7 +508,7 @@ class _BaseTaskTestCase(object):
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
legacy_bdm=False, host_lists=None)
mock_getaz.assert_has_calls([
mock.call(self.context, 'host1'),
mock.call(self.context, 'host2')])
@ -670,6 +701,62 @@ class _BaseTaskTestCase(object):
mock.call(self.context, instances[1].uuid)])
self.assertFalse(mock_get_by_host.called)
@mock.patch("nova.scheduler.utils.claim_resources", return_value=False)
@mock.patch.object(objects.Instance, 'save')
def test_build_instances_exhaust_host_list(self, _mock_save, mock_claim):
# A list of three alternate hosts for one instance
host_lists = copy.deepcopy(fake_host_lists_alt)
instance = fake_instance.fake_instance_obj(self.context)
image = {'fake-data': 'should_pass_silently'}
expected_claim_count = len(host_lists[0])
# build_instances() is a cast, we need to wait for it to complete
self.useFixture(cast_as_call.CastAsCall(self))
# Since claim_resources() is mocked to always return False, we will run
# out of alternate hosts, and MaxRetriesExceeded should be raised.
self.assertRaises(exc.MaxRetriesExceeded,
self.conductor.build_instances, context=self.context,
instances=[instance], image=image, filter_properties={},
admin_password='admin_password',
injected_files='injected_files', requested_networks=None,
security_groups='security_groups',
block_device_mapping=None, legacy_bdm=None,
host_lists=host_lists)
self.assertEqual(expected_claim_count, mock_claim.call_count)
@mock.patch.object(conductor_manager.ComputeTaskManager,
'_destroy_build_request')
@mock.patch.object(conductor_manager.LOG, 'debug')
@mock.patch("nova.scheduler.utils.claim_resources", return_value=True)
@mock.patch.object(objects.Instance, 'save')
def test_build_instances_logs_selected_and_alts(self, _mock_save,
mock_claim, mock_debug, mock_destroy):
# A list of three alternate hosts for one instance
host_lists = copy.deepcopy(fake_host_lists_alt)
expected_host = host_lists[0][0]
expected_alts = host_lists[0][1:]
instance = fake_instance.fake_instance_obj(self.context)
image = {'fake-data': 'should_pass_silently'}
# build_instances() is a cast, we need to wait for it to complete
self.useFixture(cast_as_call.CastAsCall(self))
with mock.patch.object(self.conductor_manager.compute_rpcapi,
'build_and_run_instance'):
self.conductor.build_instances(context=self.context,
instances=[instance], image=image, filter_properties={},
admin_password='admin_password',
injected_files='injected_files', requested_networks=None,
security_groups='security_groups',
block_device_mapping=None, legacy_bdm=None,
host_lists=host_lists)
# The last LOG.debug call should record the selected host name and the
# list of alternates.
last_call = mock_debug.call_args_list[-1][0]
self.assertIn(expected_host.service_host, last_call)
expected_alt_hosts = [(alt.service_host, alt.nodename)
for alt in expected_alts]
self.assertIn(expected_alt_hosts, last_call)
@mock.patch.object(objects.BuildRequest, 'get_by_instance_uuid')
@mock.patch.object(objects.Instance, 'save')
@mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid')
@ -804,7 +891,8 @@ class _BaseTaskTestCase(object):
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
legacy_bdm=False,
host_lists=None)
do_test()
@ -849,8 +937,11 @@ class _BaseTaskTestCase(object):
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
legacy_bdm=False, host_lists=None)
expected_build_run_host_list = copy.copy(fake_host_lists1[0])
if expected_build_run_host_list:
expected_build_run_host_list.pop(0)
mock_build_and_run.assert_called_once_with(
self.context,
instance=mock.ANY,
@ -866,7 +957,8 @@ class _BaseTaskTestCase(object):
security_groups='security_groups',
block_device_mapping=test.MatchType(
objects.BlockDeviceMappingList),
node='node1', limits=None)
node='node1', limits=None,
host_list=expected_build_run_host_list)
mock_pop_inst_map.assert_not_called()
mock_destroy_build_req.assert_not_called()
@ -2396,9 +2488,11 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
mox.IgnoreArg()).AndReturn(spec)
filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
inst_uuids = [inst.uuid for inst in instances]
sched_return = copy.deepcopy(fake_host_lists2)
self.conductor_manager._schedule_instances(self.context,
fake_spec, inst_uuids, return_alternates=True).AndReturn(
[[fake_selection1], [fake_selection2]])
sched_return)
instances[0].save().AndRaise(
exc.InstanceNotFound(instance_id=instances[0].uuid))
instances[1].save()
@ -2415,7 +2509,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
requested_networks=None,
security_groups='security_groups',
block_device_mapping=mox.IsA(objects.BlockDeviceMappingList),
node='node2', limits=None)
node='node2', limits=None, host_list=[])
self.mox.ReplayAll()
# build_instances() is a cast, we need to wait for it to complete
@ -2430,7 +2524,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
requested_networks=None,
security_groups='security_groups',
block_device_mapping='block_device_mapping',
legacy_bdm=False)
legacy_bdm=False, host_lists=None)
# RequestSpec.from_primitives is called once before we call the
# scheduler to select_destinations and then once per instance that
# gets build in the compute.
@ -2496,7 +2590,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
requested_networks=None,
security_groups='security_groups',
block_device_mapping=mock.ANY,
node='node2', limits=None)
node='node2', limits=None, host_list=[])
@mock.patch('nova.objects.Instance.save')
def test_build_instances_max_retries_exceeded(self, mock_save):
@ -2828,7 +2922,7 @@ class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
cctxt_mock = mock.MagicMock()
@mock.patch.object(self.conductor.client, 'can_send_version',
return_value=True)
side_effect=(False, True, True, True, True))
@mock.patch.object(self.conductor.client, 'prepare',
return_value=cctxt_mock)
def _test(prepare_mock, can_send_mock):
@ -2859,7 +2953,7 @@ class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
cctxt_mock = mock.MagicMock()
@mock.patch.object(self.conductor.client, 'can_send_version',
side_effect=(False, True, True, True))
side_effect=(False, False, True, True, True))
@mock.patch.object(self.conductor.client, 'prepare',
return_value=cctxt_mock)
def _test(prepare_mock, can_send_mock):

View File

@ -470,7 +470,7 @@ class TestServiceVersion(test.TestCase):
self.assertEqual(
current, calculated,
'Changes detected that require a SERVICE_VERSION change. Please '
'increment nova.objects.service.SERVICE_VERSION, and make sure it'
'increment nova.objects.service.SERVICE_VERSION, and make sure it '
'is equal to nova.compute.manager.ComputeManager.target.version.')
def test_version_in_init(self):

View File

@ -201,7 +201,9 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
selected_hosts = self.driver._schedule(ctx, spec_obj, instance_uuids,
alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries)
expected_selection = [[objects.Selection.from_host_state(host_state)]]
sel_obj = objects.Selection.from_host_state(host_state,
allocation_request=fake_alloc)
expected_selection = [[sel_obj]]
mock_get_all_states.assert_called_once_with(
ctx.elevated.return_value, spec_obj,
mock.sentinel.provider_summaries)
@ -387,7 +389,8 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries,
return_alternates=True)
sel0 = objects.Selection.from_host_state(host_state0)
sel0 = objects.Selection.from_host_state(host_state0,
allocation_request=fake_alloc0)
sel1 = objects.Selection.from_host_state(host_state1,
allocation_request=fake_alloc1)
sel2 = objects.Selection.from_host_state(host_state2,
@ -452,7 +455,8 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries,
return_alternates=False)
sel0 = objects.Selection.from_host_state(host_state0)
sel0 = objects.Selection.from_host_state(host_state0,
allocation_request=fake_alloc0)
expected_selection = [[sel0]]
self.assertEqual(expected_selection, selected_hosts)