Make conductor pass and use host_lists

The earlier patches in the series generated alternates and Selection objects, and modified the RPC calls to send them to the conductor. This patch has the conductor pass these host_lists to the compute for the build process, and, if the build fails, has the compute pass the host_list back to the conductor. Also fixes a bug in the scheduler manager exposed by this change when using the CachingScheduler in a reschedule functional test. Blueprint: return-alternate-hosts Change-Id: Iae904afb6cb4fcea8bb27741d774ffbe986a5fb4
2017-10-10 23:25:29 +00:00 · 2017-10-10 23:25:29 +00:00 · d913de7471
parent f8747407fc
commit d913de7471
14 changed files with 365 additions and 101 deletions
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -485,7 +485,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
 class ComputeManager(manager.Manager):
    """Manages the running instances from creation to destruction."""

-    target = messaging.Target(version='4.18')
+    target = messaging.Target(version='4.19')

    # How long to wait in seconds before re-issuing a shutdown
    # signal to an instance during power off.  The overall
@ -1738,7 +1738,7 @@ class ComputeManager(manager.Manager):
                     filter_properties, admin_password=None,
                     injected_files=None, requested_networks=None,
                     security_groups=None, block_device_mapping=None,
-                     node=None, limits=None):
+                     node=None, limits=None, host_list=None):

        @utils.synchronized(instance.uuid)
        def _locked_do_build_and_run_instance(*args, **kwargs):
@ -1760,24 +1760,24 @@ class ComputeManager(manager.Manager):
                    result = build_results.FAILED
                    raise
                finally:
-                    fails = (build_results.FAILED,
-                             build_results.RESCHEDULED)
-                    if result in fails:
-                        # Remove the allocation records from Placement for
-                        # the instance if the build failed or is being
-                        # rescheduled to another node. The instance.host is
+                    if result == build_results.FAILED:
+                        # Remove the allocation records from Placement for the
+                        # instance if the build failed. The instance.host is
                        # likely set to None in _do_build_and_run_instance
-                        # which means if the user deletes the instance, it will
-                        # be deleted in the API, not the compute service.
+                        # which means if the user deletes the instance, it
+                        # will be deleted in the API, not the compute service.
                        # Setting the instance.host to None in
                        # _do_build_and_run_instance means that the
                        # ResourceTracker will no longer consider this instance
                        # to be claiming resources against it, so we want to
-                        # reflect that same thing in Placement.
-                        rt = self._get_resource_tracker()
-                        rt.reportclient.delete_allocation_for_instance(
-                            instance.uuid)
+                        # reflect that same thing in Placement.  No need to
+                        # call this for a reschedule, as the allocations will
+                        # have already been removed in
+                        # self._do_build_and_run_instance().
+                        self._delete_allocation_for_instance(instance.uuid)

+                    if result in (build_results.FAILED,
+                                  build_results.RESCHEDULED):
                        self._build_failed()
                    else:
                        self._failed_builds = 0
@ -1789,7 +1789,11 @@ class ComputeManager(manager.Manager):
                      context, instance, image, request_spec,
                      filter_properties, admin_password, injected_files,
                      requested_networks, security_groups,
-                      block_device_mapping, node, limits)
+                      block_device_mapping, node, limits, host_list)
+
+    def _delete_allocation_for_instance(self, instance_uuid):
+        rt = self._get_resource_tracker()
+        rt.reportclient.delete_allocation_for_instance(instance_uuid)

    def _check_device_tagging(self, requested_networks, block_device_mapping):
        tagging_requested = False
@ -1817,7 +1821,7 @@ class ComputeManager(manager.Manager):
    def _do_build_and_run_instance(self, context, instance, image,
            request_spec, filter_properties, admin_password, injected_files,
            requested_networks, security_groups, block_device_mapping,
-            node=None, limits=None):
+            node=None, limits=None, host_list=None):

        try:
            LOG.debug('Starting instance...', instance=instance)
@ -1895,11 +1899,18 @@ class ComputeManager(manager.Manager):
            self._nil_out_instance_obj_host_and_node(instance)
            instance.task_state = task_states.SCHEDULING
            instance.save()
+            # The instance will have already claimed resources from this host
+            # before this build was attempted. Now that it has failed, we need
+            # to unclaim those resources before casting to the conductor, so
+            # that if there are alternate hosts available for a retry, it can
+            # claim resources on that new host for the instance.
+            self._delete_allocation_for_instance(instance.uuid)

            self.compute_task_api.build_instances(context, [instance],
                    image, filter_properties, admin_password,
                    injected_files, requested_networks, security_groups,
-                    block_device_mapping, request_spec=request_spec)
+                    block_device_mapping, request_spec=request_spec,
+                    host_lists=[host_list])
            return build_results.RESCHEDULED
        except (exception.InstanceNotFound,
                exception.UnexpectedDeletingTaskStateError):
--- a/nova/compute/rpcapi.py
+++ b/nova/compute/rpcapi.py
@ -329,6 +329,14 @@ class ComputeAPI(object):
        * 4.16 - Add tag argument to attach_interface()
        * 4.17 - Add new_attachment_id to swap_volume.
        * 4.18 - Add migration to prep_resize()
+
+        ... Pike supports messaging version 4.18. So any changes to existing
+        methods in 4.x after that point should be done so that they can handle
+        the version_cap being set to 4.18.
+
+        * 4.19 - build_and_run_instance() now gets a 'host_list' parameter
+                 representing potential alternate hosts for retries within a
+                 cell.
    '''

    VERSION_ALIASES = {
@ -339,6 +347,7 @@ class ComputeAPI(object):
        'mitaka': '4.11',
        'newton': '4.13',
        'ocata': '4.13',
+        'pike': '4.18',
    }

    def __init__(self):
@ -1116,22 +1125,31 @@ class ComputeAPI(object):
    def build_and_run_instance(self, ctxt, instance, host, image, request_spec,
            filter_properties, admin_password=None, injected_files=None,
            requested_networks=None, security_groups=None,
-            block_device_mapping=None, node=None, limits=None):
+            block_device_mapping=None, node=None, limits=None,
+            host_list=None):
        # NOTE(edleafe): compute nodes can only use the dict form of limits.
        if isinstance(limits, objects.SchedulerLimits):
            limits = limits.to_dict()
-        version = '4.0'
-        cctxt = self.router.client(ctxt).prepare(
-                server=host, version=version)
-        cctxt.cast(ctxt, 'build_and_run_instance', instance=instance,
-                image=image, request_spec=request_spec,
-                filter_properties=filter_properties,
-                admin_password=admin_password,
-                injected_files=injected_files,
-                requested_networks=requested_networks,
-                security_groups=security_groups,
-                block_device_mapping=block_device_mapping, node=node,
-                limits=limits)
+        kwargs = {"instance": instance,
+                  "image": image,
+                  "request_spec": request_spec,
+                  "filter_properties": filter_properties,
+                  "admin_password": admin_password,
+                  "injected_files": injected_files,
+                  "requested_networks": requested_networks,
+                  "security_groups": security_groups,
+                  "block_device_mapping": block_device_mapping,
+                  "node": node,
+                  "limits": limits,
+                  "host_list": host_list,
+                 }
+        client = self.router.client(ctxt)
+        version = '4.19'
+        if not client.can_send_version(version):
+            version = '4.0'
+            kwargs.pop("host_list")
+        cctxt = client.prepare(server=host, version=version)
+        cctxt.cast(ctxt, 'build_and_run_instance', **kwargs)

    def quiesce_instance(self, ctxt, instance):
        version = '4.0'
--- a/nova/conductor/api.py
+++ b/nova/conductor/api.py
@ -113,7 +113,7 @@ class ComputeTaskAPI(object):
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping, legacy_bdm=True,
-            request_spec=None):
+            request_spec=None, host_lists=None):
        self.conductor_compute_rpcapi.build_instances(context,
                instances=instances, image=image,
                filter_properties=filter_properties,
@ -121,7 +121,8 @@ class ComputeTaskAPI(object):
                requested_networks=requested_networks,
                security_groups=security_groups,
                block_device_mapping=block_device_mapping,
-                legacy_bdm=legacy_bdm, request_spec=request_spec)
+                legacy_bdm=legacy_bdm, request_spec=request_spec,
+                host_lists=host_lists)

    def schedule_and_build_instances(self, context, build_requests,
                                     request_spec, image,
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -21,6 +21,7 @@ import functools
 from oslo_config import cfg
 from oslo_log import log as logging
 import oslo_messaging as messaging
+from oslo_serialization import jsonutils
 from oslo_utils import excutils
 from oslo_utils import versionutils
 import six
@ -218,7 +219,7 @@ class ComputeTaskManager(base.Base):
    may involve coordinating activities on multiple compute nodes.
    """

-    target = messaging.Target(namespace='compute_task', version='1.18')
+    target = messaging.Target(namespace='compute_task', version='1.19')

    def __init__(self):
        super(ComputeTaskManager, self).__init__()
@ -227,6 +228,7 @@ class ComputeTaskManager(base.Base):
        self.network_api = network.API()
        self.servicegroup_api = servicegroup.API()
        self.scheduler_client = scheduler_client.SchedulerClient()
+        self.report_client = self.scheduler_client.reportclient
        self.notifier = rpc.get_notifier('compute', CONF.host)

    def reset(self):
@ -516,7 +518,7 @@ class ComputeTaskManager(base.Base):
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping=None, legacy_bdm=True,
-            request_spec=None):
+            request_spec=None, host_lists=None):
        # TODO(ndipanov): Remove block_device_mapping and legacy_bdm in version
        #                 2.0 of the RPC API.
        # TODO(danms): Remove this in version 2.0 of the RPC API
@ -548,6 +550,11 @@ class ComputeTaskManager(base.Base):
            # work on a RequestSpec object rather than filter_properties.
            request_spec = request_spec.to_legacy_request_spec_dict()

+        # 'host_lists' will be None in one of two cases: when running cellsv1,
+        # or during a reschedule from a pre-Queens compute. In all other cases,
+        # it will be a list of lists, though the lists may be empty if there
+        # are no more hosts left in a rescheduling situation.
+        is_reschedule = host_lists is not None
        try:
            # check retry policy. Rather ugly use of instances[0]...
            # but if we've exceeded max retries... then we really only
@ -559,8 +566,22 @@ class ComputeTaskManager(base.Base):
            instance_uuids = [instance.uuid for instance in instances]
            spec_obj = objects.RequestSpec.from_primitives(
                    context, request_spec, filter_properties)
-            host_lists = self._schedule_instances(context, spec_obj,
-                    instance_uuids, return_alternates=True)
+            LOG.debug("Rescheduling: %s", is_reschedule)
+            if is_reschedule:
+                # Make sure that we have a host, as we may have exhausted all
+                # our alternates
+                if not host_lists[0]:
+                    # We have an empty list of hosts, so this instance has
+                    # failed to build.
+                    msg = ("Exhausted all hosts available for retrying build "
+                           "failures for instance %(instance_uuid)s." %
+                           {"instance_uuid": instances[0].uuid})
+                    raise exception.MaxRetriesExceeded(reason=msg)
+            else:
+                # This is not a reschedule, so we need to call the scheduler to
+                # get appropriate hosts for the request.
+                host_lists = self._schedule_instances(context, spec_obj,
+                        instance_uuids, return_alternates=True)
        except Exception as exc:
            num_attempts = filter_properties.get(
                'retry', {}).get('num_attempts', 1)
@ -585,8 +606,45 @@ class ComputeTaskManager(base.Base):
                    context, instance, requested_networks)
            return

+        elevated = context.elevated()
        for (instance, host_list) in six.moves.zip(instances, host_lists):
-            host = host_list[0]
+            host = host_list.pop(0)
+            if is_reschedule:
+                # If this runs in the superconductor, the first instance will
+                # already have its resources claimed in placement. If this is a
+                # retry, though, this is running in the cell conductor, and we
+                # need to claim first to ensure that the alternate host still
+                # has its resources available. Note that there are schedulers
+                # that don't support Placement, so must assume that the host is
+                # still available.
+                host_available = False
+                while host and not host_available:
+                    if host.allocation_request:
+                        alloc_req = jsonutils.loads(host.allocation_request)
+                    else:
+                        alloc_req = None
+                    if alloc_req:
+                        host_available = scheduler_utils.claim_resources(
+                                elevated, self.report_client, spec_obj,
+                                instance.uuid, alloc_req,
+                                host.allocation_request_version)
+                    else:
+                        # Some deployments use different schedulers that do not
+                        # use Placement, so they will not have an
+                        # allocation_request to claim with. For those cases,
+                        # there is no concept of claiming, so just assume that
+                        # the host is valid.
+                        host_available = True
+                    if not host_available:
+                        # Insufficient resources remain on that host, so
+                        # discard it and try the next.
+                        host = host_list.pop(0) if host_list else None
+                if not host_available:
+                    # No more available hosts for retrying the build.
+                    msg = ("Exhausted all hosts available for retrying build "
+                           "failures for instance %(instance_uuid)s." %
+                           {"instance_uuid": instance.uuid})
+                    raise exception.MaxRetriesExceeded(reason=msg)
            instance.availability_zone = (
                availability_zones.get_host_availability_zone(context,
                        host.service_host))
@ -634,6 +692,10 @@ class ComputeTaskManager(base.Base):
                        inst_mapping.destroy()
                    return

+            alts = [(alt.service_host, alt.nodename) for alt in host_list]
+            LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
+                    host.service_host, host.nodename, alts, instance=instance)
+
            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host.service_host, image=image,
                    request_spec=local_reqspec,
@ -643,7 +705,7 @@ class ComputeTaskManager(base.Base):
                    requested_networks=requested_networks,
                    security_groups=security_groups,
                    block_device_mapping=bdms, node=host.nodename,
-                    limits=host.limits)
+                    limits=host.limits, host_list=host_list)

    def _schedule_instances(self, context, request_spec,
                            instance_uuids=None, return_alternates=False):
@ -803,8 +865,7 @@ class ComputeTaskManager(base.Base):
        # in the API.
        try:
            scheduler_utils.claim_resources_on_destination(
-                self.scheduler_client.reportclient, instance,
-                source_node, dest_node)
+                self.report_client, instance, source_node, dest_node)
        except exception.NoValidHost as ex:
            with excutils.save_and_reraise_exception():
                self._set_vm_state_and_notify(
@ -1134,7 +1195,10 @@ class ComputeTaskManager(base.Base):
            cell = cell_mapping_cache[instance.uuid]
            # host_list is a list of one or more Selection objects, the first
            # of which has been selected and its resources claimed.
-            host = host_list[0]
+            host = host_list.pop(0)
+            alts = [(alt.service_host, alt.nodename) for alt in host_list]
+            LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
+                    host.service_host, host.nodename, alts, instance=instance)
            filter_props = request_spec.to_legacy_filter_properties_dict()
            scheduler_utils.populate_retry(filter_props, instance.uuid)
            scheduler_utils.populate_filter_properties(filter_props,
@ -1192,7 +1256,7 @@ class ComputeTaskManager(base.Base):
                    security_groups=legacy_secgroups,
                    block_device_mapping=instance_bdms,
                    host=host.service_host, node=host.nodename,
-                    limits=host.limits)
+                    limits=host.limits, host_list=host_list)

    def _cleanup_build_artifacts(self, context, exc, instances, build_requests,
                                 request_specs, cell_mapping_cache):
--- a/nova/conductor/rpcapi.py
+++ b/nova/conductor/rpcapi.py
@ -275,6 +275,9 @@ class ComputeTaskAPI(object):
    1.16 - Added schedule_and_build_instances
    1.17 - Added tags to schedule_and_build_instances()
    1.18 - Added request_spec to build_instances().
+    1.19 - build_instances() now gets a 'host_lists' parameter that represents
+           potential alternate hosts for retries within a cell for each
+           instance.
    """

    def __init__(self):
@ -329,39 +332,42 @@ class ComputeTaskAPI(object):
    def build_instances(self, context, instances, image, filter_properties,
            admin_password, injected_files, requested_networks,
            security_groups, block_device_mapping, legacy_bdm=True,
-            request_spec=None):
+            request_spec=None, host_lists=None):
        image_p = jsonutils.to_primitive(image)
-        version = '1.18'
-        send_reqspec = True
+        kwargs = {"instances": instances, "image": image_p,
+                  "filter_properties": filter_properties,
+                  "admin_password": admin_password,
+                  "injected_files": injected_files,
+                  "requested_networks": requested_networks,
+                  "security_groups": security_groups,
+                  "request_spec": request_spec,
+                  "host_lists": host_lists}
+        version = '1.19'
+        if not self.client.can_send_version(version):
+            version = '1.18'
+            kwargs.pop("host_lists")
        if not self.client.can_send_version(version):
-            send_reqspec = False
            version = '1.10'
+            kwargs.pop("request_spec")
        if not self.client.can_send_version(version):
            version = '1.9'
            if 'instance_type' in filter_properties:
                flavor = filter_properties['instance_type']
                flavor_p = objects_base.obj_to_primitive(flavor)
-                filter_properties = dict(filter_properties,
-                                         instance_type=flavor_p)
-        kw = {'instances': instances, 'image': image_p,
-               'filter_properties': filter_properties,
-               'admin_password': admin_password,
-               'injected_files': injected_files,
-               'requested_networks': requested_networks,
-               'security_groups': security_groups}
-        if send_reqspec:
-            kw['request_spec'] = request_spec
+                kwargs["filter_properties"] = dict(filter_properties,
+                                                   instance_type=flavor_p)
        if not self.client.can_send_version(version):
            version = '1.8'
-            kw['requested_networks'] = kw['requested_networks'].as_tuples()
+            nets = kwargs['requested_networks'].as_tuples()
+            kwargs['requested_networks'] = nets
        if not self.client.can_send_version('1.7'):
            version = '1.5'
            bdm_p = objects_base.obj_to_primitive(block_device_mapping)
-            kw.update({'block_device_mapping': bdm_p,
-                       'legacy_bdm': legacy_bdm})
+            kwargs.update({'block_device_mapping': bdm_p,
+                           'legacy_bdm': legacy_bdm})

        cctxt = self.client.prepare(version=version)
-        cctxt.cast(context, 'build_instances', **kw)
+        cctxt.cast(context, 'build_instances', **kwargs)

    def schedule_and_build_instances(self, context, build_requests,
                                     request_specs,
--- a/nova/objects/service.py
+++ b/nova/objects/service.py
@ -31,7 +31,7 @@ LOG = logging.getLogger(__name__)


 # NOTE(danms): This is the global service version counter
-SERVICE_VERSION = 25
+SERVICE_VERSION = 26


 # NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -120,6 +120,8 @@ SERVICE_VERSION_HISTORY = (
    # Version 25: Compute hosts allow migration-based allocations
    # for live migration.
    {'compute_rpc': '4.18'},
+    # Version 26: Adds a 'host_list' parameter to build_and_run_instance()
+    {'compute_rpc': '4.19'},
 )


--- a/nova/scheduler/filter_scheduler.py
+++ b/nova/scheduler/filter_scheduler.py
@ -345,7 +345,14 @@ class FilterScheduler(driver.Scheduler):
        selections_to_return = []
        for selected_host in selected_hosts:
            # This is the list of hosts for one particular instance.
-            selection = objects.Selection.from_host_state(selected_host)
+            if alloc_reqs_by_rp_uuid:
+                selected_alloc_req = alloc_reqs_by_rp_uuid.get(
+                        selected_host.uuid)[0]
+            else:
+                selected_alloc_req = None
+            selection = objects.Selection.from_host_state(selected_host,
+                    allocation_request=selected_alloc_req,
+                    allocation_request_version=allocation_request_version)
            selected_plus_alts = [selection]
            cell_uuid = selected_host.cell_uuid
            # This will populate the alternates with many of the same unclaimed
--- a/nova/scheduler/manager.py
+++ b/nova/scheduler/manager.py
@ -116,7 +116,8 @@ class SchedulerManager(manager.Manager):
                                                           request_spec,
                                                           filter_properties)
        resources = utils.resources_from_request_spec(spec_obj)
-        alloc_reqs_by_rp_uuid, provider_summaries = None, None
+        alloc_reqs_by_rp_uuid, provider_summaries, allocation_request_version \
+            = None, None, None
        if self.driver.USES_ALLOCATION_CANDIDATES:
            res = self.placement_client.get_allocation_candidates(resources)
            if res is None:
@ -144,7 +145,7 @@ class SchedulerManager(manager.Manager):
                        rp_uuid = rr['resource_provider']['uuid']
                        alloc_reqs_by_rp_uuid[rp_uuid].append(ar)

-        # Only return alteranates if both return_objects and return_alternates
+        # Only return alternates if both return_objects and return_alternates
        # are True.
        return_alternates = return_alternates and return_objects
        selections = self.driver.select_destinations(ctxt, spec_obj,
--- a/nova/tests/functional/regressions/test_bug_1671648.py
+++ b/nova/tests/functional/regressions/test_bug_1671648.py
@ -148,3 +148,11 @@ class TestRetryBetweenComputeNodeBuilds(test.TestCase):

        # Assert that we retried.
        self.assertEqual(2, self.attempts)
+
+
+class TestRetryBetweenComputeNodeBuildsCachingScheduler(
+        TestRetryBetweenComputeNodeBuilds):
+    """Tests the reschedule scenario using the CachingScheduler."""
+    def setUp(self):
+        self.flags(driver='caching_scheduler', group='scheduler')
+        super(TestRetryBetweenComputeNodeBuildsCachingScheduler, self).setUp()
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@ -73,6 +73,7 @@ from nova.volume import cinder


 CONF = nova.conf.CONF
+fake_host_list = [mock.sentinel.host1]


 class ComputeManagerUnitTestCase(test.NoDBTestCase):
@ -4365,7 +4366,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                requested_networks=self.requested_networks,
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)

        self._assert_build_instance_hook_called(mock_hooks,
                                                build_results.ACTIVE)
@ -4396,7 +4397,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                    port_id=uuids.port_instance)],
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)
        requested_network = mock_build_and_run.call_args[0][5][0]
        self.assertEqual('fake_network_id', requested_network.network_id)
        self.assertEqual('10.0.0.1', str(requested_network.address))
@ -4431,7 +4432,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                requested_networks=self.requested_networks,
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)

        self._instance_action_events(mock_start, mock_finish)
        self._assert_build_instance_update(mock_save)
@ -4480,7 +4481,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                    requested_networks=self.requested_networks,
                    security_groups=self.security_groups,
                    block_device_mapping=self.block_device_mapping,
-                    node=self.node, limits=self.limits)
+                    node=self.node, limits=self.limits,
+                    host_list=fake_host_list)

        self._assert_build_instance_hook_called(mock_hooks,
                                                build_results.RESCHEDULED)
@ -4498,7 +4500,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                [self.instance], self.image, self.filter_properties,
                self.admin_pass, self.injected_files, self.requested_networks,
                self.security_groups, self.block_device_mapping,
-                request_spec={})
+                request_spec={}, host_lists=[fake_host_list])

    @mock.patch.object(manager.ComputeManager, '_shutdown_instance')
    @mock.patch.object(manager.ComputeManager, '_build_networks_for_instance')
@ -4569,7 +4571,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                requested_networks=self.requested_networks,
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)

        mock_build_and_run.assert_called_once_with(self.context,
            instance,
@ -4583,7 +4585,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
            [instance], self.image, self.filter_properties,
            self.admin_pass, self.injected_files, self.requested_networks,
            self.security_groups, self.block_device_mapping,
-            request_spec={})
+            request_spec={}, host_lists=[fake_host_list])

    @mock.patch.object(manager.ComputeManager, '_build_and_run_instance')
    @mock.patch.object(conductor_api.ComputeTaskAPI, 'build_instances')
@ -4625,7 +4627,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
            requested_networks=self.requested_networks,
            security_groups=self.security_groups,
            block_device_mapping=self.block_device_mapping, node=self.node,
-            limits=self.limits)
+            limits=self.limits, host_list=fake_host_list)

        mock_build_and_run.assert_called_once_with(self.context,
            instance,
@ -4639,7 +4641,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
            [instance], self.image, self.filter_properties,
            self.admin_pass, self.injected_files, self.requested_networks,
            self.security_groups, self.block_device_mapping,
-            request_spec={})
+            request_spec={}, host_lists=[fake_host_list])

    @mock.patch.object(objects.InstanceActionEvent,
                       'event_finish_with_failure')
@ -4668,7 +4670,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                requested_networks=self.requested_networks,
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)

        self._assert_build_instance_hook_called(mock_hooks,
                build_results.FAILED)
@ -4721,7 +4723,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                    requested_networks=self.requested_networks,
                    security_groups=self.security_groups,
                    block_device_mapping=self.block_device_mapping,
-                    node=self.node, limits=self.limits)
+                    node=self.node, limits=self.limits,
+                    host_list=fake_host_list)

        self._assert_build_instance_hook_called(mock_hooks,
                                                build_results.RESCHEDULED)
@ -4740,7 +4743,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                [self.instance], self.image, self.filter_properties,
                self.admin_pass, self.injected_files, self.requested_networks,
                self.security_groups, self.block_device_mapping,
-                request_spec={})
+                request_spec={}, host_lists=[fake_host_list])

    @mock.patch.object(objects.InstanceActionEvent,
                       'event_finish_with_failure')
@ -4770,7 +4773,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                requested_networks=self.requested_networks,
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)

        self._assert_build_instance_hook_called(mock_hooks,
                                                build_results.RESCHEDULED)
@ -4789,7 +4792,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                [self.instance], self.image, self.filter_properties,
                self.admin_pass, self.injected_files, self.requested_networks,
                self.security_groups, self.block_device_mapping,
-                request_spec={})
+                request_spec={}, host_lists=[fake_host_list])

    @mock.patch.object(objects.InstanceActionEvent,
                       'event_finish_with_failure')
@ -4820,7 +4823,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                requested_networks=self.requested_networks,
                security_groups=self.security_groups,
                block_device_mapping=self.block_device_mapping, node=self.node,
-                limits=self.limits)
+                limits=self.limits, host_list=fake_host_list)

        self._assert_build_instance_hook_called(mock_hooks,
                                                build_results.FAILED)
@ -5163,7 +5166,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                    requested_networks=self.requested_networks,
                    security_groups=self.security_groups,
                    block_device_mapping=self.block_device_mapping,
-                    node=self.node, limits=self.limits)
+                    node=self.node, limits=self.limits,
+                    host_list=fake_host_list)

        self._instance_action_events(mock_start, mock_finish)
        self._assert_build_instance_update(mock_save, reschedule_update=True)
@ -5177,7 +5181,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
                self.image, self.filter_properties, self.admin_pass,
                self.injected_files, self.requested_networks,
                self.security_groups, self.block_device_mapping,
-                request_spec={})
+                request_spec={}, host_lists=[fake_host_list])
        mock_nil.assert_called_once_with(self.instance)
        mock_clean.assert_called_once_with(self.context, self.instance,
                self.compute.host)
--- a/nova/tests/unit/compute/test_rpcapi.py
+++ b/nova/tests/unit/compute/test_rpcapi.py
@ -16,6 +16,7 @@
 Unit Tests for nova.compute.rpcapi
 """

+import copy
 import mock
 from oslo_serialization import jsonutils

@ -23,6 +24,7 @@ from nova.compute import rpcapi as compute_rpcapi
 import nova.conf
 from nova import context
 from nova import exception
+from nova import objects
 from nova.objects import block_device as objects_block_dev
 from nova.objects import migrate_data as migrate_data_obj
 from nova.objects import migration as migration_obj
@ -731,7 +733,49 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
                admin_password='passwd', injected_files=None,
                requested_networks=['network1'], security_groups=None,
                block_device_mapping=None, node='node', limits=[],
-                version='4.0')
+                host_list=None, version='4.19')
+
+    def test_build_and_run_instance_4_18(self):
+        ctxt = context.RequestContext('fake_user', 'fake_project')
+        rpcapi = compute_rpcapi.ComputeAPI()
+        mock_client = mock.Mock()
+        rpcapi.router.client = mock.Mock(return_value=mock_client)
+        mock_client.can_send_version = mock.Mock(return_value=False)
+        prepare_mock = mock.Mock()
+        prepare_mock.cast = mock.Mock()
+        mock_client.prepare.return_value = prepare_mock
+        fake_limit = {"memory_mb": 1024, "disk_gb": 100, "vcpus": 2,
+                "numa_topology": None}
+        fake_limit_obj = objects.SchedulerLimits.from_dict(fake_limit)
+        args = (self.fake_instance_obj, "host", "image", "request_spec",
+                "filter_properties")
+        kwargs = {
+                "admin_password": 'passwd',
+                "injected_files": None,
+                "requested_networks": ['network1'],
+                "security_groups": None,
+                "block_device_mapping": None,
+                "node": 'node',
+                "limits": fake_limit_obj,
+                "host_list": ["host"],
+                }
+
+        expected_kwargs = copy.deepcopy(kwargs)
+        # Since we're failing the 'can_send_version' check, the host_list
+        # should be removed, and the limits objects should be converted to the
+        # older dict format.
+        expected_kwargs.pop("host_list")
+        expected_kwargs["limits"] = fake_limit_obj.to_dict()
+        # Add in the args, which will be added to the kwargs dict in the RPC
+        # call
+        expected_kwargs["instance"] = self.fake_instance_obj
+        expected_kwargs["image"] = "image"
+        expected_kwargs["request_spec"] = "request_spec"
+        expected_kwargs["filter_properties"] = "filter_properties"
+
+        rpcapi.build_and_run_instance(ctxt, *args, **kwargs)
+        prepare_mock.cast.assert_called_once_with(ctxt,
+                "build_and_run_instance", **expected_kwargs)

    def test_quiesce_instance(self):
        self._test_compute_api('quiesce_instance', 'call',
--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -65,10 +65,40 @@ from nova import utils
 CONF = conf.CONF


+fake_alloc1 = {"allocations": [
+        {"resource_provider": {"uuid": uuids.host1},
+         "resources": {"VCPU": 1,
+                       "MEMORY_MB": 1024,
+                       "DISK_GB": 100}
+        }]}
+fake_alloc2 = {"allocations": [
+        {"resource_provider": {"uuid": uuids.host2},
+         "resources": {"VCPU": 1,
+                       "MEMORY_MB": 1024,
+                       "DISK_GB": 100}
+        }]}
+fake_alloc3 = {"allocations": [
+        {"resource_provider": {"uuid": uuids.host3},
+         "resources": {"VCPU": 1,
+                       "MEMORY_MB": 1024,
+                       "DISK_GB": 100}
+        }]}
+fake_alloc_json1 = jsonutils.dumps(fake_alloc1)
+fake_alloc_json2 = jsonutils.dumps(fake_alloc2)
+fake_alloc_json3 = jsonutils.dumps(fake_alloc3)
+fake_alloc_version = "1.23"
 fake_selection1 = objects.Selection(service_host="host1", nodename="node1",
-        cell_uuid=uuids.cell, limits=None)
+        cell_uuid=uuids.cell, limits=None, allocation_request=fake_alloc_json1,
+        allocation_request_version=fake_alloc_version)
 fake_selection2 = objects.Selection(service_host="host2", nodename="node2",
-        cell_uuid=uuids.cell, limits=None)
+        cell_uuid=uuids.cell, limits=None, allocation_request=fake_alloc_json2,
+        allocation_request_version=fake_alloc_version)
+fake_selection3 = objects.Selection(service_host="host3", nodename="node3",
+        cell_uuid=uuids.cell, limits=None, allocation_request=fake_alloc_json3,
+        allocation_request_version=fake_alloc_version)
+fake_host_lists1 = [[fake_selection1]]
+fake_host_lists2 = [[fake_selection1], [fake_selection2]]
+fake_host_lists_alt = [[fake_selection1, fake_selection2, fake_selection3]]


 class FakeContext(context.RequestContext):
@ -422,9 +452,10 @@ class _BaseTaskTestCase(object):
                'instance_type': instance_type_p,
                'num_instances': 2}
        filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
+        sched_return = copy.deepcopy(fake_host_lists2)
        self.conductor_manager._schedule_instances(self.context,
                fake_spec, [uuids.fake, uuids.fake], return_alternates=True
-                ).AndReturn([[fake_selection1], [fake_selection2]])
+                ).AndReturn(sched_return)
        db.block_device_mapping_get_all_by_instance(self.context,
                instances[0].uuid).AndReturn([])
        filter_properties2 = {'retry': {'num_attempts': 1,
@ -442,7 +473,7 @@ class _BaseTaskTestCase(object):
                requested_networks=None,
                security_groups='security_groups',
                block_device_mapping=mox.IgnoreArg(),
-                node='node1', limits=None)
+                node='node1', limits=None, host_list=sched_return[0])
        db.block_device_mapping_get_all_by_instance(self.context,
                instances[1].uuid).AndReturn([])
        filter_properties3 = {'limits': {},
@ -460,7 +491,7 @@ class _BaseTaskTestCase(object):
                requested_networks=None,
                security_groups='security_groups',
                block_device_mapping=mox.IgnoreArg(),
-                node='node2', limits=None)
+                node='node2', limits=None, host_list=sched_return[1])
        self.mox.ReplayAll()

        # build_instances() is a cast, we need to wait for it to complete
@ -477,7 +508,7 @@ class _BaseTaskTestCase(object):
                requested_networks=None,
                security_groups='security_groups',
                block_device_mapping='block_device_mapping',
-                legacy_bdm=False)
+                legacy_bdm=False, host_lists=None)
        mock_getaz.assert_has_calls([
            mock.call(self.context, 'host1'),
            mock.call(self.context, 'host2')])
@ -670,6 +701,62 @@ class _BaseTaskTestCase(object):
            mock.call(self.context, instances[1].uuid)])
        self.assertFalse(mock_get_by_host.called)

+    @mock.patch("nova.scheduler.utils.claim_resources", return_value=False)
+    @mock.patch.object(objects.Instance, 'save')
+    def test_build_instances_exhaust_host_list(self, _mock_save, mock_claim):
+        # A list of three alternate hosts for one instance
+        host_lists = copy.deepcopy(fake_host_lists_alt)
+        instance = fake_instance.fake_instance_obj(self.context)
+        image = {'fake-data': 'should_pass_silently'}
+        expected_claim_count = len(host_lists[0])
+
+        # build_instances() is a cast, we need to wait for it to complete
+        self.useFixture(cast_as_call.CastAsCall(self))
+        # Since claim_resources() is mocked to always return False, we will run
+        # out of alternate hosts, and MaxRetriesExceeded should be raised.
+        self.assertRaises(exc.MaxRetriesExceeded,
+                self.conductor.build_instances, context=self.context,
+                instances=[instance], image=image, filter_properties={},
+                admin_password='admin_password',
+                injected_files='injected_files', requested_networks=None,
+                security_groups='security_groups',
+                block_device_mapping=None, legacy_bdm=None,
+                host_lists=host_lists)
+        self.assertEqual(expected_claim_count, mock_claim.call_count)
+
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+            '_destroy_build_request')
+    @mock.patch.object(conductor_manager.LOG, 'debug')
+    @mock.patch("nova.scheduler.utils.claim_resources", return_value=True)
+    @mock.patch.object(objects.Instance, 'save')
+    def test_build_instances_logs_selected_and_alts(self, _mock_save,
+            mock_claim, mock_debug, mock_destroy):
+        # A list of three alternate hosts for one instance
+        host_lists = copy.deepcopy(fake_host_lists_alt)
+        expected_host = host_lists[0][0]
+        expected_alts = host_lists[0][1:]
+        instance = fake_instance.fake_instance_obj(self.context)
+        image = {'fake-data': 'should_pass_silently'}
+
+        # build_instances() is a cast, we need to wait for it to complete
+        self.useFixture(cast_as_call.CastAsCall(self))
+        with mock.patch.object(self.conductor_manager.compute_rpcapi,
+                'build_and_run_instance'):
+            self.conductor.build_instances(context=self.context,
+                    instances=[instance], image=image, filter_properties={},
+                    admin_password='admin_password',
+                    injected_files='injected_files', requested_networks=None,
+                    security_groups='security_groups',
+                    block_device_mapping=None, legacy_bdm=None,
+                    host_lists=host_lists)
+        # The last LOG.debug call should record the selected host name and the
+        # list of alternates.
+        last_call = mock_debug.call_args_list[-1][0]
+        self.assertIn(expected_host.service_host, last_call)
+        expected_alt_hosts = [(alt.service_host, alt.nodename)
+                for alt in expected_alts]
+        self.assertIn(expected_alt_hosts, last_call)
+
    @mock.patch.object(objects.BuildRequest, 'get_by_instance_uuid')
    @mock.patch.object(objects.Instance, 'save')
    @mock.patch.object(objects.InstanceMapping, 'get_by_instance_uuid')
@ -804,7 +891,8 @@ class _BaseTaskTestCase(object):
                              requested_networks=None,
                              security_groups='security_groups',
                              block_device_mapping='block_device_mapping',
-                              legacy_bdm=False)
+                              legacy_bdm=False,
+                              host_lists=None)

        do_test()

@ -849,8 +937,11 @@ class _BaseTaskTestCase(object):
                requested_networks=None,
                security_groups='security_groups',
                block_device_mapping='block_device_mapping',
-                legacy_bdm=False)
+                legacy_bdm=False, host_lists=None)

+            expected_build_run_host_list = copy.copy(fake_host_lists1[0])
+            if expected_build_run_host_list:
+                expected_build_run_host_list.pop(0)
            mock_build_and_run.assert_called_once_with(
                self.context,
                instance=mock.ANY,
@ -866,7 +957,8 @@ class _BaseTaskTestCase(object):
                security_groups='security_groups',
                block_device_mapping=test.MatchType(
                    objects.BlockDeviceMappingList),
-                node='node1', limits=None)
+                node='node1', limits=None,
+                host_list=expected_build_run_host_list)
            mock_pop_inst_map.assert_not_called()
            mock_destroy_build_req.assert_not_called()

@ -2396,9 +2488,11 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
                mox.IgnoreArg()).AndReturn(spec)
        filter_properties = {'retry': {'num_attempts': 1, 'hosts': []}}
        inst_uuids = [inst.uuid for inst in instances]
+
+        sched_return = copy.deepcopy(fake_host_lists2)
        self.conductor_manager._schedule_instances(self.context,
                fake_spec, inst_uuids, return_alternates=True).AndReturn(
-                [[fake_selection1], [fake_selection2]])
+                sched_return)
        instances[0].save().AndRaise(
                exc.InstanceNotFound(instance_id=instances[0].uuid))
        instances[1].save()
@ -2415,7 +2509,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
                requested_networks=None,
                security_groups='security_groups',
                block_device_mapping=mox.IsA(objects.BlockDeviceMappingList),
-                node='node2', limits=None)
+                node='node2', limits=None, host_list=[])
        self.mox.ReplayAll()

        # build_instances() is a cast, we need to wait for it to complete
@ -2430,7 +2524,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
                requested_networks=None,
                security_groups='security_groups',
                block_device_mapping='block_device_mapping',
-                legacy_bdm=False)
+                legacy_bdm=False, host_lists=None)
        # RequestSpec.from_primitives is called once before we call the
        # scheduler to select_destinations and then once per instance that
        # gets build in the compute.
@ -2496,7 +2590,7 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
                    requested_networks=None,
                    security_groups='security_groups',
                    block_device_mapping=mock.ANY,
-                    node='node2', limits=None)
+                    node='node2', limits=None, host_list=[])

    @mock.patch('nova.objects.Instance.save')
    def test_build_instances_max_retries_exceeded(self, mock_save):
@ -2828,7 +2922,7 @@ class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
        cctxt_mock = mock.MagicMock()

        @mock.patch.object(self.conductor.client, 'can_send_version',
-                           return_value=True)
+                           side_effect=(False, True, True, True, True))
        @mock.patch.object(self.conductor.client, 'prepare',
                           return_value=cctxt_mock)
        def _test(prepare_mock, can_send_mock):
@ -2859,7 +2953,7 @@ class ConductorTaskRPCAPITestCase(_BaseTaskTestCase,
        cctxt_mock = mock.MagicMock()

        @mock.patch.object(self.conductor.client, 'can_send_version',
-                           side_effect=(False, True, True, True))
+                           side_effect=(False, False, True, True, True))
        @mock.patch.object(self.conductor.client, 'prepare',
                           return_value=cctxt_mock)
        def _test(prepare_mock, can_send_mock):
--- a/nova/tests/unit/objects/test_service.py
+++ b/nova/tests/unit/objects/test_service.py
@ -470,7 +470,7 @@ class TestServiceVersion(test.TestCase):
        self.assertEqual(
            current, calculated,
            'Changes detected that require a SERVICE_VERSION change. Please '
-            'increment nova.objects.service.SERVICE_VERSION, and make sure it'
+            'increment nova.objects.service.SERVICE_VERSION, and make sure it '
            'is equal to nova.compute.manager.ComputeManager.target.version.')

    def test_version_in_init(self):
--- a/nova/tests/unit/scheduler/test_filter_scheduler.py
+++ b/nova/tests/unit/scheduler/test_filter_scheduler.py
@ -201,7 +201,9 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
        selected_hosts = self.driver._schedule(ctx, spec_obj, instance_uuids,
                alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries)

-        expected_selection = [[objects.Selection.from_host_state(host_state)]]
+        sel_obj = objects.Selection.from_host_state(host_state,
+                allocation_request=fake_alloc)
+        expected_selection = [[sel_obj]]
        mock_get_all_states.assert_called_once_with(
            ctx.elevated.return_value, spec_obj,
            mock.sentinel.provider_summaries)
@ -387,7 +389,8 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
                alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries,
                return_alternates=True)

-        sel0 = objects.Selection.from_host_state(host_state0)
+        sel0 = objects.Selection.from_host_state(host_state0,
+                allocation_request=fake_alloc0)
        sel1 = objects.Selection.from_host_state(host_state1,
                allocation_request=fake_alloc1)
        sel2 = objects.Selection.from_host_state(host_state2,
@ -452,7 +455,8 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase):
                alloc_reqs_by_rp_uuid, mock.sentinel.provider_summaries,
                return_alternates=False)

-        sel0 = objects.Selection.from_host_state(host_state0)
+        sel0 = objects.Selection.from_host_state(host_state0,
+                allocation_request=fake_alloc0)
        expected_selection = [[sel0]]
        self.assertEqual(expected_selection, selected_hosts)