Create and bind Cyborg ARQs.

* Call Cyborg with device profile name to get ARQs (Accelerator Requests). Each ARQ corresponds to a single device profile group, which corrresponds to a single request group in request spec. * Match each ARQ to associated request group, and thereby obtain the corresponding RP for that ARQ. * Call Cyborg to bind the ARQ to that host/device-RP. * When Cyborg sends the ARQ bind notification events, wait for those events with a timeout. Change-Id: I0f8b6bf2b4f4510da6c84fede532533602b6af7f Blueprint: nova-cyborg-interaction
2019-01-16 00:35:55 -08:00 · 2019-01-16 00:35:55 -08:00 · cc630b4eb6
parent 0c52730f6a
commit cc630b4eb6
10 changed files with 1054 additions and 20 deletions
--- a/nova/accelerator/cyborg.py
+++ b/nova/accelerator/cyborg.py
@ -80,6 +80,7 @@ def get_device_profile_request_groups(context, dp_name):

 class _CyborgClient(object):
    DEVICE_PROFILE_URL = "/device_profiles"
+    ARQ_URL = "/accelerator_requests"

    def __init__(self, context):
        auth = service_auth.get_auth_plugin(context)
@ -145,3 +146,120 @@ class _CyborgClient(object):
                    rg.add_trait(trait_name=name, trait_type=val)
            request_groups.append(rg)
        return request_groups
+
+    def _create_arqs(self, dp_name):
+        data = {"device_profile_name": dp_name}
+        resp, err_msg = self._call_cyborg(self._client.post,
+             self.ARQ_URL, json=data)
+
+        if err_msg:
+            raise exception.AcceleratorRequestOpFailed(
+                op=_('create'), msg=err_msg)
+
+        return resp.json().get('arqs')
+
+    def create_arqs_and_match_resource_providers(self, dp_name, rg_rp_map):
+        """Create ARQs, match them with request groups and thereby
+          determine their corresponding RPs.
+
+        :param dp_name: Device profile name
+        :param rg_rp_map: Request group - Resource Provider map
+            {requester_id: [resource_provider_uuid]}
+        :returns:
+            [arq], with each ARQ associated with an RP
+        :raises: DeviceProfileError, AcceleratorRequestOpFailed
+        """
+        LOG.info('Creating ARQs for device profile %s', dp_name)
+        arqs = self._create_arqs(dp_name)
+        if not arqs or len(arqs) == 0:
+            msg = _('device profile name %s') % dp_name
+            raise exception.AcceleratorRequestOpFailed(op=_('create'), msg=msg)
+        for arq in arqs:
+            dp_group_id = arq['device_profile_group_id']
+            arq['device_rp_uuid'] = None
+            requester_id = (
+                get_device_profile_group_requester_id(dp_group_id))
+            arq['device_rp_uuid'] = rg_rp_map[requester_id][0]
+        return arqs
+
+    def bind_arqs(self, bindings):
+        """Initiate Cyborg bindings.
+
+           Handles RFC 6902-compliant JSON patching, sparing
+           calling Nova code from those details.
+
+           :param bindings:
+               { "$arq_uuid": {
+                     "hostname": STRING
+                     "device_rp_uuid": UUID
+                     "instance_uuid": UUID
+                  },
+                  ...
+                }
+           :returns: nothing
+           :raises: AcceleratorRequestOpFailed
+        """
+        LOG.info('Binding ARQs.')
+        # Create a JSON patch in RFC 6902 format
+        patch_list = {}
+        for arq_uuid, binding in bindings.items():
+            patch = [{"path": "/" + field,
+                      "op": "add",
+                      "value": value
+                     } for field, value in binding.items()]
+            patch_list[arq_uuid] = patch
+
+        resp, err_msg = self._call_cyborg(self._client.patch,
+             self.ARQ_URL, json=patch_list)
+        if err_msg:
+            msg = _(' Binding failed for ARQ UUIDs: ')
+            err_msg = err_msg + msg + ','.join(bindings.keys())
+            raise exception.AcceleratorRequestOpFailed(
+                op=_('bind'), msg=err_msg)
+
+    def get_arqs_for_instance(self, instance_uuid, only_resolved=False):
+        """Get ARQs for the instance.
+
+           :param instance_uuid: Instance UUID
+           :param only_resolved: flag to return only resolved ARQs
+           :returns: List of ARQs for the instance:
+               if only_resolved: only those ARQs which have completed binding
+               else: all ARQs
+            The format of the returned data structure is as below:
+               [
+                {'uuid': $arq_uuid,
+                 'device_profile_name': $dp_name,
+                 'device_profile_group_id': $dp_request_group_index,
+                 'state': 'Bound',
+                 'device_rp_uuid': $resource_provider_uuid,
+                 'hostname': $host_nodename,
+                 'instance_uuid': $instance_uuid,
+                 'attach_handle_info': {  # PCI bdf
+                     'bus': '0c', 'device': '0',
+                     'domain': '0000', 'function': '0'},
+                 'attach_handle_type': 'PCI'
+                      # or 'TEST_PCI' for Cyborg fake driver
+                }
+               ]
+           :raises: AcceleratorRequestOpFailed
+        """
+        query = {"instance": instance_uuid}
+        resp, err_msg = self._call_cyborg(self._client.get,
+            self.ARQ_URL, params=query)
+
+        if err_msg:
+            err_msg = err_msg + _(' Instance %s') % instance_uuid
+            raise exception.AcceleratorRequestOpFailed(
+                op=_('get'), msg=err_msg)
+
+        arqs = resp.json().get('arqs')
+        if not arqs:
+            err_msg = _('Cyborg returned no accelerator requests for '
+                        'instance %s') % instance_uuid
+            raise exception.AcceleratorRequestOpFailed(
+                op=_('get'), msg=err_msg)
+
+        if only_resolved:
+            arqs = [arq for arq in arqs if
+                    arq['state'] in ['Bound', 'BindFailed', 'Deleting']]
+        return arqs
--- a/nova/compute/manager.py
+++ b/nova/compute/manager.py
@ -56,6 +56,7 @@ from oslo_utils import units
 import six
 from six.moves import range

+from nova.accelerator import cyborg
 from nova import block_device
 from nova.compute import api as compute
 from nova.compute import build_results
@ -2512,6 +2513,14 @@ class ComputeManager(manager.Manager):
                self.host, phase=fields.NotificationPhase.END,
                bdms=block_device_mapping)

+    def _build_resources_cleanup(self, instance, network_info):
+        # Make sure the async call finishes
+        if network_info is not None:
+            network_info.wait(do_raise=False)
+            self.driver.clean_networks_preparation(instance,
+                                                   network_info)
+        self.driver.failed_spawn_cleanup(instance)
+
    @contextlib.contextmanager
    def _build_resources(self, context, instance, requested_networks,
                         security_groups, image_meta, block_device_mapping,
@ -2566,33 +2575,34 @@ class ComputeManager(manager.Manager):
        except (exception.InstanceNotFound,
                exception.UnexpectedDeletingTaskStateError):
            with excutils.save_and_reraise_exception():
-                # Make sure the async call finishes
-                if network_info is not None:
-                    network_info.wait(do_raise=False)
-                    self.driver.clean_networks_preparation(instance,
-                                                           network_info)
-                self.driver.failed_spawn_cleanup(instance)
+                self._build_resources_cleanup(instance, network_info)
        except (exception.UnexpectedTaskStateError,
                exception.OverQuota, exception.InvalidBDM) as e:
-            # Make sure the async call finishes
-            if network_info is not None:
-                network_info.wait(do_raise=False)
-                self.driver.clean_networks_preparation(instance, network_info)
-            self.driver.failed_spawn_cleanup(instance)
+            self._build_resources_cleanup(instance, network_info)
            raise exception.BuildAbortException(instance_uuid=instance.uuid,
                    reason=e.format_message())
        except Exception:
            LOG.exception('Failure prepping block device',
                          instance=instance)
-            # Make sure the async call finishes
-            if network_info is not None:
-                network_info.wait(do_raise=False)
-                self.driver.clean_networks_preparation(instance, network_info)
-            self.driver.failed_spawn_cleanup(instance)
+            self._build_resources_cleanup(instance, network_info)
            msg = _('Failure prepping block device.')
            raise exception.BuildAbortException(instance_uuid=instance.uuid,
                    reason=msg)

+        arqs = []
+        dp_name = instance.flavor.extra_specs.get('accel:device_profile')
+        try:
+            if dp_name:
+                arqs = self._get_bound_arq_resources(
+                        context, dp_name, instance)
+        except (Exception, eventlet.timeout.Timeout) as exc:
+            LOG.exception(exc.format_message())
+            self._build_resources_cleanup(instance, network_info)
+            msg = _('Failure getting accelerator requests.')
+            raise exception.BuildAbortException(instance_uuid=instance.uuid,
+                    reason=msg)
+        resources['accel_info'] = arqs
+
        try:
            yield resources
        except Exception as exc:
@ -2623,6 +2633,48 @@ class ComputeManager(manager.Manager):
                            instance_uuid=instance.uuid,
                            reason=six.text_type(exc))

+    def _get_bound_arq_resources(self, context, dp_name, instance):
+        """Get bound accelerator requests.
+
+        The ARQ binding was kicked off in the conductor as an async
+        operation. Here we wait for the notification from Cyborg.
+
+        If the notification arrived before this point, which can happen
+        in many/most cases (see [1]), it will be lost. To handle that,
+        we use exit_wait_early.
+        [1] https://review.opendev.org/#/c/631244/46/nova/compute/
+            manager.py@2627
+
+        :param dp_name: Device profile name. Caller ensures this is valid.
+        :param instance: instance object
+        :returns: List of ARQs for which bindings have completed,
+                  successfully or otherwise
+        """
+
+        cyclient = cyborg.get_client(context)
+        arqs = cyclient.get_arqs_for_instance(instance.uuid)
+        events = [('accelerator-request-bound', arq['uuid']) for arq in arqs]
+        timeout = CONF.arq_binding_timeout
+        with self.virtapi.wait_for_instance_event(
+                instance, events, deadline=timeout):
+            resolved_arqs = cyclient.get_arqs_for_instance(
+                    instance.uuid, only_resolved=True)
+            # Events for these resolved ARQs may have already arrived.
+            # Such 'early' events need to be ignored.
+            early_events = [('accelerator-request-bound', arq['uuid'])
+                             for arq in resolved_arqs]
+            if early_events:
+                self.virtapi.exit_wait_early(early_events)
+
+        # Since a timeout in wait_for_instance_event will raise, we get
+        # here only if all binding events have been received.
+        if sorted(resolved_arqs) != sorted(arqs):
+            # Query Cyborg to get all.
+            arqs = cyclient.get_arqs_for_instance(instance.uuid)
+        else:
+            arqs = resolved_arqs  # latter has the right arq.state
+        return arqs
+
    def _cleanup_allocated_networks(self, context, instance,
            requested_networks):
        """Cleanup networks allocated for instance.
--- a/nova/conductor/manager.py
+++ b/nova/conductor/manager.py
@ -31,6 +31,7 @@ from oslo_utils import timeutils
 from oslo_utils import versionutils
 import six

+from nova.accelerator import cyborg
 from nova import availability_zones
 from nova.compute import instance_actions
 from nova.compute import rpcapi as compute_rpcapi
@ -835,6 +836,18 @@ class ComputeTaskManager(base.Base):
            LOG.debug("Selected host: %s; Selected node: %s; Alternates: %s",
                    host.service_host, host.nodename, alts, instance=instance)

+            try:
+                resource_provider_mapping = (
+                    local_reqspec.get_request_group_mapping())
+                self._create_and_bind_arqs(
+                    context, instance.uuid, instance.flavor.extra_specs,
+                    host.nodename, resource_provider_mapping)
+            except Exception as exc:
+                LOG.exception('Failed to reschedule. Reason: %s', exc)
+                self._cleanup_when_reschedule_fails(context, instance, exc,
+                     legacy_request_spec, requested_networks)
+                continue
+
            self.compute_rpcapi.build_and_run_instance(context,
                    instance=instance, host=host.service_host, image=image,
                    request_spec=local_reqspec,
@ -1604,6 +1617,21 @@ class ComputeTaskManager(base.Base):
                # this one.
                continue

+            try:
+                resource_provider_mapping = (
+                    request_spec.get_request_group_mapping())
+                # Using nodename instead of hostname. See:
+                # http://lists.openstack.org/pipermail/openstack-discuss/2019-November/011044.html  # noqa
+                self._create_and_bind_arqs(
+                    context, instance.uuid, instance.flavor.extra_specs,
+                    host.nodename, resource_provider_mapping)
+            except Exception as exc:
+                # If anything failed here we need to cleanup and bail out.
+                with excutils.save_and_reraise_exception():
+                    self._cleanup_build_artifacts(
+                        context, exc, instances, build_requests, request_specs,
+                        block_device_mapping, tags, cell_mapping_cache)
+
            # NOTE(danms): Compute RPC expects security group names or ids
            # not objects, so convert this to a list of names until we can
            # pass the objects.
@ -1622,6 +1650,33 @@ class ComputeTaskManager(base.Base):
                    host=host.service_host, node=host.nodename,
                    limits=host.limits, host_list=host_list)

+    def _create_and_bind_arqs(self, context, instance_uuid, extra_specs,
+                              hostname, resource_provider_mapping):
+        """Create ARQs, determine their RPs and initiate ARQ binding.
+
+           The binding is asynchronous; Cyborg will notify on completion.
+           The notification will be handled in the compute manager.
+        """
+        dp_name = extra_specs.get('accel:device_profile')
+        if not dp_name:
+            return
+
+        LOG.debug('Calling Cyborg to get ARQs. dp_name=%s instance=%s',
+                  dp_name, instance_uuid)
+        cyclient = cyborg.get_client(context)
+        arqs = cyclient.create_arqs_and_match_resource_providers(
+            dp_name, resource_provider_mapping)
+        LOG.debug('Got ARQs with resource provider mapping %s', arqs)
+
+        bindings = {arq['uuid']:
+                       {"hostname": hostname,
+                        "device_rp_uuid": arq['device_rp_uuid'],
+                        "instance_uuid": instance_uuid
+                       }
+                    for arq in arqs}
+        # Initiate Cyborg binding asynchronously
+        cyclient.bind_arqs(bindings=bindings)
+
    @staticmethod
    def _map_instance_to_cell(context, instance, cell):
        """Update the instance mapping to point at the given cell.
--- a/nova/conf/compute.py
+++ b/nova/conf/compute.py
@ -176,6 +176,17 @@ Related options:
 * vif_plugging_is_fatal - If ``vif_plugging_timeout`` is set to zero and
  ``vif_plugging_is_fatal`` is False, events should not be expected to
  arrive at all.
+"""),
+    cfg.IntOpt('arq_binding_timeout',
+        default=300,
+        min=1,
+        help="""
+Timeout for Accelerator Request (ARQ) bind event message arrival.
+
+Number of seconds to wait for ARQ bind resolution event to arrive.
+The event indicates that every ARQ for an instance has either bound
+successfully or failed to bind. If it does not arrive, instance bringup
+is aborted with an exception.
 """),
    cfg.StrOpt('injected_network_template',
        default=paths.basedir_def('nova/virt/interfaces.template'),
--- a/nova/exception.py
+++ b/nova/exception.py
@ -2293,3 +2293,7 @@ class UnexpectedResourceProviderNameForPCIRequest(NovaException):

 class DeviceProfileError(NovaException):
    msg_fmt = _("Device profile name %(name)s: %(msg)s")
+
+
+class AcceleratorRequestOpFailed(NovaException):
+    msg_fmt = _("Failed to %(op)s accelerator requests: %(msg)s")
--- a/nova/tests/fixtures.py
+++ b/nova/tests/fixtures.py
@ -2526,14 +2526,136 @@ def _get_device_profile(dp_name, trait):
    return dp


+def get_arqs(dp_name):
+    arq = {
+        'uuid': 'b59d34d3-787b-4fb0-a6b9-019cd81172f8',
+        'device_profile_name': dp_name,
+        'device_profile_group_id': 0,
+        'state': 'Initial',
+        'device_rp_uuid': None,
+        'hostname': None,
+        'instance_uuid': None,
+        'attach_handle_info': {},
+        'attach_handle_type': '',
+    }
+    bound_arq = copy.deepcopy(arq)
+    bound_arq.update(
+        {'state': 'Bound',
+         'attach_handle_type': 'TEST_PCI',
+         'attach_handle_info': {
+            'bus': '0c',
+            'device': '0',
+            'domain': '0000',
+            'function': '0'
+          },
+        })
+    return [arq], [bound_arq]
+
+
 class CyborgFixture(fixtures.Fixture):
    """Fixture that mocks Cyborg APIs used by nova/accelerator/cyborg.py"""

    dp_name = 'fakedev-dp'
    trait = 'CUSTOM_FAKE_DEVICE'
+    arq_list, bound_arq_list = get_arqs(dp_name)
+
+    # NOTE(Sundar): The bindings passed to the fake_bind_arqs() from the
+    # conductor are indexed by ARQ UUID and include the host name, device
+    # RP UUID and instance UUID. (See params to fake_bind_arqs below.)
+    #
+    # Later, when the compute manager calls fake_get_arqs_for_instance() with
+    # the instance UUID, the returned ARQs must contain the host name and
+    # device RP UUID. But these can vary from test to test.
+    #
+    # So, fake_bind_arqs() below takes bindings indexed by ARQ UUID and
+    # converts them to bindings indexed by instance UUID, which are then
+    # stored in the dict below. This dict looks like:
+    # { $instance_uuid: [
+    #      {'hostname': $hostname,
+    #       'device_rp_uuid': $device_rp_uuid,
+    #       'arq_uuid': $arq_uuid
+    #      }
+    #   ]
+    # }
+    # Since it is indexed by instance UUID, and that is presumably unique
+    # across concurrently executing tests, this should be safe for
+    # concurrent access.
+    bindings_by_instance = {}
+
+    @staticmethod
+    def fake_bind_arqs(bindings):
+        """Simulate Cyborg ARQ bindings.
+
+           Since Nova calls Cyborg for binding on per-instance basis, the
+           instance UUIDs would be the same for all ARQs in a single call.
+
+           This function converts bindings indexed by ARQ UUID to bindings
+           indexed by instance UUID, so that fake_get_arqs_for_instance can
+           retrieve them later.
+
+           :param bindings:
+               { "$arq_uuid": {
+                     "hostname": STRING
+                     "device_rp_uuid": UUID
+                     "instance_uuid": UUID
+                  },
+                  ...
+                }
+           :returns: None
+        """
+        binding_by_instance = collections.defaultdict(list)
+        for index, arq_uuid in enumerate(bindings):
+            arq_binding = bindings[arq_uuid]
+            # instance_uuid is same for all ARQs in a single call.
+            instance_uuid = arq_binding['instance_uuid']
+            newbinding = {
+                'hostname': arq_binding['hostname'],
+                'device_rp_uuid': arq_binding['device_rp_uuid'],
+                'arq_uuid': arq_uuid,
+            }
+            binding_by_instance[instance_uuid].append(newbinding)
+
+        CyborgFixture.bindings_by_instance.update(binding_by_instance)
+
+    @staticmethod
+    def fake_get_arqs_for_instance(instance_uuid, only_resolved=False):
+        """Get list of bound ARQs for this instance.
+
+           This function uses bindings indexed by instance UUID to
+           populate the bound ARQ templates in CyborgFixture.bound_arq_list.
+        """
+        arq_host_rp_list = CyborgFixture.bindings_by_instance[instance_uuid]
+        # The above looks like:
+        # [{'hostname': $hostname,
+        #   'device_rp_uuid': $device_rp_uuid,
+        #   'arq_uuid': $arq_uuid
+        #  }]
+
+        bound_arq_list = copy.deepcopy(CyborgFixture.bound_arq_list)
+        for arq in bound_arq_list:
+            match = [(arq_host_rp['hostname'],
+                      arq_host_rp['device_rp_uuid'],
+                      instance_uuid)
+                     for arq_host_rp in arq_host_rp_list
+                     if arq_host_rp['arq_uuid'] == arq['uuid']
+                    ]
+            # Only 1 ARQ UUID would match, so len(match) == 1
+            arq['hostname'], arq['device_rp_uuid'], arq['instance_uuid'] = (
+                match[0][0], match[0][1], match[0][2])
+        return bound_arq_list

    def setUp(self):
        super(CyborgFixture, self).setUp()
        self.mock_get_dp = self.useFixture(fixtures.MockPatch(
            'nova.accelerator.cyborg._CyborgClient._get_device_profile_list',
            return_value=_get_device_profile(self.dp_name, self.trait))).mock
+        self.mock_create_arqs = self.useFixture(fixtures.MockPatch(
+            'nova.accelerator.cyborg._CyborgClient._create_arqs',
+            return_value=self.arq_list)).mock
+        self.mock_bind_arqs = self.useFixture(fixtures.MockPatch(
+            'nova.accelerator.cyborg._CyborgClient.bind_arqs',
+            side_effect=self.fake_bind_arqs)).mock
+        self.mock_get_arqs = self.useFixture(fixtures.MockPatch(
+            'nova.accelerator.cyborg._CyborgClient.'
+            'get_arqs_for_instance',
+            side_effect=self.fake_get_arqs_for_instance)).mock
--- a/nova/tests/functional/test_servers.py
+++ b/nova/tests/functional/test_servers.py
@ -7835,6 +7835,20 @@ class AcceleratorServerBase(integrated_helpers.ProviderUsageBaseTestCase):
                                        extra_spec=extra_specs)
        return flavor_id

+    def _check_allocations_usage(self, server_uuid):
+        host_rp_uuid = self.compute_rp_uuids[0]
+        device_rp_uuid = self.device_rp_map[host_rp_uuid]
+        expected_host_alloc = {
+            'resources': {'VCPU': 2, 'MEMORY_MB': 2048, 'DISK_GB': 20},
+        }
+        expected_device_alloc = {'resources': {'FPGA': 1}}
+
+        host_alloc = self._get_allocations_by_provider_uuid(host_rp_uuid)
+        self.assertEqual(expected_host_alloc, host_alloc[server_uuid])
+
+        device_alloc = self._get_allocations_by_provider_uuid(device_rp_uuid)
+        self.assertEqual(expected_device_alloc, device_alloc[server_uuid])
+

 class AcceleratorServerTest(AcceleratorServerBase):
    def setUp(self):
@ -7844,10 +7858,110 @@ class AcceleratorServerTest(AcceleratorServerBase):
    def test_create_server(self):
        flavor_id = self._create_acc_flavor()
        server_name = 'accel_server1'
-        self._create_server(
+        server = self._create_server(
            server_name, flavor_id=flavor_id,
            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
            networks='none', expected_state='ACTIVE')

-        self.cyborg.mock_get_dp.assert_called_once_with(self.cyborg.dp_name)
-        # TODO(Sundar): Add checks for Placement allocations.
+        # Verify that the host name and the device rp UUID are set properly.
+        # Other fields in the ARQ are hardcoded data from the fixture.
+        arqs = self.cyborg.fake_get_arqs_for_instance(server['id'])
+        self.assertEqual(self.device_rp_uuids[0], arqs[0]['device_rp_uuid'])
+        self.assertEqual(server['OS-EXT-SRV-ATTR:host'], arqs[0]['hostname'])
+
+        # Check allocations and usage
+        self._check_allocations_usage(server['id'])
+
+
+class AcceleratorServerReschedTest(AcceleratorServerBase):
+
+    def setUp(self):
+        self.NUM_HOSTS = 2
+        super(AcceleratorServerReschedTest, self).setUp()
+
+    def _check_allocations_usage_resched(self, server):
+        # Check allocations on host where instance is running
+        server_uuid = server['id']
+
+        hostname = server['OS-EXT-SRV-ATTR:host']
+        server_host_rp_uuid = self._get_provider_uuid_by_host(hostname)
+        expected_host_alloc = {
+            'resources': {'VCPU': 2, 'MEMORY_MB': 2048, 'DISK_GB': 20},
+        }
+        expected_device_alloc = {'resources': {'FPGA': 1}}
+
+        for i in range(self.NUM_HOSTS):
+            compute_uuid = self.compute_rp_uuids[i]
+            device_uuid = self.device_rp_map[compute_uuid]
+            host_alloc = self._get_allocations_by_provider_uuid(compute_uuid)
+            device_alloc = self._get_allocations_by_provider_uuid(device_uuid)
+            if compute_uuid == server_host_rp_uuid:
+                self.assertEqual(expected_host_alloc, host_alloc[server_uuid])
+                self.assertEqual(expected_device_alloc,
+                                 device_alloc[server_uuid])
+            else:
+                self.assertEqual({}, host_alloc)
+                self.assertEqual({}, device_alloc)
+
+        # NOTE(Sundar): ARQs for an instance could come from different
+        # devices in the same host, in general. But, in this test case,
+        # there is only one device in the host. So, we check for that.
+        device_rp_uuid = self.device_rp_map[server_host_rp_uuid]
+        expected_arq_bind_info = set([('Bound', hostname,
+                                       device_rp_uuid, server_uuid)])
+        arqs = nova_fixtures.CyborgFixture.fake_get_arqs_for_instance(
+            server_uuid)
+        # The state is hardcoded but other fields come from the test case.
+        arq_bind_info = {(arq['state'], arq['hostname'],
+                          arq['device_rp_uuid'], arq['instance_uuid'])
+                         for arq in arqs}
+        self.assertSetEqual(expected_arq_bind_info, arq_bind_info)
+
+    def _check_no_allocations(self, server_uuid):
+        allocs = self._get_allocations_by_server_uuid(server_uuid)
+        self.assertEqual(allocs, {})
+
+        for i in range(self.NUM_HOSTS):
+            usage = self._get_provider_usages(
+                self.device_rp_uuids[i]).get('FPGA')
+            self.assertEqual(usage, 0)
+
+    def test_resched(self):
+        orig_spawn = fake.FakeDriver.spawn
+
+        def fake_spawn(*args, **kwargs):
+            fake_spawn.count += 1
+            if fake_spawn.count == 1:
+                raise exception.ComputeResourcesUnavailable(
+                    reason='First host fake fail.', instance_uuid='fake')
+            else:
+                orig_spawn(*args, **kwargs)
+        fake_spawn.count = 0
+
+        with mock.patch('nova.virt.fake.FakeDriver.spawn', new=fake_spawn):
+            flavor_id = self._create_acc_flavor()
+            server_name = 'accel_server1'
+            server = self._create_server(
+                server_name, flavor_id=flavor_id,
+                image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
+                networks='none', expected_state='ACTIVE')
+
+        self.assertEqual(2, fake_spawn.count)
+        self._check_allocations_usage_resched(server)
+
+    def test_resched_fails(self):
+
+        def throw_error(*args, **kwargs):
+            raise exception.ComputeResourcesUnavailable(reason='',
+                    instance_uuid='fake')
+
+        self.stub_out('nova.virt.fake.FakeDriver.spawn', throw_error)
+
+        flavor_id = self._create_acc_flavor()
+        server_name = 'accel_server1'
+        server = self._create_server(
+            server_name, flavor_id=flavor_id,
+            image_uuid='155d900f-4e14-4e4c-a73d-069cbf4541e6',
+            networks='none', expected_state='ERROR')
+
+        self._check_no_allocations(server['id'])
--- a/nova/tests/unit/accelerator/test_cyborg.py
+++ b/nova/tests/unit/accelerator/test_cyborg.py
@ -12,16 +12,20 @@
 #    License for the specific language governing permissions and limitations
 #    under the License.

+import itertools
 import mock

 from keystoneauth1 import exceptions as ks_exc
 from requests.models import Response

+from oslo_serialization import jsonutils
+
 from nova.accelerator import cyborg
 from nova import context
 from nova import exception
 from nova.objects import request_spec
 from nova import test
+from nova.tests.unit import fake_requests


 class CyborgTestCase(test.NoDBTestCase):
@ -130,3 +134,216 @@ class CyborgTestCase(test.NoDBTestCase):
        self.assertRaises(exception.DeviceProfileError,
                          self.client.get_device_profile_groups,
                          dp_name='mydp')
+
+    def _get_arqs_and_request_groups(self):
+        arq_common = {
+            # All ARQs for an instance have same device profile name.
+            "device_profile_name": "noprog-dp",
+            "device_rp_uuid": "",
+            "hostname": "",
+            "instance_uuid": "",
+            "state": "Initial",
+        }
+        arq_variants = [
+            {"device_profile_group_id": 0,
+             "uuid": "edbba496-3cc8-4256-94ca-dfe3413348eb"},
+            {"device_profile_group_id": 1,
+             "uuid": "20125bcb-9f55-4e13-8e8c-3fee30e54cca"},
+        ]
+        arqs = [dict(arq_common, **variant) for variant in arq_variants]
+        rg_rp_map = {
+            'device_profile_0': ['c532cf11-02ed-4b03-9dd8-3e9a454131dc'],
+            'device_profile_1': ['2c332d7b-daaf-4726-a80d-ecf5212da4b8'],
+        }
+        return arqs, rg_rp_map
+
+    def _get_bound_arqs(self):
+        arqs, rg_rp_map = self._get_arqs_and_request_groups()
+        common = {
+            'host_name': 'myhost',
+            'instance_uuid': '15d3acf8-df76-400b-bfc9-484a5208daa1',
+        }
+        bindings = {
+           arqs[0]['uuid']: dict(
+               common, device_rp_uuid=rg_rp_map['device_profile_0'][0]),
+           arqs[1]['uuid']: dict(
+               common, device_rp_uuid=rg_rp_map['device_profile_1'][0]),
+        }
+        bound_arq_common = {
+            "attach_handle_info": {
+                "bus": "01",
+                "device": "00",
+                "domain": "0000",
+                "function": "0"  # will vary function ID later
+            },
+            "attach_handle_type": "PCI",
+            "state": "Bound",
+            # Devic eprofile name is common to all bound ARQs
+            "device_profile_name": arqs[0]["device_profile_name"],
+            **common
+        }
+        bound_arqs = [
+            {'uuid': arq['uuid'],
+             'device_profile_group_id': arq['device_profile_group_id'],
+             'device_rp_uuid': bindings[arq['uuid']]['device_rp_uuid'],
+             **bound_arq_common} for arq in arqs]
+        for index, bound_arq in enumerate(bound_arqs):
+            bound_arq['attach_handle_info']['function'] = index  # fix func ID
+        return bindings, bound_arqs
+
+    @mock.patch('keystoneauth1.adapter.Adapter.post')
+    def test_create_arqs_failure(self, mock_cyborg_post):
+        # If Cyborg returns invalid response, raise exception.
+        mock_cyborg_post.return_value = None
+        self.assertRaises(exception.AcceleratorRequestOpFailed,
+                          self.client._create_arqs,
+                          dp_name='mydp')
+
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                '_create_arqs')
+    def test_create_arq_and_match_rps(self, mock_create_arqs):
+        # Happy path
+        arqs, rg_rp_map = self._get_arqs_and_request_groups()
+        dp_name = arqs[0]["device_profile_name"]
+
+        mock_create_arqs.return_value = arqs
+
+        ret_arqs = self.client.create_arqs_and_match_resource_providers(
+            dp_name, rg_rp_map)
+
+        # Each value in rg_rp_map is a list. We merge them into a single list.
+        expected_rp_uuids = sorted(list(
+            itertools.chain.from_iterable(rg_rp_map.values())))
+        ret_rp_uuids = sorted([arq['device_rp_uuid'] for arq in ret_arqs])
+        self.assertEqual(expected_rp_uuids, ret_rp_uuids)
+
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                '_create_arqs')
+    def test_create_arq_and_match_rps_exception(self, mock_create_arqs):
+        # If Cyborg response does not contain ARQs, raise
+        arqs, rg_rp_map = self._get_arqs_and_request_groups()
+        dp_name = arqs[0]["device_profile_name"]
+
+        mock_create_arqs.return_value = None
+        self.assertRaises(
+            exception.AcceleratorRequestOpFailed,
+            self.client.create_arqs_and_match_resource_providers,
+            dp_name, rg_rp_map)
+
+    @mock.patch('keystoneauth1.adapter.Adapter.patch')
+    def test_bind_arqs(self, mock_cyborg_patch):
+        bindings, bound_arqs = self._get_bound_arqs()
+        arq_uuid = bound_arqs[0]['uuid']
+
+        patch_list = {}
+        for arq_uuid, binding in bindings.items():
+            patch = [{"path": "/" + field,
+                      "op": "add",
+                      "value": value
+                     } for field, value in binding.items()]
+            patch_list[arq_uuid] = patch
+
+        self.client.bind_arqs(bindings)
+
+        mock_cyborg_patch.assert_called_once_with(
+            self.client.ARQ_URL, json=mock.ANY)
+        called_params = mock_cyborg_patch.call_args.kwargs['json']
+        self.assertEqual(sorted(called_params), sorted(patch_list))
+
+    @mock.patch('nova.accelerator.cyborg._CyborgClient._call_cyborg')
+    def test_bind_arqs_exception(self, mock_call_cyborg):
+        # If Cyborg returns invalid response, raise exception.
+        bindings, _ = self._get_bound_arqs()
+        mock_call_cyborg.return_value = None, 'Some error'
+        self.assertRaises(exception.AcceleratorRequestOpFailed,
+            self.client.bind_arqs, bindings=bindings)
+
+    @mock.patch('keystoneauth1.adapter.Adapter.get')
+    def test_get_arqs_for_instance(self, mock_cyborg_get):
+        # Happy path, without only_resolved=True
+        _, bound_arqs = self._get_bound_arqs()
+        instance_uuid = bound_arqs[0]['instance_uuid']
+
+        query = {"instance": instance_uuid}
+        content = jsonutils.dumps({'arqs': bound_arqs})
+        resp = fake_requests.FakeResponse(200, content)
+        mock_cyborg_get.return_value = resp
+
+        ret_arqs = self.client.get_arqs_for_instance(instance_uuid)
+
+        mock_cyborg_get.assert_called_once_with(
+            self.client.ARQ_URL, params=query)
+
+        bound_arqs.sort(key=lambda x: x['uuid'])
+        ret_arqs.sort(key=lambda x: x['uuid'])
+        for ret_arq, bound_arq in zip(ret_arqs, bound_arqs):
+            self.assertDictEqual(ret_arq, bound_arq)
+
+    @mock.patch('keystoneauth1.adapter.Adapter.get')
+    def test_get_arqs_for_instance_exception(self, mock_cyborg_get):
+        # If Cyborg returns an error code, raise exception
+        _, bound_arqs = self._get_bound_arqs()
+        instance_uuid = bound_arqs[0]['instance_uuid']
+
+        resp = fake_requests.FakeResponse(404, content='')
+        mock_cyborg_get.return_value = resp
+        self.assertRaises(
+            exception.AcceleratorRequestOpFailed,
+            self.client.get_arqs_for_instance, instance_uuid)
+
+    @mock.patch('keystoneauth1.adapter.Adapter.get')
+    def test_get_arqs_for_instance_exception_no_resp(self, mock_cyborg_get):
+        # If Cyborg returns an error code, raise exception
+        _, bound_arqs = self._get_bound_arqs()
+        instance_uuid = bound_arqs[0]['instance_uuid']
+
+        content = jsonutils.dumps({'noarqs': 'oops'})
+        resp = fake_requests.FakeResponse(200, content)
+        mock_cyborg_get.return_value = resp
+        self.assertRaisesRegex(
+            exception.AcceleratorRequestOpFailed,
+            'Cyborg returned no accelerator requests for ',
+            self.client.get_arqs_for_instance, instance_uuid)
+
+    @mock.patch('keystoneauth1.adapter.Adapter.get')
+    def test_get_arqs_for_instance_all_resolved(self, mock_cyborg_get):
+        # If all ARQs are resolved, return full list
+        _, bound_arqs = self._get_bound_arqs()
+        instance_uuid = bound_arqs[0]['instance_uuid']
+
+        query = {"instance": instance_uuid}
+        content = jsonutils.dumps({'arqs': bound_arqs})
+        resp = fake_requests.FakeResponse(200, content)
+        mock_cyborg_get.return_value = resp
+
+        ret_arqs = self.client.get_arqs_for_instance(
+            instance_uuid, only_resolved=True)
+
+        mock_cyborg_get.assert_called_once_with(
+            self.client.ARQ_URL, params=query)
+
+        bound_arqs.sort(key=lambda x: x['uuid'])
+        ret_arqs.sort(key=lambda x: x['uuid'])
+        for ret_arq, bound_arq in zip(ret_arqs, bound_arqs):
+            self.assertDictEqual(ret_arq, bound_arq)
+
+    @mock.patch('keystoneauth1.adapter.Adapter.get')
+    def test_get_arqs_for_instance_some_resolved(self, mock_cyborg_get):
+        # If only some ARQs are resolved, return just the resolved ones
+        unbound_arqs, _ = self._get_arqs_and_request_groups()
+        _, bound_arqs = self._get_bound_arqs()
+        # Create a amixture of unbound and bound ARQs
+        arqs = [unbound_arqs[0], bound_arqs[0]]
+        instance_uuid = bound_arqs[0]['instance_uuid']
+
+        query = {"instance": instance_uuid}
+        content = jsonutils.dumps({'arqs': arqs})
+        resp = fake_requests.FakeResponse(200, content)
+        mock_cyborg_get.return_value = resp
+
+        ret_arqs = self.client.get_arqs_for_instance(
+            instance_uuid, only_resolved=True)
+
+        mock_cyborg_get.assert_called_once_with(
+            self.client.ARQ_URL, params=query)
+        self.assertEqual(ret_arqs, [bound_arqs[0]])
--- a/nova/tests/unit/compute/test_compute_mgr.py
+++ b/nova/tests/unit/compute/test_compute_mgr.py
@ -5993,6 +5993,188 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
        mock_hooks.setdefault().run_post.assert_called_once_with(
            'build_instance', result, mock.ANY, mock.ANY, f=None)

+    @mock.patch.object(objects.Instance, 'save')
+    @mock.patch.object(nova.compute.manager.ComputeManager,
+                       '_default_block_device_names')
+    @mock.patch.object(nova.compute.manager.ComputeManager,
+                       '_prep_block_device')
+    @mock.patch.object(virt_driver.ComputeDriver,
+                       'prepare_for_spawn')
+    @mock.patch.object(nova.compute.manager.ComputeManager,
+                       '_build_networks_for_instance')
+    @mock.patch.object(virt_driver.ComputeDriver,
+                       'prepare_networks_before_block_device_mapping')
+    def _test_accel_build_resources(self, mock_prep_net, mock_build_net,
+            mock_prep_spawn, mock_prep_bd, mock_bdnames, mock_save):
+
+        args = (self.context, self.instance, self.requested_networks,
+                self.security_groups, self.image, self.block_device_mapping,
+                self.resource_provider_mapping)
+
+        resources = []
+        with self.compute._build_resources(*args) as resources:
+            pass
+
+        return resources
+
+    @mock.patch.object(nova.compute.manager.ComputeManager,
+                       '_get_bound_arq_resources')
+    def test_accel_build_resources_no_device_profile(self, mock_get_arqs):
+        # If dp_name is None, accel path is a no-op.
+        self.instance.flavor.extra_specs = {}
+        self._test_accel_build_resources()
+        mock_get_arqs.assert_not_called()
+
+    @mock.patch.object(nova.compute.manager.ComputeManager,
+                       '_get_bound_arq_resources')
+    def test_accel_build_resources(self, mock_get_arqs):
+        # Happy path for accels in build_resources
+        dp_name = "mydp"
+        self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
+        arq_list = fixtures.CyborgFixture.bound_arq_list
+        mock_get_arqs.return_value = arq_list
+
+        resources = self._test_accel_build_resources()
+
+        mock_get_arqs.assert_called_once_with(self.context,
+            dp_name, self.instance)
+        self.assertEqual(sorted(resources['accel_info']), sorted(arq_list))
+
+    @mock.patch.object(virt_driver.ComputeDriver,
+                       'clean_networks_preparation')
+    @mock.patch.object(nova.compute.manager.ComputeManager,
+                       '_get_bound_arq_resources')
+    def test_accel_build_resources_exception(self, mock_get_arqs,
+                                             mock_clean_net):
+        dp_name = "mydp"
+        self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
+        mock_get_arqs.side_effect = (
+            exception.AcceleratorRequestOpFailed(op='get', msg=''))
+
+        self.assertRaises(exception.NovaException,
+                          self._test_accel_build_resources)
+        mock_clean_net.assert_called_once()
+
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'exit_wait_early')
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'wait_for_instance_event')
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                'get_arqs_for_instance')
+    def test_arq_bind_wait_exit_early(self, mock_get_arqs,
+            mock_wait_inst_ev, mock_exit_wait_early):
+        # Bound ARQs available on first query, quit early.
+        dp_name = fixtures.CyborgFixture.dp_name
+        arq_list = fixtures.CyborgFixture.bound_arq_list
+        self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
+        arq_events = [('accelerator-request-bound', arq['uuid'])
+                      for arq in arq_list]
+
+        # get_arqs_for_instance() is called twice, once to get all ARQs
+        # for the instance, once to get only the resolved ARQs
+        mock_get_arqs.return_value = arq_list
+
+        ret_arqs = self.compute._get_bound_arq_resources(
+            self.context, dp_name, self.instance)
+
+        mock_wait_inst_ev.assert_called_once_with(
+            self.instance, arq_events, deadline=mock.ANY)
+        mock_exit_wait_early.assert_called_once_with(arq_events)
+
+        mock_get_arqs.assert_has_calls([
+            mock.call(self.instance.uuid),
+            mock.call(self.instance.uuid, only_resolved=True)])
+
+        self.assertEqual(sorted(ret_arqs), sorted(arq_list))
+
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'exit_wait_early')
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'wait_for_instance_event')
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                'get_arqs_for_instance')
+    def test_arq_bind_wait(self, mock_get_arqs,
+            mock_wait_inst_ev, mock_exit_wait_early):
+        # If binding is in progress, must wait.
+        dp_name = fixtures.CyborgFixture.dp_name
+        arq_list = fixtures.CyborgFixture.bound_arq_list
+        self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
+        arq_events = [('accelerator-request-bound', arq['uuid'])
+                      for arq in arq_list]
+        # get_arqs_for_instance gets called 3 times, returning the full
+        # ARQ list first, [] as resolved ARQs next, and the full list finally
+        mock_get_arqs.side_effect = [arq_list, [], arq_list]
+
+        ret_arqs = self.compute._get_bound_arq_resources(
+            self.context, dp_name, self.instance)
+
+        mock_wait_inst_ev.assert_called_once_with(
+            self.instance, arq_events, deadline=mock.ANY)
+        mock_exit_wait_early.assert_not_called()
+        self.assertEqual(sorted(ret_arqs), sorted(arq_list))
+        mock_get_arqs.assert_has_calls([
+            mock.call(self.instance.uuid),
+            mock.call(self.instance.uuid, only_resolved=True),
+            mock.call(self.instance.uuid)])
+
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'exit_wait_early')
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'wait_for_instance_event')
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                'get_arqs_for_instance')
+    def test_arq_bind_timeout(self, mock_get_arqs,
+            mock_wait_inst_ev, mock_exit_wait_early):
+        # If binding fails even after wait, exception is thrown
+        dp_name = fixtures.CyborgFixture.dp_name
+        arq_list = fixtures.CyborgFixture.bound_arq_list
+        self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
+        arq_events = [('accelerator-request-bound', arq['uuid'])
+                      for arq in arq_list]
+
+        mock_get_arqs.return_value = arq_list
+        mock_wait_inst_ev.side_effect = eventlet_timeout.Timeout
+
+        self.assertRaises(eventlet_timeout.Timeout,
+            self.compute._get_bound_arq_resources,
+            self.context, dp_name, self.instance)
+
+        mock_wait_inst_ev.assert_called_once_with(
+            self.instance, arq_events, deadline=mock.ANY)
+        mock_exit_wait_early.assert_not_called()
+        mock_get_arqs.assert_called_once_with(self.instance.uuid)
+
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'exit_wait_early')
+    @mock.patch.object(nova.compute.manager.ComputeVirtAPI,
+                       'wait_for_instance_event')
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                'get_arqs_for_instance')
+    def test_arq_bind_exception(self, mock_get_arqs,
+            mock_wait_inst_ev, mock_exit_wait_early):
+        # If the code inside the context manager of _get_bound_arq_resources
+        # raises an exception, that exception must be handled.
+        dp_name = fixtures.CyborgFixture.dp_name
+        arq_list = fixtures.CyborgFixture.bound_arq_list
+        self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
+        arq_events = [('accelerator-request-bound', arq['uuid'])
+                      for arq in arq_list]
+
+        mock_get_arqs.side_effect = [
+            arq_list,
+            exception.AcceleratorRequestOpFailed(op='', msg='')]
+
+        self.assertRaises(exception.AcceleratorRequestOpFailed,
+            self.compute._get_bound_arq_resources,
+            self.context, dp_name, self.instance)
+
+        mock_wait_inst_ev.assert_called_once_with(
+            self.instance, arq_events, deadline=mock.ANY)
+        mock_exit_wait_early.assert_not_called()
+        mock_get_arqs.assert_has_calls([
+            mock.call(self.instance.uuid),
+            mock.call(self.instance.uuid, only_resolved=True)])
+
    def test_build_and_run_instance_called_with_proper_args(self):
        self._test_build_and_run_instance()

--- a/nova/tests/unit/conductor/test_conductor.py
+++ b/nova/tests/unit/conductor/test_conductor.py
@ -437,6 +437,8 @@ class _BaseTaskTestCase(object):
    def test_cold_migrate_forced_shutdown(self):
        self._test_cold_migrate(clean_shutdown=False)

+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_create_and_bind_arqs')
    @mock.patch.object(compute_rpcapi.ComputeAPI, 'build_and_run_instance')
    @mock.patch.object(db, 'block_device_mapping_get_all_by_instance',
                       return_value=[])
@ -448,7 +450,7 @@ class _BaseTaskTestCase(object):
    @mock.patch.object(objects.RequestSpec, 'from_primitives')
    def test_build_instances(self, mock_fp, mock_save, mock_getaz,
                             mock_buildreq, mock_schedule, mock_bdm,
-                             mock_build):
+                             mock_build, mock_create_bind_arqs):
        """Tests creating two instances and the scheduler returns a unique
        host/node combo for each instance.
        """
@ -530,6 +532,86 @@ class _BaseTaskTestCase(object):
                      security_groups='security_groups',
                      block_device_mapping=mock.ANY,
                      node='node2', limits=None, host_list=sched_return[1])])
+        mock_create_bind_arqs.assert_has_calls([
+            mock.call(self.context, instances[0].uuid,
+                      instances[0].flavor.extra_specs, 'node1', mock.ANY),
+            mock.call(self.context, instances[1].uuid,
+                      instances[1].flavor.extra_specs, 'node2', mock.ANY),
+            ])
+
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_cleanup_when_reschedule_fails')
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_create_and_bind_arqs')
+    @mock.patch.object(compute_rpcapi.ComputeAPI, 'build_and_run_instance')
+    @mock.patch.object(db, 'block_device_mapping_get_all_by_instance',
+                       return_value=[])
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_schedule_instances')
+    @mock.patch('nova.objects.BuildRequest.get_by_instance_uuid')
+    @mock.patch('nova.availability_zones.get_host_availability_zone')
+    @mock.patch('nova.objects.Instance.save')
+    @mock.patch.object(objects.RequestSpec, 'from_primitives')
+    def test_build_instances_arq_failure(self, mock_fp, mock_save, mock_getaz,
+                             mock_buildreq, mock_schedule, mock_bdm,
+                             mock_build, mock_create_bind_arqs, mock_cleanup):
+        """If _create_and_bind_arqs throws an exception,
+           _destroy_build_request must be called for each instance.
+        """
+        fake_spec = objects.RequestSpec()
+        mock_fp.return_value = fake_spec
+        instance_type = objects.Flavor.get_by_name(self.context, 'm1.small')
+        # NOTE(danms): Avoid datetime timezone issues with converted flavors
+        instance_type.created_at = None
+        instances = [objects.Instance(context=self.context,
+                                      id=i,
+                                      uuid=uuids.fake,
+                                      flavor=instance_type) for i in range(2)]
+        instance_properties = obj_base.obj_to_primitive(instances[0])
+        instance_properties['system_metadata'] = flavors.save_flavor_info(
+            {}, instance_type)
+
+        sched_return = copy.deepcopy(fake_host_lists2)
+        mock_schedule.return_value = sched_return
+
+        # build_instances() is a cast, we need to wait for it to complete
+        self.useFixture(cast_as_call.CastAsCall(self))
+
+        mock_getaz.return_value = 'myaz'
+        mock_create_bind_arqs.side_effect = (
+            exc.AcceleratorRequestOpFailed(op='', msg=''))
+
+        self.conductor.build_instances(self.context,
+                instances=instances,
+                image={'fake_data': 'should_pass_silently'},
+                filter_properties={},
+                admin_password='admin_password',
+                injected_files='injected_files',
+                requested_networks=None,
+                security_groups='security_groups',
+                block_device_mapping='block_device_mapping',
+                legacy_bdm=False, host_lists=None)
+        mock_create_bind_arqs.assert_has_calls([
+            mock.call(self.context, instances[0].uuid,
+                      instances[0].flavor.extra_specs, 'node1', mock.ANY),
+            mock.call(self.context, instances[1].uuid,
+                      instances[1].flavor.extra_specs, 'node2', mock.ANY),
+            ])
+        # Comparing instances fails because the instance objects have changed
+        # in the above flow. So, we compare the fields instead.
+        mock_cleanup.assert_has_calls([
+            mock.call(self.context, test.MatchType(objects.Instance),
+                      test.MatchType(exc.AcceleratorRequestOpFailed),
+                      test.MatchType(dict), None),
+            mock.call(self.context, test.MatchType(objects.Instance),
+                      test.MatchType(exc.AcceleratorRequestOpFailed),
+                      test.MatchType(dict), None),
+        ])
+        call_list = mock_cleanup.call_args_list
+        for idx, instance in enumerate(instances):
+            actual_inst = call_list[idx][0][1]
+            self.assertEqual(actual_inst['uuid'], instance['uuid'])
+            self.assertEqual(actual_inst['flavor']['extra_specs'], {})

    @mock.patch.object(scheduler_utils, 'build_request_spec')
    @mock.patch.object(scheduler_utils, 'setup_instance_group')
@ -1880,6 +1962,45 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
        self.params = params
        self.flavor = objects.Flavor.get_by_name(self.ctxt, 'm1.tiny')

+    @mock.patch('nova.accelerator.cyborg.get_client')
+    def test_create_bind_arqs_no_device_profile(self, mock_get_client):
+        # If no device profile name, it is a no op.
+        hostname = 'myhost'
+        instance = fake_instance.fake_instance_obj(self.context)
+
+        instance.flavor.extra_specs = {}
+        self.conductor._create_and_bind_arqs(self.context,
+            instance.uuid, instance.flavor.extra_specs,
+            hostname, resource_provider_mapping=mock.ANY)
+        mock_get_client.assert_not_called()
+
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.bind_arqs')
+    @mock.patch('nova.accelerator.cyborg._CyborgClient.'
+                'create_arqs_and_match_resource_providers')
+    def test_create_bind_arqs(self, mock_create, mock_bind):
+        # Happy path
+        hostname = 'myhost'
+        instance = fake_instance.fake_instance_obj(self.context)
+        dp_name = 'mydp'
+        instance.flavor.extra_specs = {'accel:device_profile': dp_name}
+
+        in_arq_list, _ = fixtures.get_arqs(dp_name)
+        mock_create.return_value = in_arq_list
+
+        self.conductor._create_and_bind_arqs(self.context,
+            instance.uuid, instance.flavor.extra_specs,
+            hostname, resource_provider_mapping=mock.ANY)
+
+        mock_create.assert_called_once_with(dp_name, mock.ANY)
+
+        expected_bindings = {
+            'b59d34d3-787b-4fb0-a6b9-019cd81172f8':
+                {'hostname': hostname,
+                 'device_rp_uuid': mock.ANY,
+                 'instance_uuid': instance.uuid}
+        }
+        mock_bind.assert_called_once_with(bindings=expected_bindings)
+
    @mock.patch('nova.availability_zones.get_host_availability_zone')
    @mock.patch('nova.compute.rpcapi.ComputeAPI.build_and_run_instance')
    @mock.patch('nova.scheduler.rpcapi.SchedulerAPI.select_destinations')
@ -2399,6 +2520,44 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
        self.params['request_specs'][0].requested_resources = []
        self._do_schedule_and_build_instances_test(self.params)

+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_create_and_bind_arqs')
+    def test_schedule_and_build_instances_with_arqs_bind_ok(
+            self, mock_create_bind_arqs):
+        extra_specs = {'accel:device_profile': 'mydp'}
+        instance = self.params['build_requests'][0].instance
+        instance.flavor.extra_specs = extra_specs
+
+        self._do_schedule_and_build_instances_test(self.params)
+
+        # NOTE(Sundar): At this point, the instance has not been
+        # associated with a host yet. The default host.nodename is
+        # 'node1'.
+        mock_create_bind_arqs.assert_called_once_with(
+            self.params['context'], instance.uuid, extra_specs,
+            'node1', mock.ANY)
+
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_cleanup_build_artifacts')
+    @mock.patch.object(conductor_manager.ComputeTaskManager,
+                       '_create_and_bind_arqs')
+    def test_schedule_and_build_instances_with_arqs_bind_exception(
+            self, mock_create_bind_arqs, mock_cleanup):
+        # Exceptions in _create_and_bind_arqs result in cleanup
+        mock_create_bind_arqs.side_effect = (
+            exc.AcceleratorRequestOpFailed(op='', msg=''))
+
+        try:
+            self._do_schedule_and_build_instances_test(self.params)
+        except exc.AcceleratorRequestOpFailed:
+            pass
+
+        mock_cleanup.assert_called_once_with(
+            self.params['context'], mock.ANY, mock.ANY,
+            self.params['build_requests'], self.params['request_specs'],
+            self.params['block_device_mapping'], self.params['tags'],
+            mock.ANY)
+
    def test_map_instance_to_cell_already_mapped(self):
        """Tests a scenario where an instance is already mapped to a cell
        during scheduling.