smartnic support - cleanup arqs
delete arqs: - delete arq while port unbind - create ops failed and arqs did not bind to instance - arq bind to instance but not bind to port Implements: blueprint sriov-smartnic-support Change-Id: Idab0ee38750d018de409699a0dbdff106d9e11fb
This commit is contained in:
parent
120fe8af93
commit
e19fa1a199
@ -2579,7 +2579,11 @@ class ComputeManager(manager.Manager):
|
||||
spec_arqs, network_arqs)
|
||||
except (Exception, eventlet.timeout.Timeout) as exc:
|
||||
LOG.exception(exc)
|
||||
compute_utils.delete_arqs_if_needed(context, instance)
|
||||
# ARQs created for instance or ports.
|
||||
# The port binding isn't done yet.
|
||||
# Unbind port won't clean port ARQs.
|
||||
compute_utils.delete_arqs_if_needed(
|
||||
context, instance, accel_uuids)
|
||||
msg = _('Failure getting accelerator requests.')
|
||||
raise exception.BuildAbortException(
|
||||
reason=msg, instance_uuid=instance.uuid)
|
||||
@ -2678,7 +2682,12 @@ class ComputeManager(manager.Manager):
|
||||
reason=str(exc))
|
||||
finally:
|
||||
# Call Cyborg to delete accelerator requests
|
||||
compute_utils.delete_arqs_if_needed(context, instance)
|
||||
if accel_uuids:
|
||||
# ARQs created for instance or ports.
|
||||
# Port bind may not successful.
|
||||
# unbind port won't clean port ARQs.
|
||||
compute_utils.delete_arqs_if_needed(
|
||||
context, instance, accel_uuids)
|
||||
|
||||
def _get_bound_arq_resources(self, context, instance, arq_uuids):
|
||||
"""Get bound accelerator requests.
|
||||
|
@ -1537,16 +1537,28 @@ def update_pci_request_spec_with_allocated_interface_name(
|
||||
spec['parent_ifname'] = rp_name_pieces[2]
|
||||
|
||||
|
||||
def delete_arqs_if_needed(context, instance):
|
||||
"""Delete Cyborg ARQs for the instance."""
|
||||
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
|
||||
if dp_name is None:
|
||||
return
|
||||
def delete_arqs_if_needed(context, instance, arq_uuids=None):
|
||||
"""Delete Cyborg ARQs for the instance.
|
||||
|
||||
:param context
|
||||
:param instance: instance who own the args
|
||||
:param uuids: delete arqs by uuids while did not bind to instance yet.
|
||||
"""
|
||||
cyclient = cyborg.get_client(context)
|
||||
LOG.debug('Calling Cyborg to delete ARQs for instance %(instance)s',
|
||||
dp_name = instance.flavor.extra_specs.get('accel:device_profile')
|
||||
|
||||
if dp_name:
|
||||
LOG.debug('Calling Cyborg to delete ARQs for instance %(instance)s',
|
||||
{'instance': instance.uuid})
|
||||
try:
|
||||
cyclient.delete_arqs_for_instance(instance.uuid)
|
||||
except exception.AcceleratorRequestOpFailed as e:
|
||||
LOG.exception('Failed to delete accelerator requests for '
|
||||
try:
|
||||
cyclient.delete_arqs_for_instance(instance.uuid)
|
||||
except exception.AcceleratorRequestOpFailed as e:
|
||||
LOG.exception('Failed to delete accelerator requests for '
|
||||
'instance %s. Exception: %s', instance.uuid, e)
|
||||
|
||||
if arq_uuids:
|
||||
LOG.debug('Calling Cyborg to delete ARQs by uuids %(uuid)s for'
|
||||
' instance %(instance)s',
|
||||
{'instance': instance.uuid,
|
||||
'uuid': arq_uuids})
|
||||
cyclient.delete_arqs_by_uuid(arq_uuids)
|
||||
|
@ -592,7 +592,13 @@ class ComputeTaskManager:
|
||||
legacy_request_spec)
|
||||
self._cleanup_allocated_networks(
|
||||
context, instance, requested_networks)
|
||||
compute_utils.delete_arqs_if_needed(context, instance)
|
||||
|
||||
arq_uuids = None
|
||||
# arqs have not bound to port/instance yet
|
||||
if requested_networks:
|
||||
arq_uuids = [req.arq_uuid
|
||||
for req in requested_networks if req.arq_uuid]
|
||||
compute_utils.delete_arqs_if_needed(context, instance, arq_uuids)
|
||||
|
||||
# NOTE(danms): This is never cell-targeted because it is only used for
|
||||
# n-cpu reschedules which go to the cell conductor and thus are always
|
||||
|
@ -653,11 +653,19 @@ class API:
|
||||
).get('network')
|
||||
networks[net_id] = network
|
||||
|
||||
# Unbind Port device
|
||||
if port_profile.get('arq_uuid'):
|
||||
"""Delete device profile by arq uuid."""
|
||||
cyclient = cyborg.get_client(context)
|
||||
cyclient.delete_arqs_by_uuid([port_profile['arq_uuid']])
|
||||
LOG.debug('Delete ARQs %s for port %s',
|
||||
port_profile['arq_uuid'], port_id)
|
||||
|
||||
# NOTE: We're doing this to remove the binding information
|
||||
# for the physical device but don't want to overwrite the other
|
||||
# information in the binding profile.
|
||||
for profile_key in ('pci_vendor_info', 'pci_slot',
|
||||
constants.ALLOCATION):
|
||||
constants.ALLOCATION, 'arq_uuid'):
|
||||
if profile_key in port_profile:
|
||||
del port_profile[profile_key]
|
||||
port_req_body['port'][constants.BINDING_PROFILE] = port_profile
|
||||
|
@ -6235,9 +6235,12 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
self.context, self.instance, arq_uuids)
|
||||
self.assertEqual(sorted(resources['accel_info']), sorted(arq_list))
|
||||
|
||||
@mock.patch.object(compute_utils,
|
||||
'delete_arqs_if_needed')
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_get_bound_arq_resources')
|
||||
def test_accel_build_resources_exception(self, mock_get_arqs):
|
||||
def test_accel_build_resources_exception(self, mock_get_arqs,
|
||||
mock_clean_arq):
|
||||
dp_name = "mydp"
|
||||
self.instance.flavor.extra_specs = {"accel:device_profile": dp_name}
|
||||
arq_list = fixtures.CyborgFixture.bound_arq_list
|
||||
@ -6252,6 +6255,8 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
self._test_accel_build_resources,
|
||||
arq_uuids, None,
|
||||
self.requested_networks)
|
||||
mock_clean_arq.assert_called_once_with(
|
||||
self.context, self.instance, arq_uuids)
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_get_bound_arq_resources')
|
||||
|
@ -1658,3 +1658,22 @@ class AcceleratorRequestTestCase(test.NoDBTestCase):
|
||||
mock_log_exc.assert_called_once()
|
||||
self.assertIn('Failed to delete accelerator requests for instance',
|
||||
mock_log_exc.call_args[0][0])
|
||||
|
||||
@mock.patch.object(cyborgclient, 'delete_arqs_by_uuid')
|
||||
def test_delete_with_arq_uuid(self, mock_del_arq):
|
||||
flavor = objects.Flavor(**test_flavor.fake_flavor)
|
||||
instance = fake_instance.fake_instance_obj(self.context, flavor=flavor)
|
||||
arq_uuids = [uuids.arq1, uuids.arq2]
|
||||
compute_utils.delete_arqs_if_needed(self.context, instance, arq_uuids)
|
||||
mock_del_arq.assert_called_once_with(arq_uuids)
|
||||
|
||||
@mock.patch.object(cyborgclient, 'delete_arqs_by_uuid')
|
||||
@mock.patch.object(cyborgclient, 'delete_arqs_for_instance')
|
||||
def test_delete_with_arq_uuid_and_dp(self, mock_del_inst, mock_del_uuid):
|
||||
flavor = objects.Flavor(**test_flavor.fake_flavor)
|
||||
flavor['extra_specs'] = {'accel:device_profile': 'mydp'}
|
||||
instance = fake_instance.fake_instance_obj(self.context, flavor=flavor)
|
||||
arq_uuids = [uuids.arq1, uuids.arq2]
|
||||
compute_utils.delete_arqs_if_needed(self.context, instance, arq_uuids)
|
||||
mock_del_inst.assert_called_once_with(instance.uuid)
|
||||
mock_del_uuid.assert_called_once_with(arq_uuids)
|
||||
|
@ -4144,7 +4144,25 @@ class ConductorTaskTestCase(_BaseTaskTestCase, test_compute.BaseTestCase):
|
||||
self.conductor_manager._cleanup_when_reschedule_fails(
|
||||
self.context, instance, exception=None,
|
||||
legacy_request_spec=None, requested_networks=None)
|
||||
mock_del_arqs.assert_called_once_with(self.context, instance)
|
||||
mock_del_arqs.assert_called_once_with(
|
||||
self.context, instance, None)
|
||||
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_cleanup_allocated_networks')
|
||||
@mock.patch.object(conductor_manager.ComputeTaskManager,
|
||||
'_set_vm_state_and_notify')
|
||||
@mock.patch.object(compute_utils, 'delete_arqs_if_needed')
|
||||
def test_cleanup_arqs_on_reschedule_by_uuid(self, mock_del_arqs,
|
||||
mock_set_vm, mock_clean_net):
|
||||
instance = fake_instance.fake_instance_obj(self.context)
|
||||
request_tuples = [('123', '1.2.3.4', uuids.fakeid,
|
||||
None, uuids.arq_uuid, None)]
|
||||
requests = objects.NetworkRequestList.from_tuples(request_tuples)
|
||||
self.conductor_manager._cleanup_when_reschedule_fails(
|
||||
self.context, instance, exception=None,
|
||||
legacy_request_spec=None, requested_networks=requests)
|
||||
mock_del_arqs.assert_called_once_with(self.context,
|
||||
instance, [uuids.arq_uuid])
|
||||
|
||||
def test_cleanup_allocated_networks_none_requested(self):
|
||||
# Tests that we don't deallocate networks if 'none' were specifically
|
||||
|
@ -7065,6 +7065,26 @@ class TestAPIPortbinding(TestAPIBase):
|
||||
self.api.delete_port_binding(self.context, port_id,
|
||||
'fake-host')
|
||||
|
||||
@mock.patch('nova.accelerator.cyborg._CyborgClient.delete_arqs_by_uuid')
|
||||
@mock.patch('nova.network.neutron.get_binding_profile')
|
||||
@mock.patch('nova.network.neutron.API._show_port')
|
||||
@mock.patch('nova.network.neutron.get_client')
|
||||
def test_unbind_ports_clean_arq(self, mock_neutron, mock_show,
|
||||
mock_bind, mock_delete_arq):
|
||||
mock_client = mock.Mock()
|
||||
mock_ctx = mock.Mock(is_admin=False)
|
||||
ports = ["1"]
|
||||
mock_show.return_value = {'id': uuids.port}
|
||||
mock_bind.return_value = {'arq_uuid': self.arqs[0]['uuid'],
|
||||
'key': 'val'}
|
||||
api = neutronapi.API()
|
||||
api._unbind_ports(mock_ctx, ports, mock_neutron, mock_client)
|
||||
mock_delete_arq.assert_called_once_with([self.arqs[0]['uuid']])
|
||||
# verify binding profile key 'arq_uuid' deleted
|
||||
call_args = mock_client.update_port.call_args[0][1]
|
||||
self.assertEqual(call_args['port']['binding:profile'],
|
||||
{'key': 'val'})
|
||||
|
||||
|
||||
class TestAllocateForInstance(test.NoDBTestCase):
|
||||
def setUp(self):
|
||||
|
Loading…
Reference in New Issue
Block a user