From e9d572ddfad10da18603c55da1aad2e70da15381 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Fri, 3 May 2019 16:14:48 -0600 Subject: [PATCH] Log when port resource is leaked during port delete When a bound port is deleted in neutron, nova gets a network-vif-deleted notification and the port is detached from the server. However if the port had resource request then the resulting allocation is leaked. This patch makes sure nova at least logs an ERROR. Also this patch asserts that the leaked allocation is reclaimed when the server is deleted. Change-Id: I5d905aeb5b25f84d406dbf238d0d3a46f0f81161 Related-Bug: #1820588 --- nova/compute/manager.py | 13 +++- nova/tests/functional/integrated_helpers.py | 8 ++ nova/tests/functional/test_servers.py | 82 +++++++++++++++++++++ 3 files changed, 101 insertions(+), 2 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 8524e11a5b6b..f1186ecd9934 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -8288,6 +8288,17 @@ class ComputeManager(manager.Manager): 'deleting it from the info cache', {'intf': vif['id']}, instance=instance) + profile = vif.get('profile', {}) or {} # profile can be None + if profile.get('allocation'): + LOG.error( + 'The bound port %(port_id)s is deleted in Neutron but ' + 'the resource allocation on the resource provider ' + '%(rp_uuid)s is leaked until the server ' + '%(server_uuid)s is deleted.', + {'port_id': vif['id'], + 'rp_uuid': vif['profile']['allocation'], + 'server_uuid': instance.uuid}) + del network_info[index] base_net_api.update_instance_cache_with_nw_info( self.network_api, context, @@ -8388,8 +8399,6 @@ class ComputeManager(manager.Manager): instance=instance) elif event.name == 'network-vif-deleted': try: - # TODO(gibi): If the vif had resource allocation then - # it needs to be deallocated in placement. self._process_instance_vif_deleted_event(context, instance, event.tag) diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index 5866cb1b396e..92eab1fd6b72 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -357,6 +357,14 @@ class InstanceHelperMixin(object): self.fail('Timed out waiting for migration with status "%s" for ' 'instance: %s' % (expected_statuses, server['id'])) + def _wait_for_log(self, log_line): + for i in range(10): + if log_line in self.stdlog.logger.output: + return + time.sleep(0.5) + + self.fail('The line "%(log_line)s" did not appear in the log') + class ProviderUsageBaseTestCase(test.TestCase, InstanceHelperMixin): """Base test class for functional tests that check provider usage diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index 13123bc4fb3a..9097eb2eb6a2 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -6653,6 +6653,88 @@ class PortResourceRequestBasedSchedulingTest( binding_profile = updated_port['binding:profile'] self.assertNotIn('allocation', binding_profile) + def test_delete_bound_port_in_neutron_with_resource_request(self): + """Neutron sends a network-vif-deleted os-server-external-events + notification to nova when a bound port is deleted. Nova detaches the + vif from the server. If the port had a resource allocation then that + allocation is leaked. This test makes sure that 1) an ERROR is logged + when the leak happens. 2) the leaked resource is reclaimed when the + server is deleted. + """ + port = self.neutron.port_with_resource_request + + # create a server + server = self._create_server( + flavor=self.flavor, + networks=[{'port': port['id']}]) + server = self._wait_for_state_change(self.admin_api, server, 'ACTIVE') + + allocations = self.placement_api.get( + '/allocations/%s' % server['id']).body['allocations'] + # We expect one set of allocations for the compute resources on the + # compute rp and one set for the networking resources on the ovs bridge + # rp due to the port resource request + self.assertEqual(2, len(allocations)) + compute_allocations = allocations[self.compute1_rp_uuid]['resources'] + network_allocations = allocations[ + self.ovs_bridge_rp_per_host[self.compute1_rp_uuid]]['resources'] + + self.assertEqual(self._resources_from_flavor(self.flavor), + compute_allocations) + self.assertPortMatchesAllocation(port, network_allocations) + + # We expect that only the RP uuid of the networking RP having the port + # allocation is sent in the port binding for the port having resource + # request + updated_port = self.neutron.show_port(port['id'])['port'] + binding_profile = updated_port['binding:profile'] + self.assertEqual(self.ovs_bridge_rp_per_host[self.compute1_rp_uuid], + binding_profile['allocation']) + + # neutron is faked in the functional test so this test just sends in + # a os-server-external-events notification to trigger the + # detach + ERROR log. + events = { + "events": [ + { + "name": "network-vif-deleted", + "server_uuid": server['id'], + "tag": port['id'], + } + ] + } + response = self.api.api_post('/os-server-external-events', events).body + self.assertEqual(200, response['events'][0]['code']) + + port_rp_uuid = self.ovs_bridge_rp_per_host[self.compute1_rp_uuid] + + # 1) Nova logs an ERROR about the leak + self._wait_for_log( + 'ERROR [nova.compute.manager] The bound port %(port_id)s is ' + 'deleted in Neutron but the resource allocation on the resource ' + 'provider %(rp_uuid)s is leaked until the server %(server_uuid)s ' + 'is deleted.' + % {'port_id': port['id'], + 'rp_uuid': port_rp_uuid, + 'server_uuid': server['id']}) + + allocations = self.placement_api.get( + '/allocations/%s' % server['id']).body['allocations'] + + # Nova leaks the port allocation so the server still has the same + # allocation before the port delete. + self.assertEqual(2, len(allocations)) + compute_allocations = allocations[self.compute1_rp_uuid]['resources'] + network_allocations = allocations[port_rp_uuid]['resources'] + + self.assertEqual(self._resources_from_flavor(self.flavor), + compute_allocations) + self.assertPortMatchesAllocation(port, network_allocations) + + # 2) Also nova will reclaim the leaked resource during the server + # delete + self._delete_and_check_allocations(server) + def test_two_sriov_ports_one_with_request_two_available_pfs(self): """Verify that the port's bandwidth allocated from the same PF as the allocated VF.