Remove race from wait_for_interface_detach waiter

Nova list the interfaces attached to a server based on list of ports bound to the server. However during detach interface nova unbounds the port first and then deallocates the resources used by that port in placement. The current detach waiter only waits until the interface disappears from the interface list. This can cause that waiter returns before the resource allocation is removed from placement cause a race in the test asserting such allocation. So this patch changes the waiter to wait for the successful detach os-instance-action event for the server as that is only recorded after the port is fully deallocated. blueprint: qos-minimum-guaranteed-packet-rate Change-Id: I8740f8e0cc18ffea31a9a068bccee50bf4e6fe28
2021-10-05 11:22:30 +02:00 · 2021-10-05 11:22:30 +02:00 · 55414580c2
parent f46bcdf5e4
commit 55414580c2
3 changed files with 109 additions and 31 deletions
--- a/tempest/api/compute/servers/test_device_tagging.py
+++ b/tempest/api/compute/servers/test_device_tagging.py
@ -35,6 +35,8 @@ LOG = logging.getLogger(__name__)

 class DeviceTaggingBase(base.BaseV2ComputeTest):

+    credentials = ['primary', 'admin']
+
    @classmethod
    def skip_checks(cls):
        super(DeviceTaggingBase, cls).skip_checks()
@ -54,6 +56,7 @@ class DeviceTaggingBase(base.BaseV2ComputeTest):
        cls.ports_client = cls.os_primary.ports_client
        cls.subnets_client = cls.os_primary.subnets_client
        cls.interfaces_client = cls.os_primary.interfaces_client
+        cls.servers_admin_client = cls.os_admin.servers_client

    @classmethod
    def setup_credentials(cls):
@ -422,11 +425,13 @@ class TaggedAttachmentsTest(DeviceTaggingBase):
        self.servers_client.detach_volume(server['id'], volume['id'])
        waiters.wait_for_volume_resource_status(self.volumes_client,
                                                volume['id'], 'available')
-        self.interfaces_client.delete_interface(server['id'],
-                                                interface['port_id'])
-        waiters.wait_for_interface_detach(self.interfaces_client,
+        req_id = self.interfaces_client.delete_interface(
+            server['id'], interface['port_id']
+        ).response['x-openstack-request-id']
+        waiters.wait_for_interface_detach(self.servers_admin_client,
                                          server['id'],
-                                          interface['port_id'])
+                                          interface['port_id'],
+                                          req_id)
        # FIXME(mriedem): The assertion that the tagged devices are removed
        # from the metadata for the server is being skipped until bug 1775947
        # is fixed.
--- a/tempest/common/waiters.py
+++ b/tempest/common/waiters.py
@ -489,18 +489,34 @@ def wait_for_interface_status(client, server_id, port_id, status):
    return body


-def wait_for_interface_detach(client, server_id, port_id):
+def wait_for_interface_detach(client, server_id, port_id, detach_request_id):
    """Waits for an interface to be detached from a server."""
-    body = client.list_interfaces(server_id)['interfaceAttachments']
-    ports = [iface['port_id'] for iface in body]
+    def _get_detach_event_results():
+        # NOTE(gibi): The obvious choice for this waiter would be to wait
+        # until the interface disappears from the client.list_interfaces()
+        # response. However that response is based on the binding status of the
+        # port in Neutron. Nova deallocates the port resources _after the port
+        # was  unbound in Neutron. This can cause that the naive waiter would
+        # return before the port is fully deallocated. Wait instead of the
+        # os-instance-action to succeed as that is recorded after both the
+        # port is fully deallocated.
+        events = client.show_instance_action(
+            server_id, detach_request_id)['instanceAction'].get('events', [])
+        return [
+            event['result'] for event in events
+            if event['event'] == 'compute_detach_interface'
+        ]
+
+    detach_event_results = _get_detach_event_results()
+
    start = int(time.time())

-    while port_id in ports:
+    while "Success" not in detach_event_results:
        time.sleep(client.build_interval)
-        body = client.list_interfaces(server_id)['interfaceAttachments']
-        ports = [iface['port_id'] for iface in body]
-        if port_id not in ports:
-            return body
+        detach_event_results = _get_detach_event_results()
+        if "Success" in detach_event_results:
+            return client.show_instance_action(
+                server_id, detach_request_id)['instanceAction']

        timed_out = int(time.time()) - start >= client.build_timeout
        if timed_out:
--- a/tempest/tests/common/test_waiters.py
+++ b/tempest/tests/common/test_waiters.py
@ -186,37 +186,94 @@ class TestInterfaceWaiters(base.TestCase):
                                         mock.call('server_id', 'port_id')])
        sleep.assert_called_once_with(client.build_interval)

-    one_interface = {'interfaceAttachments': [{'port_id': 'port_one'}]}
-    two_interfaces = {'interfaceAttachments': [{'port_id': 'port_one'},
-                                               {'port_id': 'port_two'}]}
-
    def test_wait_for_interface_detach(self):
-        list_interfaces = mock.MagicMock(
-            side_effect=[self.two_interfaces, self.one_interface])
-        client = self.mock_client(list_interfaces=list_interfaces)
+        no_event = {
+            'instanceAction': {
+                'events': []
+            }
+        }
+        one_event_without_result = {
+            'instanceAction': {
+                'events': [
+                    {
+                        'event': 'compute_detach_interface',
+                        'result': None
+                    }
+
+                ]
+            }
+        }
+        one_event_successful = {
+            'instanceAction': {
+                'events': [
+                    {
+                        'event': 'compute_detach_interface',
+                        'result': 'Success'
+                    }
+                ]
+            }
+        }
+
+        show_instance_action = mock.MagicMock(
+            # there is an extra call to return the result from the waiter
+            side_effect=[
+                no_event,
+                one_event_without_result,
+                one_event_successful,
+                one_event_successful,
+            ]
+        )
+        client = self.mock_client(show_instance_action=show_instance_action)
        self.patch('time.time', return_value=0.)
        sleep = self.patch('time.sleep')

        result = waiters.wait_for_interface_detach(
-            client, 'server_id', 'port_two')
+            client, mock.sentinel.server_id, mock.sentinel.port_id,
+            mock.sentinel.detach_request_id
+        )

-        self.assertIs(self.one_interface['interfaceAttachments'], result)
-        list_interfaces.assert_has_calls([mock.call('server_id'),
-                                          mock.call('server_id')])
-        sleep.assert_called_once_with(client.build_interval)
+        self.assertIs(one_event_successful['instanceAction'], result)
+        show_instance_action.assert_has_calls(
+            # there is an extra call to return the result from the waiter
+            [
+                mock.call(
+                    mock.sentinel.server_id, mock.sentinel.detach_request_id)
+            ] * 4
+        )
+        sleep.assert_has_calls([mock.call(client.build_interval)] * 2)

    def test_wait_for_interface_detach_timeout(self):
-        list_interfaces = mock.MagicMock(return_value=self.one_interface)
-        client = self.mock_client(list_interfaces=list_interfaces)
+        one_event_without_result = {
+            'instanceAction': {
+                'events': [
+                    {
+                        'event': 'compute_detach_interface',
+                        'result': None
+                    }
+
+                ]
+            }
+        }
+
+        show_instance_action = mock.MagicMock(
+            return_value=one_event_without_result)
+        client = self.mock_client(show_instance_action=show_instance_action)
        self.patch('time.time', side_effect=[0., client.build_timeout + 1.])
        sleep = self.patch('time.sleep')

-        self.assertRaises(lib_exc.TimeoutException,
-                          waiters.wait_for_interface_detach,
-                          client, 'server_id', 'port_one')
+        self.assertRaises(
+            lib_exc.TimeoutException,
+            waiters.wait_for_interface_detach,
+            client, mock.sentinel.server_id, mock.sentinel.port_id,
+            mock.sentinel.detach_request_id
+        )

-        list_interfaces.assert_has_calls([mock.call('server_id'),
-                                          mock.call('server_id')])
+        show_instance_action.assert_has_calls(
+            [
+                mock.call(
+                    mock.sentinel.server_id, mock.sentinel.detach_request_id)
+            ] * 2
+        )
        sleep.assert_called_once_with(client.build_interval)

    def test_wait_for_guest_os_boot(self):