Remove race from wait_for_interface_detach waiter

Nova list the interfaces attached to a server based on list of ports
bound to the server. However during detach interface nova unbounds the
port first and then deallocates the resources used by that port in
placement. The current detach waiter only waits until the interface
disappears from the interface list. This can cause that waiter returns
before the resource allocation is removed from placement cause a race in
the test asserting such allocation.

So this patch changes the waiter to wait for the successful detach
os-instance-action event for the server as that is only recorded after
the port is fully deallocated.

blueprint: qos-minimum-guaranteed-packet-rate
Change-Id: I8740f8e0cc18ffea31a9a068bccee50bf4e6fe28
This commit is contained in:
Balazs Gibizer 2021-10-05 11:22:30 +02:00
parent f46bcdf5e4
commit 55414580c2
3 changed files with 109 additions and 31 deletions

View File

@ -35,6 +35,8 @@ LOG = logging.getLogger(__name__)
class DeviceTaggingBase(base.BaseV2ComputeTest):
credentials = ['primary', 'admin']
@classmethod
def skip_checks(cls):
super(DeviceTaggingBase, cls).skip_checks()
@ -54,6 +56,7 @@ class DeviceTaggingBase(base.BaseV2ComputeTest):
cls.ports_client = cls.os_primary.ports_client
cls.subnets_client = cls.os_primary.subnets_client
cls.interfaces_client = cls.os_primary.interfaces_client
cls.servers_admin_client = cls.os_admin.servers_client
@classmethod
def setup_credentials(cls):
@ -422,11 +425,13 @@ class TaggedAttachmentsTest(DeviceTaggingBase):
self.servers_client.detach_volume(server['id'], volume['id'])
waiters.wait_for_volume_resource_status(self.volumes_client,
volume['id'], 'available')
self.interfaces_client.delete_interface(server['id'],
interface['port_id'])
waiters.wait_for_interface_detach(self.interfaces_client,
req_id = self.interfaces_client.delete_interface(
server['id'], interface['port_id']
).response['x-openstack-request-id']
waiters.wait_for_interface_detach(self.servers_admin_client,
server['id'],
interface['port_id'])
interface['port_id'],
req_id)
# FIXME(mriedem): The assertion that the tagged devices are removed
# from the metadata for the server is being skipped until bug 1775947
# is fixed.

View File

@ -489,18 +489,34 @@ def wait_for_interface_status(client, server_id, port_id, status):
return body
def wait_for_interface_detach(client, server_id, port_id):
def wait_for_interface_detach(client, server_id, port_id, detach_request_id):
"""Waits for an interface to be detached from a server."""
body = client.list_interfaces(server_id)['interfaceAttachments']
ports = [iface['port_id'] for iface in body]
def _get_detach_event_results():
# NOTE(gibi): The obvious choice for this waiter would be to wait
# until the interface disappears from the client.list_interfaces()
# response. However that response is based on the binding status of the
# port in Neutron. Nova deallocates the port resources _after the port
# was unbound in Neutron. This can cause that the naive waiter would
# return before the port is fully deallocated. Wait instead of the
# os-instance-action to succeed as that is recorded after both the
# port is fully deallocated.
events = client.show_instance_action(
server_id, detach_request_id)['instanceAction'].get('events', [])
return [
event['result'] for event in events
if event['event'] == 'compute_detach_interface'
]
detach_event_results = _get_detach_event_results()
start = int(time.time())
while port_id in ports:
while "Success" not in detach_event_results:
time.sleep(client.build_interval)
body = client.list_interfaces(server_id)['interfaceAttachments']
ports = [iface['port_id'] for iface in body]
if port_id not in ports:
return body
detach_event_results = _get_detach_event_results()
if "Success" in detach_event_results:
return client.show_instance_action(
server_id, detach_request_id)['instanceAction']
timed_out = int(time.time()) - start >= client.build_timeout
if timed_out:

View File

@ -186,37 +186,94 @@ class TestInterfaceWaiters(base.TestCase):
mock.call('server_id', 'port_id')])
sleep.assert_called_once_with(client.build_interval)
one_interface = {'interfaceAttachments': [{'port_id': 'port_one'}]}
two_interfaces = {'interfaceAttachments': [{'port_id': 'port_one'},
{'port_id': 'port_two'}]}
def test_wait_for_interface_detach(self):
list_interfaces = mock.MagicMock(
side_effect=[self.two_interfaces, self.one_interface])
client = self.mock_client(list_interfaces=list_interfaces)
no_event = {
'instanceAction': {
'events': []
}
}
one_event_without_result = {
'instanceAction': {
'events': [
{
'event': 'compute_detach_interface',
'result': None
}
]
}
}
one_event_successful = {
'instanceAction': {
'events': [
{
'event': 'compute_detach_interface',
'result': 'Success'
}
]
}
}
show_instance_action = mock.MagicMock(
# there is an extra call to return the result from the waiter
side_effect=[
no_event,
one_event_without_result,
one_event_successful,
one_event_successful,
]
)
client = self.mock_client(show_instance_action=show_instance_action)
self.patch('time.time', return_value=0.)
sleep = self.patch('time.sleep')
result = waiters.wait_for_interface_detach(
client, 'server_id', 'port_two')
client, mock.sentinel.server_id, mock.sentinel.port_id,
mock.sentinel.detach_request_id
)
self.assertIs(self.one_interface['interfaceAttachments'], result)
list_interfaces.assert_has_calls([mock.call('server_id'),
mock.call('server_id')])
sleep.assert_called_once_with(client.build_interval)
self.assertIs(one_event_successful['instanceAction'], result)
show_instance_action.assert_has_calls(
# there is an extra call to return the result from the waiter
[
mock.call(
mock.sentinel.server_id, mock.sentinel.detach_request_id)
] * 4
)
sleep.assert_has_calls([mock.call(client.build_interval)] * 2)
def test_wait_for_interface_detach_timeout(self):
list_interfaces = mock.MagicMock(return_value=self.one_interface)
client = self.mock_client(list_interfaces=list_interfaces)
one_event_without_result = {
'instanceAction': {
'events': [
{
'event': 'compute_detach_interface',
'result': None
}
]
}
}
show_instance_action = mock.MagicMock(
return_value=one_event_without_result)
client = self.mock_client(show_instance_action=show_instance_action)
self.patch('time.time', side_effect=[0., client.build_timeout + 1.])
sleep = self.patch('time.sleep')
self.assertRaises(lib_exc.TimeoutException,
waiters.wait_for_interface_detach,
client, 'server_id', 'port_one')
self.assertRaises(
lib_exc.TimeoutException,
waiters.wait_for_interface_detach,
client, mock.sentinel.server_id, mock.sentinel.port_id,
mock.sentinel.detach_request_id
)
list_interfaces.assert_has_calls([mock.call('server_id'),
mock.call('server_id')])
show_instance_action.assert_has_calls(
[
mock.call(
mock.sentinel.server_id, mock.sentinel.detach_request_id)
] * 2
)
sleep.assert_called_once_with(client.build_interval)
def test_wait_for_guest_os_boot(self):