compute: Rework attach_volume waiters and cleanup

This change reworks both the waiters while attaching a volume *and* the
ordering of the waiters when detaching.

This is done to ensure the wait_for_volume_attachment_remove_from_server
waiter is called first as this  uses the servers client and is able to
dump the contents of the instance console when we hit a timeout.  The
contents of the instance console being incredibly useful to debug issues
within the guestOS when detaching devices as seen in bug #.

The wait_for_volume_attachment_remove_from_server waiter is also
extended to ignore missing Nova volume attachments as this can easily
happen if tests have manually detached volumes ahead of the cleanups
being called.

TODOs are also left to move away from using the volume status to
determine when a given volume has been attached and instead use the
state of volume attachments both in Nova and Cinder.

Related-Bug: #1931702
Change-Id: I8f7986dc6d8689d569b7fba74cca38de4236c6d6
This commit is contained in:
Lee Yarwood 2021-06-04 10:18:35 +01:00
parent 3c7159d2e3
commit 1bd6059454
3 changed files with 68 additions and 24 deletions

View File

@ -570,24 +570,33 @@ class BaseV2ComputeTest(api_version_utils.BaseMicroversionTest,
attachment = self.servers_client.attach_volume(
server['id'], **attach_kwargs)['volumeAttachment']
# On teardown detach the volume and for multiattach volumes wait for
# the attachment to be removed. For non-multiattach volumes wait for
# the state of the volume to change to available. This is so we don't
# error out when trying to delete the volume during teardown.
if volume['multiattach']:
att = waiters.wait_for_volume_attachment_create(
self.volumes_client, volume['id'], server['id'])
self.addCleanup(waiters.wait_for_volume_attachment_remove,
self.volumes_client, volume['id'],
att['attachment_id'])
else:
self.addCleanup(waiters.wait_for_volume_resource_status,
self.volumes_client, volume['id'], 'available')
waiters.wait_for_volume_resource_status(self.volumes_client,
volume['id'], 'in-use')
# Ignore 404s on detach in case the server is deleted or the volume
# is already detached.
# NOTE(lyarwood): During attach we initially wait for the volume
# attachment and then check the volume state.
waiters.wait_for_volume_attachment_create(
self.volumes_client, volume['id'], server['id'])
# TODO(lyarwood): Remove the following volume status checks and move to
# attachment status checks across all volumes now with the 3.27
# microversion somehow.
if not volume['multiattach']:
waiters.wait_for_volume_resource_status(
self.volumes_client, volume['id'], 'in-use')
# NOTE(lyarwood): On teardown (LIFO) initially wait for the volume
# attachment in Nova to be removed. While this technically happens last
# we want this to be the first waiter as if it fails we can then dump
# the contents of the console log. The final check of the volume state
# should be a no-op by this point and is just added for completeness
# when detaching non-multiattach volumes.
if not volume['multiattach']:
self.addCleanup(
waiters.wait_for_volume_resource_status, self.volumes_client,
volume['id'], 'available')
self.addCleanup(
waiters.wait_for_volume_attachment_remove_from_server,
self.servers_client, server['id'], volume['id'])
self.addCleanup(self._detach_volume, server, volume)
return attachment
def create_volume_snapshot(self, volume_id, name=None, description=None,

View File

@ -356,23 +356,36 @@ def wait_for_volume_attachment_remove_from_server(
This waiter checks the compute API if the volume attachment is removed.
"""
start = int(time.time())
volumes = client.list_volume_attachments(server_id)['volumeAttachments']
try:
volumes = client.list_volume_attachments(
server_id)['volumeAttachments']
except lib_exc.NotFound:
# Ignore 404s on detach in case the server is deleted or the volume
# is already detached.
return
while any(volume for volume in volumes if volume['volumeId'] == volume_id):
time.sleep(client.build_interval)
timed_out = int(time.time()) - start >= client.build_timeout
if timed_out:
console_output = client.get_console_output(server_id)['output']
LOG.debug('Console output for %s\nbody=\n%s',
server_id, console_output)
message = ('Volume %s failed to detach from server %s within '
'the required time (%s s) from the compute API '
'perspective' %
(volume_id, server_id, client.build_timeout))
raise lib_exc.TimeoutException(message)
volumes = client.list_volume_attachments(server_id)[
'volumeAttachments']
return volumes
try:
volumes = client.list_volume_attachments(
server_id)['volumeAttachments']
except lib_exc.NotFound:
# Ignore 404s on detach in case the server is deleted or the volume
# is already detached.
return
return
def wait_for_volume_migration(client, volume_id, new_host):

View File

@ -453,11 +453,14 @@ class TestVolumeWaiters(base.TestCase):
"volumeAttachments": [{"volumeId": uuids.volume_id}]}
mock_list_volume_attachments = mock.Mock(
side_effect=[volume_attached, volume_attached])
mock_get_console_output = mock.Mock(
return_value={'output': 'output'})
mock_client = mock.Mock(
spec=servers_client.ServersClient,
build_interval=1,
build_timeout=1,
list_volume_attachments=mock_list_volume_attachments)
list_volume_attachments=mock_list_volume_attachments,
get_console_output=mock_get_console_output)
self.patch(
'time.time',
side_effect=[0., 0.5, mock_client.build_timeout + 1.])
@ -473,3 +476,22 @@ class TestVolumeWaiters(base.TestCase):
mock_list_volume_attachments.assert_has_calls([
mock.call(uuids.server_id),
mock.call(uuids.server_id)])
# Assert that we fetch console output
mock_get_console_output.assert_called_once_with(uuids.server_id)
def test_wait_for_volume_attachment_remove_from_server_not_found(self):
mock_list_volume_attachments = mock.Mock(
side_effect=lib_exc.NotFound)
mock_client = mock.Mock(
spec=servers_client.ServersClient,
list_volume_attachments=mock_list_volume_attachments)
# Assert that nothing is raised when lib_exc_NotFound is raised
# by the client call to list_volume_attachments
waiters.wait_for_volume_attachment_remove_from_server(
mock_client, mock.sentinel.server_id, mock.sentinel.volume_id)
# Assert that list_volume_attachments was actually called
mock_list_volume_attachments.assert_called_once_with(
mock.sentinel.server_id)