Add regression test for bug #1943431

This regression test asserts the current broken behaviour of Nova during
a Cinder orchestrated volume migration that leaves the stashed
connection_info of the attachment pointing at a now deleted temporary
volume UUID used during the migration.

This is slightly different to the Nova orchestrated pure swap_volume
flow so an additional test is included to assert the current correct
behaviour there and to hopefully illustrate the differences better to
reviewers.

Numerous TODOs are also left in place for clean ups in the regression
test, Cinder fixture and integrated helpers.

Change-Id: Ic060523c30fd4dae086afcf7566452be0b2ff266
This commit is contained in:
Lee Yarwood
2021-09-23 15:36:55 +01:00
parent 3fd27fc6c0
commit 674a5e8dca
3 changed files with 235 additions and 6 deletions

View File

@@ -37,7 +37,12 @@ class CinderFixture(fixtures.Fixture):
SWAP_ERR_OLD_VOL = '828419fa-3efb-4533-b458-4267ca5fe9b1'
SWAP_ERR_NEW_VOL = '9c6d9c2d-7a8f-4c80-938d-3bf062b8d489'
SWAP_ERR_ATTACH_ID = '4a3cd440-b9c2-11e1-afa6-0800200c9a66'
MULTIATTACH_VOL = '4757d51f-54eb-4442-8684-3399a6431f67'
MULTIATTACH_RO_SWAP_OLD_VOL = uuids.multiattach_ro_swap_old_vol
MULTIATTACH_RO_SWAP_NEW_VOL = uuids.multiattach_ro_swap_new_vol
MULTIATTACH_RO_MIGRATE_OLD_VOL = uuids.multiattach_ro_migrate_old_vol
MULTIATTACH_RO_MIGRATE_NEW_VOL = uuids.multiattach_ro_migrate_new_vol
# This represents a bootable image-backed volume to test
# boot-from-volume scenarios.
@@ -78,6 +83,7 @@ class CinderFixture(fixtures.Fixture):
self.swap_volume_instance_uuid = None
self.swap_volume_instance_error_uuid = None
self.attachment_error_id = None
self.multiattach_ro_migrated = False
self.az = az
# A dict, keyed by volume id, to a dict, keyed by attachment id,
# with keys:
@@ -93,10 +99,20 @@ class CinderFixture(fixtures.Fixture):
def setUp(self):
super().setUp()
def _is_multiattach(volume_id):
return volume_id in [
self.MULTIATTACH_VOL,
self.MULTIATTACH_RO_SWAP_OLD_VOL,
self.MULTIATTACH_RO_SWAP_NEW_VOL,
self.MULTIATTACH_RO_MIGRATE_OLD_VOL,
self.MULTIATTACH_RO_MIGRATE_NEW_VOL]
def fake_get(self_api, context, volume_id, microversion=None):
# TODO(lyarwood): Refactor this block into something sensible and
# reusable by other tests.
# Check for the special swap volumes.
attachments = self.volume_to_attachment[volume_id]
if volume_id in (self.SWAP_OLD_VOL, self.SWAP_ERR_OLD_VOL):
volume = {
'status': 'available',
@@ -144,7 +160,7 @@ class CinderFixture(fixtures.Fixture):
'display_name': volume_id,
'attach_status': 'attached',
'id': volume_id,
'multiattach': volume_id == self.MULTIATTACH_VOL,
'multiattach': _is_multiattach(volume_id),
'size': 1,
'attachments': {
attachment['instance_uuid']: {
@@ -157,10 +173,10 @@ class CinderFixture(fixtures.Fixture):
# This is a test that does not care about the actual details.
volume = {
'status': 'available',
'display_name': 'TEST2',
'display_name': volume_id,
'attach_status': 'detached',
'id': volume_id,
'multiattach': volume_id == self.MULTIATTACH_VOL,
'multiattach': _is_multiattach(volume_id),
'size': 1
}
@@ -186,11 +202,35 @@ class CinderFixture(fixtures.Fixture):
"trait:HW_CPU_X86_SGX": "required",
}
# If we haven't called migrate_volume_completion then return
# a migration_status of migrating
if (
volume_id == self.MULTIATTACH_RO_MIGRATE_OLD_VOL and
not self.multiattach_ro_migrated
):
volume['migration_status'] = 'migrating'
# If we have migrated and are still GET'ing the new volume
# return raise VolumeNotFound
if (
volume_id == self.MULTIATTACH_RO_MIGRATE_NEW_VOL and
self.multiattach_ro_migrated
):
raise exception.VolumeNotFound(
volume_id=self.MULTIATTACH_RO_MIGRATE_NEW_VOL)
return volume
def fake_migrate_volume_completion(
_self, context, old_volume_id, new_volume_id, error,
):
if new_volume_id == self.MULTIATTACH_RO_MIGRATE_NEW_VOL:
# Mimic the behaviour of Cinder here that renames the new
# volume to the old UUID once the migration is complete.
# This boolean is used above to signal that the old volume
# has been deleted if callers try to GET it.
self.multiattach_ro_migrated = True
return {'save_volume_id': old_volume_id}
return {'save_volume_id': new_volume_id}
def _find_attachment(attachment_id):
@@ -212,14 +252,15 @@ class CinderFixture(fixtures.Fixture):
"""Find the connection_info associated with an attachment
:returns: A connection_info dict based on a deepcopy associated
with the volume_id but containing the attachment_id, making it
unique for the attachment.
with the volume_id but containing both the attachment_id and
volume_id making it unique for the attachment.
"""
connection_info = copy.deepcopy(
self.VOLUME_CONNECTION_INFO.get(
volume_id, self.VOLUME_CONNECTION_INFO.get('fake')
)
)
connection_info['data']['volume_id'] = volume_id
connection_info['data']['attachment_id'] = attachment_id
return connection_info
@@ -241,6 +282,13 @@ class CinderFixture(fixtures.Fixture):
'instance_uuid': instance_uuid,
'connector': connector,
}
if volume_id in [self.MULTIATTACH_RO_SWAP_OLD_VOL,
self.MULTIATTACH_RO_SWAP_NEW_VOL,
self.MULTIATTACH_RO_MIGRATE_OLD_VOL,
self.MULTIATTACH_RO_MIGRATE_NEW_VOL]:
attachment['attach_mode'] = 'ro'
LOG.info(
'Created attachment %s for volume %s. Total attachments '
'for volume: %d',

View File

@@ -202,6 +202,7 @@ class InstanceHelperMixin:
'actions: %s. Events in the last matching action: %s'
% (event_name, actions, events))
# TODO(lyarwood): Rewrite this waiter to use os-volume_attachments
def _wait_for_volume_attach(self, server_id, volume_id):
timeout = 0.0
server = self.api.get_server(server_id)
@@ -220,6 +221,25 @@ class InstanceHelperMixin:
'server %s. Currently attached volumes: %s' %
(volume_id, server_id, attached_vols))
# TODO(lyarwood): Rewrite this waiter to use os-volume_attachments
def _wait_for_volume_detach(self, server_id, volume_id):
timeout = 0.0
server = self.api.get_server(server_id)
attached_vols = [vol['id'] for vol in
server['os-extended-volumes:volumes_attached']]
while volume_id in attached_vols and timeout < 10.0:
time.sleep(.1)
timeout += .1
server = self.api.get_server(server_id)
attached_vols = [vol['id'] for vol in
server['os-extended-volumes:volumes_attached']]
if volume_id in attached_vols:
self.fail('Timed out waiting for volume %s to be detached from '
'server %s. Currently attached volumes: %s' %
(volume_id, server_id, attached_vols))
def _assert_resize_migrate_action_fail(self, server, action, error_in_tb):
"""Waits for the conductor_migrate_server action event to fail for
the given action and asserts the error is in the event traceback.

View File

@@ -0,0 +1,161 @@
# Copyright 2021, Red Hat, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_serialization import jsonutils
from nova import context
from nova import objects
from nova.tests.functional.api import client
from nova.tests.functional import integrated_helpers
from nova.tests.functional.libvirt import base
from nova.virt import block_device as driver_block_device
class TestLibvirtROMultiattachMigrate(
base.ServersTestBase,
integrated_helpers.InstanceHelperMixin
):
"""Regression test for bug 1939545
This regression test asserts the current broken behaviour of Nova during
a Cinder orchestrated volume migration that leaves the stashed
connection_info of the attachment pointing at a now deleted temporary
volume UUID used during the migration.
This is slightly different to the Nova orchestrated pure swap_volume
flow so an additional test is included to assert the current correct
behaviour there and to hopefully illustrate the differences better to
reviewers.
"""
microversion = 'latest'
ADMIN_API = True
def setUp(self):
super().setUp()
self.start_compute()
def test_ro_multiattach_swap_volume(self):
server_id = self._create_server(networks='none')['id']
self.api.post_server_volume(
server_id,
{
'volumeAttachment': {
'volumeId': self.cinder.MULTIATTACH_RO_SWAP_OLD_VOL
}
}
)
self._wait_for_volume_attach(
server_id, self.cinder.MULTIATTACH_RO_SWAP_OLD_VOL)
# Swap between the old and new volumes
self.api.put_server_volume(
server_id,
self.cinder.MULTIATTACH_RO_SWAP_OLD_VOL,
self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL)
# Wait until the old volume is detached and new volume is attached
self._wait_for_volume_detach(
server_id, self.cinder.MULTIATTACH_RO_SWAP_OLD_VOL)
self._wait_for_volume_attach(
server_id, self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL)
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
context.get_admin_context(),
self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL,
server_id)
connection_info = jsonutils.loads(bdm.connection_info)
# Assert that only the new volume UUID is referenced within the stashed
# connection_info and returned by driver_block_device.get_volume_id
self.assertIn('volume_id', connection_info.get('data'))
self.assertEqual(
self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL,
connection_info['data']['volume_id'])
self.assertIn('serial', connection_info)
self.assertEqual(
self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL,
connection_info.get('serial'))
self.assertEqual(
self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL,
driver_block_device.get_volume_id(connection_info))
# Assert that the new volume can be detached from the instance
self.api.delete_server_volume(
server_id, self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL)
self._wait_for_volume_detach(
server_id, self.cinder.MULTIATTACH_RO_SWAP_NEW_VOL)
def test_ro_multiattach_migrate_volume(self):
server_id = self._create_server(networks='none')['id']
self.api.post_server_volume(
server_id,
{
'volumeAttachment': {
'volumeId': self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL
}
}
)
self._wait_for_volume_attach(
server_id, self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL)
# Mimic Cinder during the volume migration flow by calling swap_volume
# with a source volume that has a migration_status of migrating.
self.api.put_server_volume(
server_id,
self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL,
self.cinder.MULTIATTACH_RO_MIGRATE_NEW_VOL)
bdm = objects.BlockDeviceMapping.get_by_volume_and_instance(
context.get_admin_context(),
self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL,
server_id)
connection_info = jsonutils.loads(bdm.connection_info)
# FIXME(lyarwood): This is bug #1943431 where only the serial within
# the connection_info of the temporary volume has been updated to point
# to the old volume UUID with the stashed volume_id provided by the
# backend still pointing to the temporary volume UUID that has now been
# deleted by cinder.
self.assertIn('serial', connection_info)
self.assertEqual(
self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL,
connection_info.get('serial'))
self.assertIn('volume_id', connection_info.get('data'))
self.assertEqual(
self.cinder.MULTIATTACH_RO_MIGRATE_NEW_VOL,
connection_info['data']['volume_id'])
self.assertEqual(
self.cinder.MULTIATTACH_RO_MIGRATE_NEW_VOL,
driver_block_device.get_volume_id(connection_info))
# FIXME(lyarwood): As a result of the above any request to detach the
# migrated multiattach volume from the instance or any action that
# would cause the _disconnect_volume and _should_disconnect_target
# logic to trigger in the libvirt driver will fail as
# driver_block_device.get_volume_id points to the now deleted temporary
# volume used during the migration.
#
# Replace this with the following once fixed:
#
# self.api.delete_server_volume(
# server_id, self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL)
# self._wait_for_volume_detach(
# server_id, self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL)
ex = self.assertRaises(
client.OpenStackApiException,
self.api.delete_server_volume,
server_id,
self.cinder.MULTIATTACH_RO_MIGRATE_OLD_VOL)
self.assertEqual(500, ex.response.status_code)