From 65a888fbaa8b4ee7c409e00ba069c5cc343c0f63 Mon Sep 17 00:00:00 2001 From: Lee Yarwood Date: Mon, 27 Jul 2020 19:27:24 +0100 Subject: [PATCH] compute: Don't delete the original attachment during pre LM rollback I0bfb11296430dfffe9b091ae7c3a793617bd9d0d introduced support for live migration with cinderv3 volume attachments during Queens. This initial support handled failures in pre_live_migration directly by removing any attachments created on the destination and reverting to the original attachment ids before re-raising the caught exception to the source compute. It also added rollback code within the main _rollback_live_migration method but missed that this would also be called during a pre_live_migration rollback. As a result after a failure in pre_live_migration _rollback_live_migration will attempt to delete the source host volume attachments referenced by the bdm before updating the bdms with the now non-existent attachment ids, leaving the volumes in an `available` state in Cinder as they have no attachment records associated with them anymore. This change aims to resolve this within _rollback_volume_bdms by ensuring that the current and original attachment_ids are not equal before requesting that the current attachment referenced by the bdm is deleted. When called after a failure in pre_live_migration this should result in no attempt being made to remove the original source host attachments from Cinder. Note that the following changes muddy the waters slightly here but introduced no actual changes to the logic within _rollback_live_migration: * I0f3ab6604d8b79bdb75cf67571e359cfecc039d8 reworked some of the error handling in Rocky but isn't the source of the issue here. * Ibe9215c07a1ee00e0e121c69bcf7ee1b1b80fae0 reworked _rollback_live_migration to use the provided source_bdms. * I6bc73e8c8f98d9955f33f309beb8a7c56981b553 then refactored _rollback_live_migration, moving the logic into a self contained _rollback_volume_bdms method. Closes-Bug: #1889108 Change-Id: I9edb36c4df1cc0d8b529e669f06540de71766085 (cherry picked from commit 2102f1834a6ac9fd870bfb457b28a2172f33e281) (cherry picked from commit 034b2fa8fea0e34fed95a2ba728e4387ce4e78de) (cherry picked from commit c6c3483a41c2a53bcfafdfe3475283b0379f7f1d) (cherry picked from commit 6007c41aa830e1d1e227a8c66285b2eb32fe9a7c) (cherry picked from commit b18e93a2a486cc91e3867cefd68d223359d14bed) --- nova/compute/manager.py | 7 ++++++- .../functional/regressions/test_bug_1889108.py | 12 +++++------- nova/tests/unit/compute/test_compute_mgr.py | 13 +++++++++++++ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 099097641229..adc6696b3dc3 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -6881,7 +6881,12 @@ class ComputeManager(manager.Manager): for bdm in bdms: try: original_bdm = original_bdms_by_volid[bdm.volume_id] - if bdm.attachment_id and original_bdm.attachment_id: + # NOTE(lyarwood): Only delete the referenced attachment if it + # is different to the original in order to avoid accidentally + # removing the source host volume attachment after it has + # already been rolled back by a failure in pre_live_migration. + if (bdm.attachment_id and original_bdm.attachment_id and + bdm.attachment_id != original_bdm.attachment_id): # NOTE(lyarwood): 3.44 cinder api flow. Delete the # attachment used by the bdm and reset it to that of # the original bdm. diff --git a/nova/tests/functional/regressions/test_bug_1889108.py b/nova/tests/functional/regressions/test_bug_1889108.py index 4f6d6441364a..68f954baae06 100644 --- a/nova/tests/functional/regressions/test_bug_1889108.py +++ b/nova/tests/functional/regressions/test_bug_1889108.py @@ -27,8 +27,8 @@ class TestVolAttachmentsDuringPreLiveMigration( """Regression test for bug 1889108. This regression test asserts that the original source volume attachments - are incorrectly removed during the rollback from pre_live_migration - failures on the destination. + are not removed during the rollback from pre_live_migration failures on the + destination. """ api_major_version = 'v2.1' microversion = 'latest' @@ -114,9 +114,7 @@ class TestVolAttachmentsDuringPreLiveMigration( server = self.api.get_server(server['id']) self.assertEqual(src_host, server['OS-EXT-SRV-ATTR:host']) - # FIXME(lyarwood): Assert that both the src and dest attachments have - # been removed. Only the dest attachment should be removed during the - # rollback of a pre_live_migration failure. + # Assert that the src attachment is still present attachments = self.cinder.volume_to_attachment.get(volume_id) - self.assertNotIn(src_attachment_id, attachments.keys()) - self.assertEqual(0, len(attachments)) + self.assertIn(src_attachment_id, attachments.keys()) + self.assertEqual(1, len(attachments)) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 09cc97dc0991..803fe72781b7 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -8284,6 +8284,19 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase): self.assertIn('Exception while attempting to rollback', mock_log.exception.call_args[0][0]) + @mock.patch('nova.volume.cinder.API.attachment_delete') + def test_rollback_volume_bdms_after_pre_failure( + self, mock_delete_attachment): + instance = fake_instance.fake_instance_obj( + self.context, uuid=uuids.instance) + original_bdms = bdms = self._generate_volume_bdm_list(instance) + self.compute._rollback_volume_bdms( + self.context, bdms, original_bdms, instance) + # Assert that attachment_delete isn't called when the bdms have already + # been rolled back by a failure in pre_live_migration to reference the + # source bdms. + mock_delete_attachment.assert_not_called() + @mock.patch.object(objects.ComputeNode, 'get_first_node_by_host_for_old_compat') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'