From 377dc5bda9369e50c0c4a893564fc218b4b31fb1 Mon Sep 17 00:00:00 2001 From: Lee Yarwood Date: Tue, 30 Apr 2019 11:59:48 +0100 Subject: [PATCH] compute: refactor volume bdm rollback error handling Previously any exception while rolling back the connection_info and attachment_id of volume bdms would result in the overall attempt to rollback a LM failing. This change refactors this specific bdm rollback logic into two self contained methods that ignore by default errors where possible to allow the LM rollback attempt to continue. Change-Id: I6bc73e8c8f98d9955f33f309beb8a7c56981b553 (cherry picked from commit 9524a5a1b5745f6064f88cbfbf5bbfae3a973bef) --- nova/compute/manager.py | 81 ++++++++----- nova/tests/unit/compute/test_compute_mgr.py | 124 ++++++++++++++++++++ 2 files changed, 178 insertions(+), 27 deletions(-) diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 87ef1440aa1f..399e8a255594 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -7738,6 +7738,50 @@ class ComputeManager(manager.Manager): action=fields.NotificationAction.LIVE_MIGRATION_POST_DEST, phase=fields.NotificationPhase.END) + def _remove_remote_volume_connections(self, context, dest, bdms, instance): + """Rollback remote volume connections on the dest""" + for bdm in bdms: + try: + # remove the connection on the destination host + # NOTE(lyarwood): This actually calls the cinderv2 + # os-terminate_connection API if required. + self.compute_rpcapi.remove_volume_connection( + context, instance, bdm.volume_id, dest) + except Exception: + LOG.warning("Ignoring exception while attempting " + "to rollback volume connections for " + "volume %s on host %s.", bdm.volume_id, + dest, instance=instance) + + def _rollback_volume_bdms(self, context, bdms, original_bdms, instance): + """Rollback the connection_info and attachment_id for each bdm""" + original_bdms_by_volid = {bdm.volume_id: bdm for bdm in original_bdms + if bdm.is_volume} + for bdm in bdms: + try: + original_bdm = original_bdms_by_volid[bdm.volume_id] + if bdm.attachment_id and original_bdm.attachment_id: + # NOTE(lyarwood): 3.44 cinder api flow. Delete the + # attachment used by the bdm and reset it to that of + # the original bdm. + self.volume_api.attachment_delete(context, + bdm.attachment_id) + bdm.attachment_id = original_bdm.attachment_id + # NOTE(lyarwood): Reset the connection_info to the original + bdm.connection_info = original_bdm.connection_info + bdm.save() + except cinder_exception.ClientException: + LOG.warning("Ignoring cinderclient exception when " + "attempting to delete attachment %s for volume" + "%s while rolling back volume bdms.", + bdm.attachment_id, bdm.volume_id, + instance=instance) + except Exception: + with excutils.save_and_reraise_exception(): + LOG.exception("Exception while attempting to rollback " + "BDM for volume %s.", bdm.volume_id, + instance=instance) + @wrap_exception() @wrap_instance_fault def _rollback_live_migration(self, context, instance, @@ -7806,35 +7850,18 @@ class ComputeManager(manager.Manager): self.driver.rollback_live_migration_at_source(context, instance, migrate_data) - source_bdms_by_volid = {bdm.volume_id: bdm for bdm in source_bdms - if bdm.is_volume} - - # NOTE(lyarwood): Fetch the current list of BDMs and delete any volume - # attachments used by the destination host before rolling back to the - # original and still valid source host volume attachments. + # NOTE(lyarwood): Fetch the current list of BDMs, disconnect any + # connected volumes from the dest and delete any volume attachments + # used by the destination host before rolling back to the original + # still valid source host volume attachments. bdms = objects.BlockDeviceMappingList.get_by_instance_uuid( context, instance.uuid) - for bdm in bdms: - if bdm.is_volume: - # remove the connection on the destination host - # NOTE(lyarwood): This actually calls the cinderv2 - # os-terminate_connection API if required. - self.compute_rpcapi.remove_volume_connection( - context, instance, bdm.volume_id, dest) - - source_bdm = source_bdms_by_volid[bdm.volume_id] - if bdm.attachment_id and source_bdm.attachment_id: - # NOTE(lyarwood): 3.44 cinder api flow. Delete the - # attachment used by the destination and reset the bdm - # attachment_id to the old attachment of the source host. - self.volume_api.attachment_delete(context, - bdm.attachment_id) - bdm.attachment_id = source_bdm.attachment_id - - # NOTE(lyarwood): Rollback the connection_info stored within - # the BDM to that used by the source and not the destination. - bdm.connection_info = source_bdm.connection_info - bdm.save() + # TODO(lyarwood): Turn the following into a lookup method within + # BlockDeviceMappingList. + vol_bdms = [bdm for bdm in bdms if bdm.is_volume] + self._remove_remote_volume_connections(context, dest, vol_bdms, + instance) + self._rollback_volume_bdms(context, vol_bdms, source_bdms, instance) self._notify_about_instance_usage(context, instance, "live_migration._rollback.start") diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 84f180914e80..b21a454cab83 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -9399,6 +9399,130 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase, _test() + def _generate_volume_bdm_list(self, instance, original=False): + # TODO(lyarwood): There are various methods generating fake bdms within + # this class, we should really look at writing a small number of + # generic reusable methods somewhere to replace all of these. + connection_info = "{'data': {'host': 'dest'}}" + if original: + connection_info = "{'data': {'host': 'original'}}" + + vol1_bdm = fake_block_device.fake_bdm_object( + self.context, + {'source_type': 'volume', 'destination_type': 'volume', + 'volume_id': uuids.vol1, 'device_name': '/dev/vdb', + 'instance_uuid': instance.uuid, + 'connection_info': connection_info}) + vol1_bdm.save = mock.Mock() + vol2_bdm = fake_block_device.fake_bdm_object( + self.context, + {'source_type': 'volume', 'destination_type': 'volume', + 'volume_id': uuids.vol2, 'device_name': '/dev/vdc', + 'instance_uuid': instance.uuid, + 'connection_info': connection_info}) + vol2_bdm.save = mock.Mock() + + if original: + vol1_bdm.attachment_id = uuids.vol1_attach_original + vol2_bdm.attachment_id = uuids.vol2_attach_original + else: + vol1_bdm.attachment_id = uuids.vol1_attach + vol2_bdm.attachment_id = uuids.vol2_attach + return objects.BlockDeviceMappingList(objects=[vol1_bdm, vol2_bdm]) + + @mock.patch('nova.compute.rpcapi.ComputeAPI.remove_volume_connection') + def test_remove_remote_volume_connections(self, mock_remove_vol_conn): + instance = fake_instance.fake_instance_obj(self.context, + uuid=uuids.instance) + bdms = self._generate_volume_bdm_list(instance) + + self.compute._remove_remote_volume_connections(self.context, 'fake', + bdms, instance) + mock_remove_vol_conn.assert_has_calls = [ + mock.call(self.context, instance, bdm.volume_id, 'fake') for + bdm in bdms] + + @mock.patch('nova.compute.rpcapi.ComputeAPI.remove_volume_connection') + def test_remove_remote_volume_connections_exc(self, mock_remove_vol_conn): + instance = fake_instance.fake_instance_obj(self.context, + uuid=uuids.instance) + bdms = self._generate_volume_bdm_list(instance) + + # Raise an exception for the second call to remove_volume_connections + mock_remove_vol_conn.side_effect = [None, test.TestingException] + + # Assert that errors are ignored + self.compute._remove_remote_volume_connections(self.context, + 'fake', bdms, instance) + + @mock.patch('nova.volume.cinder.API.attachment_delete') + def test_rollback_volume_bdms(self, mock_delete_attachment): + instance = fake_instance.fake_instance_obj(self.context, + uuid=uuids.instance) + bdms = self._generate_volume_bdm_list(instance) + original_bdms = self._generate_volume_bdm_list(instance, + original=True) + + self.compute._rollback_volume_bdms(self.context, bdms, + original_bdms, instance) + + # Assert that we delete the current attachments + mock_delete_attachment.assert_has_calls = [ + mock.call(self.context, uuids.vol1_attach), + mock.call(self.context, uuids.vol2_attach)] + # Assert that we switch the attachment ids and connection_info for each + # bdm back to their original values + self.assertEqual(uuids.vol1_attach_original, + bdms[0].attachment_id) + self.assertEqual("{'data': {'host': 'original'}}", + bdms[0].connection_info) + self.assertEqual(uuids.vol2_attach_original, + bdms[1].attachment_id) + self.assertEqual("{'data': {'host': 'original'}}", + bdms[1].connection_info) + # Assert that save is called for each bdm + bdms[0].save.assert_called_once() + bdms[1].save.assert_called_once() + + @mock.patch('nova.compute.manager.LOG') + @mock.patch('nova.volume.cinder.API.attachment_delete') + def test_rollback_volume_bdms_exc(self, mock_delete_attachment, mock_log): + instance = fake_instance.fake_instance_obj(self.context, + uuid=uuids.instance) + bdms = self._generate_volume_bdm_list(instance) + original_bdms = self._generate_volume_bdm_list(instance, + original=True) + + # Assert that we ignore cinderclient exceptions and continue to attempt + # to rollback any remaining bdms. + mock_delete_attachment.side_effect = [ + cinder_exception.ClientException(code=9001), None] + self.compute._rollback_volume_bdms(self.context, bdms, + original_bdms, instance) + self.assertEqual(uuids.vol2_attach_original, + bdms[1].attachment_id) + self.assertEqual("{'data': {'host': 'original'}}", + bdms[1].connection_info) + bdms[0].save.assert_not_called() + bdms[1].save.assert_called_once() + mock_log.warning.assert_called_once() + self.assertIn('Ignoring cinderclient exception', + mock_log.warning.call_args[0][0]) + + # Assert that we raise unknown Exceptions + mock_log.reset_mock() + bdms[0].save.reset_mock() + bdms[1].save.reset_mock() + mock_delete_attachment.side_effect = test.TestingException + self.assertRaises(test.TestingException, + self.compute._rollback_volume_bdms, self.context, + bdms, original_bdms, instance) + bdms[0].save.assert_not_called() + bdms[1].save.assert_not_called() + mock_log.exception.assert_called_once() + self.assertIn('Exception while attempting to rollback', + mock_log.exception.call_args[0][0]) + @mock.patch.object(objects.ComputeNode, 'get_first_node_by_host_for_old_compat') @mock.patch('nova.scheduler.client.report.SchedulerReportClient.'