From 87ade6d0f0aff986e5749398cea92949a66ae633 Mon Sep 17 00:00:00 2001 From: Matt Riedemann Date: Wed, 20 Nov 2019 11:47:36 -0500 Subject: [PATCH] Flesh out RevertResizeTask.rollback This implements the TODO in the RevertResizeTask.rollback method by logging an exception with some details and also setting the instance to ERROR state, recording a fault and sending an error notification, similar to the rollback method in ConfirmResizeTask. As noted inline, the exception message could be more detailed but I've left that for a later change in case people want more details. Part of blueprint cross-cell-resize Change-Id: I41296696da3226767d2bacba9345d829529ce4b6 --- nova/conductor/tasks/cross_cell_migrate.py | 49 ++++++++++++++----- .../tasks/test_cross_cell_migrate.py | 49 ++++++++++++++++--- 2 files changed, 81 insertions(+), 17 deletions(-) diff --git a/nova/conductor/tasks/cross_cell_migrate.py b/nova/conductor/tasks/cross_cell_migrate.py index a54342795e9c..1024c29d4cec 100644 --- a/nova/conductor/tasks/cross_cell_migrate.py +++ b/nova/conductor/tasks/cross_cell_migrate.py @@ -1139,7 +1139,9 @@ class RevertResizeTask(base.TaskBase): self.legacy_notifier = legacy_notifier self.compute_rpcapi = compute_rpcapi + # These are used for rollback handling. self._source_cell_migration = None + self._source_cell_instance = None self.volume_api = cinder.API() @@ -1395,6 +1397,7 @@ class RevertResizeTask(base.TaskBase): get_instance_from_source_cell( self.context, self.migration.source_compute, self.instance.uuid)) + self._source_cell_instance = source_cell_instance # Update the source cell database information based on the target cell # database, i.e. the instance/migration/BDMs/action records. Do all of @@ -1450,14 +1453,38 @@ class RevertResizeTask(base.TaskBase): source_cell_instance, fields.NotificationPhase.END) def rollback(self, ex): - # If we have updated the instance mapping to point at the source - # cell we update the migration from the source cell, otherwise we - # update the migration in the target cell. - migration = self._source_cell_migration \ - if self._source_cell_migration else self.migration - migration.status = 'error' - migration.save() - - # TODO(mriedem): Will have to think about setting the instance to - # ERROR status and/or recording a fault and sending an - # error notification (create new resize_revert.error notification?). + with excutils.save_and_reraise_exception(): + # If we have updated the instance mapping to point at the source + # cell we update the records in the source cell, otherwise we + # update the records in the target cell. + instance_at_source = self._source_cell_migration is not None + migration = self._source_cell_migration \ + if self._source_cell_migration else self.migration + instance = self._source_cell_instance \ + if self._source_cell_instance else self.instance + # NOTE(mriedem): This exception log is fairly generic. We could + # probably make this more targeted based on what we know of the + # state of the system if we want to make it more detailed, e.g. + # the execute method could "record" checkpoints to be used here + # or we could check to see if the instance was deleted from the + # target cell by trying to refresh it and handle InstanceNotFound. + LOG.exception( + 'An error occurred while reverting the resize for instance. ' + 'The instance is mapped to the %s cell %s. If the instance ' + 'was deleted from the target cell %s then the target host %s ' + 'was already cleaned up. If the instance is back in the ' + 'source cell then you can try hard-rebooting it to recover.', + ('source' if instance_at_source else 'target'), + migration._context.cell_uuid, self.context.cell_uuid, + migration.dest_compute, instance=instance) + # If anything failed set the migration status to 'error'. + migration.status = 'error' + migration.save() + # Put the instance into ERROR status, record a fault and send an + # error notification. + updates = {'vm_state': vm_states.ERROR, 'task_state': None} + request_spec = objects.RequestSpec.get_by_instance_uuid( + self.context, instance.uuid) + scheduler_utils.set_vm_state_and_notify( + instance._context, instance.uuid, 'compute_task', + 'migrate_server', updates, ex, request_spec) diff --git a/nova/tests/unit/conductor/tasks/test_cross_cell_migrate.py b/nova/tests/unit/conductor/tasks/test_cross_cell_migrate.py index b0a2bd535b1c..8ed22695aef8 100644 --- a/nova/tests/unit/conductor/tasks/test_cross_cell_migrate.py +++ b/nova/tests/unit/conductor/tasks/test_cross_cell_migrate.py @@ -1351,7 +1351,9 @@ class RevertResizeTaskTestCase(test.NoDBTestCase, ObjectComparatorMixin): source_cell_instance) _update_instance_mapping.assert_called_once_with( source_cell_instance, source_cell_mapping) - # _source_cell_migration should have been set for rollbacks + # _source_cell_instance and _source_cell_migration should have been + # set for rollbacks + self.assertIs(self.task._source_cell_instance, source_cell_instance) self.assertIs(self.task._source_cell_migration, mock.sentinel.source_cell_migration) # Cleanup at dest host. @@ -1377,26 +1379,61 @@ class RevertResizeTaskTestCase(test.NoDBTestCase, ObjectComparatorMixin): # Refresh the source cell instance so we have the latest data. mock_inst_refresh.assert_called_once_with() - def test_rollback_target_cell(self): + @mock.patch('nova.conductor.tasks.cross_cell_migrate.RevertResizeTask.' + '_execute') + @mock.patch('nova.objects.RequestSpec.get_by_instance_uuid') + @mock.patch('nova.scheduler.utils.set_vm_state_and_notify') + def test_rollback_target_cell( + self, mock_set_state_notify, mock_get_reqspec, mock_execute): """Tests the case that we did not update the instance mapping so we set the target cell migration to error status. """ + error = test.TestingException('zoinks!') + mock_execute.side_effect = error with mock.patch.object(self.task.migration, 'save') as mock_save: - self.task.rollback(test.TestingException('zoinks!')) + self.assertRaises(test.TestingException, self.task.execute) self.assertEqual('error', self.task.migration.status) mock_save.assert_called_once_with() + mock_get_reqspec.assert_called_once_with( + self.task.context, self.task.instance.uuid) + mock_set_state_notify.assert_called_once_with( + self.task.instance._context, self.task.instance.uuid, + 'compute_task', 'migrate_server', + {'vm_state': vm_states.ERROR, 'task_state': None}, error, + mock_get_reqspec.return_value) + self.assertIn('The instance is mapped to the target cell', + self.stdlog.logger.output) - def test_rollback_source_cell(self): + @mock.patch('nova.conductor.tasks.cross_cell_migrate.RevertResizeTask.' + '_execute') + @mock.patch('nova.objects.RequestSpec.get_by_instance_uuid') + @mock.patch('nova.scheduler.utils.set_vm_state_and_notify') + def test_rollback_source_cell( + self, mock_set_state_notify, mock_get_reqspec, mock_execute): """Tests the case that we did update the instance mapping so we set the source cell migration to error status. """ + source_cell_instance = self._generate_source_cell_instance() + source_cell_context = source_cell_instance._context + self.task._source_cell_instance = source_cell_instance self.task._source_cell_migration = objects.Migration( - status='reverting') + source_cell_context, status='reverting', dest_compute='dest-host') + error = test.TestingException('jinkies!') + mock_execute.side_effect = error with mock.patch.object(self.task._source_cell_migration, 'save') as mock_save: - self.task.rollback(test.TestingException('jinkies!')) + self.assertRaises(test.TestingException, self.task.execute) self.assertEqual('error', self.task._source_cell_migration.status) mock_save.assert_called_once_with() + mock_get_reqspec.assert_called_once_with( + self.task.context, self.task.instance.uuid) + mock_set_state_notify.assert_called_once_with( + source_cell_context, source_cell_instance.uuid, + 'compute_task', 'migrate_server', + {'vm_state': vm_states.ERROR, 'task_state': None}, error, + mock_get_reqspec.return_value) + self.assertIn('The instance is mapped to the source cell', + self.stdlog.logger.output) @mock.patch('nova.compute.utils.notify_about_instance_usage') @mock.patch('nova.compute.utils.notify_about_instance_action')