Error out migration when confirm_resize fails

If anything fails and raises an exception during
confirm_resize, the migration status is stuck in
"confirming" status even though the instance status
may be "ERROR".

This change adds the errors_out_migration decorator
to the confirm_resize method to make sure the migration
status is "error" if an error is raised.

In bug 1821594 it was the driver.confirm_migration
method that raised some exception, so a unit test is
added here which simulates a similar scenario.

This only partially closes the bug because we are still
leaking allocations on the source node resource provider
since _delete_allocation_after_move is not called. That
will be dealt with in a separate patch.

Conflicts:
      nova/tests/functional/test_servers.py
      nova/tests/unit/compute/test_compute_mgr.py

NOTE(mriedem): The functional test conflict is due to not
having change I99427a52676826990d2a2ffc82cf30ad945b939c
in Rocky. The unit test conflict is due to not having
change I0851e2d54a1fdc82fe3291fb7e286e790f121e92 in Rocky.
The source_node attribute on the fake Migration object in
the unit test is added here because change
I312d61383345ea0ac1ab0c277b4c468e6aa94656 is not in Rocky.

Change-Id: Ic7d78ad43a2bad7f932c22c98944accbbed9e9e2
Partial-Bug: #1821594
(cherry picked from commit 408ef8f84a)
(cherry picked from commit 972d4e0eb3)
This commit is contained in:
Matt Riedemann 2019-03-25 13:16:42 -04:00
parent f1ac5183d4
commit 2a25d1e48b
2 changed files with 49 additions and 0 deletions

View File

@ -3825,6 +3825,7 @@ class ComputeManager(manager.Manager):
@wrap_exception() @wrap_exception()
@wrap_instance_event(prefix='compute') @wrap_instance_event(prefix='compute')
@errors_out_migration
@wrap_instance_fault @wrap_instance_fault
def confirm_resize(self, context, instance, migration): def confirm_resize(self, context, instance, migration):
"""Confirms a migration/resize and deletes the 'old' instance. """Confirms a migration/resize and deletes the 'old' instance.

View File

@ -6628,6 +6628,7 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
expected_attrs=['metadata', 'system_metadata', 'info_cache']) expected_attrs=['metadata', 'system_metadata', 'info_cache'])
self.migration = objects.Migration( self.migration = objects.Migration(
context=self.context.elevated(), context=self.context.elevated(),
id=1,
uuid=uuids.migration_uuid, uuid=uuids.migration_uuid,
instance_uuid=self.instance.uuid, instance_uuid=self.instance.uuid,
new_instance_type_id=7, new_instance_type_id=7,
@ -7043,6 +7044,53 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
mock_resources.return_value) mock_resources.return_value)
do_it() do_it()
@mock.patch('nova.compute.utils.add_instance_fault_from_exc')
@mock.patch('nova.objects.Migration.get_by_id')
@mock.patch('nova.objects.Instance.get_by_uuid')
@mock.patch('nova.compute.utils.notify_about_instance_usage')
@mock.patch('nova.compute.utils.notify_about_instance_action')
@mock.patch('nova.objects.Instance.save')
def test_confirm_resize_driver_confirm_migration_fails(
self, instance_save, notify_action, notify_usage,
instance_get_by_uuid, migration_get_by_id, add_fault):
"""Tests the scenario that driver.confirm_migration raises some error
to make sure the error is properly handled, like the instance and
migration status is set to 'error'.
"""
self.migration.status = 'confirming'
migration_get_by_id.return_value = self.migration
instance_get_by_uuid.return_value = self.instance
error = exception.HypervisorUnavailable(
host=self.migration.source_compute)
with test.nested(
mock.patch.object(self.compute, 'network_api'),
mock.patch.object(self.compute.driver, 'confirm_migration',
side_effect=error)
) as (
network_api, confirm_migration
):
self.assertRaises(exception.HypervisorUnavailable,
self.compute.confirm_resize,
self.context, self.instance, self.migration)
# Make sure the instance is in ERROR status.
self.assertEqual(vm_states.ERROR, self.instance.vm_state)
# Make sure the migration is in error status.
self.assertEqual('error', self.migration.status)
# Instance.save is called twice, once to clear the resize metadata
# and once to set the instance to ERROR status.
self.assertEqual(2, instance_save.call_count)
# The migration.status should have been saved.
self.migration.save.assert_called_once_with()
# Assert other mocks we care less about.
notify_usage.assert_called_once()
notify_action.assert_called_once()
add_fault.assert_called_once()
confirm_migration.assert_called_once()
network_api.setup_networks_on_host.assert_called_once()
instance_get_by_uuid.assert_called_once()
migration_get_by_id.assert_called_once()
@mock.patch('nova.scheduler.utils.resources_from_flavor') @mock.patch('nova.scheduler.utils.resources_from_flavor')
def test_delete_allocation_after_move_confirm_by_migration(self, mock_rff): def test_delete_allocation_after_move_confirm_by_migration(self, mock_rff):
mock_rff.return_value = {} mock_rff.return_value = {}