Browse Source

Error out migration when confirm_resize fails

If anything fails and raises an exception during
confirm_resize, the migration status is stuck in
"confirming" status even though the instance status
may be "ERROR".

This change adds the errors_out_migration decorator
to the confirm_resize method to make sure the migration
status is "error" if an error is raised.

In bug 1821594 it was the driver.confirm_migration
method that raised some exception, so a unit test is
added here which simulates a similar scenario.

This only partially closes the bug because we are still
leaking allocations on the source node resource provider
since _delete_allocation_after_move is not called. That
will be dealt with in a separate patch.

Conflicts:
      nova/tests/unit/compute/test_compute_mgr.py

NOTE(mriedem): The conflict is due to not having change
Ia05525058e47efb806cf8820410c8bc80eccca25 in Queens.

Change-Id: Ic7d78ad43a2bad7f932c22c98944accbbed9e9e2
Partial-Bug: #1821594
(cherry picked from commit 408ef8f84a)
(cherry picked from commit 972d4e0eb3)
(cherry picked from commit e3f69c8af0)
tags/17.0.11
Matt Riedemann 5 months ago
parent
commit
c6a42cd3b3
2 changed files with 50 additions and 0 deletions
  1. 1
    0
      nova/compute/manager.py
  2. 49
    0
      nova/tests/unit/compute/test_compute_mgr.py

+ 1
- 0
nova/compute/manager.py View File

@@ -3696,6 +3696,7 @@ class ComputeManager(manager.Manager):
3696 3696
 
3697 3697
     @wrap_exception()
3698 3698
     @wrap_instance_event(prefix='compute')
3699
+    @errors_out_migration
3699 3700
     @wrap_instance_fault
3700 3701
     def confirm_resize(self, context, instance, migration):
3701 3702
         """Confirms a migration/resize and deletes the 'old' instance.

+ 49
- 0
nova/tests/unit/compute/test_compute_mgr.py View File

@@ -6256,12 +6256,14 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
6256 6256
                 expected_attrs=['metadata', 'system_metadata', 'info_cache'])
6257 6257
         self.migration = objects.Migration(
6258 6258
             context=self.context.elevated(),
6259
+            id=1,
6259 6260
             uuid=mock.sentinel.uuid,
6260 6261
             instance_uuid=self.instance.uuid,
6261 6262
             new_instance_type_id=7,
6262 6263
             dest_compute=None,
6263 6264
             dest_node=None,
6264 6265
             dest_host=None,
6266
+            source_compute='source_compute',
6265 6267
             status='migrating')
6266 6268
         self.migration.save = mock.MagicMock()
6267 6269
         self.useFixture(fixtures.SpawnIsSynchronousFixture())
@@ -6664,6 +6666,53 @@ class ComputeManagerMigrationTestCase(test.NoDBTestCase):
6664 6666
                 mock_resources.return_value)
6665 6667
         do_it()
6666 6668
 
6669
+    @mock.patch('nova.compute.utils.add_instance_fault_from_exc')
6670
+    @mock.patch('nova.objects.Migration.get_by_id')
6671
+    @mock.patch('nova.objects.Instance.get_by_uuid')
6672
+    @mock.patch('nova.compute.utils.notify_about_instance_usage')
6673
+    @mock.patch('nova.compute.utils.notify_about_instance_action')
6674
+    @mock.patch('nova.objects.Instance.save')
6675
+    def test_confirm_resize_driver_confirm_migration_fails(
6676
+            self, instance_save, notify_action, notify_usage,
6677
+            instance_get_by_uuid, migration_get_by_id, add_fault):
6678
+        """Tests the scenario that driver.confirm_migration raises some error
6679
+        to make sure the error is properly handled, like the instance and
6680
+        migration status is set to 'error'.
6681
+        """
6682
+        self.migration.status = 'confirming'
6683
+        migration_get_by_id.return_value = self.migration
6684
+        instance_get_by_uuid.return_value = self.instance
6685
+
6686
+        error = exception.HypervisorUnavailable(
6687
+            host=self.migration.source_compute)
6688
+        with test.nested(
6689
+            mock.patch.object(self.compute, 'network_api'),
6690
+            mock.patch.object(self.compute.driver, 'confirm_migration',
6691
+                              side_effect=error)
6692
+        ) as (
6693
+            network_api, confirm_migration
6694
+        ):
6695
+            self.assertRaises(exception.HypervisorUnavailable,
6696
+                              self.compute.confirm_resize,
6697
+                              self.context, self.instance, self.migration)
6698
+        # Make sure the instance is in ERROR status.
6699
+        self.assertEqual(vm_states.ERROR, self.instance.vm_state)
6700
+        # Make sure the migration is in error status.
6701
+        self.assertEqual('error', self.migration.status)
6702
+        # Instance.save is called twice, once to clear the resize metadata
6703
+        # and once to set the instance to ERROR status.
6704
+        self.assertEqual(2, instance_save.call_count)
6705
+        # The migration.status should have been saved.
6706
+        self.migration.save.assert_called_once_with()
6707
+        # Assert other mocks we care less about.
6708
+        notify_usage.assert_called_once()
6709
+        notify_action.assert_called_once()
6710
+        add_fault.assert_called_once()
6711
+        confirm_migration.assert_called_once()
6712
+        network_api.setup_networks_on_host.assert_called_once()
6713
+        instance_get_by_uuid.assert_called_once()
6714
+        migration_get_by_id.assert_called_once()
6715
+
6667 6716
     @mock.patch('nova.scheduler.utils.resources_from_flavor')
6668 6717
     def test_delete_allocation_after_move_confirm_by_migration(self, mock_rff):
6669 6718
         mock_rff.return_value = {}

Loading…
Cancel
Save