From a3f286f43d866cd343d26d9bafadecab1c225e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?El=C5=91d=20Ill=C3=A9s?= Date: Wed, 30 Aug 2017 16:54:36 +0200 Subject: [PATCH] Set error state after failed evacuation When evacuation fails with NoValidHost, the migration status remains 'accepted' instead of 'error'. This causes problem in case the compute service starts up again and looks for evacuations with status 'accepted', as it then removes the local instances for those evacuations even though the instance was never actually evacuated to another host. Change-Id: I06d78c744fa75ae5f34c5cfa76bc3c9460767b84 Closes-Bug: #1713783 (cherry picked from commit a8ebf5f1aac080854704e27146e8c98b053c6224) --- nova/conductor/manager.py | 23 ++++++++++++------- .../regressions/test_bug_1713783.py | 5 +--- nova/tests/unit/conductor/test_conductor.py | 13 +++++++++++ 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index dd1e41efff23..14e369e61483 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -807,6 +807,15 @@ class ComputeTaskManager(base.Base): with compute_utils.EventReporter(context, 'rebuild_server', instance.uuid): node = limits = None + + try: + migration = objects.Migration.get_by_instance_and_status( + context, instance.uuid, 'accepted') + except exception.MigrationNotFoundByStatus: + LOG.debug("No migration record for the rebuild/evacuate " + "request.", instance=instance) + migration = None + # The host variable is passed in two cases: # 1. rebuild - the instance.host is passed to rebuild on the # same host and bypass the scheduler. @@ -857,6 +866,9 @@ class ComputeTaskManager(base.Base): host_dict['nodename'], host_dict['limits']) except exception.NoValidHost as ex: + if migration: + migration.status = 'error' + migration.save() request_spec = request_spec.to_legacy_request_spec_dict() with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, @@ -866,6 +878,9 @@ class ComputeTaskManager(base.Base): LOG.warning("No valid host found for rebuild", instance=instance) except exception.UnsupportedPolicyException as ex: + if migration: + migration.status = 'error' + migration.save() request_spec = request_spec.to_legacy_request_spec_dict() with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, @@ -875,14 +890,6 @@ class ComputeTaskManager(base.Base): LOG.warning("Server with unsupported policy " "cannot be rebuilt", instance=instance) - try: - migration = objects.Migration.get_by_instance_and_status( - context, instance.uuid, 'accepted') - except exception.MigrationNotFoundByStatus: - LOG.debug("No migration record for the rebuild/evacuate " - "request.", instance=instance) - migration = None - compute_utils.notify_about_instance_usage( self.notifier, context, instance, "rebuild.scheduled") diff --git a/nova/tests/functional/regressions/test_bug_1713783.py b/nova/tests/functional/regressions/test_bug_1713783.py index 4da5c6086497..4233511aaf67 100644 --- a/nova/tests/functional/regressions/test_bug_1713783.py +++ b/nova/tests/functional/regressions/test_bug_1713783.py @@ -118,7 +118,4 @@ class FailedEvacuateStateTests(test.TestCase, self.assertEqual('evacuation', migrations[0]['migration_type']) self.assertEqual(server['id'], migrations[0]['instance_uuid']) self.assertEqual(self.hostname, migrations[0]['source_compute']) - self.assertEqual('accepted', migrations[0]['status']) - # NOTE(elod.illes): Migration status should be 'error' and not - # 'accepted'. Needs to be replaced when bug #1713783 is fixed. - # self.assertEqual('error', migrations[0]['status']) + self.assertEqual('error', migrations[0]['status']) diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index ad96d1c1282b..b4885cc7f6de 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -1282,6 +1282,15 @@ class _BaseTaskTestCase(object): # build_instances() is a cast, we need to wait for it to complete self.useFixture(cast_as_call.CastAsCall(self)) + # Create the migration record (normally created by the compute API). + migration = objects.Migration(self.context, + source_compute=inst_obj.host, + source_node=inst_obj.node, + instance_uuid=inst_obj.uuid, + status='accepted', + migration_type='evacuation') + migration.create() + self.assertRaises(exc.UnsupportedPolicyException, self.conductor.rebuild_instance, self.context, @@ -1294,6 +1303,10 @@ class _BaseTaskTestCase(object): self.assertFalse(select_dest_mock.called) self.assertFalse(rebuild_mock.called) + # Assert the migration status was updated. + migration = objects.Migration.get_by_id(self.context, migration.id) + self.assertEqual('error', migration.status) + def test_rebuild_instance_evacuate_migration_record(self): inst_obj = self._create_fake_instance_obj() migration = objects.Migration(context=self.context,