diff --git a/nova/conductor/manager.py b/nova/conductor/manager.py index b5236a974634..bfd9769497ce 100644 --- a/nova/conductor/manager.py +++ b/nova/conductor/manager.py @@ -17,6 +17,7 @@ import contextlib import copy import functools +import sys from oslo_config import cfg from oslo_log import log as logging @@ -956,10 +957,12 @@ class ComputeTaskManager(base.Base): with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, 'rebuild_server', - {'vm_state': instance.vm_state, + {'vm_state': vm_states.ERROR, 'task_state': None}, ex, request_spec) LOG.warning("No valid host found for rebuild", instance=instance) + compute_utils.add_instance_fault_from_exc(context, + instance, ex, sys.exc_info()) except exception.UnsupportedPolicyException as ex: if migration: migration.status = 'error' @@ -968,10 +971,12 @@ class ComputeTaskManager(base.Base): with excutils.save_and_reraise_exception(): self._set_vm_state_and_notify(context, instance.uuid, 'rebuild_server', - {'vm_state': instance.vm_state, + {'vm_state': vm_states.ERROR, 'task_state': None}, ex, request_spec) LOG.warning("Server with unsupported policy " "cannot be rebuilt", instance=instance) + compute_utils.add_instance_fault_from_exc(context, + instance, ex, sys.exc_info()) compute_utils.notify_about_instance_usage( self.notifier, context, instance, "rebuild.scheduled") diff --git a/nova/tests/functional/regressions/test_bug_1713783.py b/nova/tests/functional/regressions/test_bug_1713783.py index 4233511aaf67..bc023e779c6b 100644 --- a/nova/tests/functional/regressions/test_bug_1713783.py +++ b/nova/tests/functional/regressions/test_bug_1713783.py @@ -109,7 +109,7 @@ class FailedEvacuateStateTests(test.TestCase, self._wait_for_notification_event_type('compute_task.rebuild_server') - server = self._wait_for_state_change(self.api, server, 'ACTIVE') + server = self._wait_for_state_change(self.api, server, 'ERROR') self.assertEqual(self.hostname, server['OS-EXT-SRV-ATTR:host']) # Check migrations diff --git a/nova/tests/functional/test_server_group.py b/nova/tests/functional/test_server_group.py index 61c68854c79a..9e8ea76e713d 100644 --- a/nova/tests/functional/test_server_group.py +++ b/nova/tests/functional/test_server_group.py @@ -455,7 +455,7 @@ class ServerGroupTestV21(ServerGroupTestBase): self.admin_api.post_server_action(servers[1]['id'], post) self._wait_for_migration_status(servers[1], 'error') server_after_failed_evac = self._wait_for_state_change( - self.admin_api, servers[1], 'ACTIVE') + self.admin_api, servers[1], 'ERROR') # assert that after a failed evac the server active on the same host # as before @@ -479,7 +479,7 @@ class ServerGroupTestV21(ServerGroupTestBase): self.admin_api.post_server_action(servers[1]['id'], post) self._wait_for_migration_status(servers[1], 'error') server_after_failed_evac = self._wait_for_state_change( - self.admin_api, servers[1], 'ACTIVE') + self.admin_api, servers[1], 'ERROR') # assert that after a failed evac the server active on the same host # as before @@ -653,7 +653,7 @@ class ServerGroupTestV215(ServerGroupTestV21): self.admin_api.post_server_action(servers[1]['id'], post) self._wait_for_migration_status(servers[1], 'error') server_after_failed_evac = self._wait_for_state_change( - self.admin_api, servers[1], 'ACTIVE') + self.admin_api, servers[1], 'ERROR') # assert that after a failed evac the server active on the same host # as before @@ -677,7 +677,7 @@ class ServerGroupTestV215(ServerGroupTestV21): self.admin_api.post_server_action(servers[1]['id'], post) self._wait_for_migration_status(servers[1], 'error') server_after_failed_evac = self._wait_for_state_change( - self.admin_api, servers[1], 'ACTIVE') + self.admin_api, servers[1], 'ERROR') # assert that after a failed evac the server active on the same host # as before diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index a22db98aba41..ed485f0215b7 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -1205,6 +1205,10 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase, server = self.api.get_server(server['id']) self.assertEqual(rebuild_image_ref, server['image']['id']) + # The server should be in ERROR state + self.assertEqual('ERROR', server['status']) + self.assertIn('No valid host', server['fault']['message']) + def test_rebuild_with_new_image(self): """Rebuilds a server with a different image which will run it through the scheduler to validate the image is still OK with the compute host diff --git a/nova/tests/unit/conductor/test_conductor.py b/nova/tests/unit/conductor/test_conductor.py index 4c9d35eaa70a..7f9b756ebdeb 100644 --- a/nova/tests/unit/conductor/test_conductor.py +++ b/nova/tests/unit/conductor/test_conductor.py @@ -1338,8 +1338,10 @@ class _BaseTaskTestCase(object): 'select_destinations', side_effect=exc.NoValidHost(reason='')), mock.patch('nova.scheduler.utils.build_request_spec', - return_value=request_spec) - ) as (rebuild_mock, sig_mock, fp_mock, select_dest_mock, bs_mock): + return_value=request_spec), + mock.patch.object(scheduler_utils, 'set_vm_state_and_notify') + ) as (rebuild_mock, sig_mock, fp_mock, + select_dest_mock, bs_mock, set_vm_state_and_notify_mock): self.assertRaises(exc.NoValidHost, self.conductor_manager.rebuild_instance, context=self.context, instance=inst_obj, @@ -1349,7 +1351,11 @@ class _BaseTaskTestCase(object): select_dest_mock.assert_called_once_with(self.context, fake_spec, [inst_obj.uuid], return_objects=True, return_alternates=False) + self.assertEqual( + set_vm_state_and_notify_mock.call_args[0][4]['vm_state'], + vm_states.ERROR) self.assertFalse(rebuild_mock.called) + self.assertIn('No valid host', inst_obj.fault.message) @mock.patch.object(conductor_manager.compute_rpcapi.ComputeAPI, 'rebuild_instance') @@ -1390,12 +1396,14 @@ class _BaseTaskTestCase(object): self.context, inst_obj, **rebuild_args) - updates = {'vm_state': vm_states.ACTIVE, 'task_state': None} + updates = {'vm_state': vm_states.ERROR, 'task_state': None} state_mock.assert_called_once_with(self.context, inst_obj.uuid, 'rebuild_server', updates, exception, mock.ANY) self.assertFalse(select_dest_mock.called) self.assertFalse(rebuild_mock.called) + self.assertIn('ServerGroup policy is not supported', + inst_obj.fault.message) # Assert the migration status was updated. migration = objects.Migration.get_by_id(self.context, migration.id) diff --git a/releasenotes/notes/bug-1744325-rebuild-error-status-9e2da03f3f81fd6e.yaml b/releasenotes/notes/bug-1744325-rebuild-error-status-9e2da03f3f81fd6e.yaml new file mode 100644 index 000000000000..086d4094fb4d --- /dev/null +++ b/releasenotes/notes/bug-1744325-rebuild-error-status-9e2da03f3f81fd6e.yaml @@ -0,0 +1,7 @@ +--- +fixes: + - | + If scheduling fails during rebuild the server instance will go to ERROR + state and a fault will be recorded. `Bug 1744325`_ + + .. _Bug 1744325: https://bugs.launchpad.net/nova/+bug/1744325