Browse Source

Set server status to ERROR if rebuild failed

Currently there is no indication that the rebuild was refused,
and worse, we may have a wrong imageref for the instance.

This patch set the instance to ERROR status if rebuild failed in the
scheduling stage. The user can rebuild the instance with valid image
to get it out of ERROR state and reset with right instance metadata and
properties.

Closes-Bug: 1744325

Change-Id: Ibb7bee15a3d4ee6f0ef53ba12e8b41f65a1fe999
changes/68/536268/7
int32bit 4 years ago
committed by Matt Riedemann
parent
commit
d03a890a34
  1. 9
      nova/conductor/manager.py
  2. 2
      nova/tests/functional/regressions/test_bug_1713783.py
  3. 8
      nova/tests/functional/test_server_group.py
  4. 4
      nova/tests/functional/test_servers.py
  5. 14
      nova/tests/unit/conductor/test_conductor.py
  6. 7
      releasenotes/notes/bug-1744325-rebuild-error-status-9e2da03f3f81fd6e.yaml

9
nova/conductor/manager.py

@ -17,6 +17,7 @@
import contextlib
import copy
import functools
import sys
from oslo_config import cfg
from oslo_log import log as logging
@ -956,10 +957,12 @@ class ComputeTaskManager(base.Base):
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify(context, instance.uuid,
'rebuild_server',
{'vm_state': instance.vm_state,
{'vm_state': vm_states.ERROR,
'task_state': None}, ex, request_spec)
LOG.warning("No valid host found for rebuild",
instance=instance)
compute_utils.add_instance_fault_from_exc(context,
instance, ex, sys.exc_info())
except exception.UnsupportedPolicyException as ex:
if migration:
migration.status = 'error'
@ -968,10 +971,12 @@ class ComputeTaskManager(base.Base):
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify(context, instance.uuid,
'rebuild_server',
{'vm_state': instance.vm_state,
{'vm_state': vm_states.ERROR,
'task_state': None}, ex, request_spec)
LOG.warning("Server with unsupported policy "
"cannot be rebuilt", instance=instance)
compute_utils.add_instance_fault_from_exc(context,
instance, ex, sys.exc_info())
compute_utils.notify_about_instance_usage(
self.notifier, context, instance, "rebuild.scheduled")

2
nova/tests/functional/regressions/test_bug_1713783.py

@ -109,7 +109,7 @@ class FailedEvacuateStateTests(test.TestCase,
self._wait_for_notification_event_type('compute_task.rebuild_server')
server = self._wait_for_state_change(self.api, server, 'ACTIVE')
server = self._wait_for_state_change(self.api, server, 'ERROR')
self.assertEqual(self.hostname, server['OS-EXT-SRV-ATTR:host'])
# Check migrations

8
nova/tests/functional/test_server_group.py

@ -455,7 +455,7 @@ class ServerGroupTestV21(ServerGroupTestBase):
self.admin_api.post_server_action(servers[1]['id'], post)
self._wait_for_migration_status(servers[1], 'error')
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before
@ -479,7 +479,7 @@ class ServerGroupTestV21(ServerGroupTestBase):
self.admin_api.post_server_action(servers[1]['id'], post)
self._wait_for_migration_status(servers[1], 'error')
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before
@ -653,7 +653,7 @@ class ServerGroupTestV215(ServerGroupTestV21):
self.admin_api.post_server_action(servers[1]['id'], post)
self._wait_for_migration_status(servers[1], 'error')
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before
@ -677,7 +677,7 @@ class ServerGroupTestV215(ServerGroupTestV21):
self.admin_api.post_server_action(servers[1]['id'], post)
self._wait_for_migration_status(servers[1], 'error')
server_after_failed_evac = self._wait_for_state_change(
self.admin_api, servers[1], 'ACTIVE')
self.admin_api, servers[1], 'ERROR')
# assert that after a failed evac the server active on the same host
# as before

4
nova/tests/functional/test_servers.py

@ -1205,6 +1205,10 @@ class ServerRebuildTestCase(integrated_helpers._IntegratedTestBase,
server = self.api.get_server(server['id'])
self.assertEqual(rebuild_image_ref, server['image']['id'])
# The server should be in ERROR state
self.assertEqual('ERROR', server['status'])
self.assertIn('No valid host', server['fault']['message'])
def test_rebuild_with_new_image(self):
"""Rebuilds a server with a different image which will run it through
the scheduler to validate the image is still OK with the compute host

14
nova/tests/unit/conductor/test_conductor.py

@ -1338,8 +1338,10 @@ class _BaseTaskTestCase(object):
'select_destinations',
side_effect=exc.NoValidHost(reason='')),
mock.patch('nova.scheduler.utils.build_request_spec',
return_value=request_spec)
) as (rebuild_mock, sig_mock, fp_mock, select_dest_mock, bs_mock):
return_value=request_spec),
mock.patch.object(scheduler_utils, 'set_vm_state_and_notify')
) as (rebuild_mock, sig_mock, fp_mock,
select_dest_mock, bs_mock, set_vm_state_and_notify_mock):
self.assertRaises(exc.NoValidHost,
self.conductor_manager.rebuild_instance,
context=self.context, instance=inst_obj,
@ -1349,7 +1351,11 @@ class _BaseTaskTestCase(object):
select_dest_mock.assert_called_once_with(self.context, fake_spec,
[inst_obj.uuid], return_objects=True,
return_alternates=False)
self.assertEqual(
set_vm_state_and_notify_mock.call_args[0][4]['vm_state'],
vm_states.ERROR)
self.assertFalse(rebuild_mock.called)
self.assertIn('No valid host', inst_obj.fault.message)
@mock.patch.object(conductor_manager.compute_rpcapi.ComputeAPI,
'rebuild_instance')
@ -1390,12 +1396,14 @@ class _BaseTaskTestCase(object):
self.context,
inst_obj,
**rebuild_args)
updates = {'vm_state': vm_states.ACTIVE, 'task_state': None}
updates = {'vm_state': vm_states.ERROR, 'task_state': None}
state_mock.assert_called_once_with(self.context, inst_obj.uuid,
'rebuild_server', updates,
exception, mock.ANY)
self.assertFalse(select_dest_mock.called)
self.assertFalse(rebuild_mock.called)
self.assertIn('ServerGroup policy is not supported',
inst_obj.fault.message)
# Assert the migration status was updated.
migration = objects.Migration.get_by_id(self.context, migration.id)

7
releasenotes/notes/bug-1744325-rebuild-error-status-9e2da03f3f81fd6e.yaml

@ -0,0 +1,7 @@
---
fixes:
- |
If scheduling fails during rebuild the server instance will go to ERROR
state and a fault will be recorded. `Bug 1744325`_
.. _Bug 1744325: https://bugs.launchpad.net/nova/+bug/1744325
Loading…
Cancel
Save