diff --git a/nova/api/openstack/common.py b/nova/api/openstack/common.py index d4b8b8b5eb..dd8d588768 100644 --- a/nova/api/openstack/common.py +++ b/nova/api/openstack/common.py @@ -59,7 +59,11 @@ _STATE_MAP = { vm_states.ACTIVE: { 'default': 'ACTIVE', task_states.REBOOTING: 'REBOOT', + task_states.REBOOT_PENDING: 'REBOOT', + task_states.REBOOT_STARTED: 'REBOOT', task_states.REBOOTING_HARD: 'HARD_REBOOT', + task_states.REBOOT_PENDING_HARD: 'HARD_REBOOT', + task_states.REBOOT_STARTED_HARD: 'HARD_REBOOT', task_states.UPDATING_PASSWORD: 'PASSWORD', task_states.REBUILDING: 'REBUILD', task_states.REBUILD_BLOCK_DEVICE_MAPPING: 'REBUILD', diff --git a/nova/compute/api.py b/nova/compute/api.py index 20f1958948..36e89974d0 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -2050,9 +2050,8 @@ class API(base.Base): @wrap_check_policy @check_instance_lock - @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED, - vm_states.PAUSED, vm_states.SUSPENDED, - vm_states.ERROR], + @check_instance_state(vm_state=set( + vm_states.ALLOW_SOFT_REBOOT + vm_states.ALLOW_HARD_REBOOT), task_state=[None, task_states.REBOOTING, task_states.REBOOTING_HARD, task_states.RESUMING, @@ -2062,10 +2061,7 @@ class API(base.Base): def reboot(self, context, instance, reboot_type): """Reboot the given instance.""" if (reboot_type == 'SOFT' and - (instance['vm_state'] in [vm_states.STOPPED, - vm_states.PAUSED, - vm_states.SUSPENDED, - vm_states.ERROR])): + (instance['vm_state'] not in vm_states.ALLOW_SOFT_REBOOT)): raise exception.InstanceInvalidState( attr='vm_state', instance_uuid=instance['uuid'], diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 2a19fa782b..f82ed764cf 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -848,6 +848,34 @@ class ComputeManager(manager.Manager): finally: return + try_reboot, reboot_type = self._retry_reboot(context, instance) + current_power_state = self._get_power_state(context, instance) + + if try_reboot: + LOG.debug(_("Instance in transitional state (%(task_state)s) at " + "start-up and power state is (%(power_state)s), " + "triggering reboot"), + {'task_state': instance['task_state'], + 'power_state': current_power_state}, + instance=instance) + self.compute_rpcapi.reboot_instance(context, instance, + block_device_info=None, + reboot_type=reboot_type) + return + + elif (current_power_state == power_state.RUNNING and + instance.task_state in [task_states.REBOOT_STARTED, + task_states.REBOOT_STARTED_HARD]): + LOG.warning(_("Instance in transitional state " + "(%(task_state)s) at start-up and power state " + "is (%(power_state)s), clearing task state"), + {'task_state': instance['task_state'], + 'power_state': current_power_state}, + instance=instance) + instance = self._instance_update(context, instance.uuid, + vm_state=vm_states.ACTIVE, + task_state=None) + net_info = compute_utils.get_nw_info_for_instance(instance) try: self.driver.plug_vifs(instance, net_info) @@ -917,6 +945,27 @@ class ComputeManager(manager.Manager): LOG.warning(_('Hypervisor driver does not support ' 'firewall rules'), instance=instance) + def _retry_reboot(self, context, instance): + current_power_state = self._get_power_state(context, instance) + current_task_state = instance.task_state + retry_reboot = False + reboot_type = compute_utils.get_reboot_type(current_task_state, + current_power_state) + + pending_soft = (current_task_state == task_states.REBOOT_PENDING and + instance.vm_state in vm_states.ALLOW_SOFT_REBOOT) + pending_hard = (current_task_state == task_states.REBOOT_PENDING_HARD + and instance.vm_state in vm_states.ALLOW_HARD_REBOOT) + started_not_running = (current_task_state in + [task_states.REBOOT_STARTED, + task_states.REBOOT_STARTED_HARD] and + current_power_state != power_state.RUNNING) + + if pending_soft or pending_hard or started_not_running: + retry_reboot = True + + return retry_reboot, reboot_type + def handle_lifecycle_event(self, event): LOG.info(_("Lifecycle event %(state)d on VM %(uuid)s") % {'state': event.get_transition(), @@ -2528,6 +2577,17 @@ class ComputeManager(manager.Manager): def reboot_instance(self, context, instance, block_device_info, reboot_type): """Reboot an instance on this host.""" + # acknowledge the request made it to the manager + if reboot_type == "SOFT": + instance.task_state = task_states.REBOOT_PENDING + expected_states = (task_states.REBOOTING, + task_states.REBOOT_PENDING, + task_states.REBOOT_STARTED) + else: + instance.task_state = task_states.REBOOT_PENDING_HARD + expected_states = (task_states.REBOOTING_HARD, + task_states.REBOOT_PENDING_HARD, + task_states.REBOOT_STARTED_HARD) context = context.elevated() LOG.audit(_("Rebooting instance"), context=context, instance=instance) @@ -2541,7 +2601,7 @@ class ComputeManager(manager.Manager): current_power_state = self._get_power_state(context, instance) instance.power_state = current_power_state - instance.save() + instance.save(expected_task_state=expected_states) if instance['power_state'] != power_state.RUNNING: state = instance['power_state'] @@ -2562,7 +2622,13 @@ class ComputeManager(manager.Manager): else: new_vm_state = vm_states.ACTIVE new_power_state = None - + if reboot_type == "SOFT": + instance.task_state = task_states.REBOOT_STARTED + expected_state = task_states.REBOOT_PENDING + else: + instance.task_state = task_states.REBOOT_STARTED_HARD + expected_state = task_states.REBOOT_PENDING_HARD + instance.save(expected_task_state=expected_state) self.driver.reboot(context, instance, network_info, reboot_type, diff --git a/nova/compute/task_states.py b/nova/compute/task_states.py index 53cf0f5241..44adaa4ea9 100644 --- a/nova/compute/task_states.py +++ b/nova/compute/task_states.py @@ -55,7 +55,11 @@ RESIZE_CONFIRMING = 'resize_confirming' # possible task states during reboot() REBOOTING = 'rebooting' +REBOOT_PENDING = 'reboot_pending' +REBOOT_STARTED = 'reboot_started' REBOOTING_HARD = 'rebooting_hard' +REBOOT_PENDING_HARD = 'reboot_pending_hard' +REBOOT_STARTED_HARD = 'reboot_started_hard' # possible task states during pause() PAUSING = 'pausing' diff --git a/nova/compute/utils.py b/nova/compute/utils.py index 8ea35bedb7..119510cad6 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -23,6 +23,8 @@ from oslo.config import cfg from nova import block_device from nova.compute import flavors +from nova.compute import power_state +from nova.compute import task_states from nova import exception from nova.network import model as network_model from nova import notifications @@ -448,6 +450,16 @@ def usage_volume_info(vol_usage): return usage_info +def get_reboot_type(task_state, current_power_state): + """Checks if the current instance state requires a HARD reboot.""" + if current_power_state != power_state.RUNNING: + return 'HARD' + soft_types = [task_states.REBOOT_STARTED, task_states.REBOOT_PENDING, + task_states.REBOOTING] + reboot_type = 'SOFT' if task_state in soft_types else 'HARD' + return reboot_type + + class EventReporter(object): """Context manager to report instance action events.""" diff --git a/nova/compute/vm_states.py b/nova/compute/vm_states.py index 5fe639773a..8062b9099a 100644 --- a/nova/compute/vm_states.py +++ b/nova/compute/vm_states.py @@ -46,3 +46,7 @@ ERROR = 'error' SHELVED = 'shelved' # VM is powered off, resources still on hypervisor SHELVED_OFFLOADED = 'shelved_offloaded' # VM and associated resources are # not on hypervisor + +ALLOW_SOFT_REBOOT = [ACTIVE] # states we can soft reboot from +ALLOW_HARD_REBOOT = ALLOW_SOFT_REBOOT + [STOPPED, PAUSED, SUSPENDED, ERROR] +# states we allow hard reboot from diff --git a/nova/tests/api/openstack/compute/plugins/v3/test_servers.py b/nova/tests/api/openstack/compute/plugins/v3/test_servers.py index 7b9a52378f..5e9643b8cd 100644 --- a/nova/tests/api/openstack/compute/plugins/v3/test_servers.py +++ b/nova/tests/api/openstack/compute/plugins/v3/test_servers.py @@ -955,7 +955,10 @@ class ServersControllerTest(ControllerTest): expected_attrs=[]): self.assertIsNotNone(search_opts) self.assertIn('task_state', search_opts) - self.assertEqual(search_opts['task_state'], [task_state]) + self.assertEqual([task_states.REBOOT_PENDING, + task_states.REBOOT_STARTED, + task_states.REBOOTING], + search_opts['task_state']) db_list = [fakes.stub_instance(100, uuid=server_uuid, task_state=task_state)] return instance_obj._make_instance_list( diff --git a/nova/tests/api/openstack/compute/test_servers.py b/nova/tests/api/openstack/compute/test_servers.py index a93bdc3389..182cd5dce8 100644 --- a/nova/tests/api/openstack/compute/test_servers.py +++ b/nova/tests/api/openstack/compute/test_servers.py @@ -876,7 +876,10 @@ class ServersControllerTest(ControllerTest): limit=None, marker=None, want_objects=False): self.assertIsNotNone(search_opts) self.assertIn('task_state', search_opts) - self.assertEqual(search_opts['task_state'], [task_state]) + self.assertEqual([task_states.REBOOT_PENDING, + task_states.REBOOT_STARTED, + task_states.REBOOTING], + search_opts['task_state']) db_list = [fakes.stub_instance(100, uuid=server_uuid, task_state=task_state)] return instance_obj._make_instance_list( diff --git a/nova/tests/api/openstack/test_common.py b/nova/tests/api/openstack/test_common.py index 9a656b9f29..dde9b515d7 100644 --- a/nova/tests/api/openstack/test_common.py +++ b/nova/tests/api/openstack/test_common.py @@ -360,7 +360,9 @@ class MiscFunctionsTest(test.TestCase): def test_task_and_vm_state_from_status(self): fixture1 = 'reboot' actual = common.task_and_vm_state_from_status(fixture1) - expected = [vm_states.ACTIVE], [task_states.REBOOTING] + expected = [vm_states.ACTIVE], [task_states.REBOOT_PENDING, + task_states.REBOOT_STARTED, + task_states.REBOOTING] self.assertEqual(expected, actual) fixture2 = 'resize' diff --git a/nova/tests/compute/test_compute.py b/nova/tests/compute/test_compute.py index d7a393dfad..194cbd732d 100644 --- a/nova/tests/compute/test_compute.py +++ b/nova/tests/compute/test_compute.py @@ -2287,6 +2287,21 @@ class ComputeTestCase(BaseTestCase): def _test_reboot(self, soft, test_delete=False, test_unrescue=False, fail_reboot=False, fail_running=False): + + reboot_type = soft and 'SOFT' or 'HARD' + task_pending = (soft and task_states.REBOOT_PENDING + or task_states.REBOOT_PENDING_HARD) + task_started = (soft and task_states.REBOOT_STARTED + or task_states.REBOOT_STARTED_HARD) + expected_task = (soft and task_states.REBOOTING + or task_states.REBOOTING_HARD) + expected_tasks = (soft and (task_states.REBOOTING, + task_states.REBOOT_PENDING, + task_states.REBOOT_STARTED) + or (task_states.REBOOTING_HARD, + task_states.REBOOT_PENDING_HARD, + task_states.REBOOT_STARTED_HARD)) + # This is a true unit test, so we don't need the network stubs. fake_network.unset_stub_network_methods(self.stubs) @@ -2309,6 +2324,7 @@ class ComputeTestCase(BaseTestCase): **dict(uuid='fake-instance', power_state=power_state.NOSTATE, vm_state=vm_states.ACTIVE, + task_state=expected_task, launched_at=timeutils.utcnow())) instance = instance_obj.Instance._from_db_object( econtext, instance_obj.Instance(), db_instance) @@ -2317,11 +2333,13 @@ class ComputeTestCase(BaseTestCase): **dict(uuid='updated-instance1', power_state=10003, vm_state=vm_states.ACTIVE, + task_state=expected_task, launched_at=timeutils.utcnow())) updated_dbinstance2 = fake_instance.fake_db_instance( **dict(uuid='updated-instance2', power_state=10003, vm_state=vm_states.ACTIVE, + task_state=expected_task, launched_at=timeutils.utcnow())) if test_unrescue: @@ -2334,7 +2352,6 @@ class ComputeTestCase(BaseTestCase): fake_power_state1 = 10001 fake_power_state2 = power_state.RUNNING fake_power_state3 = 10002 - reboot_type = soft and 'SOFT' or 'HARD' # Beginning of calls we expect. @@ -2352,13 +2369,22 @@ class ComputeTestCase(BaseTestCase): self.compute._get_power_state(econtext, instance).AndReturn(fake_power_state1) db.instance_update_and_get_original(econtext, instance['uuid'], - {'power_state': fake_power_state1}, - update_cells=False, - columns_to_join=['system_metadata'] - ).AndReturn((None, - updated_dbinstance1)) - + {'task_state': task_pending, + 'expected_task_state': expected_tasks, + 'power_state': fake_power_state1}, + update_cells=False, + columns_to_join=['system_metadata'] + ).AndReturn((None, + updated_dbinstance1)) expected_nw_info = fake_nw_model + db.instance_update_and_get_original(econtext, + updated_dbinstance1['uuid'], + {'task_state': task_started, + 'expected_task_state': task_pending}, + update_cells=False, + columns_to_join=['system_metadata'] + ).AndReturn((None, + updated_dbinstance1)) # Annoying. driver.reboot is wrapped in a try/except, and # doesn't re-raise. It eats exception generated by mox if diff --git a/nova/tests/compute/test_compute_mgr.py b/nova/tests/compute/test_compute_mgr.py index f878f22f52..73d0392413 100644 --- a/nova/tests/compute/test_compute_mgr.py +++ b/nova/tests/compute/test_compute_mgr.py @@ -274,6 +274,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): '_set_instance_error_state') self.compute._get_power_state(mox.IgnoreArg(), instance).AndReturn(power_state.SHUTDOWN) + self.compute._get_power_state(mox.IgnoreArg(), + instance).AndReturn(power_state.SHUTDOWN) + self.compute._get_power_state(mox.IgnoreArg(), + instance).AndReturn(power_state.SHUTDOWN) self.compute.driver.plug_vifs(instance, mox.IgnoreArg()) self.compute._get_instance_volume_block_device_info(mox.IgnoreArg(), instance).AndReturn('fake-bdm') @@ -330,7 +334,10 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): '_get_instance_volume_block_device_info') self.mox.StubOutWithMock(self.compute.driver, 'get_info') self.mox.StubOutWithMock(instance, 'save') + self.mox.StubOutWithMock(self.compute, '_retry_reboot') + self.compute._retry_reboot(self.context, instance).AndReturn( + (False, None)) compute_utils.get_nw_info_for_instance(instance).AndReturn( network_model.NetworkInfo()) self.compute.driver.plug_vifs(instance, []) @@ -341,6 +348,8 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): instance.save() self.compute.driver.get_info(instance).AndReturn( {'state': power_state.SHUTDOWN}) + self.compute.driver.get_info(instance).AndReturn( + {'state': power_state.SHUTDOWN}) self.mox.ReplayAll() @@ -472,6 +481,91 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): self.compute._init_instance(self.context, instance) self.mox.VerifyAll() + def _test_init_instance_retries_reboot(self, instance, reboot_type, + return_power_state): + with contextlib.nested( + mock.patch.object(self.compute, '_get_power_state', + return_value=return_power_state), + mock.patch.object(self.compute.compute_rpcapi, 'reboot_instance'), + mock.patch.object(compute_utils, 'get_nw_info_for_instance') + ) as ( + _get_power_state, + reboot_instance, + get_nw_info_for_instance + ): + self.compute._init_instance(self.context, instance) + call = mock.call(self.context, instance, block_device_info=None, + reboot_type=reboot_type) + reboot_instance.assert_has_calls([call]) + + def test_init_instance_retries_reboot_pending(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_PENDING + for state in vm_states.ALLOW_SOFT_REBOOT: + instance.vm_state = state + self._test_init_instance_retries_reboot(instance, 'SOFT', + power_state.RUNNING) + + def test_init_instance_retries_reboot_pending_hard(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_PENDING_HARD + for state in vm_states.ALLOW_HARD_REBOOT: + # NOTE(dave-mcnally) while a reboot of a vm in error state is + # possible we don't attempt to recover an error during init + if state == vm_states.ERROR: + continue + instance.vm_state = state + self._test_init_instance_retries_reboot(instance, 'HARD', + power_state.RUNNING) + + def test_init_instance_retries_reboot_started(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.vm_state = vm_states.ACTIVE + instance.task_state = task_states.REBOOT_STARTED + self._test_init_instance_retries_reboot(instance, 'HARD', + power_state.NOSTATE) + + def test_init_instance_retries_reboot_started_hard(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.vm_state = vm_states.ACTIVE + instance.task_state = task_states.REBOOT_STARTED_HARD + self._test_init_instance_retries_reboot(instance, 'HARD', + power_state.NOSTATE) + + def _test_init_instance_cleans_reboot_state(self, instance): + with contextlib.nested( + mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.RUNNING), + mock.patch.object(self.compute, '_instance_update'), + mock.patch.object(compute_utils, 'get_nw_info_for_instance') + ) as ( + _get_power_state, + _instance_update, + get_nw_info_for_instance + ): + self.compute._init_instance(self.context, instance) + call = mock.call(self.context, 'foo', vm_state='active', + task_state=None) + _instance_update.assert_has_calls([call]) + + def test_init_instance_cleans_image_state_reboot_started(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.vm_state = vm_states.ACTIVE + instance.task_state = task_states.REBOOT_STARTED + self._test_init_instance_cleans_reboot_state(instance) + + def test_init_instance_cleans_image_state_reboot_started_hard(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.vm_state = vm_states.ACTIVE + instance.task_state = task_states.REBOOT_STARTED_HARD + self._test_init_instance_cleans_reboot_state(instance) + def test_get_instances_on_driver(self): fake_context = context.get_admin_context() @@ -935,6 +1029,74 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): events[1]) do_test() + def test_retry_reboot_pending_soft(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_PENDING + instance.vm_state = vm_states.ACTIVE + with mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.RUNNING): + allow_reboot, reboot_type = self.compute._retry_reboot( + context, instance) + self.assertTrue(allow_reboot) + self.assertEqual(reboot_type, 'SOFT') + + def test_retry_reboot_pending_hard(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_PENDING_HARD + instance.vm_state = vm_states.ACTIVE + with mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.RUNNING): + allow_reboot, reboot_type = self.compute._retry_reboot( + context, instance) + self.assertTrue(allow_reboot) + self.assertEqual(reboot_type, 'HARD') + + def test_retry_reboot_starting_soft_off(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_STARTED + with mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.NOSTATE): + allow_reboot, reboot_type = self.compute._retry_reboot( + context, instance) + self.assertTrue(allow_reboot) + self.assertEqual(reboot_type, 'HARD') + + def test_retry_reboot_starting_hard_off(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_STARTED_HARD + with mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.NOSTATE): + allow_reboot, reboot_type = self.compute._retry_reboot( + context, instance) + self.assertTrue(allow_reboot) + self.assertEqual(reboot_type, 'HARD') + + def test_retry_reboot_starting_hard_on(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = task_states.REBOOT_STARTED_HARD + with mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.RUNNING): + allow_reboot, reboot_type = self.compute._retry_reboot( + context, instance) + self.assertFalse(allow_reboot) + self.assertEqual(reboot_type, 'HARD') + + def test_retry_reboot_no_reboot(self): + instance = instance_obj.Instance(self.context) + instance.uuid = 'foo' + instance.task_state = 'bar' + with mock.patch.object(self.compute, '_get_power_state', + return_value=power_state.RUNNING): + allow_reboot, reboot_type = self.compute._retry_reboot( + context, instance) + self.assertFalse(allow_reboot) + self.assertEqual(reboot_type, 'HARD') + class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): def setUp(self): diff --git a/nova/tests/compute/test_compute_utils.py b/nova/tests/compute/test_compute_utils.py index 667856a201..1e9702af5b 100644 --- a/nova/tests/compute/test_compute_utils.py +++ b/nova/tests/compute/test_compute_utils.py @@ -23,6 +23,8 @@ import mock from oslo.config import cfg from nova.compute import flavors +from nova.compute import power_state +from nova.compute import task_states from nova.compute import utils as compute_utils from nova import context from nova import db @@ -721,3 +723,27 @@ class ComputeUtilsGetNWInfo(test.TestCase): self.assertIsNone(inst['info_cache']) result = compute_utils.get_nw_info_for_instance(inst) self.assertEqual(jsonutils.dumps([]), result.json()) + + +class ComputeUtilsGetRebootTypes(test.TestCase): + def setUp(self): + super(ComputeUtilsGetRebootTypes, self).setUp() + self.context = context.RequestContext('fake', 'fake') + + def test_get_reboot_type_started_soft(self): + reboot_type = compute_utils.get_reboot_type(task_states.REBOOT_STARTED, + power_state.RUNNING) + self.assertEqual(reboot_type, 'SOFT') + + def test_get_reboot_type_pending_soft(self): + reboot_type = compute_utils.get_reboot_type(task_states.REBOOT_PENDING, + power_state.RUNNING) + self.assertEqual(reboot_type, 'SOFT') + + def test_get_reboot_type_hard(self): + reboot_type = compute_utils.get_reboot_type('foo', power_state.RUNNING) + self.assertEqual(reboot_type, 'HARD') + + def test_get_reboot_not_running_hard(self): + reboot_type = compute_utils.get_reboot_type('foo', 'bar') + self.assertEqual(reboot_type, 'HARD')