From d6a90cc6ac1f49286b1c6a53f934d60a579da9bf Mon Sep 17 00:00:00 2001 From: Tanvir Talukder Date: Wed, 4 Jan 2017 11:27:04 -0600 Subject: [PATCH] Fix for resources stuck in progress after engine crash When a stack is IN_PROGRESS and an UPDATE or RESTORE is called after an engine crash, we set status of the stack and all of its IN_PROGRESS resources to FAILED Change-Id: Ia3adbfeff16c69719f9e5365657ab46a0932ec9b Closes-Bug: #1570576 --- heat/engine/service.py | 17 ++----- heat/engine/stack.py | 13 ++++- .../tests/engine/service/test_stack_update.py | 38 ++++++++++++++ heat/tests/test_engine_service.py | 51 +------------------ 4 files changed, 55 insertions(+), 64 deletions(-) diff --git a/heat/engine/service.py b/heat/engine/service.py index d8a18851d8..f87a02c9e3 100644 --- a/heat/engine/service.py +++ b/heat/engine/service.py @@ -2397,17 +2397,6 @@ class EngineService(service.ServiceBase): LOG.debug('Service %s was aborted' % service_ref['id']) service_objects.Service.delete(cnxt, service_ref['id']) - def set_stack_and_resource_to_failed(self, stack): - for name, rsrc in six.iteritems(stack.resources): - if rsrc.status == rsrc.IN_PROGRESS: - status_reason = ('Engine went down ' - 'during resource %s' % rsrc.action) - rsrc.state_set(rsrc.action, - rsrc.FAILED, - six.text_type(status_reason)) - reason = ('Engine went down during stack %s' % stack.action) - stack.state_set(stack.action, stack.FAILED, six.text_type(reason)) - def reset_stack_status(self): cnxt = context.get_admin_context() filters = { @@ -2440,12 +2429,14 @@ class EngineService(service.ServiceBase): {'engine': engine_id, 'action': stk.action, 'stack_id': stk.id}) + reason = _('Engine went down during stack %s') % stk.action + # Set stack and resources status to FAILED in sub thread self.thread_group_mgr.start_with_acquired_lock( stk, lock, - self.set_stack_and_resource_to_failed, - stk + stk.reset_stack_and_resources_in_progress, + reason ) except exception.ActionInProgress: continue diff --git a/heat/engine/stack.py b/heat/engine/stack.py index 3af5354904..3b25d02156 100644 --- a/heat/engine/stack.py +++ b/heat/engine/stack.py @@ -1426,6 +1426,14 @@ class Stack(collections.Mapping): return self._convg_deps + def reset_stack_and_resources_in_progress(self, reason): + for name, rsrc in six.iteritems(self.resources): + if rsrc.status == rsrc.IN_PROGRESS: + rsrc.state_set(rsrc.action, + rsrc.FAILED, + six.text_type(reason)) + self.state_set(self.action, self.FAILED, six.text_type(reason)) + @scheduler.wrappertask def update_task(self, newstack, action=UPDATE, msg_queue=None): if action not in (self.UPDATE, self.ROLLBACK, self.RESTORE): @@ -1445,8 +1453,9 @@ class Stack(collections.Mapping): if action == self.ROLLBACK: LOG.debug("Starting update rollback for %s" % self.name) else: - self.state_set(action, self.FAILED, - 'State invalid for %s' % action) + reason = _('Attempted to %s an IN_PROGRESS ' + 'stack') % action + self.reset_stack_and_resources_in_progress(reason) return # Save a copy of the new template. To avoid two DB writes diff --git a/heat/tests/engine/service/test_stack_update.py b/heat/tests/engine/service/test_stack_update.py index 6ef3e4499d..c54f149ffc 100644 --- a/heat/tests/engine/service/test_stack_update.py +++ b/heat/tests/engine/service/test_stack_update.py @@ -1125,3 +1125,41 @@ resources: environment_files=environment_files) # Assertions done in _test_stack_update_preview + + def test_reset_stack_and_resources_in_progress(self): + + def mock_stack_resource(name, action, status): + rs = mock.MagicMock() + rs.name = name + rs.action = action + rs.status = status + rs.IN_PROGRESS = 'IN_PROGRESS' + rs.FAILED = 'FAILED' + + def mock_resource_state_set(a, s, reason='engine_down'): + rs.status = s + rs.action = a + rs.status_reason = reason + + rs.state_set = mock_resource_state_set + + return rs + + stk_name = 'test_stack' + stk = tools.get_stack(stk_name, self.ctx) + stk.action = 'CREATE' + stk.status = 'IN_PROGRESS' + + resources = {'r1': mock_stack_resource('r1', 'UPDATE', 'COMPLETE'), + 'r2': mock_stack_resource('r2', 'UPDATE', 'IN_PROGRESS'), + 'r3': mock_stack_resource('r3', 'UPDATE', 'FAILED')} + + stk._resources = resources + + reason = 'Test resetting stack and resources in progress' + + stk.reset_stack_and_resources_in_progress(reason) + self.assertEqual('FAILED', stk.status) + self.assertEqual('COMPLETE', stk.resources.get('r1').status) + self.assertEqual('FAILED', stk.resources.get('r2').status) + self.assertEqual('FAILED', stk.resources.get('r3').status) diff --git a/heat/tests/test_engine_service.py b/heat/tests/test_engine_service.py index ee00d99462..1b3d2ad72c 100644 --- a/heat/tests/test_engine_service.py +++ b/heat/tests/test_engine_service.py @@ -1340,59 +1340,12 @@ class StackServiceTest(common.HeatTestCase): service_check_defer=True, resource_validate=False) self.assertTrue(lock2.release.called) + reason = ('Engine went down during stack %s' % fake_stack.action) mock_thread.start_with_acquired_lock.assert_called_once_with( fake_stack, lock1, - self.eng.set_stack_and_resource_to_failed, fake_stack + fake_stack.reset_stack_and_resources_in_progress, reason ) - def test_set_stack_and_resource_to_failed(self): - - def fake_stack(): - stk = mock.MagicMock() - stk.action = 'CREATE' - stk.id = 'foo' - stk.status = 'IN_PROGRESS' - stk.FAILED = 'FAILED' - - def mock_stack_state_set(a, s, reason): - stk.status = s - stk.action = a - stk.status_reason = reason - - stk.state_set = mock_stack_state_set - - return stk - - def fake_stack_resource(name, action, status): - rs = mock.MagicMock() - rs.name = name - rs.action = action - rs.status = status - rs.IN_PROGRESS = 'IN_PROGRESS' - rs.FAILED = 'FAILED' - - def mock_resource_state_set(a, s, reason='engine_down'): - rs.status = s - rs.action = a - rs.status_reason = reason - - rs.state_set = mock_resource_state_set - - return rs - - test_stack = fake_stack() - - test_stack.resources = { - 'r1': fake_stack_resource('r1', 'UPDATE', 'COMPLETE'), - 'r2': fake_stack_resource('r2', 'UPDATE', 'IN_PROGRESS'), - 'r3': fake_stack_resource('r3', 'UPDATE', 'FAILED')} - - self.eng.set_stack_and_resource_to_failed(test_stack) - self.assertEqual('FAILED', test_stack.status) - self.assertEqual('COMPLETE', test_stack.resources.get('r1').status) - self.assertEqual('FAILED', test_stack.resources.get('r2').status) - self.assertEqual('FAILED', test_stack.resources.get('r3').status) - def test_parse_adopt_stack_data_without_parameters(self): cfg.CONF.set_override('enable_stack_adopt', True, enforce_type=True) template = {"heat_template_version": "2015-04-30",