Fix sync point delete

When a resource failed, the stack state was set to FAILED and current
traversal was set to emoty string. The actual traversal was lost and
there was no way to delete the sync points belonging to the actual
traversal.

This change keeps the current traversal when you do a state set, so that
later you can delete the sync points belonging to it. Also, the current
traversal is set to empty when the stack has failed and there is no need
to rollback.

Closes-Bug: #1618155

Change-Id: Iec3922af92b70b0628fb94b7b2d597247e6d42c4
This commit is contained in:
Anant Patil 2016-09-14 16:59:55 +05:30
parent 54feba728b
commit 2e281df428
4 changed files with 46 additions and 14 deletions

View File

@ -924,14 +924,9 @@ class Stack(collections.Mapping):
self._send_notification_and_add_event()
if self.convergence:
# do things differently for convergence
exp_trvsl = self.current_traversal
if self.status == self.FAILED:
self.current_traversal = ''
values['current_traversal'] = self.current_traversal
updated = stack_object.Stack.select_and_update(
self.context, self.id, values,
exp_trvsl=exp_trvsl)
exp_trvsl=self.current_traversal)
return updated
@ -2021,13 +2016,17 @@ class Stack(collections.Mapping):
"""
resource_objects.Resource.purge_deleted(self.context, self.id)
exp_trvsl = self.current_traversal
if self.status == self.FAILED:
self.current_traversal = ''
prev_tmpl_id = None
if (self.prev_raw_template_id is not None and
self.status != self.FAILED):
prev_tmpl_id = self.prev_raw_template_id
self.prev_raw_template_id = None
stack_id = self.store()
stack_id = self.store(exp_trvsl=exp_trvsl)
if stack_id is None:
# Failed concurrent update
LOG.warning(_LW("Failed to store stack %(name)s with traversal ID "
@ -2039,7 +2038,7 @@ class Stack(collections.Mapping):
if prev_tmpl_id is not None:
raw_template_object.RawTemplate.delete(self.context, prev_tmpl_id)
sync_point.delete_all(self.context, self.id, self.current_traversal)
sync_point.delete_all(self.context, self.id, exp_trvsl)
if (self.action, self.status) == (self.DELETE, self.COMPLETE):
if not self.owner_id:

View File

@ -18,6 +18,7 @@ import eventlet.queue
from oslo_log import log as logging
import oslo_messaging
from oslo_service import service
from oslo_utils import uuidutils
from osprofiler import profiler
from heat.common import context
@ -28,6 +29,7 @@ from heat.common import messaging as rpc_messaging
from heat.db import api as db_api
from heat.engine import check_resource
from heat.engine import sync_point
from heat.objects import stack as stack_objects
from heat.rpc import api as rpc_api
from heat.rpc import worker_client as rpc_client
@ -102,14 +104,24 @@ class WorkerService(service.Service):
in_progress resources to complete normally; no worker is stopped
abruptly.
"""
reason = 'User cancelled stack %s ' % stack.action
# state_set will update the current traversal to '' for FAILED state
old_trvsl = stack.current_traversal
updated = _update_current_traversal(stack)
if not updated:
LOG.warning(_LW("Failed to update stack %(name)s with new "
"traversal, aborting stack cancel"),
{'name': stack.name})
return
reason = 'User cancelled stack %s ' % stack.action
updated = stack.state_set(stack.action, stack.FAILED, reason)
if not updated:
LOG.warning(_LW("Failed to stop traversal %(trvsl)s of stack "
"%(name)s while cancelling the operation."),
{'name': stack.name, 'trvsl': old_trvsl})
LOG.warning(_LW("Failed to update stack %(name)s status"
" to %(action)_%(state)"),
{'name': stack.name, 'action': stack.action,
'state': stack.FAILED})
return
sync_point.delete_all(stack.context, stack.id, old_trvsl)
def stop_all_workers(self, stack):
# stop the traversal
@ -172,6 +184,15 @@ class WorkerService(service.Service):
_cancel_check_resource(stack_id, self.engine_id, self.thread_group_mgr)
def _update_current_traversal(stack):
previous_traversal = stack.current_traversal
stack.current_traversal = uuidutils.generate_uuid()
values = {'current_traversal': stack.current_traversal}
return stack_objects.Stack.select_and_update(
stack.context, stack.id, values,
exp_trvsl=previous_traversal)
def _cancel_check_resource(stack_id, engine_id, tgm):
LOG.debug('Cancelling workers for stack [%s] in engine [%s]',
stack_id, engine_id)

View File

@ -18,6 +18,7 @@ import mock
from heat.db import api as db_api
from heat.engine import check_resource
from heat.engine import worker
from heat.objects import stack as stack_objects
from heat.rpc import worker_client as wc
from heat.tests import common
from heat.tests import utils
@ -220,3 +221,13 @@ class WorkerServiceTest(common.HeatTestCase):
mock_cw.assert_called_with(stack, mock_tgm, 'engine-001',
_worker._rpc_client)
self.assertFalse(stack.rollback.called)
@mock.patch.object(stack_objects.Stack, 'select_and_update')
def test_update_current_traversal(self, mock_sau):
stack = mock.MagicMock()
stack.current_traversal = 'some-thing'
old_trvsl = stack.current_traversal
worker._update_current_traversal(stack)
self.assertNotEqual(old_trvsl, stack.current_traversal)
mock_sau.assert_called_once_with(mock.ANY, stack.id, mock.ANY,
exp_trvsl=old_trvsl)

View File

@ -375,8 +375,9 @@ class StackConvergenceCreateUpdateDeleteTest(common.HeatTestCase):
stack = tools.get_stack('test_stack', utils.dummy_context(),
template=tools.string_template_five,
convergence=True)
stack.status = stack.FAILED
stack.store()
stack.state_set(stack.action, stack.FAILED, 'test-reason')
stack.purge_db()
self.assertEqual('', stack.current_traversal)
@mock.patch.object(raw_template_object.RawTemplate, 'delete')