Don't acquire the lock when cancelling a stack

We used to try to acquire the stack lock in order to find out which engine
to cancel a running update on, in the misguided belief that it could never
succeed. Accordingly, we never released the lock.

Since it is entirely possible to encounter a race where the lock has
already been released, use the get_engine_id() method instead to look up
the ID of the engine holding the lock without attempting to acquire it.

Change-Id: I1d026f8c67dddcf840ccbc2f3f1537693dc266fb
Closes-Bug: #1624538
This commit is contained in:
Zane Bitter 2016-09-16 17:05:25 -04:00
parent e03a063084
commit e2ba3390cd
3 changed files with 36 additions and 6 deletions

View File

@ -1127,12 +1127,13 @@ class EngineService(service.Service):
self.thread_group_mgr.start(current_stack.id, func)
return
# stop the running update and take the lock
# as we cancel only running update, the acquire_result is
# always some engine_id, not None
lock = stack_lock.StackLock(cnxt, current_stack.id,
self.engine_id)
engine_id = lock.try_acquire()
engine_id = lock.get_engine_id()
if engine_id is None:
LOG.debug('No lock found on stack %s', db_stack.name)
return
if cancel_with_rollback:
cancel_message = rpc_api.THREAD_CANCEL_WITH_ROLLBACK
@ -1156,6 +1157,12 @@ class EngineService(service.Service):
raise exception.EventSendFailed(stack_name=current_stack.name,
engine_id=engine_id)
else:
LOG.warning(_('Cannot cancel stack %(stack_name)s: lock held by '
'unknown engine %(engine_id)s') % {
'stack_name': db_stack.name,
'engine_id': engine_id})
@context.request_context
def validate_template(self, cnxt, template, params=None, files=None,
environment_files=None, show_nested=False,

View File

@ -35,6 +35,10 @@ class StackLock(object):
self.listener = None
def get_engine_id(self):
"""Return the ID of the engine which currently holds the lock.
Returns None if there is no lock held on the stack.
"""
return stack_lock_object.StackLock.get_engine_id(self.context,
self.stack_id)

View File

@ -482,8 +482,10 @@ resources:
stk.disable_rollback = False
stk.store()
self.man.engine_id = service_utils.generate_engine_id()
self.patchobject(stack.Stack, 'load', return_value=stk)
self.patchobject(stack_lock.StackLock, 'try_acquire',
self.patchobject(stack_lock.StackLock, 'get_engine_id',
return_value=self.man.engine_id)
self.patchobject(self.man.thread_group_mgr, 'send')
@ -500,7 +502,7 @@ resources:
stk.disable_rollback = False
stk.store()
self.patchobject(stack.Stack, 'load', return_value=stk)
self.patchobject(stack_lock.StackLock, 'try_acquire',
self.patchobject(stack_lock.StackLock, 'get_engine_id',
return_value=str(uuid.uuid4()))
self.patchobject(service_utils, 'engine_alive',
return_value=True)
@ -514,6 +516,23 @@ resources:
self.man.stack_cancel_update,
self.ctx, stk.identifier())
def test_stack_cancel_update_no_lock(self):
stack_name = 'service_update_stack_test_cancel_same_engine'
stk = tools.get_stack(stack_name, self.ctx)
stk.state_set(stk.UPDATE, stk.IN_PROGRESS, 'test_override')
stk.disable_rollback = False
stk.store()
self.patchobject(stack.Stack, 'load', return_value=stk)
self.patchobject(stack_lock.StackLock, 'get_engine_id',
return_value=None)
self.patchobject(self.man.thread_group_mgr, 'send')
self.man.stack_cancel_update(self.ctx, stk.identifier(),
cancel_with_rollback=False)
self.assertFalse(self.man.thread_group_mgr.send.called)
def test_stack_cancel_update_wrong_state_fails(self):
stack_name = 'service_update_cancel_test_stack'
stk = tools.get_stack(stack_name, self.ctx)