Fix action (CREATE) in progress error for TemplateResource/RG

When StackResource 'update_with_template' is called and there
is no existing nested_stack, we call create_stack with an empty
template(TemplateResource CREATE_FAILED->UPDATE flow). We check
db for CREATE_COMPLETE before calling update_stack.There is a
possibility that the create_stack green thread has not finished
and released the stack lock. By persisting the stack state for
COMPLETE/FAILED in the same db session as releasing the lock,
when green thread finishes it's run, we can avoid this race
condition.

Change-Id: I1442a7bcedb4fa340ee4f0e6ebc3705544c2a9dd
Closes-Bug: #1498495
(cherry picked from commit e2294da861)
This commit is contained in:
Rabi Mishra 2015-09-24 12:10:43 +05:30 committed by Steve Baker
parent c74721943a
commit 49976fb7be
13 changed files with 143 additions and 27 deletions

View File

@ -201,6 +201,11 @@ def stack_lock_release(stack_id, engine_id):
return IMPL.stack_lock_release(stack_id, engine_id)
def persist_state_and_release_lock(context, stack_id, engine_id, values):
return IMPL.persist_state_and_release_lock(context, stack_id,
engine_id, values)
def stack_get_root_id(context, stack_id):
return IMPL.stack_get_root_id(context, stack_id)

View File

@ -557,6 +557,22 @@ def stack_lock_get_engine_id(stack_id):
return lock.engine_id
def persist_state_and_release_lock(context, stack_id, engine_id, values):
session = _session(context)
with session.begin():
rows_updated = (session.query(models.Stack)
.filter(models.Stack.id == stack_id)
.update(values, synchronize_session=False))
rows_affected = None
if rows_updated is not None and rows_updated > 0:
rows_affected = session.query(
models.StackLock
).filter_by(stack_id=stack_id, engine_id=engine_id).delete()
session.expire_all()
if not rows_affected:
return True
def stack_lock_steal(stack_id, old_engine_id, new_engine_id):
session = get_session()
with session.begin():

View File

@ -160,8 +160,16 @@ class ThreadGroupManager(object):
"""
def release(gt):
"""Callback function that will be passed to GreenThread.link()."""
lock.release()
"""Callback function that will be passed to GreenThread.link().
Persist the stack state to COMPLETE and FAILED close to
releasing the lock to avoid race condtitions.
"""
if stack is not None and stack.action not in (
stack.DELETE, stack.ROLLBACK):
stack.persist_state_and_release_lock(lock.engine_id)
else:
lock.release()
th = self.start(stack.id, func, *args, **kwargs)
th.link(release)

View File

@ -706,7 +706,7 @@ class Stack(collections.Mapping):
@profiler.trace('Stack.state_set', hide_args=False)
def state_set(self, action, status, reason):
"""Update the stack state in the database."""
"""Update the stack state."""
if action not in self.ACTIONS:
raise ValueError(_("Invalid action %s") % action)
@ -717,22 +717,47 @@ class Stack(collections.Mapping):
self.status = status
self.status_reason = reason
# Persist state to db only if status == IN_PROGRESS
# or action == self.DELETE/self.ROLLBACK. Else, it would
# be done before releasing the stack lock.
if status == self.IN_PROGRESS or action in (
self.DELETE, self.ROLLBACK):
self._persist_state()
def _persist_state(self):
"""Persist stack state to database"""
if self.id is None:
return
stack = stack_object.Stack.get_by_id(self.context, self.id)
if stack is not None:
notification.send(self)
self._add_event(action, status, reason)
LOG.info(_LI('Stack %(action)s %(status)s (%(name)s): '
'%(reason)s'),
{'action': action,
'status': status,
'name': self.name,
'reason': reason})
stack.update_and_save({'action': action,
'status': status,
'status_reason': reason})
values = {'action': self.action,
'status': self.status,
'status_reason': self.status_reason}
self._send_notification_and_add_event()
stack.update_and_save(values)
def _send_notification_and_add_event(self):
notification.send(self)
self._add_event(self.action, self.status, self.status_reason)
LOG.info(_LI('Stack %(action)s %(status)s (%(name)s): '
'%(reason)s'),
{'action': self.action,
'status': self.status,
'name': self.name,
'reason': self.status_reason})
def persist_state_and_release_lock(self, engine_id):
"""Persist stack state to database and release stack lock"""
if self.id is None:
return
stack = stack_object.Stack.get_by_id(self.context, self.id)
if stack is not None:
values = {'action': self.action,
'status': self.status,
'status_reason': self.status_reason}
self._send_notification_and_add_event()
stack.persist_state_and_release_lock(self.context, self.id,
engine_id, values)
@property
def state(self):

View File

@ -169,6 +169,12 @@ class Stack(
return db_api.stack_update(context, stack_id, values,
exp_trvsl=exp_trvsl)
@classmethod
def persist_state_and_release_lock(cls, context, stack_id,
engine_id, values):
return db_api.persist_state_and_release_lock(context, stack_id,
engine_id, values)
@classmethod
def delete(cls, context, stack_id):
db_api.stack_delete(context, stack_id)

View File

@ -778,6 +778,7 @@ class SqlAlchemyTest(common.HeatTestCase):
self._mock_create(self.m)
self.m.ReplayAll()
stack.create()
stack._persist_state()
self.m.UnsetStubs()
events = db_api.event_get_all_by_stack(self.ctx, UUID1)
@ -869,6 +870,7 @@ class SqlAlchemyTest(common.HeatTestCase):
self._mock_create(self.m)
self.m.ReplayAll()
stack.create()
stack._persist_state()
self.m.UnsetStubs()
num_events = db_api.event_count_all_by_stack(self.ctx, UUID1)
@ -889,6 +891,7 @@ class SqlAlchemyTest(common.HeatTestCase):
self._mock_create(self.m)
self.m.ReplayAll()
[s.create() for s in stacks]
[s._persist_state() for s in stacks]
self.m.UnsetStubs()
events = db_api.event_get_all_by_tenant(self.ctx)
@ -909,6 +912,7 @@ class SqlAlchemyTest(common.HeatTestCase):
self._mock_create(self.m)
self.m.ReplayAll()
[s.create() for s in stacks]
[s._persist_state() for s in stacks]
self.m.UnsetStubs()
events = db_api.event_get_all(self.ctx)
@ -1657,6 +1661,58 @@ class DBAPIStackTest(common.HeatTestCase):
exp_trvsl=diff_uuid)
self.assertFalse(updated)
def test_stack_set_status_release_lock(self):
stack = create_stack(self.ctx, self.template, self.user_creds)
values = {
'name': 'db_test_stack_name2',
'action': 'update',
'status': 'failed',
'status_reason': "update_failed",
'timeout': '90',
'current_traversal': 'another-dummy-uuid',
}
db_api.stack_lock_create(stack.id, UUID1)
observed = db_api.persist_state_and_release_lock(self.ctx, stack.id,
UUID1, values)
self.assertIsNone(observed)
stack = db_api.stack_get(self.ctx, stack.id)
self.assertEqual('db_test_stack_name2', stack.name)
self.assertEqual('update', stack.action)
self.assertEqual('failed', stack.status)
self.assertEqual('update_failed', stack.status_reason)
self.assertEqual(90, stack.timeout)
self.assertEqual('another-dummy-uuid', stack.current_traversal)
def test_stack_set_status_release_lock_failed(self):
stack = create_stack(self.ctx, self.template, self.user_creds)
values = {
'name': 'db_test_stack_name2',
'action': 'update',
'status': 'failed',
'status_reason': "update_failed",
'timeout': '90',
'current_traversal': 'another-dummy-uuid',
}
db_api.stack_lock_create(stack.id, UUID2)
observed = db_api.persist_state_and_release_lock(self.ctx, stack.id,
UUID1, values)
self.assertTrue(observed)
def test_stack_set_status_failed_release_lock(self):
stack = create_stack(self.ctx, self.template, self.user_creds)
values = {
'name': 'db_test_stack_name2',
'action': 'update',
'status': 'failed',
'status_reason': "update_failed",
'timeout': '90',
'current_traversal': 'another-dummy-uuid',
}
db_api.stack_lock_create(stack.id, UUID1)
observed = db_api.persist_state_and_release_lock(self.ctx, UUID2,
UUID1, values)
self.assertTrue(observed)
def test_stack_get_returns_a_stack(self):
stack = create_stack(self.ctx, self.template, self.user_creds)
ret_stack = db_api.stack_get(self.ctx, stack.id, show_deleted=False)

View File

@ -92,7 +92,7 @@ class StackServiceAdoptTest(common.HeatTestCase):
{'adopt_stack_data': str(adopt_data)})
stack = stack_object.Stack.get_by_id(self.ctx, result['stack_id'])
self.assertEqual((parser.Stack.ADOPT, parser.Stack.COMPLETE),
self.assertEqual((parser.Stack.ADOPT, parser.Stack.IN_PROGRESS),
(stack.action, stack.status))
def test_stack_adopt_disabled(self):

View File

@ -109,6 +109,7 @@ class StackEventTest(common.HeatTestCase):
result = self.eng.update_stack(self.ctx, self.stack.identifier(),
new_tmpl, None, None, {})
self.stack._persist_state()
# The self.stack reference needs to be updated. Since the underlying
# stack is updated in update_stack, the original reference is now
@ -120,7 +121,7 @@ class StackEventTest(common.HeatTestCase):
self.assertTrue(result['stack_id'])
events = self.eng.list_events(self.ctx, self.stack.identifier())
self.assertEqual(10, len(events))
self.assertEqual(11, len(events))
for ev in events:
self.assertIn('event_identity', ev)

View File

@ -57,12 +57,14 @@ class SnapshotServiceTest(common.HeatTestCase):
def test_show_snapshot_not_belong_to_stack(self):
stk1 = self._create_stack('stack_snaphot_not_belong_to_stack_1')
stk1._persist_state()
snapshot1 = self.engine.stack_snapshot(
self.ctx, stk1.identifier(), 'snap1')
self.engine.thread_group_mgr.groups[stk1.id].wait()
snapshot_id = snapshot1['id']
stk2 = self._create_stack('stack_snaphot_not_belong_to_stack_2')
stk2._persist_state()
ex = self.assertRaises(dispatcher.ExpectedException,
self.engine.show_snapshot,
self.ctx, stk2.identifier(),

View File

@ -439,13 +439,12 @@ class ServiceStackUpdateTest(common.HeatTestCase):
}
}
template = templatem.Template(tpl)
create_stack = stack.Stack(self.ctx, stack_name, template)
sid = create_stack.store()
create_stack.create()
self.assertEqual((create_stack.CREATE, create_stack.COMPLETE),
create_stack.state)
create_stack._persist_state()
s = stack_object.Stack.get_by_id(self.ctx, sid)
old_stack = stack.Stack.load(self.ctx, stack=s)
@ -460,6 +459,7 @@ class ServiceStackUpdateTest(common.HeatTestCase):
result = self.man.update_stack(self.ctx, create_stack.identifier(),
tpl, {}, None, {})
old_stack._persist_state()
self.assertEqual((old_stack.UPDATE, old_stack.COMPLETE),
old_stack.state)
self.assertEqual(create_stack.identifier(), result)

View File

@ -229,6 +229,7 @@ def setup_stack(stack_name, ctx, create_res=True):
setup_mocks(m, stack)
m.ReplayAll()
stack.create()
stack._persist_state()
m.UnsetStubs()
return stack

View File

@ -196,17 +196,10 @@ class StackTest(common.HeatTestCase):
status=stack.Stack.IN_PROGRESS)
self.stack.id = '1234'
# Simulate a deleted stack
self.m.StubOutWithMock(stack_object.Stack, 'get_by_id')
stack_object.Stack.get_by_id(self.stack.context,
self.stack.id).AndReturn(None)
self.m.ReplayAll()
self.stack.delete()
self.assertIsNone(self.stack.state_set(stack.Stack.CREATE,
stack.Stack.COMPLETE,
'test'))
self.m.VerifyAll()
def test_state_bad(self):
self.stack = stack.Stack(self.ctx, 'test_stack', self.tmpl,

View File

@ -269,6 +269,7 @@ class StackUpdateTest(common.HeatTestCase):
self.stack.store()
stack_id = self.stack.id
self.stack.create()
self.stack._persist_state()
self.assertEqual((stack.Stack.CREATE, stack.Stack.COMPLETE),
self.stack.state)
@ -292,6 +293,7 @@ class StackUpdateTest(common.HeatTestCase):
template.Template(tmpl2))
self.stack.update(updated_stack)
self.stack._persist_state()
self.assertEqual((stack.Stack.UPDATE, stack.Stack.COMPLETE),
self.stack.state)
mock_upd.assert_called_once_with(mock.ANY, mock.ANY, prop_diff1)
@ -309,6 +311,7 @@ class StackUpdateTest(common.HeatTestCase):
template.Template(tmpl3))
self.stack.update(updated_stack)
self.stack._persist_state()
self.assertEqual((stack.Stack.UPDATE, stack.Stack.COMPLETE),
self.stack.state)