Set owner for actions in waiting for lifecycle

- Set owner for that node stop or node delete actions that are created
  as part of cluster scale-in.  That way those actions will be cleaned
  up by garbage collection if the engine dies.
- Reset owner back to None if the node stop or node delete actions fail
  or the lifecycle timeout is hit.  This is necessary for those actions
  to be get picked to be executed by dispatcher.

Change-Id: I004386b069597af62da06fa88659babe91197229
Closes-Bug: #1811294
This commit is contained in:
Duc Truong 2019-01-10 23:04:44 +00:00
parent 125e403321
commit 719c1bc32e
3 changed files with 48 additions and 16 deletions

View File

@ -326,8 +326,11 @@ class ClusterAction(base.Action):
for action_id, node_id in child:
status = ao.Action.check_status(self.context, action_id, 0)
if (status == consts.ACTION_WAITING_LIFECYCLE_COMPLETION):
# update action status and reset owner back to None
# so that the action will get picked up by dispatcher
ao.Action.update(self.context, action_id,
{'status': base.Action.READY})
{'status': base.Action.READY,
'owner': None})
def _remove_nodes_with_hook(self, action_name, node_ids, lifecycle_hook,
inputs=None):
@ -368,6 +371,7 @@ class ClusterAction(base.Action):
for action_id, node_id in child:
# wait lifecycle complete if node exists and is active
node = no.Node.get(self.context, node_id)
owner = None
if not node:
LOG.warning('Node %s is not found. '
'Skipping wait for lifecycle completion.',
@ -382,9 +386,14 @@ class ClusterAction(base.Action):
child.remove((action_id, node_id))
else:
status = base.Action.WAITING_LIFECYCLE_COMPLETION
# set owner for actions in waiting for lifecycle completion
# so that they will get cleaned up by dead engine gc
# if the engine dies
owner = self.owner
ao.Action.update(self.context, action_id,
{'status': status})
{'status': status,
'owner': owner})
if status == base.Action.WAITING_LIFECYCLE_COMPLETION:
notifier.post_lifecycle_hook_message(
action_id, node_id, node.physical_id,
@ -1203,7 +1212,12 @@ def CompleteLifecycleProc(context, action_id):
raise exception.ResourceNotFound(type='action', id=action_id)
if action.get_status() == consts.ACTION_WAITING_LIFECYCLE_COMPLETION:
action.set_status(base.Action.RES_LIFECYCLE_COMPLETE)
# update action status and reset owner back to None
# so that the action will get picked up by dispatcher
ao.Action.update(context, action_id,
{'status': consts.ACTION_READY,
'status_reason': 'Lifecycle complete.',
'owner': None})
dispatcher.start_action()
else:
LOG.debug('Action %s status is not WAITING_LIFECYCLE. '

View File

@ -19,6 +19,7 @@ from senlin.engine.actions import cluster_action as ca
from senlin.engine import cluster as cm
from senlin.engine import dispatcher
from senlin.engine import senlin_lock
from senlin.objects import action as ao
from senlin.policies import base as pb
from senlin.tests.unit.common import base
from senlin.tests.unit.common import utils
@ -200,7 +201,8 @@ class CompleteLifecycleProcTest(base.SenlinTestCase):
@mock.patch.object(dispatcher, 'start_action')
@mock.patch.object(ab.Action, 'load')
def test_complete_lifecycle_proc_successful(self, mock_load,
@mock.patch.object(ao.Action, 'update')
def test_complete_lifecycle_proc_successful(self, mock_update, mock_load,
mock_dispatcher_start):
action = ab.Action(OBJID, 'OBJECT_ACTION', self.ctx)
mock_obj = mock.Mock()
@ -208,7 +210,6 @@ class CompleteLifecycleProcTest(base.SenlinTestCase):
mock_get_status = self.patchobject(action, 'get_status')
mock_get_status.return_value = \
consts.ACTION_WAITING_LIFECYCLE_COMPLETION
mock_set_status = self.patchobject(action, 'set_status')
mock_load.return_value = action
res = ca.CompleteLifecycleProc(self.ctx, 'ACTION_ID')
@ -217,7 +218,12 @@ class CompleteLifecycleProcTest(base.SenlinTestCase):
mock_load.assert_called_once_with(self.ctx, action_id='ACTION_ID',
project_safe=False)
mock_get_status.assert_called_once_with()
mock_set_status.assert_called_once_with(action.RES_LIFECYCLE_COMPLETE)
mock_update.assert_called_once_with(
self.ctx, 'ACTION_ID',
{'status': consts.ACTION_READY,
'status_reason': 'Lifecycle complete.',
'owner': None}
)
mock_dispatcher_start.assert_called_once_with()
@mock.patch.object(ab.Action, 'load')
@ -230,14 +236,14 @@ class CompleteLifecycleProcTest(base.SenlinTestCase):
@mock.patch.object(dispatcher, 'start_action')
@mock.patch.object(ab.Action, 'load')
def test_complete_lifecycle_proc_warning(self, mock_load,
@mock.patch.object(ao.Action, 'update')
def test_complete_lifecycle_proc_warning(self, mock_update, mock_load,
mock_dispatcher_start):
action = ab.Action(OBJID, 'OBJECT_ACTION', self.ctx)
mock_obj = mock.Mock()
action.entity = mock_obj
mock_get_status = self.patchobject(action, 'get_status')
mock_get_status.return_value = consts.ACTION_SUCCEEDED
mock_set_status = self.patchobject(action, 'set_status')
mock_load.return_value = action
res = ca.CompleteLifecycleProc(self.ctx, 'ACTION_ID')
@ -246,5 +252,5 @@ class CompleteLifecycleProcTest(base.SenlinTestCase):
mock_load.assert_called_once_with(self.ctx, action_id='ACTION_ID',
project_safe=False)
mock_get_status.assert_called_once_with()
mock_set_status.assert_not_called()
mock_update.assert_not_called()
mock_dispatcher_start.assert_not_called()

View File

@ -229,6 +229,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
}
}
action.owner = 'OWNER_ID'
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
mock_action.return_value = 'NODE_ACTION_ID'
mock_node_get.return_value = mock.Mock(
@ -248,7 +249,8 @@ class ClusterDeleteTest(base.SenlinTestCase):
inputs={})
update_calls = [
mock.call(action.context, 'NODE_ACTION_ID',
{'status': 'WAITING_LIFECYCLE_COMPLETION'}),
{'status': 'WAITING_LIFECYCLE_COMPLETION',
'owner': 'OWNER_ID'}),
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID',
@ -304,6 +306,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
}
}
action.owner = None
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
mock_action.return_value = 'NODE_ACTION_ID'
mock_node_get.return_value = mock_node_obj
@ -320,7 +323,8 @@ class ClusterDeleteTest(base.SenlinTestCase):
cause='Derived Action with Lifecycle Hook', inputs={})
update_calls = [
mock.call(action.context, 'NODE_ACTION_ID',
{'status': 'READY'}),
{'status': 'READY',
'owner': None}),
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_not_called()
@ -358,6 +362,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
}
}
action.owner = 'OWNER_ID'
mock_wait.side_effect = [
(action.RES_LIFECYCLE_HOOK_TIMEOUT, 'Timeout'),
(action.RES_OK, 'All dependents completed')
@ -379,9 +384,11 @@ class ClusterDeleteTest(base.SenlinTestCase):
cause='Derived Action with Lifecycle Hook', inputs={})
update_calls = [
mock.call(action.context, 'NODE_ACTION_ID',
{'status': 'WAITING_LIFECYCLE_COMPLETION'}),
{'status': 'WAITING_LIFECYCLE_COMPLETION',
'owner': 'OWNER_ID'}),
mock.call(action.context, 'NODE_ACTION_ID',
{'status': 'READY'}),
{'status': 'READY',
'owner': None}),
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID',
@ -828,6 +835,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
}
}
action.owner = 'OWNER_ID'
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
mock_action.return_value = 'NODE_ACTION_ID'
mock_node_get.return_value = mock.Mock(
@ -846,7 +854,8 @@ class ClusterDeleteTest(base.SenlinTestCase):
cause='Derived Action with Lifecycle Hook', inputs={})
update_calls = [
mock.call(action.context, 'NODE_ACTION_ID',
{'status': 'WAITING_LIFECYCLE_COMPLETION'}),
{'status': 'WAITING_LIFECYCLE_COMPLETION',
'owner': 'OWNER_ID'}),
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID',
@ -945,6 +954,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
}
}
action.owner = 'OWNER_ID'
mock_action.side_effect = ['NODE_ACTION_1', 'NODE_ACTION_2']
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
node1 = mock.Mock(status=consts.NS_ACTIVE, id='NODE_1',
@ -961,8 +971,10 @@ class ClusterDeleteTest(base.SenlinTestCase):
self.assertEqual('All dependents completed', res_msg)
update_calls = [
mock.call(action.context, 'NODE_ACTION_1',
{'status': 'WAITING_LIFECYCLE_COMPLETION'}),
mock.call(action.context, 'NODE_ACTION_2', {'status': 'READY'})
{'status': 'WAITING_LIFECYCLE_COMPLETION',
'owner': 'OWNER_ID'}),
mock.call(action.context, 'NODE_ACTION_2', {'status': 'READY',
'owner': None})
]
mock_update.assert_has_calls(update_calls)
create_actions = [