From 719c1bc32e6cdd0efe9e9c676f286c27cef0178b Mon Sep 17 00:00:00 2001 From: Duc Truong Date: Thu, 10 Jan 2019 23:04:44 +0000 Subject: [PATCH] Set owner for actions in waiting for lifecycle - Set owner for that node stop or node delete actions that are created as part of cluster scale-in. That way those actions will be cleaned up by garbage collection if the engine dies. - Reset owner back to None if the node stop or node delete actions fail or the lifecycle timeout is hit. This is necessary for those actions to be get picked to be executed by dispatcher. Change-Id: I004386b069597af62da06fa88659babe91197229 Closes-Bug: #1811294 --- senlin/engine/actions/cluster_action.py | 20 +++++++++++--- .../engine/actions/test_cluster_action.py | 18 ++++++++----- .../tests/unit/engine/actions/test_delete.py | 26 ++++++++++++++----- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/senlin/engine/actions/cluster_action.py b/senlin/engine/actions/cluster_action.py index 810218f56..fad956f8f 100755 --- a/senlin/engine/actions/cluster_action.py +++ b/senlin/engine/actions/cluster_action.py @@ -326,8 +326,11 @@ class ClusterAction(base.Action): for action_id, node_id in child: status = ao.Action.check_status(self.context, action_id, 0) if (status == consts.ACTION_WAITING_LIFECYCLE_COMPLETION): + # update action status and reset owner back to None + # so that the action will get picked up by dispatcher ao.Action.update(self.context, action_id, - {'status': base.Action.READY}) + {'status': base.Action.READY, + 'owner': None}) def _remove_nodes_with_hook(self, action_name, node_ids, lifecycle_hook, inputs=None): @@ -368,6 +371,7 @@ class ClusterAction(base.Action): for action_id, node_id in child: # wait lifecycle complete if node exists and is active node = no.Node.get(self.context, node_id) + owner = None if not node: LOG.warning('Node %s is not found. ' 'Skipping wait for lifecycle completion.', @@ -382,9 +386,14 @@ class ClusterAction(base.Action): child.remove((action_id, node_id)) else: status = base.Action.WAITING_LIFECYCLE_COMPLETION + # set owner for actions in waiting for lifecycle completion + # so that they will get cleaned up by dead engine gc + # if the engine dies + owner = self.owner ao.Action.update(self.context, action_id, - {'status': status}) + {'status': status, + 'owner': owner}) if status == base.Action.WAITING_LIFECYCLE_COMPLETION: notifier.post_lifecycle_hook_message( action_id, node_id, node.physical_id, @@ -1203,7 +1212,12 @@ def CompleteLifecycleProc(context, action_id): raise exception.ResourceNotFound(type='action', id=action_id) if action.get_status() == consts.ACTION_WAITING_LIFECYCLE_COMPLETION: - action.set_status(base.Action.RES_LIFECYCLE_COMPLETE) + # update action status and reset owner back to None + # so that the action will get picked up by dispatcher + ao.Action.update(context, action_id, + {'status': consts.ACTION_READY, + 'status_reason': 'Lifecycle complete.', + 'owner': None}) dispatcher.start_action() else: LOG.debug('Action %s status is not WAITING_LIFECYCLE. ' diff --git a/senlin/tests/unit/engine/actions/test_cluster_action.py b/senlin/tests/unit/engine/actions/test_cluster_action.py index 540c8230f..2795e970c 100755 --- a/senlin/tests/unit/engine/actions/test_cluster_action.py +++ b/senlin/tests/unit/engine/actions/test_cluster_action.py @@ -19,6 +19,7 @@ from senlin.engine.actions import cluster_action as ca from senlin.engine import cluster as cm from senlin.engine import dispatcher from senlin.engine import senlin_lock +from senlin.objects import action as ao from senlin.policies import base as pb from senlin.tests.unit.common import base from senlin.tests.unit.common import utils @@ -200,7 +201,8 @@ class CompleteLifecycleProcTest(base.SenlinTestCase): @mock.patch.object(dispatcher, 'start_action') @mock.patch.object(ab.Action, 'load') - def test_complete_lifecycle_proc_successful(self, mock_load, + @mock.patch.object(ao.Action, 'update') + def test_complete_lifecycle_proc_successful(self, mock_update, mock_load, mock_dispatcher_start): action = ab.Action(OBJID, 'OBJECT_ACTION', self.ctx) mock_obj = mock.Mock() @@ -208,7 +210,6 @@ class CompleteLifecycleProcTest(base.SenlinTestCase): mock_get_status = self.patchobject(action, 'get_status') mock_get_status.return_value = \ consts.ACTION_WAITING_LIFECYCLE_COMPLETION - mock_set_status = self.patchobject(action, 'set_status') mock_load.return_value = action res = ca.CompleteLifecycleProc(self.ctx, 'ACTION_ID') @@ -217,7 +218,12 @@ class CompleteLifecycleProcTest(base.SenlinTestCase): mock_load.assert_called_once_with(self.ctx, action_id='ACTION_ID', project_safe=False) mock_get_status.assert_called_once_with() - mock_set_status.assert_called_once_with(action.RES_LIFECYCLE_COMPLETE) + mock_update.assert_called_once_with( + self.ctx, 'ACTION_ID', + {'status': consts.ACTION_READY, + 'status_reason': 'Lifecycle complete.', + 'owner': None} + ) mock_dispatcher_start.assert_called_once_with() @mock.patch.object(ab.Action, 'load') @@ -230,14 +236,14 @@ class CompleteLifecycleProcTest(base.SenlinTestCase): @mock.patch.object(dispatcher, 'start_action') @mock.patch.object(ab.Action, 'load') - def test_complete_lifecycle_proc_warning(self, mock_load, + @mock.patch.object(ao.Action, 'update') + def test_complete_lifecycle_proc_warning(self, mock_update, mock_load, mock_dispatcher_start): action = ab.Action(OBJID, 'OBJECT_ACTION', self.ctx) mock_obj = mock.Mock() action.entity = mock_obj mock_get_status = self.patchobject(action, 'get_status') mock_get_status.return_value = consts.ACTION_SUCCEEDED - mock_set_status = self.patchobject(action, 'set_status') mock_load.return_value = action res = ca.CompleteLifecycleProc(self.ctx, 'ACTION_ID') @@ -246,5 +252,5 @@ class CompleteLifecycleProcTest(base.SenlinTestCase): mock_load.assert_called_once_with(self.ctx, action_id='ACTION_ID', project_safe=False) mock_get_status.assert_called_once_with() - mock_set_status.assert_not_called() + mock_update.assert_not_called() mock_dispatcher_start.assert_not_called() diff --git a/senlin/tests/unit/engine/actions/test_delete.py b/senlin/tests/unit/engine/actions/test_delete.py index 61ab5b271..a55241130 100755 --- a/senlin/tests/unit/engine/actions/test_delete.py +++ b/senlin/tests/unit/engine/actions/test_delete.py @@ -229,6 +229,7 @@ class ClusterDeleteTest(base.SenlinTestCase): } } } + action.owner = 'OWNER_ID' mock_wait.return_value = (action.RES_OK, 'All dependents completed') mock_action.return_value = 'NODE_ACTION_ID' mock_node_get.return_value = mock.Mock( @@ -248,7 +249,8 @@ class ClusterDeleteTest(base.SenlinTestCase): inputs={}) update_calls = [ mock.call(action.context, 'NODE_ACTION_ID', - {'status': 'WAITING_LIFECYCLE_COMPLETION'}), + {'status': 'WAITING_LIFECYCLE_COMPLETION', + 'owner': 'OWNER_ID'}), ] mock_update.assert_has_calls(update_calls) mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID', @@ -304,6 +306,7 @@ class ClusterDeleteTest(base.SenlinTestCase): } } } + action.owner = None mock_wait.return_value = (action.RES_OK, 'All dependents completed') mock_action.return_value = 'NODE_ACTION_ID' mock_node_get.return_value = mock_node_obj @@ -320,7 +323,8 @@ class ClusterDeleteTest(base.SenlinTestCase): cause='Derived Action with Lifecycle Hook', inputs={}) update_calls = [ mock.call(action.context, 'NODE_ACTION_ID', - {'status': 'READY'}), + {'status': 'READY', + 'owner': None}), ] mock_update.assert_has_calls(update_calls) mock_post.assert_not_called() @@ -358,6 +362,7 @@ class ClusterDeleteTest(base.SenlinTestCase): } } } + action.owner = 'OWNER_ID' mock_wait.side_effect = [ (action.RES_LIFECYCLE_HOOK_TIMEOUT, 'Timeout'), (action.RES_OK, 'All dependents completed') @@ -379,9 +384,11 @@ class ClusterDeleteTest(base.SenlinTestCase): cause='Derived Action with Lifecycle Hook', inputs={}) update_calls = [ mock.call(action.context, 'NODE_ACTION_ID', - {'status': 'WAITING_LIFECYCLE_COMPLETION'}), + {'status': 'WAITING_LIFECYCLE_COMPLETION', + 'owner': 'OWNER_ID'}), mock.call(action.context, 'NODE_ACTION_ID', - {'status': 'READY'}), + {'status': 'READY', + 'owner': None}), ] mock_update.assert_has_calls(update_calls) mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID', @@ -828,6 +835,7 @@ class ClusterDeleteTest(base.SenlinTestCase): } } } + action.owner = 'OWNER_ID' mock_wait.return_value = (action.RES_OK, 'All dependents completed') mock_action.return_value = 'NODE_ACTION_ID' mock_node_get.return_value = mock.Mock( @@ -846,7 +854,8 @@ class ClusterDeleteTest(base.SenlinTestCase): cause='Derived Action with Lifecycle Hook', inputs={}) update_calls = [ mock.call(action.context, 'NODE_ACTION_ID', - {'status': 'WAITING_LIFECYCLE_COMPLETION'}), + {'status': 'WAITING_LIFECYCLE_COMPLETION', + 'owner': 'OWNER_ID'}), ] mock_update.assert_has_calls(update_calls) mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID', @@ -945,6 +954,7 @@ class ClusterDeleteTest(base.SenlinTestCase): } } } + action.owner = 'OWNER_ID' mock_action.side_effect = ['NODE_ACTION_1', 'NODE_ACTION_2'] mock_wait.return_value = (action.RES_OK, 'All dependents completed') node1 = mock.Mock(status=consts.NS_ACTIVE, id='NODE_1', @@ -961,8 +971,10 @@ class ClusterDeleteTest(base.SenlinTestCase): self.assertEqual('All dependents completed', res_msg) update_calls = [ mock.call(action.context, 'NODE_ACTION_1', - {'status': 'WAITING_LIFECYCLE_COMPLETION'}), - mock.call(action.context, 'NODE_ACTION_2', {'status': 'READY'}) + {'status': 'WAITING_LIFECYCLE_COMPLETION', + 'owner': 'OWNER_ID'}), + mock.call(action.context, 'NODE_ACTION_2', {'status': 'READY', + 'owner': None}) ] mock_update.assert_has_calls(update_calls) create_actions = [