revist lifecycle_hook logic

1. use one "Message" object to sink event
    2. add physical_id which is the actual to the event message
    3. check both node status and physical id before send event
    4. remove the actions that can be executed immediately from "child"
    to reduce DB interactions

Change-Id: Ie34fa1a44b575644685dc400fd70deb160c5c2e5
This commit is contained in:
ruijie 2018-04-26 02:14:14 +08:00
parent a7fea71b9e
commit 2d26ef7627
4 changed files with 97 additions and 16 deletions

View File

@ -329,6 +329,16 @@ class ClusterAction(base.Action):
if child:
dobj.Dependency.create(self.context, [aid for aid, nid in child],
self.id)
# lifecycle_hook_type has to be "zaqar"
# post message to zaqar
kwargs = {
'user': self.context.user_id,
'project': self.context.project_id,
'domain': self.context.domain_id
}
notifier = msg.Message(lifecycle_hook_target, **kwargs)
for action_id, node_id in child:
# wait lifecycle complete if node exists and is active
node = no.Node.get(self.context, node_id)
@ -337,28 +347,21 @@ class ClusterAction(base.Action):
'Skipping wait for lifecycle completion.',
node_id)
status = base.Action.READY
elif node.status != consts.NS_ACTIVE:
child.remove((action_id, node_id))
elif node.status != consts.NS_ACTIVE or not node.physical_id:
LOG.warning('Node %s is not in ACTIVE status. '
'Skipping wait for lifecycle completion.',
node_id)
status = base.Action.READY
child.remove((action_id, node_id))
else:
status = base.Action.WAITING_LIFECYCLE_COMPLETION
ao.Action.update(self.context, action_id,
{'status': status})
if status == base.Action.WAITING_LIFECYCLE_COMPLETION:
# lifecycle_hook_type has to be "zaqar"
# post message to zaqar
kwargs = {
'user': self.context.user_id,
'project': self.context.project_id,
'domain': self.context.domain_id
}
notifier = msg.Message(lifecycle_hook_target, **kwargs)
notifier.post_lifecycle_hook_message(
action_id, node_id,
action_id, node_id, node.physical_id,
consts.LIFECYCLE_NODE_TERMINATION)
dispatcher.start_action()

View File

@ -67,14 +67,15 @@ class Message(object):
return params
def post_lifecycle_hook_message(self, lifecycle_action_token,
node_id, lifecycle_transition_type):
def post_lifecycle_hook_message(self, lifecycle_action_token, node_id,
resource_id, lifecycle_transition_type):
try:
message_list = [{
"ttl": CONF.notification.ttl,
"body": {
"lifecycle_action_token": lifecycle_action_token,
"node_id": node_id,
"resource_id": resource_id,
"lifecycle_transition_type": lifecycle_transition_type
}
}]

View File

@ -180,7 +180,8 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
mock_action.return_value = 'NODE_ACTION_ID'
mock_node_get.return_value = mock.Mock(status=consts.NS_ACTIVE)
mock_node_get.return_value = mock.Mock(
status=consts.NS_ACTIVE, id='NODE_ID', physical_id="nova-server")
# do it
res_code, res_msg = action._delete_nodes(['NODE_ID'])
@ -198,6 +199,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID',
'nova-server',
consts.LIFECYCLE_NODE_TERMINATION)
mock_start.assert_called_once_with()
mock_wait.assert_called_once_with(action.data['hooks']['timeout'])
@ -308,7 +310,8 @@ class ClusterDeleteTest(base.SenlinTestCase):
(action.RES_OK, 'All dependents completed')
]
mock_action.return_value = 'NODE_ACTION_ID'
mock_node_get.return_value = mock.Mock(status=consts.NS_ACTIVE)
mock_node_get.return_value = mock.Mock(
status=consts.NS_ACTIVE, id='NODE_ID', physical_id="nova-server")
mock_check_status.return_value = 'WAITING_LIFECYCLE_COMPLETION'
# do it
res_code, res_msg = action._delete_nodes(['NODE_ID'])
@ -329,6 +332,7 @@ class ClusterDeleteTest(base.SenlinTestCase):
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID',
'nova-server',
consts.LIFECYCLE_NODE_TERMINATION)
mock_start.assert_has_calls([mock.call(), mock.call()])
wait_calls = [
@ -736,7 +740,8 @@ class ClusterDeleteTest(base.SenlinTestCase):
}
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
mock_action.return_value = 'NODE_ACTION_ID'
mock_node_get.return_value = mock.Mock(status=consts.NS_ACTIVE)
mock_node_get.return_value = mock.Mock(
status=consts.NS_ACTIVE, id='NODE_ID', physical_id="nova-server")
# do it
res_code, res_msg = action._delete_nodes_with_hook(
'NODE_DELETE', ['NODE_ID'], action.data['hooks'])
@ -755,6 +760,72 @@ class ClusterDeleteTest(base.SenlinTestCase):
]
mock_update.assert_has_calls(update_calls)
mock_post.assert_called_once_with('NODE_ACTION_ID', 'NODE_ID',
'nova-server',
consts.LIFECYCLE_NODE_TERMINATION)
mock_start.assert_called_once_with()
mock_wait.assert_called_once_with(action.data['hooks']['timeout'])
@mock.patch.object(ao.Action, 'update')
@mock.patch.object(ab.Action, 'create')
@mock.patch.object(no.Node, 'get')
@mock.patch.object(dobj.Dependency, 'create')
@mock.patch.object(msg.Message, 'post_lifecycle_hook_message')
@mock.patch.object(dispatcher, 'start_action')
@mock.patch.object(ca.ClusterAction, '_wait_for_dependents')
def test__delete_nodes_with_error_nodes(self, mock_wait, mock_start,
mock_post, mock_dep,
mock_node_get, mock_action,
mock_update, mock_load):
# prepare mocks
cluster = mock.Mock(id='CLUSTER_ID', desired_capacity=100)
mock_load.return_value = cluster
# cluster action is real
action = ca.ClusterAction(cluster.id, 'CLUSTER_DELETE', self.ctx)
action.id = 'CLUSTER_ACTION_ID'
action.data = {
'hooks': {
'timeout': 10,
'type': 'zaqar',
'params': {
'queue': 'myqueue'
}
}
}
mock_action.side_effect = ['NODE_ACTION_1', 'NODE_ACTION_2']
mock_wait.return_value = (action.RES_OK, 'All dependents completed')
node1 = mock.Mock(status=consts.NS_ACTIVE, id='NODE_1',
physical_id="nova-server-1")
node2 = mock.Mock(status=consts.NS_ACTIVE, id='NODE_2',
physical_id=None)
mock_node_get.side_effect = [node1, node2]
# do it
res_code, res_msg = action._delete_nodes_with_hook(
'NODE_DELETE', ['NODE_1', 'NODE_2'], action.data['hooks'])
# assertions
self.assertEqual(action.RES_OK, res_code)
self.assertEqual('All dependents completed', res_msg)
update_calls = [
mock.call(action.context, 'NODE_ACTION_1',
{'status': 'WAITING_LIFECYCLE_COMPLETION'}),
mock.call(action.context, 'NODE_ACTION_2', {'status': 'READY'})
]
mock_update.assert_has_calls(update_calls)
create_actions = [
mock.call(action.context, 'NODE_1', 'NODE_DELETE',
name='node_delete_NODE_1', cause='Derived Action with '
'Lifecycle Hook'),
mock.call(action.context, 'NODE_2', 'NODE_DELETE',
name='node_delete_NODE_2', cause='Derived Action with '
'Lifecycle Hook')
]
mock_action.assert_has_calls(create_actions)
mock_post.assert_called_once_with('NODE_ACTION_1', 'NODE_1',
node1.physical_id,
consts.LIFECYCLE_NODE_TERMINATION)
mock_start.assert_called_once_with()
mock_wait.assert_called_once_with(action.data['hooks']['timeout'])
self.assertEqual(1, mock_dep.call_count)

View File

@ -64,9 +64,11 @@ class TestMessage(base.SenlinTestCase):
lifecycle_action_token = 'ACTION_ID'
node_id = 'NODE_ID'
resource_id = 'RESOURCE_ID'
lifecycle_transition_type = 'TYPE'
message.post_lifecycle_hook_message(lifecycle_action_token, node_id,
resource_id,
lifecycle_transition_type)
mock_zc.queue_create.assert_not_called()
@ -76,6 +78,7 @@ class TestMessage(base.SenlinTestCase):
"body": {
"lifecycle_action_token": lifecycle_action_token,
"node_id": node_id,
"resource_id": resource_id,
"lifecycle_transition_type": lifecycle_transition_type
}
}]
@ -99,9 +102,11 @@ class TestMessage(base.SenlinTestCase):
lifecycle_action_token = 'ACTION_ID'
node_id = 'NODE_ID'
resource_id = 'RESOURCE_ID'
lifecycle_transition_type = 'TYPE'
message.post_lifecycle_hook_message(lifecycle_action_token, node_id,
resource_id,
lifecycle_transition_type)
mock_zc.queue_create.assert_called_once_with(**kwargs)
@ -111,6 +116,7 @@ class TestMessage(base.SenlinTestCase):
"body": {
"lifecycle_action_token": lifecycle_action_token,
"node_id": node_id,
"resource_id": resource_id,
"lifecycle_transition_type": lifecycle_transition_type
}
}]