senlin/senlin/engine/actions/cluster_action.py

1257 lines
48 KiB
Python
Executable File

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import copy
import eventlet
from oslo_log import log as logging
from oslo_utils import timeutils
from osprofiler import profiler
from senlin.common import consts
from senlin.common import exception
from senlin.common import scaleutils
from senlin.common import utils
from senlin.engine.actions import base
from senlin.engine import cluster as cluster_mod
from senlin.engine import dispatcher
from senlin.engine import node as node_mod
from senlin.engine.notifications import message as msg
from senlin.engine import senlin_lock
from senlin.objects import action as ao
from senlin.objects import cluster as co
from senlin.objects import cluster_policy as cp_obj
from senlin.objects import dependency as dobj
from senlin.objects import node as no
from senlin.objects import receiver as receiver_obj
from senlin.policies import base as policy_mod
LOG = logging.getLogger(__name__)
class ClusterAction(base.Action):
"""An action that can be performed on a cluster."""
def __init__(self, target, action, context, **kwargs):
"""Constructor for cluster action.
:param target: ID of the target cluster.
:param action: Name of the action to be executed.
:param context: Context used when interacting with DB layer.
:param dict kwargs: Other optional arguments for the action.
"""
super(ClusterAction, self).__init__(target, action, context, **kwargs)
try:
self.entity = cluster_mod.Cluster.load(self.context, self.target)
self.timeout = self.entity.timeout
except Exception:
self.entity = None
def _sleep(self, period):
if period:
eventlet.sleep(period)
def _wait_for_dependents(self, lifecycle_hook_timeout=None):
"""Wait for dependent actions to complete.
:returns: A tuple containing the result and the corresponding reason.
"""
status = self.get_status()
while status != self.READY:
if status == self.FAILED:
reason = ('%(action)s [%(id)s] failed' % {
'action': self.action, 'id': self.id[:8]})
LOG.debug(reason)
return self.RES_ERROR, reason
if self.is_cancelled():
# During this period, if cancel request comes, cancel this
# operation immediately after signaling children to cancel,
# then release the cluster lock
reason = ('%(action)s [%(id)s] cancelled' % {
'action': self.action, 'id': self.id[:8]})
LOG.debug(reason)
return self.RES_CANCEL, reason
# When a child action is cancelled the parent action will update
# its status to cancelled as well this allows it to exit.
if status == self.CANCELLED:
if self.check_children_complete():
reason = ('%(action)s [%(id)s] cancelled' % {
'action': self.action, 'id': self.id[:8]})
LOG.debug(reason)
return self.RES_CANCEL, reason
if self.is_timeout():
# Action timeout, return
reason = ('%(action)s [%(id)s] timeout' % {
'action': self.action, 'id': self.id[:8]})
LOG.debug(reason)
return self.RES_TIMEOUT, reason
if (lifecycle_hook_timeout is not None and
self.is_timeout(lifecycle_hook_timeout)):
# if lifecycle hook timeout is specified and Lifecycle hook
# timeout is reached, return
reason = ('%(action)s [%(id)s] lifecycle hook timeout'
'') % {'action': self.action, 'id': self.id[:8]}
LOG.debug(reason)
return self.RES_LIFECYCLE_HOOK_TIMEOUT, reason
# Continue waiting (with reschedule)
LOG.debug('Action %s sleep for 3 seconds ', self.id)
self._sleep(3)
status = self.get_status()
dispatcher.start_action()
return self.RES_OK, 'All dependents ended with success'
def check_children_complete(self):
depended = dobj.Dependency.get_depended(self.context, self.id)
if not depended:
return True
for child in depended:
# Try to cancel all dependant actions
action = base.Action.load(self.context, action_id=child)
if action.get_status() not in (action.CANCELLED, action.SUCCEEDED,
action.FAILED):
return False
return True
def _create_nodes(self, count):
"""Utility method for node creation.
:param count: Number of nodes to create.
:returns: A tuple comprised of the result and reason.
"""
if count == 0:
return self.RES_OK, ''
placement = self.data.get('placement', None)
nodes = []
child = []
# conunt >= 1
for m in range(count):
index = co.Cluster.get_next_index(self.context, self.entity.id)
kwargs = {
'index': index,
'metadata': {},
'user': self.entity.user,
'project': self.entity.project,
'domain': self.entity.domain,
}
if placement is not None:
# We assume placement is a list
kwargs['data'] = {'placement': placement['placements'][m]}
name_format = self.entity.config.get("node.name.format", "")
name = utils.format_node_name(name_format, self.entity, index)
node = node_mod.Node(name, self.entity.profile_id,
self.entity.id, context=self.context,
**kwargs)
node.store(self.context)
nodes.append(node)
kwargs = {
'name': 'node_create_%s' % node.id[:8],
'cluster_id': self.entity.id,
'cause': consts.CAUSE_DERIVED,
}
action_id = base.Action.create(self.context, node.id,
consts.NODE_CREATE, **kwargs)
child.append(action_id)
# Build dependency and make the new action ready
dobj.Dependency.create(self.context, [a for a in child], self.id)
for cid in child:
ao.Action.update(self.context, cid,
{'status': base.Action.READY})
dispatcher.start_action()
# Wait for cluster creation to complete
res, reason = self._wait_for_dependents()
if res == self.RES_OK:
nodes_added = [n.id for n in nodes]
self.outputs['nodes_added'] = nodes_added
creation = self.data.get('creation', {})
creation['nodes'] = nodes_added
self.data['creation'] = creation
for node in nodes:
self.entity.add_node(node)
else:
reason = 'Failed in creating nodes.'
return res, reason
@profiler.trace('ClusterAction.do_create', hide_args=False)
def do_create(self):
"""Handler for CLUSTER_CREATE action.
:returns: A tuple containing the result and the corresponding reason.
"""
res = self.entity.do_create(self.context)
if not res:
reason = 'Cluster creation failed.'
self.entity.set_status(self.context, consts.CS_ERROR, reason)
return self.RES_ERROR, reason
result, reason = self._create_nodes(self.entity.desired_capacity)
params = {}
if result == self.RES_OK:
reason = 'Cluster creation succeeded.'
params = {'created_at': timeutils.utcnow(True)}
self.entity.eval_status(self.context, consts.CLUSTER_CREATE, **params)
return result, reason
def _update_nodes(self, profile_id, nodes_obj):
# Get batching policy data if any
LOG.info("Updating cluster '%(cluster)s': profile='%(profile)s'.",
{'cluster': self.entity.id, 'profile': profile_id})
plan = []
pd = self.data.get('update', None)
if pd:
pause_time = pd.get('pause_time')
plan = pd.get('plan')
else:
pause_time = 0
nodes_list = []
for node in self.entity.nodes:
nodes_list.append(node.id)
plan.append(set(nodes_list))
for node_set in plan:
child = []
nodes = list(node_set)
nodes.sort()
for node in nodes:
kwargs = {
'name': 'node_update_%s' % node[:8],
'cluster_id': self.entity.id,
'cause': consts.CAUSE_DERIVED,
'inputs': self.entity.config,
}
kwargs['inputs']['new_profile_id'] = profile_id
action_id = base.Action.create(self.context, node,
consts.NODE_UPDATE, **kwargs)
child.append(action_id)
if child:
dobj.Dependency.create(self.context, [c for c in child],
self.id)
for cid in child:
ao.Action.update(self.context, cid,
{'status': base.Action.READY})
dispatcher.start_action()
# clear the action list
child = []
result, new_reason = self._wait_for_dependents()
if result != self.RES_OK:
self.entity.eval_status(self.context,
consts.CLUSTER_UPDATE)
return result, 'Failed in updating nodes.'
# pause time
if pause_time != 0:
self._sleep(pause_time)
self.entity.profile_id = profile_id
self.entity.eval_status(self.context, consts.CLUSTER_UPDATE,
profile_id=profile_id,
updated_at=timeutils.utcnow(True))
return self.RES_OK, 'Cluster update completed.'
@profiler.trace('ClusterAction.do_update', hide_args=False)
def do_update(self):
"""Handler for CLUSTER_UPDATE action.
:returns: A tuple consisting the result and the corresponding reason.
"""
res = self.entity.do_update(self.context)
if not res:
reason = 'Cluster update failed.'
self.entity.set_status(self.context, consts.CS_ERROR, reason)
return self.RES_ERROR, reason
config = self.inputs.get('config')
name = self.inputs.get('name')
metadata = self.inputs.get('metadata')
timeout = self.inputs.get('timeout')
profile_id = self.inputs.get('new_profile_id')
profile_only = self.inputs.get('profile_only')
if config is not None:
# make sure config values are valid
try:
stop_timeout = config.get('cluster.stop_timeout_before_update')
if stop_timeout:
config['cluster.stop_timeout_before_update'] = int(
stop_timeout)
except Exception as e:
return self.RES_ERROR, str(e)
self.entity.config = config
if name is not None:
self.entity.name = name
if metadata is not None:
self.entity.metadata = metadata
if timeout is not None:
self.entity.timeout = timeout
self.entity.store(self.context)
reason = 'Cluster update completed.'
if profile_id is None:
self.entity.eval_status(self.context, consts.CLUSTER_UPDATE,
updated_at=timeutils.utcnow(True))
return self.RES_OK, reason
# profile_only's type is bool
if profile_only:
self.entity.profile_id = profile_id
self.entity.eval_status(self.context, consts.CLUSTER_UPDATE,
profile_id=profile_id,
updated_at=timeutils.utcnow(True))
return self.RES_OK, reason
# Update nodes with new profile
result, reason = self._update_nodes(profile_id, self.entity.nodes)
return result, reason
def _handle_lifecycle_timeout(self, child):
for action_id, node_id in child:
status = ao.Action.check_status(self.context, action_id, 0)
if (status == consts.ACTION_WAITING_LIFECYCLE_COMPLETION):
# update action status and reset owner back to None
# so that the action will get picked up by dispatcher
ao.Action.update(self.context, action_id,
{'status': base.Action.READY,
'owner': None})
def _remove_nodes_with_hook(self, action_name, node_ids, lifecycle_hook,
inputs=None):
lifecycle_hook_timeout = lifecycle_hook.get('timeout')
lifecycle_hook_type = lifecycle_hook.get('type', None)
lifecycle_hook_params = lifecycle_hook.get('params')
if lifecycle_hook_type == "zaqar":
lifecycle_hook_target = lifecycle_hook_params.get('queue')
else:
# lifecycle_hook_target = lifecycle_hook_params.get('url')
return self.RES_ERROR, ("Lifecycle hook type '%s' is not "
"implemented") % lifecycle_hook_type
child = []
for node_id in node_ids:
kwargs = {
'name': 'node_delete_%s' % node_id[:8],
'cluster_id': self.entity.id,
'cause': consts.CAUSE_DERIVED_LCH,
'inputs': inputs or {},
}
action_id = base.Action.create(self.context, node_id, action_name,
**kwargs)
child.append((action_id, node_id))
if child:
dobj.Dependency.create(self.context, [aid for aid, nid in child],
self.id)
# lifecycle_hook_type has to be "zaqar"
# post message to zaqar
kwargs = {
'user': self.context.user_id,
'project': self.context.project_id,
'domain': self.context.domain_id
}
notifier = msg.Message(lifecycle_hook_target, **kwargs)
child_copy = list(child)
for action_id, node_id in child_copy:
# wait lifecycle complete if node exists and is active
node = no.Node.get(self.context, node_id)
owner = None
if not node:
LOG.warning('Node %s is not found. '
'Skipping wait for lifecycle completion.',
node_id)
status = base.Action.READY
child.remove((action_id, node_id))
elif node.status != consts.NS_ACTIVE or not node.physical_id:
LOG.warning('Node %s is not in ACTIVE status. '
'Skipping wait for lifecycle completion.',
node_id)
status = base.Action.READY
child.remove((action_id, node_id))
else:
status = base.Action.WAITING_LIFECYCLE_COMPLETION
# set owner for actions in waiting for lifecycle completion
# so that they will get cleaned up by dead engine gc
# if the engine dies
owner = self.owner
ao.Action.update(self.context, action_id,
{'status': status,
'owner': owner})
if status == base.Action.WAITING_LIFECYCLE_COMPLETION:
notifier.post_lifecycle_hook_message(
action_id, node_id, node.physical_id,
consts.LIFECYCLE_NODE_TERMINATION)
dispatcher.start_action()
res, reason = self._wait_for_dependents(lifecycle_hook_timeout)
if res == self.RES_LIFECYCLE_HOOK_TIMEOUT:
self._handle_lifecycle_timeout(child)
if res is None or res == self.RES_LIFECYCLE_HOOK_TIMEOUT:
dispatcher.start_action()
res, reason = self._wait_for_dependents()
return res, reason
return self.RES_OK, ''
def _remove_nodes_normally(self, action_name, node_ids, inputs=None):
child = []
for node_id in node_ids:
kwargs = {
'name': 'node_delete_%s' % node_id[:8],
'cluster_id': self.entity.id,
'cause': consts.CAUSE_DERIVED,
'inputs': inputs or {},
}
action_id = base.Action.create(self.context, node_id, action_name,
**kwargs)
child.append((action_id, node_id))
if child:
dobj.Dependency.create(self.context, [aid for aid, nid in child],
self.id)
for action_id, node_id in child:
ao.Action.update(self.context, action_id,
{'status': base.Action.READY})
dispatcher.start_action()
res, reason = self._wait_for_dependents()
return res, reason
return self.RES_OK, ''
def _delete_nodes(self, node_ids):
action_name = consts.NODE_DELETE
pd = self.data.get('deletion', None)
if pd is not None:
destroy = pd.get('destroy_after_deletion', True)
if destroy is False:
action_name = consts.NODE_LEAVE
stop_node_before_delete = self.entity.config.get(
"cluster.stop_node_before_delete", False)
# get lifecycle hook properties if specified
lifecycle_hook = self.data.get('hooks')
if lifecycle_hook:
if stop_node_before_delete:
# set update_parent_status to False so that a failure in stop
# operation is ignored and the parent status is not changed
res, reason = self._remove_nodes_with_hook(
consts.NODE_OPERATION, node_ids, lifecycle_hook,
{'operation': 'stop', 'update_parent_status': False})
if res != self.RES_OK:
LOG.warning('Failure while stopping nodes. '
'Proceed to delete nodes.')
res, reason = self._remove_nodes_normally(action_name,
node_ids)
else:
res, reason = self._remove_nodes_with_hook(
action_name, node_ids, lifecycle_hook)
else:
if stop_node_before_delete:
# set update_parent_status to False so that a failure in stop
# operation is ignored and the parent status is not changed
res, reason = self._remove_nodes_normally(
consts.NODE_OPERATION, node_ids,
{'operation': 'stop', 'update_parent_status': False})
if res != self.RES_OK:
LOG.warning('Failure while stopping nodes. '
'Proceed to delete nodes.')
res, reason = self._remove_nodes_normally(action_name, node_ids)
if res == self.RES_OK:
self.outputs['nodes_removed'] = node_ids
for node_id in node_ids:
self.entity.remove_node(node_id)
else:
reason = 'Failed in deleting nodes: %s' % reason
return res, reason
@profiler.trace('ClusterAction.do_delete', hide_args=False)
def do_delete(self):
"""Handler for the CLUSTER_DELETE action.
:returns: A tuple containing the result and the corresponding reason.
"""
# Detach policies before delete
policies = cp_obj.ClusterPolicy.get_all(self.context, self.entity.id)
for policy in policies:
res, reason = self.entity.detach_policy(self.context,
policy.policy_id)
if res:
self.entity.store(self.context)
else:
return self.RES_ERROR, ("Unable to detach policy {} before "
"deletion.".format(policy.id))
# Delete receivers
receivers = receiver_obj.Receiver.get_all(
self.context, filters={'cluster_id': self.entity.id})
for receiver in receivers:
receiver_obj.Receiver.delete(self.context, receiver.id)
reason = 'Deletion in progress.'
self.entity.set_status(self.context, consts.CS_DELETING, reason)
node_ids = [node.id for node in self.entity.nodes]
# For cluster delete, we delete the nodes
data = {
'deletion': {
'destroy_after_deletion': True
}
}
self.data.update(data)
result, reason = self._delete_nodes(node_ids)
if result != self.RES_OK:
self.entity.eval_status(self.context, consts.CLUSTER_DELETE)
return result, reason
res = self.entity.do_delete(self.context)
if not res:
self.entity.eval_status(self.context, consts.CLUSTER_DELETE)
return self.RES_ERROR, 'Cannot delete cluster object.'
return self.RES_OK, reason
@profiler.trace('ClusterAction.do_add_nodes', hide_args=False)
def do_add_nodes(self):
"""Handler for the CLUSTER_ADD_NODES action.
TODO(anyone): handle placement data
:returns: A tuple containing the result and the corresponding reason.
"""
node_ids = self.inputs.get('nodes')
errors = []
nodes = []
for nid in node_ids:
node = no.Node.get(self.context, nid)
if not node:
errors.append('Node %s is not found.' % nid)
continue
if node.cluster_id:
errors.append('Node %(n)s is already owned by cluster %(c)s.'
'' % {'n': nid, 'c': node.cluster_id})
continue
if node.status != consts.NS_ACTIVE:
errors.append('Node %s is not in ACTIVE status.' % nid)
continue
nodes.append(node)
if len(errors) > 0:
return self.RES_ERROR, '\n'.join(errors)
reason = 'Completed adding nodes.'
# check the size constraint
current = no.Node.count_by_cluster(self.context, self.target)
desired = current + len(node_ids)
res = scaleutils.check_size_params(self.entity, desired, None,
None, True)
if res:
return self.RES_ERROR, res
child = []
for node in nodes:
nid = node.id
kwargs = {
'name': 'node_join_%s' % nid[:8],
'cluster_id': self.entity.id,
'cause': consts.CAUSE_DERIVED,
'inputs': {'cluster_id': self.target},
}
action_id = base.Action.create(self.context, nid, consts.NODE_JOIN,
**kwargs)
child.append(action_id)
if child:
dobj.Dependency.create(self.context, [c for c in child], self.id)
for cid in child:
ao.Action.update(self.context, cid,
{'status': base.Action.READY})
dispatcher.start_action()
# Wait for dependent action if any
result, new_reason = self._wait_for_dependents()
if result != self.RES_OK:
reason = new_reason
else:
self.entity.eval_status(self.context, consts.CLUSTER_ADD_NODES,
desired_capacity=desired)
self.outputs['nodes_added'] = node_ids
creation = self.data.get('creation', {})
creation['nodes'] = node_ids
self.data['creation'] = creation
for node in nodes:
obj = node_mod.Node.load(self.context, db_node=node)
self.entity.add_node(obj)
return result, reason
@profiler.trace('ClusterAction.do_del_nodes', hide_args=False)
def do_del_nodes(self):
"""Handler for the CLUSTER_DEL_NODES action.
:returns: A tuple containing the result and the corresponding reason.
"""
# Use policy decision if any, or fall back to defaults
destroy_after_deletion = self.inputs.get('destroy_after_deletion',
False)
grace_period = 0
reduce_desired_capacity = True
pd = self.data.get('deletion', None)
if pd is not None:
destroy_after_deletion = pd.get('destroy_after_deletion', False)
grace_period = pd.get('grace_period', 0)
reduce_desired_capacity = pd.get('reduce_desired_capacity', True)
data = {
'deletion': {
'destroy_after_deletion': destroy_after_deletion,
'grace_period': grace_period,
'reduce_desired_capacity': reduce_desired_capacity,
}
}
self.data.update(data)
nodes = self.inputs.get('candidates', [])
node_ids = copy.deepcopy(nodes)
errors = []
for node_id in node_ids:
node = no.Node.get(self.context, node_id)
# The return value is None if node not found
if not node:
errors.append(node_id)
continue
if ((not node.cluster_id) or (node.cluster_id != self.target)):
nodes.remove(node_id)
if len(errors) > 0:
msg = "Nodes not found: %s." % errors
return self.RES_ERROR, msg
reason = 'Completed deleting nodes.'
if len(nodes) == 0:
return self.RES_OK, reason
# check the size constraint
current = no.Node.count_by_cluster(self.context, self.target)
desired = current - len(nodes)
res = scaleutils.check_size_params(self.entity, desired, None,
None, True)
if res:
return self.RES_ERROR, res
# sleep period
self._sleep(grace_period)
result, new_reason = self._delete_nodes(nodes)
params = {}
if result != self.RES_OK:
reason = new_reason
if reduce_desired_capacity:
params['desired_capacity'] = desired
self.entity.eval_status(self.context,
consts.CLUSTER_DEL_NODES, **params)
return result, reason
@profiler.trace('ClusterAction.do_replace_nodes', hide_args=False)
def do_replace_nodes(self):
"""Handler for the CLUSTER_REPLACE_NODES action.
:returns: A tuple containing the result and the corresponding reason.
"""
node_dict = self.inputs.get('candidates')
if not node_dict:
return (
self.RES_ERROR,
'Candidates must be a non-empty dict.'
' Instead got {}'.format(node_dict))
errors = []
original_nodes = []
replacement_nodes = []
for (original, replacement) in node_dict.items():
original_node = no.Node.get(self.context, original)
replacement_node = no.Node.get(self.context, replacement)
# The return value is None if node not found
if not original_node:
errors.append('Original node %s not found.' % original)
continue
if not replacement_node:
errors.append('Replacement node %s not found.' % replacement)
continue
if original_node.cluster_id != self.target:
errors.append('Node %(o)s is not a member of the '
'cluster %(c)s.' % {'o': original,
'c': self.target})
continue
if replacement_node.cluster_id:
errors.append(('Node %(r)s is already owned by cluster %(c)s.'
) % {'r': replacement,
'c': replacement_node.cluster_id})
continue
if replacement_node.status != consts.NS_ACTIVE:
errors.append('Node %s is not in ACTIVE status.' % replacement)
continue
original_nodes.append(original_node)
replacement_nodes.append(replacement_node)
if len(errors) > 0:
return self.RES_ERROR, '\n'.join(errors)
result = self.RES_OK
reason = 'Completed replacing nodes.'
children = []
for (original, replacement) in node_dict.items():
kwargs = {
'cluster_id': self.entity.id,
'cause': consts.CAUSE_DERIVED,
}
# node_leave action
kwargs['name'] = 'node_leave_%s' % original[:8]
leave_action_id = base.Action.create(self.context, original,
consts.NODE_LEAVE, **kwargs)
# node_join action
kwargs['name'] = 'node_join_%s' % replacement[:8]
kwargs['inputs'] = {'cluster_id': self.target}
join_action_id = base.Action.create(self.context, replacement,
consts.NODE_JOIN, **kwargs)
children.append((join_action_id, leave_action_id))
if children:
dobj.Dependency.create(self.context, [c[0] for c in children],
self.id)
for child in children:
join_id = child[0]
leave_id = child[1]
ao.Action.update(self.context, join_id,
{'status': base.Action.READY})
dobj.Dependency.create(self.context, [join_id], leave_id)
ao.Action.update(self.context, leave_id,
{'status': base.Action.READY})
dispatcher.start_action()
result, new_reason = self._wait_for_dependents()
if result != self.RES_OK:
reason = new_reason
else:
for n in range(len(original_nodes)):
self.entity.remove_node(original_nodes[n])
self.entity.add_node(replacement_nodes[n])
self.entity.eval_status(self.context, consts.CLUSTER_REPLACE_NODES)
return result, reason
@profiler.trace('ClusterAction.do_check', hide_args=False)
def do_check(self):
"""Handler for CLUSTER_CHECK action.
:returns: A tuple containing the result and the corresponding reason.
"""
self.entity.do_check(self.context)
child = []
res = self.RES_OK
reason = 'Cluster checking completed.'
for node in self.entity.nodes:
node_id = node.id
need_delete = self.inputs.get('delete_check_action', False)
# delete some records of NODE_CHECK
if need_delete:
ao.Action.delete_by_target(
self.context, node_id, action=[consts.NODE_CHECK],
status=[consts.ACTION_SUCCEEDED, consts.ACTION_FAILED])
name = 'node_check_%s' % node_id[:8]
action_id = base.Action.create(
self.context, node_id, consts.NODE_CHECK, name=name,
cause=consts.CAUSE_DERIVED,
inputs=self.inputs
)
child.append(action_id)
if child:
dobj.Dependency.create(self.context, [c for c in child], self.id)
for cid in child:
ao.Action.update(self.context, cid,
{'status': base.Action.READY})
dispatcher.start_action()
# Wait for dependent action if any
res, new_reason = self._wait_for_dependents()
if res != self.RES_OK:
reason = new_reason
self.entity.eval_status(self.context, consts.CLUSTER_CHECK)
return res, reason
def _check_capacity(self):
cluster = self.entity
current = len(cluster.nodes)
desired = cluster.desired_capacity
if current < desired:
count = desired - current
self._create_nodes(count)
if current > desired:
count = current - desired
nodes = no.Node.get_all_by_cluster(self.context, cluster.id)
candidates = scaleutils.nodes_by_random(nodes, count)
self._delete_nodes(candidates)
@profiler.trace('ClusterAction.do_recover', hide_args=False)
def do_recover(self):
"""Handler for the CLUSTER_RECOVER action.
:returns: A tuple containing the result and the corresponding reason.
"""
self.entity.do_recover(self.context)
inputs = {}
check = self.inputs.get('check', False)
inputs['operation'] = self.inputs.get('operation', None)
inputs['operation_params'] = self.inputs.get('operation_params', None)
children = []
for node in self.entity.nodes:
node_id = node.id
if check:
node = node_mod.Node.load(self.context, node_id=node_id)
node.do_check(self.context)
if node.status == consts.NS_ACTIVE:
continue
action_id = base.Action.create(
self.context, node_id, consts.NODE_RECOVER,
name='node_recover_%s' % node_id[:8],
cause=consts.CAUSE_DERIVED, inputs=inputs,
)
children.append(action_id)
res = self.RES_OK
reason = 'Cluster recovery succeeded.'
if children:
dobj.Dependency.create(self.context, [c for c in children],
self.id)
for cid in children:
ao.Action.update(self.context, cid,
{'status': consts.ACTION_READY})
dispatcher.start_action()
# Wait for dependent action if any
res, new_reason = self._wait_for_dependents()
if res != self.RES_OK:
reason = new_reason
check_capacity = self.inputs.get('check_capacity', False)
if check_capacity is True:
self._check_capacity()
self.entity.eval_status(self.context, consts.CLUSTER_RECOVER)
return res, reason
def _update_cluster_size(self, desired):
"""Private function for updating cluster properties."""
kwargs = {'desired_capacity': desired}
min_size = self.inputs.get(consts.ADJUSTMENT_MIN_SIZE, None)
max_size = self.inputs.get(consts.ADJUSTMENT_MAX_SIZE, None)
if min_size is not None:
kwargs['min_size'] = min_size
if max_size is not None:
kwargs['max_size'] = max_size
self.entity.set_status(self.context, consts.CS_RESIZING,
'Cluster resize started.', **kwargs)
@profiler.trace('ClusterAction.do_resize', hide_args=False)
def do_resize(self):
"""Handler for the CLUSTER_RESIZE action.
:returns: A tuple containing the result and the corresponding reason.
"""
# if no policy decision(s) found, use policy inputs directly,
# Note the 'parse_resize_params' function is capable of calculating
# desired capacity and handling best effort scaling. It also verifies
# that the inputs are valid
curr_capacity = no.Node.count_by_cluster(self.context, self.entity.id)
if 'creation' not in self.data and 'deletion' not in self.data:
result, reason = scaleutils.parse_resize_params(self, self.entity,
curr_capacity)
if result != self.RES_OK:
return result, reason
# action input consolidated to action data now
reason = 'Cluster resize succeeded.'
if 'deletion' in self.data:
count = self.data['deletion']['count']
candidates = self.data['deletion'].get('candidates', [])
# Choose victims randomly if not already picked
if not candidates:
node_list = self.entity.nodes
candidates = scaleutils.nodes_by_random(node_list, count)
self._update_cluster_size(curr_capacity - count)
grace_period = self.data['deletion'].get('grace_period', 0)
self._sleep(grace_period)
result, new_reason = self._delete_nodes(candidates)
else:
# 'creation' in self.data:
count = self.data['creation']['count']
self._update_cluster_size(curr_capacity + count)
result, new_reason = self._create_nodes(count)
if result != self.RES_OK:
reason = new_reason
self.entity.eval_status(self.context, consts.CLUSTER_RESIZE)
return result, reason
@profiler.trace('ClusterAction.do_scale_out', hide_args=False)
def do_scale_out(self):
"""Handler for the CLUSTER_SCALE_OUT action.
:returns: A tuple containing the result and the corresponding reason.
"""
# We use policy output if any, or else the count is
# set to 1 as default.
pd = self.data.get('creation', None)
if pd is not None:
count = pd.get('count', 1)
else:
# If no scaling policy is attached, use the input count directly
value = self.inputs.get('count', 1)
success, count = utils.get_positive_int(value)
if not success:
reason = 'Invalid count (%s) for scaling out.' % value
return self.RES_ERROR, reason
# check provided params against current properties
# desired is checked when strict is True
curr_size = no.Node.count_by_cluster(self.context, self.target)
new_size = curr_size + count
result = scaleutils.check_size_params(self.entity, new_size,
None, None, True)
if result:
return self.RES_ERROR, result
self.entity.set_status(self.context, consts.CS_RESIZING,
'Cluster scale out started.',
desired_capacity=new_size)
result, reason = self._create_nodes(count)
if result == self.RES_OK:
reason = 'Cluster scaling succeeded.'
self.entity.eval_status(self.context, consts.CLUSTER_SCALE_OUT)
return result, reason
@profiler.trace('ClusterAction.do_scale_in', hide_args=False)
def do_scale_in(self):
"""Handler for the CLUSTER_SCALE_IN action.
:returns: A tuple containing the result and the corresponding reason.
"""
# We use policy data if any, deletion policy and scaling policy might
# be attached.
pd = self.data.get('deletion', None)
grace_period = 0
if pd:
grace_period = pd.get('grace_period', 0)
candidates = pd.get('candidates', [])
# if scaling policy is attached, get 'count' from action data
count = len(candidates) or pd['count']
else:
# If no scaling policy is attached, use the input count directly
candidates = []
value = self.inputs.get('count', 1)
success, count = utils.get_positive_int(value)
if not success:
reason = 'Invalid count (%s) for scaling in.' % value
return self.RES_ERROR, reason
# check provided params against current properties
# desired is checked when strict is True
curr_size = no.Node.count_by_cluster(self.context, self.target)
if count > curr_size:
LOG.warning("Triming count (%(count)s) to current cluster size "
"(%(curr)s) for scaling in",
{'count': count, 'curr': curr_size})
count = curr_size
new_size = curr_size - count
result = scaleutils.check_size_params(self.entity, new_size,
None, None, True)
if result:
return self.RES_ERROR, result
self.entity.set_status(self.context, consts.CS_RESIZING,
'Cluster scale in started.',
desired_capacity=new_size)
# Choose victims randomly
if len(candidates) == 0:
candidates = scaleutils.nodes_by_random(self.entity.nodes, count)
# Sleep period
self._sleep(grace_period)
result, reason = self._delete_nodes(candidates)
if result == self.RES_OK:
reason = 'Cluster scaling succeeded.'
self.entity.eval_status(self.context, consts.CLUSTER_SCALE_IN)
return result, reason
@profiler.trace('ClusterAction.do_attach_policy', hide_args=False)
def do_attach_policy(self):
"""Handler for the CLUSTER_ATTACH_POLICY action.
:returns: A tuple containing the result and the corresponding reason.
"""
inputs = dict(self.inputs)
policy_id = inputs.pop('policy_id', None)
if not policy_id:
return self.RES_ERROR, 'Policy not specified.'
res, reason = self.entity.attach_policy(self.context, policy_id,
inputs)
result = self.RES_OK if res else self.RES_ERROR
# Store cluster since its data could have been updated
if result == self.RES_OK:
self.entity.store(self.context)
return result, reason
@profiler.trace('ClusterAction.do_detach_policy', hide_args=False)
def do_detach_policy(self):
"""Handler for the CLUSTER_DETACH_POLICY action.
:returns: A tuple containing the result and the corresponding reason.
"""
policy_id = self.inputs.get('policy_id', None)
if not policy_id:
return self.RES_ERROR, 'Policy not specified.'
res, reason = self.entity.detach_policy(self.context, policy_id)
result = self.RES_OK if res else self.RES_ERROR
# Store cluster since its data could have been updated
if result == self.RES_OK:
self.entity.store(self.context)
return result, reason
@profiler.trace('ClusterAction.do_update_policy', hide_args=False)
def do_update_policy(self):
"""Handler for the CLUSTER_UPDATE_POLICY action.
:returns: A tuple containing the result and the corresponding reason.
"""
policy_id = self.inputs.pop('policy_id', None)
if not policy_id:
return self.RES_ERROR, 'Policy not specified.'
res, reason = self.entity.update_policy(self.context, policy_id,
**self.inputs)
result = self.RES_OK if res else self.RES_ERROR
return result, reason
@profiler.trace('ClusterAction.do_operation', hide_args=False)
def do_operation(self):
"""Handler for CLUSTER_OPERATION action.
Note that the inputs for the action should contain the following items:
* ``nodes``: The nodes to operate on;
* ``operation``: The operation to be performed;
* ``params``: The parameters corresponding to the operation.
:returns: A tuple containing the result and the corresponding reason.
"""
inputs = copy.deepcopy(self.inputs)
operation = inputs['operation']
self.entity.do_operation(self.context, operation=operation)
child = []
res = self.RES_OK
reason = "Cluster operation '%s' completed." % operation
nodes = inputs.pop('nodes')
for node_id in nodes:
action_id = base.Action.create(
self.context, node_id, consts.NODE_OPERATION,
name='node_%s_%s' % (operation, node_id[:8]),
cause=consts.CAUSE_DERIVED,
inputs=inputs,
)
child.append(action_id)
if child:
dobj.Dependency.create(self.context, [c for c in child], self.id)
for cid in child:
ao.Action.update(self.context, cid,
{'status': base.Action.READY})
dispatcher.start_action()
# Wait for dependent action if any
res, new_reason = self._wait_for_dependents()
if res != self.RES_OK:
reason = new_reason
self.entity.eval_status(self.context, operation)
return res, reason
def _execute(self, **kwargs):
"""Private method for action execution.
This function search for the handler based on the action name for
execution and it wraps the action execution with policy checks.
:returns: A tuple containing the result and the corresponding reason.
"""
# do pre-action policy checking
self.policy_check(self.entity.id, 'BEFORE')
if self.data['status'] != policy_mod.CHECK_OK:
reason = 'Policy check failure: %s' % self.data['reason']
return self.RES_ERROR, reason
result = self.RES_OK
action_name = self.action.lower()
method_name = action_name.replace('cluster', 'do')
method = getattr(self, method_name, None)
if method is None:
reason = 'Unsupported action: %s.' % self.action
return self.RES_ERROR, reason
result, reason = method()
# do post-action policy checking
self.inputs['action_result'] = result
self.policy_check(self.entity.id, 'AFTER')
if self.data['status'] != policy_mod.CHECK_OK:
reason = 'Policy check failure: %s' % self.data['reason']
return self.RES_ERROR, reason
return result, reason
def execute(self, **kwargs):
"""Wrapper of action execution.
This is mainly a wrapper that executes an action with cluster lock
acquired.
:returns: A tuple (res, reason) that indicates whether the execution
was a success and why if it wasn't a success.
"""
# Try to lock cluster before do real operation
forced = (self.action == consts.CLUSTER_DELETE)
res = senlin_lock.cluster_lock_acquire(self.context, self.target,
self.id, self.owner,
senlin_lock.CLUSTER_SCOPE,
forced)
# Failed to acquire lock, return RES_RETRY
if not res:
return self.RES_RETRY, 'Failed in locking cluster.'
try:
# Refresh entity state to avoid stale data in action.
self.entity = cluster_mod.Cluster.load(self.context, self.target)
res, reason = self._execute(**kwargs)
finally:
senlin_lock.cluster_lock_release(self.target, self.id,
senlin_lock.CLUSTER_SCOPE)
return res, reason
def cancel(self):
"""Handler to cancel the execution of action."""
return self.RES_OK
def release_lock(self):
"""Handler to release the lock."""
senlin_lock.cluster_lock_release(self.target, self.id,
senlin_lock.CLUSTER_SCOPE)
return self.RES_OK
def CompleteLifecycleProc(context, action_id):
"""Complete lifecycle process."""
action = base.Action.load(context, action_id=action_id, project_safe=False)
if action is None:
LOG.error("Action %s could not be found.", action_id)
raise exception.ResourceNotFound(type='action', id=action_id)
if action.get_status() == consts.ACTION_WAITING_LIFECYCLE_COMPLETION:
# update action status and reset owner back to None
# so that the action will get picked up by dispatcher
ao.Action.update(context, action_id,
{'status': consts.ACTION_READY,
'status_reason': 'Lifecycle complete.',
'owner': None})
dispatcher.start_action()
else:
LOG.debug('Action %s status is not WAITING_LIFECYCLE. '
'Skip CompleteLifecycleProc', action_id)
return False
return True