senlin/senlin/engine/actions/cluster_action.py

# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import copy
import eventlet

from oslo_log import log as logging
from oslo_utils import timeutils
from osprofiler import profiler

from senlin.common import consts
from senlin.common import exception
from senlin.common import scaleutils
from senlin.common import utils
from senlin.engine.actions import base
from senlin.engine import cluster as cluster_mod
from senlin.engine import dispatcher
from senlin.engine import node as node_mod
from senlin.engine.notifications import message as msg
from senlin.engine import senlin_lock
from senlin.objects import action as ao
from senlin.objects import cluster as co
from senlin.objects import cluster_policy as cp_obj
from senlin.objects import dependency as dobj
from senlin.objects import node as no
from senlin.objects import receiver as receiver_obj
from senlin.policies import base as policy_mod

LOG = logging.getLogger(__name__)


class ClusterAction(base.Action):
    """An action that can be performed on a cluster."""

    def __init__(self, target, action, context, **kwargs):
        """Constructor for cluster action.

        :param target: ID of the target cluster.
        :param action: Name of the action to be executed.
        :param context: Context used when interacting with DB layer.
        :param dict kwargs: Other optional arguments for the action.
        """
        super(ClusterAction, self).__init__(target, action, context, **kwargs)

        try:
            self.entity = cluster_mod.Cluster.load(self.context, self.target)
            self.timeout = self.entity.timeout
        except Exception:
            self.entity = None

    def _sleep(self, period):
        if period:
            eventlet.sleep(period)

    def _wait_for_dependents(self, lifecycle_hook_timeout=None):
        """Wait for dependent actions to complete.

        :returns: A tuple containing the result and the corresponding reason.
        """
        status = self.get_status()
        while status != self.READY:
            if status == self.FAILED:
                reason = ('%(action)s [%(id)s] failed' % {
                    'action': self.action, 'id': self.id[:8]})
                LOG.debug(reason)
                return self.RES_ERROR, reason

            if self.is_cancelled():
                # During this period, if cancel request comes, cancel this
                # operation immediately after signaling children to cancel,
                # then release the cluster lock
                reason = ('%(action)s [%(id)s] cancelled' % {
                    'action': self.action, 'id': self.id[:8]})
                LOG.debug(reason)
                return self.RES_CANCEL, reason

            # When a child action is cancelled the parent action will update
            # its status to cancelled as well this allows it to exit.
            if status == self.CANCELLED:
                if self.check_children_complete():
                    reason = ('%(action)s [%(id)s] cancelled' % {
                        'action': self.action, 'id': self.id[:8]})
                    LOG.debug(reason)
                    return self.RES_CANCEL, reason

            if self.is_timeout():
                # Action timeout, return
                reason = ('%(action)s [%(id)s] timeout' % {
                    'action': self.action, 'id': self.id[:8]})
                LOG.debug(reason)
                return self.RES_TIMEOUT, reason

            if (lifecycle_hook_timeout is not None and
                    self.is_timeout(lifecycle_hook_timeout)):
                # if lifecycle hook timeout is specified and Lifecycle hook
                # timeout is reached, return
                reason = ('%(action)s [%(id)s] lifecycle hook timeout'
                          '') % {'action': self.action, 'id': self.id[:8]}
                LOG.debug(reason)
                return self.RES_LIFECYCLE_HOOK_TIMEOUT, reason

            # Continue waiting (with reschedule)
            LOG.debug('Action %s sleep for 3 seconds ', self.id)
            self._sleep(3)
            status = self.get_status()
            dispatcher.start_action()

        return self.RES_OK, 'All dependents ended with success'

    def check_children_complete(self):
        depended = dobj.Dependency.get_depended(self.context, self.id)
        if not depended:
            return True

        for child in depended:
            # Try to cancel all dependant actions
            action = base.Action.load(self.context, action_id=child)
            if action.get_status() not in (action.CANCELLED, action.SUCCEEDED,
                                           action.FAILED):
                return False
        return True

    def _create_nodes(self, count):
        """Utility method for node creation.

        :param count: Number of nodes to create.
        :returns: A tuple comprised of the result and reason.
        """

        if count == 0:
            return self.RES_OK, ''

        placement = self.data.get('placement', None)

        nodes = []
        child = []
        # conunt >= 1
        for m in range(count):
            index = co.Cluster.get_next_index(self.context, self.entity.id)
            kwargs = {
                'index': index,
                'metadata': {},
                'user': self.entity.user,
                'project': self.entity.project,
                'domain': self.entity.domain,
            }
            if placement is not None:
                # We assume placement is a list
                kwargs['data'] = {'placement': placement['placements'][m]}

            name_format = self.entity.config.get("node.name.format", "")
            name = utils.format_node_name(name_format, self.entity, index)
            node = node_mod.Node(name, self.entity.profile_id,
                                 self.entity.id, context=self.context,
                                 **kwargs)

            node.store(self.context)
            nodes.append(node)

            kwargs = {
                'name': 'node_create_%s' % node.id[:8],
                'cluster_id': self.entity.id,
                'cause': consts.CAUSE_DERIVED,
            }
            action_id = base.Action.create(self.context, node.id,
                                           consts.NODE_CREATE, **kwargs)
            child.append(action_id)

        # Build dependency and make the new action ready
        dobj.Dependency.create(self.context, [a for a in child], self.id)
        for cid in child:
            ao.Action.update(self.context, cid,
                             {'status': base.Action.READY})
        dispatcher.start_action()

        # Wait for cluster creation to complete
        res, reason = self._wait_for_dependents()
        if res == self.RES_OK:
            nodes_added = [n.id for n in nodes]
            self.outputs['nodes_added'] = nodes_added
            creation = self.data.get('creation', {})
            creation['nodes'] = nodes_added
            self.data['creation'] = creation
            for node in nodes:
                self.entity.add_node(node)
        else:
            reason = 'Failed in creating nodes.'

        return res, reason

    @profiler.trace('ClusterAction.do_create', hide_args=False)
    def do_create(self):
        """Handler for CLUSTER_CREATE action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        res = self.entity.do_create(self.context)

        if not res:
            reason = 'Cluster creation failed.'
            self.entity.set_status(self.context, consts.CS_ERROR, reason)
            return self.RES_ERROR, reason

        result, reason = self._create_nodes(self.entity.desired_capacity)

        params = {}
        if result == self.RES_OK:
            reason = 'Cluster creation succeeded.'
            params = {'created_at': timeutils.utcnow(True)}
        self.entity.eval_status(self.context, consts.CLUSTER_CREATE, **params)

        return result, reason

    def _update_nodes(self, profile_id, nodes_obj):
        # Get batching policy data if any
        LOG.info("Updating cluster '%(cluster)s': profile='%(profile)s'.",
                 {'cluster': self.entity.id, 'profile': profile_id})
        plan = []

        pd = self.data.get('update', None)
        if pd:
            pause_time = pd.get('pause_time')
            plan = pd.get('plan')
        else:
            pause_time = 0
            nodes_list = []
            for node in self.entity.nodes:
                nodes_list.append(node.id)
            plan.append(set(nodes_list))

        for node_set in plan:
            child = []
            nodes = list(node_set)
            nodes.sort()

            for node in nodes:
                kwargs = {
                    'name': 'node_update_%s' % node[:8],
                    'cluster_id': self.entity.id,
                    'cause': consts.CAUSE_DERIVED,
                    'inputs': self.entity.config,
                }
                kwargs['inputs']['new_profile_id'] = profile_id

                action_id = base.Action.create(self.context, node,
                                               consts.NODE_UPDATE, **kwargs)
                child.append(action_id)

            if child:
                dobj.Dependency.create(self.context, [c for c in child],
                                       self.id)
                for cid in child:
                    ao.Action.update(self.context, cid,
                                     {'status': base.Action.READY})

                dispatcher.start_action()
                # clear the action list
                child = []
                result, new_reason = self._wait_for_dependents()
                if result != self.RES_OK:
                    self.entity.eval_status(self.context,
                                            consts.CLUSTER_UPDATE)
                    return result, 'Failed in updating nodes.'
                # pause time
                if pause_time != 0:
                    self._sleep(pause_time)

        self.entity.profile_id = profile_id
        self.entity.eval_status(self.context, consts.CLUSTER_UPDATE,
                                profile_id=profile_id,
                                updated_at=timeutils.utcnow(True))
        return self.RES_OK, 'Cluster update completed.'

    @profiler.trace('ClusterAction.do_update', hide_args=False)
    def do_update(self):
        """Handler for CLUSTER_UPDATE action.

        :returns: A tuple consisting the result and the corresponding reason.
        """
        res = self.entity.do_update(self.context)
        if not res:
            reason = 'Cluster update failed.'
            self.entity.set_status(self.context, consts.CS_ERROR, reason)
            return self.RES_ERROR, reason

        config = self.inputs.get('config')
        name = self.inputs.get('name')
        metadata = self.inputs.get('metadata')
        timeout = self.inputs.get('timeout')
        profile_id = self.inputs.get('new_profile_id')
        profile_only = self.inputs.get('profile_only')

        if config is not None:
            # make sure config values are valid
            try:
                stop_timeout = config.get('cluster.stop_timeout_before_update')
                if stop_timeout:
                    config['cluster.stop_timeout_before_update'] = int(
                        stop_timeout)
            except Exception as e:
                return self.RES_ERROR, str(e)

            self.entity.config = config
        if name is not None:
            self.entity.name = name
        if metadata is not None:
            self.entity.metadata = metadata
        if timeout is not None:
            self.entity.timeout = timeout
        self.entity.store(self.context)

        reason = 'Cluster update completed.'
        if profile_id is None:
            self.entity.eval_status(self.context, consts.CLUSTER_UPDATE,
                                    updated_at=timeutils.utcnow(True))
            return self.RES_OK, reason

        # profile_only's type is bool
        if profile_only:
            self.entity.profile_id = profile_id
            self.entity.eval_status(self.context, consts.CLUSTER_UPDATE,
                                    profile_id=profile_id,
                                    updated_at=timeutils.utcnow(True))
            return self.RES_OK, reason

        # Update nodes with new profile
        result, reason = self._update_nodes(profile_id, self.entity.nodes)
        return result, reason

    def _handle_lifecycle_timeout(self, child):
        for action_id, node_id in child:
            status = ao.Action.check_status(self.context, action_id, 0)
            if (status == consts.ACTION_WAITING_LIFECYCLE_COMPLETION):
                # update action status and reset owner back to None
                # so that the action will get picked up by dispatcher
                ao.Action.update(self.context, action_id,
                                 {'status': base.Action.READY,
                                  'owner': None})

    def _remove_nodes_with_hook(self, action_name, node_ids, lifecycle_hook,
                                inputs=None):
        lifecycle_hook_timeout = lifecycle_hook.get('timeout')
        lifecycle_hook_type = lifecycle_hook.get('type', None)
        lifecycle_hook_params = lifecycle_hook.get('params')
        if lifecycle_hook_type == "zaqar":
            lifecycle_hook_target = lifecycle_hook_params.get('queue')
        else:
            # lifecycle_hook_target = lifecycle_hook_params.get('url')
            return self.RES_ERROR, ("Lifecycle hook type '%s' is not "
                                    "implemented") % lifecycle_hook_type
        child = []
        for node_id in node_ids:
            kwargs = {
                'name': 'node_delete_%s' % node_id[:8],
                'cluster_id': self.entity.id,
                'cause': consts.CAUSE_DERIVED_LCH,
                'inputs': inputs or {},
            }

            action_id = base.Action.create(self.context, node_id, action_name,
                                           **kwargs)
            child.append((action_id, node_id))

        if child:
            dobj.Dependency.create(self.context, [aid for aid, nid in child],
                                   self.id)
            # lifecycle_hook_type has to be "zaqar"
            # post message to zaqar
            kwargs = {
                'user': self.context.user_id,
                'project': self.context.project_id,
                'domain': self.context.domain_id
            }

            notifier = msg.Message(lifecycle_hook_target, **kwargs)

            child_copy = list(child)
            for action_id, node_id in child_copy:
                # wait lifecycle complete if node exists and is active
                node = no.Node.get(self.context, node_id)
                owner = None
                if not node:
                    LOG.warning('Node %s is not found. '
                                'Skipping wait for lifecycle completion.',
                                node_id)
                    status = base.Action.READY
                    child.remove((action_id, node_id))
                elif node.status != consts.NS_ACTIVE or not node.physical_id:
                    LOG.warning('Node %s is not in ACTIVE status. '
                                'Skipping wait for lifecycle completion.',
                                node_id)
                    status = base.Action.READY
                    child.remove((action_id, node_id))
                else:
                    status = base.Action.WAITING_LIFECYCLE_COMPLETION
                    # set owner for actions in waiting for lifecycle completion
                    # so that they will get cleaned up by dead engine gc
                    # if the engine dies
                    owner = self.owner

                ao.Action.update(self.context, action_id,
                                 {'status': status,
                                  'owner': owner})
                if status == base.Action.WAITING_LIFECYCLE_COMPLETION:
                    notifier.post_lifecycle_hook_message(
                        action_id, node_id, node.physical_id,
                        consts.LIFECYCLE_NODE_TERMINATION)

            dispatcher.start_action()
            res, reason = self._wait_for_dependents(lifecycle_hook_timeout)

            if res == self.RES_LIFECYCLE_HOOK_TIMEOUT:
                self._handle_lifecycle_timeout(child)

            if res is None or res == self.RES_LIFECYCLE_HOOK_TIMEOUT:
                dispatcher.start_action()
                res, reason = self._wait_for_dependents()

            return res, reason

        return self.RES_OK, ''

    def _remove_nodes_normally(self, action_name, node_ids, inputs=None):
        child = []
        for node_id in node_ids:
            kwargs = {
                'name': 'node_delete_%s' % node_id[:8],
                'cluster_id': self.entity.id,
                'cause': consts.CAUSE_DERIVED,
                'inputs': inputs or {},
            }

            action_id = base.Action.create(self.context, node_id, action_name,
                                           **kwargs)
            child.append((action_id, node_id))

        if child:
            dobj.Dependency.create(self.context, [aid for aid, nid in child],
                                   self.id)
            for action_id, node_id in child:
                ao.Action.update(self.context, action_id,
                                 {'status': base.Action.READY})

            dispatcher.start_action()
            res, reason = self._wait_for_dependents()
            return res, reason

        return self.RES_OK, ''

    def _delete_nodes(self, node_ids):
        action_name = consts.NODE_DELETE

        pd = self.data.get('deletion', None)
        if pd is not None:
            destroy = pd.get('destroy_after_deletion', True)
            if destroy is False:
                action_name = consts.NODE_LEAVE

        stop_node_before_delete = self.entity.config.get(
            "cluster.stop_node_before_delete", False)

        # get lifecycle hook properties if specified
        lifecycle_hook = self.data.get('hooks')
        if lifecycle_hook:
            if stop_node_before_delete:
                # set update_parent_status to False so that a failure in stop
                # operation is ignored and the parent status is not changed
                res, reason = self._remove_nodes_with_hook(
                    consts.NODE_OPERATION, node_ids, lifecycle_hook,
                    {'operation': 'stop', 'update_parent_status': False})
                if res != self.RES_OK:
                    LOG.warning('Failure while stopping nodes. '
                                'Proceed to delete nodes.')
                res, reason = self._remove_nodes_normally(action_name,
                                                          node_ids)
            else:
                res, reason = self._remove_nodes_with_hook(
                    action_name, node_ids, lifecycle_hook)
        else:
            if stop_node_before_delete:
                # set update_parent_status to False so that a failure in stop
                # operation is ignored and the parent status is not changed
                res, reason = self._remove_nodes_normally(
                    consts.NODE_OPERATION, node_ids,
                    {'operation': 'stop', 'update_parent_status': False})
                if res != self.RES_OK:
                    LOG.warning('Failure while stopping nodes. '
                                'Proceed to delete nodes.')
            res, reason = self._remove_nodes_normally(action_name, node_ids)

        if res == self.RES_OK:
            self.outputs['nodes_removed'] = node_ids
            for node_id in node_ids:
                self.entity.remove_node(node_id)
        else:
            reason = 'Failed in deleting nodes: %s' % reason

        return res, reason

    @profiler.trace('ClusterAction.do_delete', hide_args=False)
    def do_delete(self):
        """Handler for the CLUSTER_DELETE action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        # Detach policies before delete
        policies = cp_obj.ClusterPolicy.get_all(self.context, self.entity.id)
        for policy in policies:
            res, reason = self.entity.detach_policy(self.context,
                                                    policy.policy_id)
            if res:
                self.entity.store(self.context)
            else:
                return self.RES_ERROR, ("Unable to detach policy {} before "
                                        "deletion.".format(policy.id))
        # Delete receivers
        receivers = receiver_obj.Receiver.get_all(
            self.context, filters={'cluster_id': self.entity.id})
        for receiver in receivers:
            receiver_obj.Receiver.delete(self.context, receiver.id)

        reason = 'Deletion in progress.'
        self.entity.set_status(self.context, consts.CS_DELETING, reason)
        node_ids = [node.id for node in self.entity.nodes]

        # For cluster delete, we delete the nodes
        data = {
            'deletion': {
                'destroy_after_deletion': True
            }
        }
        self.data.update(data)

        result, reason = self._delete_nodes(node_ids)
        if result != self.RES_OK:
            self.entity.eval_status(self.context, consts.CLUSTER_DELETE)
            return result, reason

        res = self.entity.do_delete(self.context)
        if not res:
            self.entity.eval_status(self.context, consts.CLUSTER_DELETE)
            return self.RES_ERROR, 'Cannot delete cluster object.'

        return self.RES_OK, reason

    @profiler.trace('ClusterAction.do_add_nodes', hide_args=False)
    def do_add_nodes(self):
        """Handler for the CLUSTER_ADD_NODES action.

        TODO(anyone): handle placement data

        :returns: A tuple containing the result and the corresponding reason.
        """
        node_ids = self.inputs.get('nodes')
        errors = []
        nodes = []
        for nid in node_ids:
            node = no.Node.get(self.context, nid)
            if not node:
                errors.append('Node %s is not found.' % nid)
                continue

            if node.cluster_id:
                errors.append('Node %(n)s is already owned by cluster %(c)s.'
                              '' % {'n': nid, 'c': node.cluster_id})
                continue

            if node.status != consts.NS_ACTIVE:
                errors.append('Node %s is not in ACTIVE status.' % nid)
                continue

            nodes.append(node)

        if len(errors) > 0:
            return self.RES_ERROR, '\n'.join(errors)

        reason = 'Completed adding nodes.'
        # check the size constraint
        current = no.Node.count_by_cluster(self.context, self.target)
        desired = current + len(node_ids)
        res = scaleutils.check_size_params(self.entity, desired, None,
                                           None, True)
        if res:
            return self.RES_ERROR, res

        child = []
        for node in nodes:
            nid = node.id
            kwargs = {
                'name': 'node_join_%s' % nid[:8],
                'cluster_id': self.entity.id,
                'cause': consts.CAUSE_DERIVED,
                'inputs': {'cluster_id': self.target},
            }
            action_id = base.Action.create(self.context, nid, consts.NODE_JOIN,
                                           **kwargs)
            child.append(action_id)

        if child:
            dobj.Dependency.create(self.context, [c for c in child], self.id)
            for cid in child:
                ao.Action.update(self.context, cid,
                                 {'status': base.Action.READY})
            dispatcher.start_action()

        # Wait for dependent action if any
        result, new_reason = self._wait_for_dependents()
        if result != self.RES_OK:
            reason = new_reason
        else:
            self.entity.eval_status(self.context, consts.CLUSTER_ADD_NODES,
                                    desired_capacity=desired)
            self.outputs['nodes_added'] = node_ids
            creation = self.data.get('creation', {})
            creation['nodes'] = node_ids
            self.data['creation'] = creation
            for node in nodes:
                obj = node_mod.Node.load(self.context, db_node=node)
                self.entity.add_node(obj)

        return result, reason

    @profiler.trace('ClusterAction.do_del_nodes', hide_args=False)
    def do_del_nodes(self):
        """Handler for the CLUSTER_DEL_NODES action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        # Use policy decision if any, or fall back to defaults
        destroy_after_deletion = self.inputs.get('destroy_after_deletion',
                                                 False)
        grace_period = 0
        reduce_desired_capacity = True
        pd = self.data.get('deletion', None)
        if pd is not None:
            destroy_after_deletion = pd.get('destroy_after_deletion', False)
            grace_period = pd.get('grace_period', 0)
            reduce_desired_capacity = pd.get('reduce_desired_capacity', True)

        data = {
            'deletion': {
                'destroy_after_deletion': destroy_after_deletion,
                'grace_period': grace_period,
                'reduce_desired_capacity': reduce_desired_capacity,
            }
        }
        self.data.update(data)
        nodes = self.inputs.get('candidates', [])

        node_ids = copy.deepcopy(nodes)
        errors = []
        for node_id in node_ids:
            node = no.Node.get(self.context, node_id)

            # The return value is None if node not found
            if not node:
                errors.append(node_id)
                continue

            if ((not node.cluster_id) or (node.cluster_id != self.target)):
                nodes.remove(node_id)

        if len(errors) > 0:
            msg = "Nodes not found: %s." % errors
            return self.RES_ERROR, msg

        reason = 'Completed deleting nodes.'
        if len(nodes) == 0:
            return self.RES_OK, reason

        # check the size constraint
        current = no.Node.count_by_cluster(self.context, self.target)
        desired = current - len(nodes)
        res = scaleutils.check_size_params(self.entity, desired, None,
                                           None, True)
        if res:
            return self.RES_ERROR, res

        # sleep period
        self._sleep(grace_period)
        result, new_reason = self._delete_nodes(nodes)

        params = {}
        if result != self.RES_OK:
            reason = new_reason
        if reduce_desired_capacity:
            params['desired_capacity'] = desired

        self.entity.eval_status(self.context,
                                consts.CLUSTER_DEL_NODES, **params)

        return result, reason

    @profiler.trace('ClusterAction.do_replace_nodes', hide_args=False)
    def do_replace_nodes(self):
        """Handler for the CLUSTER_REPLACE_NODES action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        node_dict = self.inputs.get('candidates')
        if not node_dict:
            return (
                self.RES_ERROR,
                'Candidates must be a non-empty dict.'
                ' Instead got {}'.format(node_dict))

        errors = []
        original_nodes = []
        replacement_nodes = []
        for (original, replacement) in node_dict.items():
            original_node = no.Node.get(self.context, original)
            replacement_node = no.Node.get(self.context, replacement)

            # The return value is None if node not found
            if not original_node:
                errors.append('Original node %s not found.' % original)
                continue
            if not replacement_node:
                errors.append('Replacement node %s not found.' % replacement)
                continue
            if original_node.cluster_id != self.target:
                errors.append('Node %(o)s is not a member of the '
                              'cluster %(c)s.' % {'o': original,
                                                  'c': self.target})
                continue
            if replacement_node.cluster_id:
                errors.append(('Node %(r)s is already owned by cluster %(c)s.'
                               ) % {'r': replacement,
                                    'c': replacement_node.cluster_id})
                continue
            if replacement_node.status != consts.NS_ACTIVE:
                errors.append('Node %s is not in ACTIVE status.' % replacement)
                continue
            original_nodes.append(original_node)
            replacement_nodes.append(replacement_node)

        if len(errors) > 0:
            return self.RES_ERROR, '\n'.join(errors)

        result = self.RES_OK
        reason = 'Completed replacing nodes.'

        children = []
        for (original, replacement) in node_dict.items():
            kwargs = {
                'cluster_id': self.entity.id,
                'cause': consts.CAUSE_DERIVED,
            }

            # node_leave action
            kwargs['name'] = 'node_leave_%s' % original[:8]
            leave_action_id = base.Action.create(self.context, original,
                                                 consts.NODE_LEAVE, **kwargs)
            # node_join action
            kwargs['name'] = 'node_join_%s' % replacement[:8]
            kwargs['inputs'] = {'cluster_id': self.target}
            join_action_id = base.Action.create(self.context, replacement,
                                                consts.NODE_JOIN, **kwargs)

            children.append((join_action_id, leave_action_id))

        if children:
            dobj.Dependency.create(self.context, [c[0] for c in children],
                                   self.id)
            for child in children:
                join_id = child[0]
                leave_id = child[1]
                ao.Action.update(self.context, join_id,
                                 {'status': base.Action.READY})

                dobj.Dependency.create(self.context, [join_id], leave_id)
                ao.Action.update(self.context, leave_id,
                                 {'status': base.Action.READY})

                dispatcher.start_action()

            result, new_reason = self._wait_for_dependents()
            if result != self.RES_OK:
                reason = new_reason
            else:
                for n in range(len(original_nodes)):
                    self.entity.remove_node(original_nodes[n])
                    self.entity.add_node(replacement_nodes[n])

        self.entity.eval_status(self.context, consts.CLUSTER_REPLACE_NODES)
        return result, reason

    @profiler.trace('ClusterAction.do_check', hide_args=False)
    def do_check(self):
        """Handler for CLUSTER_CHECK action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        self.entity.do_check(self.context)

        child = []
        res = self.RES_OK
        reason = 'Cluster checking completed.'
        for node in self.entity.nodes:
            node_id = node.id
            need_delete = self.inputs.get('delete_check_action', False)
            # delete some records of NODE_CHECK
            if need_delete:
                ao.Action.delete_by_target(
                    self.context, node_id, action=[consts.NODE_CHECK],
                    status=[consts.ACTION_SUCCEEDED, consts.ACTION_FAILED])

            name = 'node_check_%s' % node_id[:8]
            action_id = base.Action.create(
                self.context, node_id, consts.NODE_CHECK, name=name,
                cause=consts.CAUSE_DERIVED,
                inputs=self.inputs
            )

            child.append(action_id)

        if child:
            dobj.Dependency.create(self.context, [c for c in child], self.id)
            for cid in child:
                ao.Action.update(self.context, cid,
                                 {'status': base.Action.READY})
            dispatcher.start_action()

            # Wait for dependent action if any
            res, new_reason = self._wait_for_dependents()
            if res != self.RES_OK:
                reason = new_reason

        self.entity.eval_status(self.context, consts.CLUSTER_CHECK)
        return res, reason

    def _check_capacity(self):
        cluster = self.entity

        current = len(cluster.nodes)
        desired = cluster.desired_capacity

        if current < desired:
            count = desired - current
            self._create_nodes(count)

        if current > desired:
            count = current - desired
            nodes = no.Node.get_all_by_cluster(self.context, cluster.id)
            candidates = scaleutils.nodes_by_random(nodes, count)
            self._delete_nodes(candidates)

    @profiler.trace('ClusterAction.do_recover', hide_args=False)
    def do_recover(self):
        """Handler for the CLUSTER_RECOVER action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        self.entity.do_recover(self.context)

        inputs = {}

        check = self.inputs.get('check', False)
        inputs['operation'] = self.inputs.get('operation', None)
        inputs['operation_params'] = self.inputs.get('operation_params', None)

        children = []
        for node in self.entity.nodes:
            node_id = node.id
            if check:
                node = node_mod.Node.load(self.context, node_id=node_id)
                node.do_check(self.context)

            if node.status == consts.NS_ACTIVE:
                continue
            action_id = base.Action.create(
                self.context, node_id, consts.NODE_RECOVER,
                name='node_recover_%s' % node_id[:8],
                cause=consts.CAUSE_DERIVED, inputs=inputs,
            )
            children.append(action_id)

        res = self.RES_OK
        reason = 'Cluster recovery succeeded.'
        if children:
            dobj.Dependency.create(self.context, [c for c in children],
                                   self.id)
            for cid in children:
                ao.Action.update(self.context, cid,
                                 {'status': consts.ACTION_READY})
            dispatcher.start_action()

            # Wait for dependent action if any
            res, new_reason = self._wait_for_dependents()
            if res != self.RES_OK:
                reason = new_reason

        check_capacity = self.inputs.get('check_capacity', False)
        if check_capacity is True:
            self._check_capacity()

        self.entity.eval_status(self.context, consts.CLUSTER_RECOVER)
        return res, reason

    def _update_cluster_size(self, desired):
        """Private function for updating cluster properties."""
        kwargs = {'desired_capacity': desired}
        min_size = self.inputs.get(consts.ADJUSTMENT_MIN_SIZE, None)
        max_size = self.inputs.get(consts.ADJUSTMENT_MAX_SIZE, None)
        if min_size is not None:
            kwargs['min_size'] = min_size
        if max_size is not None:
            kwargs['max_size'] = max_size
        self.entity.set_status(self.context, consts.CS_RESIZING,
                               'Cluster resize started.', **kwargs)

    @profiler.trace('ClusterAction.do_resize', hide_args=False)
    def do_resize(self):
        """Handler for the CLUSTER_RESIZE action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        # if no policy decision(s) found, use policy inputs directly,
        # Note the 'parse_resize_params' function is capable of calculating
        # desired capacity and handling best effort scaling. It also verifies
        # that the inputs are valid
        curr_capacity = no.Node.count_by_cluster(self.context, self.entity.id)
        if 'creation' not in self.data and 'deletion' not in self.data:
            result, reason = scaleutils.parse_resize_params(self, self.entity,
                                                            curr_capacity)
            if result != self.RES_OK:
                return result, reason

        # action input consolidated to action data now
        reason = 'Cluster resize succeeded.'
        if 'deletion' in self.data:
            count = self.data['deletion']['count']
            candidates = self.data['deletion'].get('candidates', [])

            # Choose victims randomly if not already picked
            if not candidates:
                node_list = self.entity.nodes
                candidates = scaleutils.nodes_by_random(node_list, count)

            self._update_cluster_size(curr_capacity - count)

            grace_period = self.data['deletion'].get('grace_period', 0)
            self._sleep(grace_period)
            result, new_reason = self._delete_nodes(candidates)
        else:
            # 'creation' in self.data:
            count = self.data['creation']['count']
            self._update_cluster_size(curr_capacity + count)
            result, new_reason = self._create_nodes(count)

        if result != self.RES_OK:
            reason = new_reason

        self.entity.eval_status(self.context, consts.CLUSTER_RESIZE)
        return result, reason

    @profiler.trace('ClusterAction.do_scale_out', hide_args=False)
    def do_scale_out(self):
        """Handler for the CLUSTER_SCALE_OUT action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        # We use policy output if any, or else the count is
        # set to 1 as default.
        pd = self.data.get('creation', None)
        if pd is not None:
            count = pd.get('count', 1)
        else:
            # If no scaling policy is attached, use the input count directly
            value = self.inputs.get('count', 1)
            success, count = utils.get_positive_int(value)
            if not success:
                reason = 'Invalid count (%s) for scaling out.' % value
                return self.RES_ERROR, reason

        # check provided params against current properties
        # desired is checked when strict is True
        curr_size = no.Node.count_by_cluster(self.context, self.target)
        new_size = curr_size + count
        result = scaleutils.check_size_params(self.entity, new_size,
                                              None, None, True)
        if result:
            return self.RES_ERROR, result

        self.entity.set_status(self.context, consts.CS_RESIZING,
                               'Cluster scale out started.',
                               desired_capacity=new_size)

        result, reason = self._create_nodes(count)
        if result == self.RES_OK:
            reason = 'Cluster scaling succeeded.'
        self.entity.eval_status(self.context, consts.CLUSTER_SCALE_OUT)

        return result, reason

    @profiler.trace('ClusterAction.do_scale_in', hide_args=False)
    def do_scale_in(self):
        """Handler for the CLUSTER_SCALE_IN action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        # We use policy data if any, deletion policy and scaling policy might
        # be attached.
        pd = self.data.get('deletion', None)
        grace_period = 0
        if pd:
            grace_period = pd.get('grace_period', 0)
            candidates = pd.get('candidates', [])
            # if scaling policy is attached, get 'count' from action data
            count = len(candidates) or pd['count']
        else:
            # If no scaling policy is attached, use the input count directly
            candidates = []
            value = self.inputs.get('count', 1)
            success, count = utils.get_positive_int(value)
            if not success:
                reason = 'Invalid count (%s) for scaling in.' % value
                return self.RES_ERROR, reason

        # check provided params against current properties
        # desired is checked when strict is True
        curr_size = no.Node.count_by_cluster(self.context, self.target)
        if count > curr_size:
            LOG.warning("Triming count (%(count)s) to current cluster size "
                        "(%(curr)s) for scaling in",
                        {'count': count, 'curr': curr_size})
            count = curr_size
        new_size = curr_size - count

        result = scaleutils.check_size_params(self.entity, new_size,
                                              None, None, True)
        if result:
            return self.RES_ERROR, result

        self.entity.set_status(self.context, consts.CS_RESIZING,
                               'Cluster scale in started.',
                               desired_capacity=new_size)

        # Choose victims randomly
        if len(candidates) == 0:
            candidates = scaleutils.nodes_by_random(self.entity.nodes, count)

        # Sleep period
        self._sleep(grace_period)

        result, reason = self._delete_nodes(candidates)

        if result == self.RES_OK:
            reason = 'Cluster scaling succeeded.'

        self.entity.eval_status(self.context, consts.CLUSTER_SCALE_IN)

        return result, reason

    @profiler.trace('ClusterAction.do_attach_policy', hide_args=False)
    def do_attach_policy(self):
        """Handler for the CLUSTER_ATTACH_POLICY action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        inputs = dict(self.inputs)
        policy_id = inputs.pop('policy_id', None)
        if not policy_id:
            return self.RES_ERROR, 'Policy not specified.'

        res, reason = self.entity.attach_policy(self.context, policy_id,
                                                inputs)
        result = self.RES_OK if res else self.RES_ERROR

        # Store cluster since its data could have been updated
        if result == self.RES_OK:
            self.entity.store(self.context)

        return result, reason

    @profiler.trace('ClusterAction.do_detach_policy', hide_args=False)
    def do_detach_policy(self):
        """Handler for the CLUSTER_DETACH_POLICY action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        policy_id = self.inputs.get('policy_id', None)
        if not policy_id:
            return self.RES_ERROR, 'Policy not specified.'

        res, reason = self.entity.detach_policy(self.context, policy_id)
        result = self.RES_OK if res else self.RES_ERROR

        # Store cluster since its data could have been updated
        if result == self.RES_OK:
            self.entity.store(self.context)

        return result, reason

    @profiler.trace('ClusterAction.do_update_policy', hide_args=False)
    def do_update_policy(self):
        """Handler for the CLUSTER_UPDATE_POLICY action.

        :returns: A tuple containing the result and the corresponding reason.
        """
        policy_id = self.inputs.pop('policy_id', None)
        if not policy_id:
            return self.RES_ERROR, 'Policy not specified.'
        res, reason = self.entity.update_policy(self.context, policy_id,
                                                **self.inputs)
        result = self.RES_OK if res else self.RES_ERROR
        return result, reason

    @profiler.trace('ClusterAction.do_operation', hide_args=False)
    def do_operation(self):
        """Handler for CLUSTER_OPERATION action.

        Note that the inputs for the action should contain the following items:

          * ``nodes``: The nodes to operate on;
          * ``operation``: The operation to be performed;
          * ``params``: The parameters corresponding to the operation.

        :returns: A tuple containing the result and the corresponding reason.
        """
        inputs = copy.deepcopy(self.inputs)
        operation = inputs['operation']
        self.entity.do_operation(self.context, operation=operation)

        child = []
        res = self.RES_OK
        reason = "Cluster operation '%s' completed." % operation
        nodes = inputs.pop('nodes')
        for node_id in nodes:
            action_id = base.Action.create(
                self.context, node_id, consts.NODE_OPERATION,
                name='node_%s_%s' % (operation, node_id[:8]),
                cause=consts.CAUSE_DERIVED,
                inputs=inputs,
            )
            child.append(action_id)

        if child:
            dobj.Dependency.create(self.context, [c for c in child], self.id)
            for cid in child:
                ao.Action.update(self.context, cid,
                                 {'status': base.Action.READY})
            dispatcher.start_action()

            # Wait for dependent action if any
            res, new_reason = self._wait_for_dependents()
            if res != self.RES_OK:
                reason = new_reason

        self.entity.eval_status(self.context, operation)
        return res, reason

    def _execute(self, **kwargs):
        """Private method for action execution.

        This function search for the handler based on the action name for
        execution and it wraps the action execution with policy checks.

        :returns: A tuple containing the result and the corresponding reason.
        """
        # do pre-action policy checking
        self.policy_check(self.entity.id, 'BEFORE')
        if self.data['status'] != policy_mod.CHECK_OK:
            reason = 'Policy check failure: %s' % self.data['reason']
            return self.RES_ERROR, reason

        result = self.RES_OK
        action_name = self.action.lower()
        method_name = action_name.replace('cluster', 'do')
        method = getattr(self, method_name, None)
        if method is None:
            reason = 'Unsupported action: %s.' % self.action
            return self.RES_ERROR, reason

        result, reason = method()

        # do post-action policy checking
        self.inputs['action_result'] = result
        self.policy_check(self.entity.id, 'AFTER')
        if self.data['status'] != policy_mod.CHECK_OK:
            reason = 'Policy check failure: %s' % self.data['reason']
            return self.RES_ERROR, reason

        return result, reason

    def execute(self, **kwargs):
        """Wrapper of action execution.

        This is mainly a wrapper that executes an action with cluster lock
        acquired.

        :returns: A tuple (res, reason) that indicates whether the execution
                 was a success and why if it wasn't a success.
        """
        # Try to lock cluster before do real operation
        forced = (self.action == consts.CLUSTER_DELETE)
        res = senlin_lock.cluster_lock_acquire(self.context, self.target,
                                               self.id, self.owner,
                                               senlin_lock.CLUSTER_SCOPE,
                                               forced)
        # Failed to acquire lock, return RES_RETRY
        if not res:
            return self.RES_RETRY, 'Failed in locking cluster.'

        try:
            # Refresh entity state to avoid stale data in action.
            self.entity = cluster_mod.Cluster.load(self.context, self.target)
            res, reason = self._execute(**kwargs)
        finally:
            senlin_lock.cluster_lock_release(self.target, self.id,
                                             senlin_lock.CLUSTER_SCOPE)

        return res, reason

    def cancel(self):
        """Handler to cancel the execution of action."""
        return self.RES_OK

    def release_lock(self):
        """Handler to release the lock."""
        senlin_lock.cluster_lock_release(self.target, self.id,
                                         senlin_lock.CLUSTER_SCOPE)
        return self.RES_OK


def CompleteLifecycleProc(context, action_id):
    """Complete lifecycle process."""

    action = base.Action.load(context, action_id=action_id, project_safe=False)
    if action is None:
        LOG.error("Action %s could not be found.", action_id)
        raise exception.ResourceNotFound(type='action', id=action_id)

    if action.get_status() == consts.ACTION_WAITING_LIFECYCLE_COMPLETION:
        # update action status and reset owner back to None
        # so that the action will get picked up by dispatcher
        ao.Action.update(context, action_id,
                         {'status': consts.ACTION_READY,
                          'status_reason': 'Lifecycle complete.',
                          'owner': None})
        dispatcher.start_action()
    else:
        LOG.debug('Action %s status is not WAITING_LIFECYCLE.  '
                  'Skip CompleteLifecycleProc', action_id)
        return False

    return True