mistral/mistral/engine/engine_server.py

338 lines
12 KiB
Python

# Copyright 2016 - Nokia Networks
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from oslo_log import log as logging
from mistral import config as cfg
from mistral.db.v2 import api as db_api
from mistral.engine import default_engine
from mistral import exceptions as exc
from mistral.rpc import base as rpc
from mistral.scheduler import base as sched_base
from mistral.service import base as service_base
from mistral.services import action_heartbeat_checker
from mistral.services import action_heartbeat_sender
from mistral.services import expiration_policy
from mistral.utils import profiler as profiler_utils
from mistral_lib import utils
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
def _validate_config():
if not CONF.yaql.convert_output_data and CONF.yaql.convert_input_data:
raise exc.MistralError(
"The config property 'yaql.convert_output_data' is set to False "
"so 'yaql.convert_input_data' must also be set to False."
)
class EngineServer(service_base.MistralService):
"""Engine server.
This class manages engine life-cycle and gets registered as an RPC
endpoint to process engine specific calls. It also registers a
cluster member associated with this instance of engine.
"""
def __init__(self, engine, setup_profiler=True):
super(EngineServer, self).__init__('engine_group', setup_profiler)
self.engine = engine
self._rpc_server = None
self._scheduler = None
self._expiration_policy_tg = None
def start(self):
super(EngineServer, self).start()
_validate_config()
db_api.setup_db()
self._scheduler = sched_base.get_system_scheduler()
self._scheduler.start()
self._expiration_policy_tg = expiration_policy.setup()
action_heartbeat_checker.start()
# If the current engine instance uses a local action executor
# then we also need to initialize a heartbeat reporter for it.
# Heartbeats will be sent to the engine tier in the same way as
# with a remote executor. So if the current cluster node crashes
# in the middle of executing an action then one of the remaining
# engine instances will expire the action in a configured period
# of time.
if CONF.executor.type == 'local':
action_heartbeat_sender.start()
if self._setup_profiler:
profiler_utils.setup('mistral-engine', CONF.engine.host)
# Initialize and start RPC server.
self._rpc_server = rpc.get_rpc_server_driver()(CONF.engine)
self._rpc_server.register_endpoint(self)
self._rpc_server.run(executor=CONF.oslo_rpc_executor)
self._notify_started('Engine server started.')
def stop(self, graceful=False):
# NOTE(rakhmerov): Unfortunately, oslo.service doesn't pass the
# 'graceful' parameter with a correct value. It's simply ignored
# in the corresponding call chain leading to a concrete service.
# The only workaround for now is to check 'graceful_shutdown_timeout'
# configuration option. If it's not empty (not None or 0) then we
# should treat it a graceful shutdown.
graceful = bool(CONF.graceful_shutdown_timeout)
LOG.info(
'Stopping an engine server [graceful=%s, timeout=%s]',
graceful,
CONF.graceful_shutdown_timeout
)
super(EngineServer, self).stop(graceful)
# The rpc server needs to be stopped first so that the engine
# server stops receiving new RPC calls. Under load, this operation
# may take much time in case of graceful shutdown because there
# still may be RPC messages already polled from the queue and
# waiting for processing. So an underlying RPC server has to wait
# until they are processed.
if self._rpc_server:
self._rpc_server.stop(graceful)
action_heartbeat_checker.stop(graceful)
if CONF.executor.type == 'local':
action_heartbeat_sender.stop(graceful)
if self._scheduler:
self._scheduler.stop(graceful)
sched_base.destroy_system_scheduler()
if self._expiration_policy_tg:
self._expiration_policy_tg.stop(graceful)
def wait(self):
LOG.info("Waiting for an engine server to exit...")
def start_workflow(self, rpc_ctx, wf_identifier, wf_namespace,
wf_ex_id, wf_input, description, params):
"""Receives calls over RPC to start workflows on engine.
:param rpc_ctx: RPC request context.
:param wf_identifier: Workflow definition identifier.
:param wf_namespace: Workflow namespace.
:param wf_input: Workflow input.
:param wf_ex_id: Workflow execution id. If passed, it will be set
in the new execution object.
:param description: Workflow execution description.
:param params: Additional workflow type specific parameters.
:return: Workflow execution.
"""
LOG.info(
"Received RPC request 'start_workflow'[workflow_identifier=%s, "
"workflow_input=%s, description=%s, params=%s]",
wf_identifier,
utils.cut(wf_input),
description,
params
)
return self.engine.start_workflow(
wf_identifier,
wf_namespace,
wf_ex_id,
wf_input,
description,
**params
)
def start_action(self, rpc_ctx, action_name,
action_input, description, namespace, params):
"""Receives calls over RPC to start actions on engine.
:param rpc_ctx: RPC request context.
:param action_name: name of the Action.
:param action_input: input dictionary for Action.
:param description: description of new Action execution.
:param namespace: The namespace of the action.
:param params: extra parameters to run Action.
:return: Action execution.
"""
LOG.info(
"Received RPC request 'start_action'[name=%s, input=%s, "
"description=%s, namespace=%s params=%s]",
action_name,
utils.cut(action_input),
description,
namespace,
params
)
return self.engine.start_action(
action_name,
action_input,
description,
namespace=namespace,
**params
)
def on_action_complete(self, rpc_ctx, action_ex_id, result, wf_action):
"""Receives RPC calls to communicate action result to engine.
:param rpc_ctx: RPC request context.
:param action_ex_id: Action execution id.
:param result: Action result data.
:param wf_action: True if given id points to a workflow execution.
:return: Action execution.
"""
LOG.info(
"Received RPC request 'on_action_complete'[action_ex_id=%s, "
"result=%s]",
action_ex_id,
result.cut_repr() if result else '<unknown>'
)
return self.engine.on_action_complete(action_ex_id, result, wf_action)
def on_action_update(self, rpc_ctx, action_ex_id, state, wf_action):
"""Receives RPC calls to communicate action execution state to engine.
:param rpc_ctx: RPC request context.
:param action_ex_id: Action execution id.
:param state: Action execution state.
:param wf_action: True if given id points to a workflow execution.
:return: Action execution.
"""
LOG.info(
"Received RPC request 'on_action_update'"
"[action_ex_id=%s, state=%s]",
action_ex_id,
state
)
return self.engine.on_action_update(action_ex_id, state, wf_action)
def pause_workflow(self, rpc_ctx, wf_ex_id):
"""Receives calls over RPC to pause workflows on engine.
:param rpc_ctx: Request context.
:param wf_ex_id: Workflow execution id.
:return: Workflow execution.
"""
LOG.info(
"Received RPC request 'pause_workflow'[execution_id=%s]",
wf_ex_id
)
return self.engine.pause_workflow(wf_ex_id)
def rerun_workflow(self, rpc_ctx, task_ex_id, reset=True, env=None):
"""Receives calls over RPC to rerun workflows on engine.
:param rpc_ctx: RPC request context.
:param task_ex_id: Task execution id.
:param reset: If true, then purge action execution for the task.
:param env: Environment variables to update.
:return: Workflow execution.
"""
LOG.info(
"Received RPC request 'rerun_workflow'[task_ex_id=%s]",
task_ex_id
)
return self.engine.rerun_workflow(task_ex_id, reset, env)
def resume_workflow(self, rpc_ctx, wf_ex_id, env=None):
"""Receives calls over RPC to resume workflows on engine.
:param rpc_ctx: RPC request context.
:param wf_ex_id: Workflow execution id.
:param env: Environment variables to update.
:return: Workflow execution.
"""
LOG.info(
"Received RPC request 'resume_workflow'[wf_ex_id=%s]",
wf_ex_id
)
return self.engine.resume_workflow(wf_ex_id, env)
def stop_workflow(self, rpc_ctx, wf_ex_id, state, message=None):
"""Receives calls over RPC to stop workflows on engine.
Sets execution state to SUCCESS or ERROR. No more tasks will be
scheduled. Running tasks won't be killed, but their results
will be ignored.
:param rpc_ctx: RPC request context.
:param wf_ex_id: Workflow execution id.
:param state: State assigned to the workflow. Permitted states are
SUCCESS or ERROR.
:param message: Optional information string.
:return: Workflow execution.
"""
LOG.info(
"Received RPC request 'stop_workflow'[execution_id=%s,"
" state=%s, message=%s]",
wf_ex_id,
state,
message
)
return self.engine.stop_workflow(wf_ex_id, state, message)
def rollback_workflow(self, rpc_ctx, wf_ex_id):
"""Receives calls over RPC to rollback workflows on engine.
:param rpc_ctx: RPC request context.
:param wf_ex_id Workflow execution id.
:return: Workflow execution.
"""
LOG.info(
"Received RPC request 'rollback_workflow'[execution_id=%s]",
wf_ex_id
)
return self.engine.rollback_workflow(wf_ex_id)
def report_running_actions(self, rpc_ctx, action_ex_ids):
"""Receives calls over RPC to receive action execution heartbeats.
:param rpc_ctx: RPC request context.
:param action_ex_ids: Action execution ids.
"""
LOG.info(
"Received RPC request 'report_running_actions'[action_ex_ids=%s]",
action_ex_ids
)
return self.engine.process_action_heartbeats(action_ex_ids)
def get_oslo_service(setup_profiler=True):
return EngineServer(
default_engine.DefaultEngine(),
setup_profiler=setup_profiler
)