Merge "Make action heartbeats work for all executor types"
This commit is contained in:
commit
a9a7a99237
|
@ -134,7 +134,7 @@ class Engine(object):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def report_running_actions(self, action_ex_ids):
|
def process_action_heartbeats(self, action_ex_ids):
|
||||||
"""Receives the heartbeat about the running actions.
|
"""Receives the heartbeat about the running actions.
|
||||||
|
|
||||||
:param action_ex_ids: The action execution ids.
|
:param action_ex_ids: The action execution ids.
|
||||||
|
|
|
@ -223,14 +223,17 @@ class DefaultEngine(base.Engine):
|
||||||
|
|
||||||
@db_utils.retry_on_db_error
|
@db_utils.retry_on_db_error
|
||||||
@post_tx_queue.run
|
@post_tx_queue.run
|
||||||
def report_running_actions(self, action_ex_ids):
|
def process_action_heartbeats(self, action_ex_ids):
|
||||||
with db_api.transaction():
|
with db_api.transaction():
|
||||||
for exec_id in action_ex_ids:
|
for exec_id in action_ex_ids:
|
||||||
try:
|
try:
|
||||||
db_api.update_action_execution_heartbeat(exec_id)
|
db_api.update_action_execution_heartbeat(exec_id)
|
||||||
except exceptions.DBEntityNotFoundError:
|
except exceptions.DBEntityNotFoundError:
|
||||||
LOG.debug("Action execution heartbeat update failed. {}"
|
LOG.debug(
|
||||||
.format(exec_id), exc_info=True)
|
"Action execution heartbeat update failed. {}"
|
||||||
|
.format(exec_id),
|
||||||
|
exc_info=True
|
||||||
|
)
|
||||||
# Ignore this error and continue with the
|
# Ignore this error and continue with the
|
||||||
# remaining ids.
|
# remaining ids.
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -20,7 +20,8 @@ from mistral.engine import default_engine
|
||||||
from mistral.rpc import base as rpc
|
from mistral.rpc import base as rpc
|
||||||
from mistral.scheduler import base as sched_base
|
from mistral.scheduler import base as sched_base
|
||||||
from mistral.service import base as service_base
|
from mistral.service import base as service_base
|
||||||
from mistral.services import action_execution_checker
|
from mistral.services import action_heartbeat_checker
|
||||||
|
from mistral.services import action_heartbeat_sender
|
||||||
from mistral.services import expiration_policy
|
from mistral.services import expiration_policy
|
||||||
from mistral.utils import profiler as profiler_utils
|
from mistral.utils import profiler as profiler_utils
|
||||||
from mistral_lib import utils
|
from mistral_lib import utils
|
||||||
|
@ -54,7 +55,17 @@ class EngineServer(service_base.MistralService):
|
||||||
|
|
||||||
self._expiration_policy_tg = expiration_policy.setup()
|
self._expiration_policy_tg = expiration_policy.setup()
|
||||||
|
|
||||||
action_execution_checker.start()
|
action_heartbeat_checker.start()
|
||||||
|
|
||||||
|
# If the current engine instance uses a local action executor
|
||||||
|
# then we also need to initialize a heartbeat reporter for it.
|
||||||
|
# Heartbeats will be sent to the engine tier in the same way as
|
||||||
|
# with a remote executor. So if the current cluster node crashes
|
||||||
|
# in the middle of executing an action then one of the remaining
|
||||||
|
# engine instances will expire the action in a configured period
|
||||||
|
# of time.
|
||||||
|
if cfg.CONF.executor.type == 'local':
|
||||||
|
action_heartbeat_sender.start()
|
||||||
|
|
||||||
if self._setup_profiler:
|
if self._setup_profiler:
|
||||||
profiler_utils.setup('mistral-engine', cfg.CONF.engine.host)
|
profiler_utils.setup('mistral-engine', cfg.CONF.engine.host)
|
||||||
|
@ -71,7 +82,10 @@ class EngineServer(service_base.MistralService):
|
||||||
def stop(self, graceful=False):
|
def stop(self, graceful=False):
|
||||||
super(EngineServer, self).stop(graceful)
|
super(EngineServer, self).stop(graceful)
|
||||||
|
|
||||||
action_execution_checker.stop(graceful)
|
action_heartbeat_checker.stop(graceful)
|
||||||
|
|
||||||
|
if cfg.CONF.executor.type == 'local':
|
||||||
|
action_heartbeat_sender.stop(graceful)
|
||||||
|
|
||||||
if self._scheduler:
|
if self._scheduler:
|
||||||
self._scheduler.stop(graceful)
|
self._scheduler.stop(graceful)
|
||||||
|
@ -275,7 +289,7 @@ class EngineServer(service_base.MistralService):
|
||||||
action_ex_ids
|
action_ex_ids
|
||||||
)
|
)
|
||||||
|
|
||||||
return self.engine.report_running_actions(action_ex_ids)
|
return self.engine.process_action_heartbeats(action_ex_ids)
|
||||||
|
|
||||||
|
|
||||||
def get_oslo_service(setup_profiler=True):
|
def get_oslo_service(setup_profiler=True):
|
||||||
|
|
|
@ -23,9 +23,9 @@ from mistral import context
|
||||||
from mistral import exceptions as exc
|
from mistral import exceptions as exc
|
||||||
from mistral.executors import base
|
from mistral.executors import base
|
||||||
from mistral.rpc import clients as rpc
|
from mistral.rpc import clients as rpc
|
||||||
|
from mistral.services import action_heartbeat_sender
|
||||||
from mistral.utils import inspect_utils as i_u
|
from mistral.utils import inspect_utils as i_u
|
||||||
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,6 +57,25 @@ class DefaultExecutor(base.Executor):
|
||||||
:return: Action result.
|
:return: Action result.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
try:
|
||||||
|
action_heartbeat_sender.add_action(action_ex_id)
|
||||||
|
|
||||||
|
return self._do_run_action(
|
||||||
|
action_cls_attrs,
|
||||||
|
action_cls_str,
|
||||||
|
action_ex_id,
|
||||||
|
execution_context,
|
||||||
|
params,
|
||||||
|
redelivered,
|
||||||
|
safe_rerun,
|
||||||
|
timeout
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
action_heartbeat_sender.remove_action(action_ex_id)
|
||||||
|
|
||||||
|
def _do_run_action(self, action_cls_attrs, action_cls_str, action_ex_id,
|
||||||
|
execution_context, params, redelivered, safe_rerun,
|
||||||
|
timeout):
|
||||||
def send_error_back(error_msg):
|
def send_error_back(error_msg):
|
||||||
error_result = mistral_lib.Result(error=error_msg)
|
error_result = mistral_lib.Result(error=error_msg)
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,7 @@ from mistral import config as cfg
|
||||||
from mistral.executors import default_executor as exe
|
from mistral.executors import default_executor as exe
|
||||||
from mistral.rpc import base as rpc
|
from mistral.rpc import base as rpc
|
||||||
from mistral.service import base as service_base
|
from mistral.service import base as service_base
|
||||||
from mistral.services import action_execution_reporter
|
from mistral.services import action_heartbeat_sender
|
||||||
from mistral.utils import profiler as profiler_utils
|
from mistral.utils import profiler as profiler_utils
|
||||||
from mistral_lib import utils
|
from mistral_lib import utils
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ class ExecutorServer(service_base.MistralService):
|
||||||
def start(self):
|
def start(self):
|
||||||
super(ExecutorServer, self).start()
|
super(ExecutorServer, self).start()
|
||||||
|
|
||||||
action_execution_reporter.start()
|
action_heartbeat_sender.start()
|
||||||
|
|
||||||
if self._setup_profiler:
|
if self._setup_profiler:
|
||||||
profiler_utils.setup('mistral-executor', cfg.CONF.executor.host)
|
profiler_utils.setup('mistral-executor', cfg.CONF.executor.host)
|
||||||
|
@ -60,7 +60,7 @@ class ExecutorServer(service_base.MistralService):
|
||||||
def stop(self, graceful=False):
|
def stop(self, graceful=False):
|
||||||
super(ExecutorServer, self).stop(graceful)
|
super(ExecutorServer, self).stop(graceful)
|
||||||
|
|
||||||
action_execution_reporter.stop()
|
action_heartbeat_sender.stop()
|
||||||
|
|
||||||
if self._rpc_server:
|
if self._rpc_server:
|
||||||
self._rpc_server.stop(graceful)
|
self._rpc_server.stop(graceful)
|
||||||
|
@ -96,30 +96,25 @@ class ExecutorServer(service_base.MistralService):
|
||||||
|
|
||||||
redelivered = rpc_ctx.redelivered or False
|
redelivered = rpc_ctx.redelivered or False
|
||||||
|
|
||||||
try:
|
res = self.executor.run_action(
|
||||||
action_execution_reporter.add_action_ex_id(action_ex_id)
|
action_ex_id,
|
||||||
|
action_cls_str,
|
||||||
|
action_cls_attrs,
|
||||||
|
params,
|
||||||
|
safe_rerun,
|
||||||
|
execution_context,
|
||||||
|
redelivered,
|
||||||
|
timeout=timeout
|
||||||
|
)
|
||||||
|
|
||||||
res = self.executor.run_action(
|
LOG.debug(
|
||||||
action_ex_id,
|
"Sending action result to engine"
|
||||||
action_cls_str,
|
" [action_ex_id=%s, action_cls=%s]",
|
||||||
action_cls_attrs,
|
action_ex_id,
|
||||||
params,
|
action_cls_str
|
||||||
safe_rerun,
|
)
|
||||||
execution_context,
|
|
||||||
redelivered,
|
|
||||||
timeout=timeout
|
|
||||||
)
|
|
||||||
|
|
||||||
LOG.debug(
|
return res
|
||||||
"Sending action result to engine"
|
|
||||||
" [action_ex_id=%s, action_cls=%s]",
|
|
||||||
action_ex_id,
|
|
||||||
action_cls_str
|
|
||||||
)
|
|
||||||
|
|
||||||
return res
|
|
||||||
finally:
|
|
||||||
action_execution_reporter.remove_action_ex_id(action_ex_id)
|
|
||||||
|
|
||||||
|
|
||||||
def get_oslo_service(setup_profiler=True):
|
def get_oslo_service(setup_profiler=True):
|
||||||
|
|
|
@ -328,7 +328,7 @@ class EngineClient(eng.Engine):
|
||||||
)
|
)
|
||||||
|
|
||||||
@base.wrap_messaging_exception
|
@base.wrap_messaging_exception
|
||||||
def report_running_actions(self, action_ex_ids):
|
def process_action_heartbeats(self, action_ex_ids):
|
||||||
"""Receives action execution heartbeats.
|
"""Receives action execution heartbeats.
|
||||||
|
|
||||||
:param action_ex_ids: Action execution ids.
|
:param action_ex_ids: Action execution ids.
|
||||||
|
|
|
@ -32,32 +32,32 @@ _stopped = True
|
||||||
_running_actions = set()
|
_running_actions = set()
|
||||||
|
|
||||||
|
|
||||||
def add_action_ex_id(action_ex_id):
|
def add_action(action_ex_id):
|
||||||
global _enabled
|
global _enabled
|
||||||
|
|
||||||
# With run-action there is no actions_ex_id assigned.
|
# With run-action there is no actions_ex_id assigned.
|
||||||
if action_ex_id and _enabled:
|
if action_ex_id and _enabled:
|
||||||
rpc.get_engine_client().report_running_actions([action_ex_id])
|
rpc.get_engine_client().process_action_heartbeats([action_ex_id])
|
||||||
|
|
||||||
_running_actions.add(action_ex_id)
|
_running_actions.add(action_ex_id)
|
||||||
|
|
||||||
|
|
||||||
def remove_action_ex_id(action_ex_id):
|
def remove_action(action_ex_id):
|
||||||
global _enabled
|
global _enabled
|
||||||
|
|
||||||
if action_ex_id and _enabled:
|
if action_ex_id and _enabled:
|
||||||
_running_actions.discard(action_ex_id)
|
_running_actions.discard(action_ex_id)
|
||||||
|
|
||||||
|
|
||||||
def report_running_actions():
|
def send_action_heartbeats():
|
||||||
LOG.debug("Running heartbeat reporter...")
|
LOG.debug('Running heartbeat reporter...')
|
||||||
|
|
||||||
global _running_actions
|
global _running_actions
|
||||||
|
|
||||||
if not _running_actions:
|
if not _running_actions:
|
||||||
return
|
return
|
||||||
|
|
||||||
rpc.get_engine_client().report_running_actions(_running_actions)
|
rpc.get_engine_client().process_action_heartbeats(_running_actions)
|
||||||
|
|
||||||
|
|
||||||
def _loop():
|
def _loop():
|
||||||
|
@ -76,10 +76,10 @@ def _loop():
|
||||||
|
|
||||||
while not _stopped:
|
while not _stopped:
|
||||||
try:
|
try:
|
||||||
report_running_actions()
|
send_action_heartbeats()
|
||||||
except Exception:
|
except Exception:
|
||||||
LOG.exception(
|
LOG.exception(
|
||||||
'Action execution reporter iteration failed'
|
'Action heartbeat sender iteration failed'
|
||||||
' due to an unexpected exception.'
|
' due to an unexpected exception.'
|
||||||
)
|
)
|
||||||
|
|
|
@ -27,14 +27,14 @@ from mistral.workflow import states
|
||||||
cfg.CONF.set_default('auth_enable', False, group='pecan')
|
cfg.CONF.set_default('auth_enable', False, group='pecan')
|
||||||
|
|
||||||
|
|
||||||
class ActionHeartbeatTest(base.EngineTestCase):
|
class ActionHeartbeatCheckerTest(base.EngineTestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
# We need to override configuration values before starting engine.
|
# We need to override configuration values before starting engine.
|
||||||
self.override_config('check_interval', 1, 'action_heartbeat')
|
self.override_config('check_interval', 1, 'action_heartbeat')
|
||||||
self.override_config('max_missed_heartbeats', 1, 'action_heartbeat')
|
self.override_config('max_missed_heartbeats', 1, 'action_heartbeat')
|
||||||
self.override_config('first_heartbeat_timeout', 0, 'action_heartbeat')
|
self.override_config('first_heartbeat_timeout', 0, 'action_heartbeat')
|
||||||
|
|
||||||
super(ActionHeartbeatTest, self).setUp()
|
super(ActionHeartbeatCheckerTest, self).setUp()
|
||||||
|
|
||||||
# Make sure actions are not sent to an executor.
|
# Make sure actions are not sent to an executor.
|
||||||
@mock.patch.object(
|
@mock.patch.object(
|
|
@ -0,0 +1,148 @@
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import mock
|
||||||
|
|
||||||
|
from oslo_config import cfg
|
||||||
|
|
||||||
|
from mistral.db.v2 import api as db_api
|
||||||
|
from mistral.rpc import clients as rpc_clients
|
||||||
|
from mistral.services import workflows as wf_service
|
||||||
|
from mistral.tests.unit.engine import base
|
||||||
|
from mistral.workflow import states
|
||||||
|
|
||||||
|
|
||||||
|
# Use the set_default method to set value otherwise in certain test cases
|
||||||
|
# the change in value is not permanent.
|
||||||
|
cfg.CONF.set_default('auth_enable', False, group='pecan')
|
||||||
|
|
||||||
|
|
||||||
|
class ActionHeartbeatSenderBaseTest(base.EngineTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
# We need to set all required configuration values before starting
|
||||||
|
# an engine and an executor.
|
||||||
|
self.get_configuration()
|
||||||
|
|
||||||
|
super(ActionHeartbeatSenderBaseTest, self).setUp()
|
||||||
|
|
||||||
|
def get_configuration(self):
|
||||||
|
# We need to override configuration values before starting engine.
|
||||||
|
# Subclasses can override this method and add/change their own
|
||||||
|
# config options.
|
||||||
|
self.override_config('check_interval', 1, 'action_heartbeat')
|
||||||
|
self.override_config('max_missed_heartbeats', 1, 'action_heartbeat')
|
||||||
|
self.override_config('first_heartbeat_timeout', 0, 'action_heartbeat')
|
||||||
|
|
||||||
|
def _do_long_action_success_test(self):
|
||||||
|
wf_text = """---
|
||||||
|
version: '2.0'
|
||||||
|
|
||||||
|
wf:
|
||||||
|
tasks:
|
||||||
|
task1:
|
||||||
|
action: std.sleep seconds=4
|
||||||
|
"""
|
||||||
|
|
||||||
|
wf_service.create_workflows(wf_text)
|
||||||
|
|
||||||
|
wf_ex = self.engine.start_workflow('wf')
|
||||||
|
|
||||||
|
self.await_workflow_success(wf_ex.id)
|
||||||
|
|
||||||
|
with db_api.transaction():
|
||||||
|
wf_ex = db_api.get_workflow_execution(wf_ex.id)
|
||||||
|
|
||||||
|
t_execs = wf_ex.task_executions
|
||||||
|
|
||||||
|
t_ex = self._assert_single_item(
|
||||||
|
t_execs,
|
||||||
|
name='task1',
|
||||||
|
state=states.SUCCESS
|
||||||
|
)
|
||||||
|
|
||||||
|
a_execs = db_api.get_action_executions(task_execution_id=t_ex.id)
|
||||||
|
|
||||||
|
self._assert_single_item(
|
||||||
|
a_execs,
|
||||||
|
name='std.sleep',
|
||||||
|
state=states.SUCCESS
|
||||||
|
)
|
||||||
|
|
||||||
|
# Disable the ability to send action heartbeats.
|
||||||
|
@mock.patch.object(
|
||||||
|
rpc_clients.EngineClient,
|
||||||
|
'process_action_heartbeats',
|
||||||
|
mock.MagicMock()
|
||||||
|
)
|
||||||
|
def _do_long_action_failure_test_with_disabled_sender(self):
|
||||||
|
wf_text = """---
|
||||||
|
version: '2.0'
|
||||||
|
|
||||||
|
wf:
|
||||||
|
tasks:
|
||||||
|
task1:
|
||||||
|
action: std.sleep seconds=4
|
||||||
|
"""
|
||||||
|
|
||||||
|
wf_service.create_workflows(wf_text)
|
||||||
|
|
||||||
|
wf_ex = self.engine.start_workflow('wf')
|
||||||
|
|
||||||
|
self.await_workflow_error(wf_ex.id)
|
||||||
|
|
||||||
|
with db_api.transaction():
|
||||||
|
wf_ex = db_api.get_workflow_execution(wf_ex.id)
|
||||||
|
|
||||||
|
t_execs = wf_ex.task_executions
|
||||||
|
|
||||||
|
t_ex = self._assert_single_item(
|
||||||
|
t_execs,
|
||||||
|
name='task1',
|
||||||
|
state=states.ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
a_execs = db_api.get_action_executions(task_execution_id=t_ex.id)
|
||||||
|
|
||||||
|
self._assert_single_item(
|
||||||
|
a_execs,
|
||||||
|
name='std.sleep',
|
||||||
|
state=states.ERROR
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionHeartbeatSenderLocalExecutorTest(ActionHeartbeatSenderBaseTest):
|
||||||
|
def get_configuration(self):
|
||||||
|
super(ActionHeartbeatSenderLocalExecutorTest, self).get_configuration()
|
||||||
|
|
||||||
|
self.override_config('type', 'local', 'executor')
|
||||||
|
|
||||||
|
def test_long_action_success(self):
|
||||||
|
self._do_long_action_success_test()
|
||||||
|
|
||||||
|
def test_long_action_failure_with_disabled_sender(self):
|
||||||
|
self._do_long_action_failure_test_with_disabled_sender()
|
||||||
|
|
||||||
|
|
||||||
|
class ActionHeartbeatSenderRemoteExecutorTest(ActionHeartbeatSenderBaseTest):
|
||||||
|
def get_configuration(self):
|
||||||
|
super(
|
||||||
|
ActionHeartbeatSenderRemoteExecutorTest,
|
||||||
|
self
|
||||||
|
).get_configuration()
|
||||||
|
|
||||||
|
self.override_config('type', 'remote', 'executor')
|
||||||
|
|
||||||
|
def test_long_action_success(self):
|
||||||
|
self._do_long_action_success_test()
|
||||||
|
|
||||||
|
def test_long_action_failure_with_disabled_sender(self):
|
||||||
|
self._do_long_action_failure_test_with_disabled_sender()
|
|
@ -656,9 +656,9 @@ class DefaultEngineTest(base.DbTestCase):
|
||||||
|
|
||||||
task_action_ex = action_execs[0]
|
task_action_ex = action_execs[0]
|
||||||
|
|
||||||
self.engine.report_running_actions([])
|
self.engine.process_action_heartbeats([])
|
||||||
self.engine.report_running_actions([None, None])
|
self.engine.process_action_heartbeats([None, None])
|
||||||
self.engine.report_running_actions([None, task_action_ex.id])
|
self.engine.process_action_heartbeats([None, task_action_ex.id])
|
||||||
|
|
||||||
task_action_ex = db_api.get_action_execution(task_action_ex.id)
|
task_action_ex = db_api.get_action_execution(task_action_ex.id)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue