mistral/mistral/services/action_execution_checker.py

135 lines
3.6 KiB
Python

# Copyright 2018 Nokia Networks.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import eventlet
import sys
from mistral import context as auth_ctx
from mistral.db import utils as db_utils
from mistral.db.v2 import api as db_api
from mistral.engine import action_handler
from mistral.engine import post_tx_queue
from mistral_lib import actions as mistral_lib
from mistral_lib import utils
from oslo_config import cfg
from oslo_log import log as logging
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
_stopped = True
@db_utils.retry_on_db_error
@post_tx_queue.run
def handle_expired_actions():
LOG.debug("Running heartbeat checker...")
interval = CONF.action_heartbeat.check_interval
max_missed = CONF.action_heartbeat.max_missed_heartbeats
exp_date = utils.utc_now_sec() - datetime.timedelta(
seconds=max_missed * interval
)
with db_api.transaction():
action_exs = db_api.get_running_expired_sync_action_executions(
exp_date,
CONF.action_heartbeat.batch_size
)
LOG.debug("Found {} running and expired actions.".format(
len(action_exs))
)
if action_exs:
LOG.info(
"Actions executions to transit to error, because "
"heartbeat wasn't received: {}".format(action_exs)
)
for action_ex in action_exs:
result = mistral_lib.Result(
error="Heartbeat wasn't received."
)
action_handler.on_action_complete(action_ex, result)
def _loop():
global _stopped
# This is an administrative thread so we need to set an admin
# security context.
auth_ctx.set_ctx(
auth_ctx.MistralContext(
user=None,
tenant=None,
auth_token=None,
is_admin=True
)
)
while not _stopped:
try:
handle_expired_actions()
except Exception:
LOG.exception(
'Action execution checker iteration failed'
' due to an unexpected exception.'
)
# For some mysterious reason (probably eventlet related)
# the exception is not cleared from the context automatically.
# This results in subsequent log.warning calls to show invalid
# info.
if sys.version_info < (3,):
sys.exc_clear()
eventlet.sleep(CONF.action_heartbeat.check_interval)
def start():
interval = CONF.action_heartbeat.check_interval
max_missed = CONF.action_heartbeat.max_missed_heartbeats
enabled = interval and max_missed
if not enabled:
LOG.info("Action heartbeat reporting is disabled.")
return
wait_time = interval * max_missed
LOG.debug(
"First run of action execution checker, wait before "
"checking to make sure executors have time to send "
"heartbeats. ({} seconds)".format(wait_time)
)
global _stopped
_stopped = False
eventlet.spawn_after(wait_time, _loop)
def stop(graceful=False):
global _stopped
_stopped = True