334 lines
15 KiB
Python
334 lines
15 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright (C) 2012 Yahoo! Inc. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import weakref
|
|
|
|
from automaton import machines
|
|
from oslo_utils import timeutils
|
|
|
|
from taskflow import logging
|
|
from taskflow import states as st
|
|
from taskflow.types import failure
|
|
from taskflow.utils import iter_utils
|
|
|
|
# Default waiting state timeout (in seconds).
|
|
WAITING_TIMEOUT = 60
|
|
|
|
# Meta states the state machine uses.
|
|
UNDEFINED = 'UNDEFINED'
|
|
GAME_OVER = 'GAME_OVER'
|
|
META_STATES = (GAME_OVER, UNDEFINED)
|
|
|
|
# Event name constants the state machine uses.
|
|
SCHEDULE = 'schedule_next'
|
|
WAIT = 'wait_finished'
|
|
ANALYZE = 'examine_finished'
|
|
FINISH = 'completed'
|
|
FAILED = 'failed'
|
|
SUSPENDED = 'suspended'
|
|
SUCCESS = 'success'
|
|
REVERTED = 'reverted'
|
|
START = 'start'
|
|
|
|
# For these states we will gather how long (in seconds) the
|
|
# state was in-progress (cumulatively if the state is entered multiple
|
|
# times)
|
|
TIMED_STATES = (st.ANALYZING, st.RESUMING, st.SCHEDULING, st.WAITING)
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class MachineMemory(object):
|
|
"""State machine memory."""
|
|
|
|
def __init__(self):
|
|
self.next_up = set()
|
|
self.not_done = set()
|
|
self.failures = []
|
|
self.done = set()
|
|
|
|
|
|
class MachineBuilder(object):
|
|
"""State machine *builder* that powers the engine components.
|
|
|
|
NOTE(harlowja): the machine (states and events that will trigger
|
|
transitions) that this builds is represented by the following
|
|
table::
|
|
|
|
+--------------+------------------+------------+----------+---------+
|
|
| Start | Event | End | On Enter | On Exit |
|
|
+--------------+------------------+------------+----------+---------+
|
|
| ANALYZING | completed | GAME_OVER | . | . |
|
|
| ANALYZING | schedule_next | SCHEDULING | . | . |
|
|
| ANALYZING | wait_finished | WAITING | . | . |
|
|
| FAILURE[$] | . | . | . | . |
|
|
| GAME_OVER | failed | FAILURE | . | . |
|
|
| GAME_OVER | reverted | REVERTED | . | . |
|
|
| GAME_OVER | success | SUCCESS | . | . |
|
|
| GAME_OVER | suspended | SUSPENDED | . | . |
|
|
| RESUMING | schedule_next | SCHEDULING | . | . |
|
|
| REVERTED[$] | . | . | . | . |
|
|
| SCHEDULING | wait_finished | WAITING | . | . |
|
|
| SUCCESS[$] | . | . | . | . |
|
|
| SUSPENDED[$] | . | . | . | . |
|
|
| UNDEFINED[^] | start | RESUMING | . | . |
|
|
| WAITING | examine_finished | ANALYZING | . | . |
|
|
+--------------+------------------+------------+----------+---------+
|
|
|
|
Between any of these yielded states (minus ``GAME_OVER`` and ``UNDEFINED``)
|
|
if the engine has been suspended or the engine has failed (due to a
|
|
non-resolveable task failure or scheduling failure) the machine will stop
|
|
executing new tasks (currently running tasks will be allowed to complete)
|
|
and this machines run loop will be broken.
|
|
|
|
NOTE(harlowja): If the runtimes scheduler component is able to schedule
|
|
tasks in parallel, this enables parallel running and/or reversion.
|
|
"""
|
|
|
|
def __init__(self, runtime, waiter):
|
|
self._runtime = weakref.proxy(runtime)
|
|
self._analyzer = runtime.analyzer
|
|
self._completer = runtime.completer
|
|
self._scheduler = runtime.scheduler
|
|
self._storage = runtime.storage
|
|
self._waiter = waiter
|
|
|
|
def build(self, statistics, timeout=None, gather_statistics=True):
|
|
"""Builds a state-machine (that is used during running)."""
|
|
if gather_statistics:
|
|
watches = {}
|
|
state_statistics = {}
|
|
statistics['seconds_per_state'] = state_statistics
|
|
watches = {}
|
|
for timed_state in TIMED_STATES:
|
|
state_statistics[timed_state.lower()] = 0.0
|
|
watches[timed_state] = timeutils.StopWatch()
|
|
statistics['discarded_failures'] = 0
|
|
statistics['awaiting'] = 0
|
|
statistics['completed'] = 0
|
|
statistics['incomplete'] = 0
|
|
|
|
memory = MachineMemory()
|
|
if timeout is None:
|
|
timeout = WAITING_TIMEOUT
|
|
|
|
# Cache some local functions/methods...
|
|
do_complete = self._completer.complete
|
|
do_complete_failure = self._completer.complete_failure
|
|
get_atom_intention = self._storage.get_atom_intention
|
|
|
|
def do_schedule(next_nodes):
|
|
return self._scheduler.schedule(
|
|
sorted(next_nodes,
|
|
key=lambda node: getattr(node, 'priority', 0),
|
|
reverse=True))
|
|
|
|
def is_runnable():
|
|
# Checks if the storage says the flow is still runnable...
|
|
return self._storage.get_flow_state() == st.RUNNING
|
|
|
|
def iter_next_atoms(atom=None, apply_deciders=True):
|
|
# Yields and filters and tweaks the next atoms to run...
|
|
maybe_atoms_it = self._analyzer.iter_next_atoms(atom=atom)
|
|
for atom, late_decider in maybe_atoms_it:
|
|
if apply_deciders:
|
|
proceed = late_decider.check_and_affect(self._runtime)
|
|
if proceed:
|
|
yield atom
|
|
else:
|
|
yield atom
|
|
|
|
def resume(old_state, new_state, event):
|
|
# This reaction function just updates the state machines memory
|
|
# to include any nodes that need to be executed (from a previous
|
|
# attempt, which may be empty if never ran before) and any nodes
|
|
# that are now ready to be ran.
|
|
memory.next_up.update(
|
|
iter_utils.unique_seen((self._completer.resume(),
|
|
iter_next_atoms())))
|
|
return SCHEDULE
|
|
|
|
def game_over(old_state, new_state, event):
|
|
# This reaction function is mainly a intermediary delegation
|
|
# function that analyzes the current memory and transitions to
|
|
# the appropriate handler that will deal with the memory values,
|
|
# it is *always* called before the final state is entered.
|
|
if memory.failures:
|
|
return FAILED
|
|
leftover_atoms = iter_utils.count(
|
|
# Avoid activating the deciders, since at this point
|
|
# the engine is finishing and there will be no more further
|
|
# work done anyway...
|
|
iter_next_atoms(apply_deciders=False))
|
|
if leftover_atoms:
|
|
# Ok we didn't finish (either reverting or executing...) so
|
|
# that means we must of been stopped at some point...
|
|
LOG.trace("Suspension determined to have been reacted to"
|
|
" since (at least) %s atoms have been left in an"
|
|
" unfinished state", leftover_atoms)
|
|
return SUSPENDED
|
|
elif self._analyzer.is_success():
|
|
return SUCCESS
|
|
else:
|
|
return REVERTED
|
|
|
|
def schedule(old_state, new_state, event):
|
|
# This reaction function starts to schedule the memory's next
|
|
# nodes (iff the engine is still runnable, which it may not be
|
|
# if the user of this engine has requested the engine/storage
|
|
# that holds this information to stop or suspend); handles failures
|
|
# that occur during this process safely...
|
|
if is_runnable() and memory.next_up:
|
|
not_done, failures = do_schedule(memory.next_up)
|
|
if not_done:
|
|
memory.not_done.update(not_done)
|
|
if failures:
|
|
memory.failures.extend(failures)
|
|
memory.next_up.intersection_update(not_done)
|
|
return WAIT
|
|
|
|
def complete_an_atom(fut):
|
|
# This completes a single atom saving its result in
|
|
# storage and preparing whatever predecessors or successors will
|
|
# now be ready to execute (or revert or retry...); it also
|
|
# handles failures that occur during this process safely...
|
|
atom = fut.atom
|
|
try:
|
|
outcome, result = fut.result()
|
|
do_complete(atom, outcome, result)
|
|
if isinstance(result, failure.Failure):
|
|
retain = do_complete_failure(atom, outcome, result)
|
|
if retain:
|
|
memory.failures.append(result)
|
|
else:
|
|
# NOTE(harlowja): avoid making any intention request
|
|
# to storage unless we are sure we are in DEBUG
|
|
# enabled logging (otherwise we will call this all
|
|
# the time even when DEBUG is not enabled, which
|
|
# would suck...)
|
|
if LOG.isEnabledFor(logging.DEBUG):
|
|
intention = get_atom_intention(atom.name)
|
|
LOG.debug("Discarding failure '%s' (in response"
|
|
" to outcome '%s') under completion"
|
|
" units request during completion of"
|
|
" atom '%s' (intention is to %s)",
|
|
result, outcome, atom, intention)
|
|
if gather_statistics:
|
|
statistics['discarded_failures'] += 1
|
|
if gather_statistics:
|
|
statistics['completed'] += 1
|
|
except Exception:
|
|
memory.failures.append(failure.Failure())
|
|
LOG.exception("Engine '%s' atom post-completion failed", atom)
|
|
|
|
def wait(old_state, new_state, event):
|
|
# TODO(harlowja): maybe we should start doing 'yield from' this
|
|
# call sometime in the future, or equivalent that will work in
|
|
# py2 and py3.
|
|
if memory.not_done:
|
|
done, not_done = self._waiter(memory.not_done, timeout=timeout)
|
|
memory.done.update(done)
|
|
memory.not_done = not_done
|
|
return ANALYZE
|
|
|
|
def analyze(old_state, new_state, event):
|
|
# This reaction function is responsible for analyzing all nodes
|
|
# that have finished executing/reverting and figuring
|
|
# out what nodes are now ready to be ran (and then triggering those
|
|
# nodes to be scheduled in the future); handles failures that
|
|
# occur during this process safely...
|
|
next_up = set()
|
|
while memory.done:
|
|
fut = memory.done.pop()
|
|
# Force it to be completed so that we can ensure that
|
|
# before we iterate over any successors or predecessors
|
|
# that we know it has been completed and saved and so on...
|
|
complete_an_atom(fut)
|
|
if not memory.failures:
|
|
atom = fut.atom
|
|
try:
|
|
more_work = set(iter_next_atoms(atom=atom))
|
|
except Exception:
|
|
memory.failures.append(failure.Failure())
|
|
LOG.exception("Engine '%s' atom post-completion"
|
|
" next atom searching failed", atom)
|
|
else:
|
|
next_up.update(more_work)
|
|
if is_runnable() and next_up and not memory.failures:
|
|
memory.next_up.update(next_up)
|
|
return SCHEDULE
|
|
elif memory.not_done:
|
|
return WAIT
|
|
else:
|
|
return FINISH
|
|
|
|
def on_exit(old_state, event):
|
|
LOG.trace("Exiting old state '%s' in response to event '%s'",
|
|
old_state, event)
|
|
if gather_statistics:
|
|
if old_state in watches:
|
|
w = watches[old_state]
|
|
w.stop()
|
|
state_statistics[old_state.lower()] += w.elapsed()
|
|
if old_state in (st.SCHEDULING, st.WAITING):
|
|
statistics['incomplete'] = len(memory.not_done)
|
|
if old_state in (st.ANALYZING, st.SCHEDULING):
|
|
statistics['awaiting'] = len(memory.next_up)
|
|
|
|
def on_enter(new_state, event):
|
|
LOG.trace("Entering new state '%s' in response to event '%s'",
|
|
new_state, event)
|
|
if gather_statistics and new_state in watches:
|
|
watches[new_state].restart()
|
|
|
|
state_kwargs = {
|
|
'on_exit': on_exit,
|
|
'on_enter': on_enter,
|
|
}
|
|
m = machines.FiniteMachine()
|
|
m.add_state(GAME_OVER, **state_kwargs)
|
|
m.add_state(UNDEFINED, **state_kwargs)
|
|
m.add_state(st.ANALYZING, **state_kwargs)
|
|
m.add_state(st.RESUMING, **state_kwargs)
|
|
m.add_state(st.REVERTED, terminal=True, **state_kwargs)
|
|
m.add_state(st.SCHEDULING, **state_kwargs)
|
|
m.add_state(st.SUCCESS, terminal=True, **state_kwargs)
|
|
m.add_state(st.SUSPENDED, terminal=True, **state_kwargs)
|
|
m.add_state(st.WAITING, **state_kwargs)
|
|
m.add_state(st.FAILURE, terminal=True, **state_kwargs)
|
|
m.default_start_state = UNDEFINED
|
|
|
|
m.add_transition(GAME_OVER, st.REVERTED, REVERTED)
|
|
m.add_transition(GAME_OVER, st.SUCCESS, SUCCESS)
|
|
m.add_transition(GAME_OVER, st.SUSPENDED, SUSPENDED)
|
|
m.add_transition(GAME_OVER, st.FAILURE, FAILED)
|
|
m.add_transition(UNDEFINED, st.RESUMING, START)
|
|
m.add_transition(st.ANALYZING, GAME_OVER, FINISH)
|
|
m.add_transition(st.ANALYZING, st.SCHEDULING, SCHEDULE)
|
|
m.add_transition(st.ANALYZING, st.WAITING, WAIT)
|
|
m.add_transition(st.RESUMING, st.SCHEDULING, SCHEDULE)
|
|
m.add_transition(st.SCHEDULING, st.WAITING, WAIT)
|
|
m.add_transition(st.WAITING, st.ANALYZING, ANALYZE)
|
|
|
|
m.add_reaction(GAME_OVER, FINISH, game_over)
|
|
m.add_reaction(st.ANALYZING, ANALYZE, analyze)
|
|
m.add_reaction(st.RESUMING, START, resume)
|
|
m.add_reaction(st.SCHEDULING, SCHEDULE, schedule)
|
|
m.add_reaction(st.WAITING, WAIT, wait)
|
|
|
|
m.freeze()
|
|
return (m, memory)
|