Files
deb-python-taskflow/taskflow/job.py
Joshua Harlow f56086d067 Don't keep the state/version in the task name.
Instead of having individual entries for the task
that change with the state name + version, we should
instead just keep the same task name but update the metadata
about the states the task has gone through instead. Also
store the task version in the same metadata and warn users
when the versions may be incompat.

This makes it easier to see what a task has done
without having to know all the states it has gone
through (just to find the task details about that
task) as well as being able to detect version issues.

Change-Id: Ia6b9400394212230905341d205d966dfdee5dfdf
2013-06-24 14:39:53 -07:00

341 lines
12 KiB
Python

# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright (C) 2012 Yahoo! Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import abc
import logging
import re
import types
from taskflow import exceptions as exc
from taskflow import states
from taskflow import utils
from taskflow.openstack.common import uuidutils
LOG = logging.getLogger(__name__)
def _get_task_version(task):
"""Gets a tasks *string* version, whether it is a task object/function."""
task_version = ''
if isinstance(task, types.FunctionType):
task_version = getattr(task, '__version__', '')
if not task_version and hasattr(task, 'version'):
task_version = task.version
if isinstance(task_version, (list, tuple)):
task_version = utils.join(task_version, with_what=".")
if not isinstance(task_version, basestring):
task_version = str(task_version)
return task_version
def _get_task_name(task):
"""Gets a tasks *string* name, whether it is a task object/function."""
task_name = ""
if isinstance(task, types.FunctionType):
# If its a function look for the attributes that should have been
# set using the task() decorator provided in the decorators file. If
# those have not been set, then we should at least have enough basic
# information (not a version) to form a useful task name.
if hasattr(task, 'name'):
task_name = str(task.name)
else:
name_pieces = [a for a in utils.get_many_attr(task,
'__module__',
'__name__')
if a is not None]
task_name = utils.join(name_pieces, ".")
else:
task_name = str(task)
return task_name
def _is_version_compatible(version_1, version_2):
"""Checks for major version compatibility of two *string" versions."""
if version_1 == version_2:
# Equivalent exactly, so skip the rest.
return True
def _convert_to_pieces(version):
try:
pieces = []
for p in version.split("."):
p = p.strip()
if not len(p):
pieces.append(0)
continue
# Clean off things like 1alpha, or 2b and just select the
# digit that starts that entry instead.
p_match = re.match(r"(\d+)([A-Za-z]*)(.*)", p)
if p_match:
p = p_match.group(1)
pieces.append(int(p))
except (AttributeError, TypeError, ValueError):
pieces = []
return pieces
version_1_pieces = _convert_to_pieces(version_1)
version_2_pieces = _convert_to_pieces(version_2)
if len(version_1_pieces) == 0 or len(version_2_pieces) == 0:
return False
# Ensure major version compatibility to start.
major1 = version_1_pieces[0]
major2 = version_2_pieces[0]
if major1 != major2:
return False
return True
class Claimer(object):
"""A base class for objects that can attempt to claim a given
job, so that said job can be worked on."""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def claim(self, job, owner):
"""This method will attempt to claim said job and must
either succeed at this or throw an exception signaling the job can not
be claimed."""
raise NotImplementedError()
@abc.abstractmethod
def unclaim(self, job, owner):
"""This method will attempt to unclaim said job and must
either succeed at this or throw an exception signaling the job can not
be unclaimed."""
raise NotImplementedError()
class Job(object):
"""A job is connection to some set of work to be done by some agent. Basic
information is provided about said work to be able to attempt to
fullfill said work."""
__metaclass__ = abc.ABCMeta
def __init__(self, name, context, catalog, claimer, jid=None):
self.name = name
self.context = context
self.owner = None
self.posted_on = []
self._catalog = catalog
self._claimer = claimer
self._logbook = None
if not jid:
self._id = uuidutils.generate_uuid()
else:
self._id = str(jid)
self._state = states.UNCLAIMED
def __str__(self):
return "Job (%s, %s): %s" % (self.name, self.tracking_id, self.state)
@property
def state(self):
return self._state
@state.setter
def state(self, new_state):
self._change_state(new_state)
def _change_state(self, new_state):
if self.state != new_state:
self._state = new_state
# TODO(harlowja): add logbook info?
def _workflow_listener(self, _context, flow, _old_state):
"""Ensure that when we receive an event from said workflow that we
make sure a logbook entry exists for that flow."""
if flow.name in self.logbook:
return
self.logbook.add_flow(flow.name)
def _task_listener(self, _context, state, flow, task, result=None):
"""Store the result of the task under the given flow in the log
book so that it can be retrieved later."""
metadata = {}
flow_details = self.logbook[flow.name]
if state in (states.SUCCESS, states.FAILURE):
metadata['result'] = result
name = _get_task_name(task)
if name not in flow_details:
metadata['states'] = [state]
metadata['version'] = _get_task_version(task)
flow_details.add_task(name, metadata)
else:
details = flow_details[name]
# Warn about task versions possibly being incompatible
my_version = _get_task_version(task)
prev_version = details.metadata.get('version')
if not _is_version_compatible(my_version, prev_version):
LOG.warn("Updating a task with a different version than the"
" one being listened to (%s != %s)",
prev_version, my_version)
past_states = details.metadata.get('states', [])
past_states.append(state)
details.metadata['states'] = past_states
details.metadata.update(metadata)
def _task_result_fetcher(self, _context, flow, task):
flow_details = self.logbook[flow.name]
# See if it completed before (or failed before) so that we can use its
# results instead of having to recompute it.
not_found = (False, False, None)
name = _get_task_name(task)
if name not in flow_details:
return not_found
details = flow_details[name]
has_completed = False
was_failure = False
task_states = details.metadata.get('states', [])
for state in task_states:
if state in (states.SUCCESS, states.FAILURE):
if state == states.FAILURE:
was_failure = True
has_completed = True
break
# Warn about task versions possibly being incompatible
my_version = _get_task_version(task)
prev_version = details.metadata.get('version')
if not _is_version_compatible(my_version, prev_version):
LOG.warn("Fetching task results from a task with a different"
" version from the one being requested (%s != %s)",
prev_version, my_version)
if has_completed:
return (True, was_failure, details.metadata.get('result'))
return not_found
def associate(self, flow, parents=True):
"""Attachs the needed resumption and state change tracking listeners
to the given workflow so that the workflow can be resumed/tracked
using the jobs components."""
if self._task_listener not in flow.task_listeners:
flow.task_listeners.append(self._task_listener)
if self._workflow_listener not in flow.listeners:
flow.listeners.append(self._workflow_listener)
flow.result_fetcher = self._task_result_fetcher
# Associate the parents as well (if desired)
if parents and flow.parents:
for p in flow.parents:
self.associate(p, parents)
def disassociate(self, flow, parents=True):
"""Detaches the needed resumption and state change tracking listeners
from the given workflow."""
if self._task_listener in flow.task_listeners:
flow.task_listeners.remove(self._task_listener)
if self._workflow_listener in flow.listeners:
flow.listeners.remove(self._workflow_listener)
if flow.result_fetcher is self._task_result_fetcher:
flow.result_fetcher = None
# Disassociate from the flows parents (if desired)
if parents and flow.parents:
for p in flow.parents:
self.disassociate(p, parents)
@property
def logbook(self):
"""Fetches (or creates) a logbook entry for this job."""
if self._logbook is None:
self._logbook = self._catalog.create_or_fetch(self)
return self._logbook
def claim(self, owner):
"""This can be used to attempt transition this job from unclaimed
to claimed.
This must be done in a way that likely uses some type of locking or
ownership transfer so that only a single entity gets this job to work
on. This will avoid multi-job ownership, which can lead to
inconsistent state."""
if self.state != states.UNCLAIMED:
raise exc.UnclaimableJobException("Unable to claim job when job is"
" in state %s" % (self.state))
self._claimer.claim(self, owner)
self._change_state(states.CLAIMED)
def run(self, flow, *args, **kwargs):
already_associated = []
def associate_all(a_flow):
if a_flow in already_associated:
return
# Associate with the flow.
self.associate(a_flow)
already_associated.append(a_flow)
# Ensure we are associated with all the flows parents.
if a_flow.parents:
for p in a_flow.parents:
associate_all(p)
if flow.state != states.PENDING:
raise exc.InvalidStateException("Unable to run %s when in"
" state %s" % (flow, flow.state))
associate_all(flow)
return flow.run(self.context, *args, **kwargs)
def unclaim(self):
"""Atomically transitions this job from claimed to unclaimed."""
if self.state == states.UNCLAIMED:
return
self._claimer.unclaim(self, self.owner)
self._change_state(states.UNCLAIMED)
def erase(self):
"""Erases any traces of this job from its associated resources."""
for b in self.posted_on:
b.erase(self)
self._catalog.erase(self)
if self._logbook is not None:
self._logbook.close()
self._logbook = None
if self.state != states.UNCLAIMED:
self._claimer.unclaim(self, self.owner)
def await(self, timeout=None):
"""Awaits until either the job fails or succeeds or the provided
timeout is reached."""
def check_functor():
if self.state not in (states.FAILURE, states.SUCCESS):
return False
return True
return utils.await(check_functor, timeout)
@property
def tracking_id(self):
"""Returns a tracking *unique* identifier that can be used to identify
this job among other jobs."""
return "j-%s-%s" % (self.name, self._id)