deb-python-taskflow/taskflow/utils/misc.py

# -*- coding: utf-8 -*-

#    Copyright (C) 2012 Yahoo! Inc. All Rights Reserved.
#    Copyright (C) 2013 Rackspace Hosting All Rights Reserved.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

import collections
import contextlib
import copy
import datetime
import errno
import inspect
import keyword
import logging
import os
import re
import string
import sys
import threading
import time
import traceback

from oslo.serialization import jsonutils
from oslo.utils import netutils
import six
from six.moves import map as compat_map
from six.moves import range as compat_range
from six.moves.urllib import parse as urlparse

from taskflow import exceptions as exc
from taskflow.utils import reflection


LOG = logging.getLogger(__name__)
NUMERIC_TYPES = six.integer_types + (float,)

# NOTE(imelnikov): regular expression to get scheme from URI,
# see RFC 3986 section 3.1
_SCHEME_REGEX = re.compile(r"^([A-Za-z][A-Za-z0-9+.-]*):")


def merge_uri(uri_pieces, conf):
    """Merges a parsed uri into the given configuration dictionary.

    Merges the username, password, hostname, and query params of a uri into
    the given configuration (it does not overwrite the configuration keys if
    they already exist) and returns the adjusted configuration.

    NOTE(harlowja): does not merge the path, scheme or fragment.
    """
    for k in ('username', 'password'):
        if not uri_pieces[k]:
            continue
        conf.setdefault(k, uri_pieces[k])
    hostname = uri_pieces.get('hostname')
    if hostname:
        port = uri_pieces.get('port')
        if port is not None:
            hostname += ":%s" % (port)
        conf.setdefault('hostname', hostname)
    for (k, v) in six.iteritems(uri_pieces['params']):
        conf.setdefault(k, v)
    return conf


def parse_uri(uri, query_duplicates=False):
    """Parses a uri into its components."""
    # Do some basic validation before continuing...
    if not isinstance(uri, six.string_types):
        raise TypeError("Can only parse string types to uri data, "
                        "and not an object of type %s"
                        % reflection.get_class_name(uri))
    match = _SCHEME_REGEX.match(uri)
    if not match:
        raise ValueError("Uri %r does not start with a RFC 3986 compliant"
                         " scheme" % (uri))
    parsed = netutils.urlsplit(uri)
    if parsed.query:
        query_params = urlparse.parse_qsl(parsed.query)
        if not query_duplicates:
            query_params = dict(query_params)
        else:
            # Retain duplicates in a list for keys which have duplicates, but
            # for items which are not duplicated, just associate the key with
            # the value.
            tmp_query_params = {}
            for (k, v) in query_params:
                if k in tmp_query_params:
                    p_v = tmp_query_params[k]
                    if isinstance(p_v, list):
                        p_v.append(v)
                    else:
                        p_v = [p_v, v]
                        tmp_query_params[k] = p_v
                else:
                    tmp_query_params[k] = v
            query_params = tmp_query_params
    else:
        query_params = {}
    return AttrDict(
        scheme=parsed.scheme,
        username=parsed.username,
        password=parsed.password,
        fragment=parsed.fragment,
        path=parsed.path,
        params=query_params,
        hostname=parsed.hostname,
        port=parsed.port)


def binary_encode(text, encoding='utf-8'):
    """Converts a string of into a binary type using given encoding.

    Does nothing if text not unicode string.
    """
    if isinstance(text, six.binary_type):
        return text
    elif isinstance(text, six.text_type):
        return text.encode(encoding)
    else:
        raise TypeError("Expected binary or string type")


def binary_decode(data, encoding='utf-8'):
    """Converts a binary type into a text type using given encoding.

    Does nothing if data is already unicode string.
    """
    if isinstance(data, six.binary_type):
        return data.decode(encoding)
    elif isinstance(data, six.text_type):
        return data
    else:
        raise TypeError("Expected binary or string type")


def decode_json(raw_data, root_types=(dict,)):
    """Parse raw data to get JSON object.

    Decodes a JSON from a given raw data binary and checks that the root
    type of that decoded object is in the allowed set of types (by
    default a JSON object/dict should be the root type).
    """
    try:
        data = jsonutils.loads(binary_decode(raw_data))
    except UnicodeDecodeError as e:
        raise ValueError("Expected UTF-8 decodable data: %s" % e)
    except ValueError as e:
        raise ValueError("Expected JSON decodable data: %s" % e)
    if root_types and not isinstance(data, tuple(root_types)):
        ok_types = ", ".join(str(t) for t in root_types)
        raise ValueError("Expected (%s) root types not: %s"
                         % (ok_types, type(data)))
    return data


class cachedproperty(object):
    """A *thread-safe* descriptor property that is only evaluated once.

    This caching descriptor can be placed on instance methods to translate
    those methods into properties that will be cached in the instance (avoiding
    repeated attribute checking logic to do the equivalent).

    NOTE(harlowja): by default the property that will be saved will be under
    the decorated methods name prefixed with an underscore. For example if we
    were to attach this descriptor to an instance method 'get_thing(self)' the
    cached property would be stored under '_get_thing' in the self object
    after the first call to 'get_thing' occurs.
    """
    def __init__(self, fget):
        self._lock = threading.RLock()
        # If a name is provided (as an argument) then this will be the string
        # to place the cached attribute under if not then it will be the
        # function itself to be wrapped into a property.
        if inspect.isfunction(fget):
            self._fget = fget
            self._attr_name = "_%s" % (fget.__name__)
            self.__doc__ = getattr(fget, '__doc__', None)
        else:
            self._attr_name = fget
            self._fget = None
            self.__doc__ = None

    def __call__(self, fget):
        # If __init__ received a string then this will be the function to be
        # wrapped as a property (if __init__ got a function then this will not
        # be called).
        self._fget = fget
        self.__doc__ = getattr(fget, '__doc__', None)
        return self

    def __set__(self, instance, value):
        raise AttributeError("can't set attribute")

    def __delete__(self, instance):
        raise AttributeError("can't delete attribute")

    def __get__(self, instance, owner):
        if instance is None:
            return self
        # Quick check to see if this already has been made (before acquiring
        # the lock). This is safe to do since we don't allow deletion after
        # being created.
        if hasattr(instance, self._attr_name):
            return getattr(instance, self._attr_name)
        else:
            with self._lock:
                try:
                    return getattr(instance, self._attr_name)
                except AttributeError:
                    value = self._fget(instance)
                    setattr(instance, self._attr_name, value)
                    return value


def wallclock():
    # NOTE(harlowja): made into a function so that this can be easily mocked
    # out if we want to alter time related functionality (for testing
    # purposes).
    return time.time()


def millis_to_datetime(milliseconds):
    """Converts number of milliseconds (from epoch) into a datetime object."""
    return datetime.datetime.fromtimestamp(float(milliseconds) / 1000)


def get_version_string(obj):
    """Gets a object's version as a string.

    Returns string representation of object's version taken from
    its 'version' attribute, or None if object does not have such
    attribute or its version is None.
    """
    obj_version = getattr(obj, 'version', None)
    if isinstance(obj_version, (list, tuple)):
        obj_version = '.'.join(str(item) for item in obj_version)
    if obj_version is not None and not isinstance(obj_version,
                                                  six.string_types):
        obj_version = str(obj_version)
    return obj_version


def sequence_minus(seq1, seq2):
    """Calculate difference of two sequences.

    Result contains the elements from first sequence that are not
    present in second sequence, in original order. Works even
    if sequence elements are not hashable.
    """
    result = list(seq1)
    for item in seq2:
        try:
            result.remove(item)
        except ValueError:
            pass
    return result


def item_from(container, index, name=None):
    """Attempts to fetch a index/key from a given container."""
    if index is None:
        return container
    try:
        return container[index]
    except (IndexError, KeyError, ValueError, TypeError):
        # NOTE(harlowja): Perhaps the container is a dictionary-like object
        # and that key does not exist (key error), or the container is a
        # tuple/list and a non-numeric key is being requested (index error),
        # or there was no container and an attempt to index into none/other
        # unsubscriptable type is being requested (type error).
        if name is None:
            name = index
        raise exc.NotFound("Unable to find %r in container %s"
                           % (name, container))


def get_duplicate_keys(iterable, key=None):
    if key is not None:
        iterable = compat_map(key, iterable)
    keys = set()
    duplicates = set()
    for item in iterable:
        if item in keys:
            duplicates.add(item)
        keys.add(item)
    return duplicates


# NOTE(imelnikov): we should not use str.isalpha or str.isdigit
# as they are locale-dependant
_ASCII_WORD_SYMBOLS = frozenset(string.ascii_letters + string.digits + '_')


def is_valid_attribute_name(name, allow_self=False, allow_hidden=False):
    """Checks that a string is a valid/invalid python attribute name."""
    return all((
        isinstance(name, six.string_types),
        len(name) > 0,
        (allow_self or not name.lower().startswith('self')),
        (allow_hidden or not name.lower().startswith('_')),

        # NOTE(imelnikov): keywords should be forbidden.
        not keyword.iskeyword(name),

        # See: http://docs.python.org/release/2.5.2/ref/grammar.txt
        not (name[0] in string.digits),
        all(symbol in _ASCII_WORD_SYMBOLS for symbol in name)
    ))


class AttrDict(dict):
    """Dictionary subclass that allows for attribute based access.

    This subclass allows for accessing a dictionaries keys and values by
    accessing those keys as regular attributes. Keys that are not valid python
    attribute names can not of course be acccessed/set (those keys must be
    accessed/set by the traditional dictionary indexing operators instead).
    """
    NO_ATTRS = tuple(reflection.get_member_names(dict))

    @classmethod
    def _is_valid_attribute_name(cls, name):
        if not is_valid_attribute_name(name):
            return False
        # Make the name just be a simple string in latin-1 encoding in python3.
        if name in cls.NO_ATTRS:
            return False
        return True

    def __init__(self, **kwargs):
        for (k, v) in kwargs.items():
            if not self._is_valid_attribute_name(k):
                raise AttributeError("Invalid attribute name: '%s'" % (k))
            self[k] = v

    def __getattr__(self, name):
        if not self._is_valid_attribute_name(name):
            raise AttributeError("Invalid attribute name: '%s'" % (name))
        try:
            return self[name]
        except KeyError:
            raise AttributeError("No attributed named: '%s'" % (name))

    def __setattr__(self, name, value):
        if not self._is_valid_attribute_name(name):
            raise AttributeError("Invalid attribute name: '%s'" % (name))
        self[name] = value


class ExponentialBackoff(object):
    """An iterable object that will yield back an exponential delay sequence.

    This objects provides for a configurable exponent, count of numbers
    to generate, and a maximum number that will be returned. This object may
    also be iterated over multiple times (yielding the same sequence each
    time).
    """
    def __init__(self, count, exponent=2, max_backoff=3600):
        self.count = max(0, int(count))
        self.exponent = exponent
        self.max_backoff = max(0, int(max_backoff))

    def __iter__(self):
        if self.count <= 0:
            raise StopIteration()
        for i in compat_range(0, self.count):
            yield min(self.exponent ** i, self.max_backoff)

    def __str__(self):
        return "ExponentialBackoff: %s" % ([str(v) for v in self])


def as_int(obj, quiet=False):
    """Converts an arbitrary value into a integer."""
    # Try "2" -> 2
    try:
        return int(obj)
    except (ValueError, TypeError):
        pass
    # Try "2.5" -> 2
    try:
        return int(float(obj))
    except (ValueError, TypeError):
        pass
    # Eck, not sure what this is then.
    if not quiet:
        raise TypeError("Can not translate %s to an integer." % (obj))
    return obj


# Taken from oslo-incubator file-utils but since that module pulls in a large
# amount of other files it does not seem so useful to include that full
# module just for this function.
def ensure_tree(path):
    """Create a directory (and any ancestor directories required).

    :param path: Directory to create
    """
    try:
        os.makedirs(path)
    except OSError as exc:
        if exc.errno == errno.EEXIST:
            if not os.path.isdir(path):
                raise
        else:
            raise


class Notifier(object):
    """A notification helper class.

    It is intended to be used to subscribe to notifications of events
    occurring as well as allow a entity to post said notifications to any
    associated subscribers without having either entity care about how this
    notification occurs.
    """

    #: Keys that can not be used in callbacks arguments
    RESERVED_KEYS = ('details',)

    #: Kleene star constant that is used to recieve all notifications
    ANY = '*'

    def __init__(self):
        self._listeners = collections.defaultdict(list)

    def __len__(self):
        """Returns how many callbacks are registered."""
        count = 0
        for (_event_type, callbacks) in six.iteritems(self._listeners):
            count += len(callbacks)
        return count

    def is_registered(self, event_type, callback):
        """Check if a callback is registered."""
        listeners = list(self._listeners.get(event_type, []))
        for (cb, _args, _kwargs) in listeners:
            if reflection.is_same_callback(cb, callback):
                return True
        return False

    def reset(self):
        """Forget all previously registered callbacks."""
        self._listeners.clear()

    def notify(self, event_type, details):
        """Notify about event occurrence.

        All callbacks registered to receive notifications about given
        event type will be called.

        :param event_type: event type that occurred
        :param details: addition event details
        """
        listeners = list(self._listeners.get(self.ANY, []))
        for i in self._listeners[event_type]:
            if i not in listeners:
                listeners.append(i)
        if not listeners:
            return
        for (callback, args, kwargs) in listeners:
            if args is None:
                args = []
            if kwargs is None:
                kwargs = {}
            kwargs['details'] = details
            try:
                callback(event_type, *args, **kwargs)
            except Exception:
                LOG.warn("Failure calling callback %s to notify about event"
                         " %s, details: %s", callback, event_type,
                         details, exc_info=True)

    def register(self, event_type, callback, args=None, kwargs=None):
        """Register a callback to be called when event of a given type occurs.

        Callback will be called with provided ``args`` and ``kwargs`` and
        when event type occurs (or on any event if ``event_type`` equals to
        :attr:`.ANY`). It will also get additional keyword argument,
        ``details``, that will hold event details provided to the
        :meth:`.notify` method.
        """
        assert six.callable(callback), "Callback must be callable"
        if self.is_registered(event_type, callback):
            raise ValueError("Callback %s already registered" % (callback))
        if kwargs:
            for k in self.RESERVED_KEYS:
                if k in kwargs:
                    raise KeyError(("Reserved key '%s' not allowed in "
                                    "kwargs") % k)
            kwargs = copy.copy(kwargs)
        if args:
            args = copy.copy(args)
        self._listeners[event_type].append((callback, args, kwargs))

    def deregister(self, event_type, callback):
        """Remove a single callback from listening to event ``event_type``."""
        if event_type not in self._listeners:
            return
        for i, (cb, args, kwargs) in enumerate(self._listeners[event_type]):
            if reflection.is_same_callback(cb, callback):
                self._listeners[event_type].pop(i)
                break


def copy_exc_info(exc_info):
    """Make copy of exception info tuple, as deep as possible."""
    if exc_info is None:
        return None
    exc_type, exc_value, tb = exc_info
    # NOTE(imelnikov): there is no need to copy type, and
    # we can't copy traceback.
    return (exc_type, copy.deepcopy(exc_value), tb)


def are_equal_exc_info_tuples(ei1, ei2):
    if ei1 == ei2:
        return True
    if ei1 is None or ei2 is None:
        return False  # if both are None, we returned True above

    # NOTE(imelnikov): we can't compare exceptions with '=='
    # because we want exc_info be equal to it's copy made with
    # copy_exc_info above.
    if ei1[0] is not ei2[0]:
        return False
    if not all((type(ei1[1]) == type(ei2[1]),
                exc.exception_message(ei1[1]) == exc.exception_message(ei2[1]),
                repr(ei1[1]) == repr(ei2[1]))):
        return False
    if ei1[2] == ei2[2]:
        return True
    tb1 = traceback.format_tb(ei1[2])
    tb2 = traceback.format_tb(ei2[2])
    return tb1 == tb2


@contextlib.contextmanager
def capture_failure():
    """Captures the occurring exception and provides a failure object back.

    This will save the current exception information and yield back a
    failure object for the caller to use (it will raise a runtime error if
    no active exception is being handled).

    This is useful since in some cases the exception context can be cleared,
    resulting in None being attempted to be saved after an exception handler is
    run. This can happen when eventlet switches greenthreads or when running an
    exception handler, code raises and catches an exception. In both
    cases the exception context will be cleared.

    To work around this, we save the exception state, yield a failure and
    then run other code.

    For example::

      except Exception:
        with capture_failure() as fail:
            LOG.warn("Activating cleanup")
            cleanup()
            save_failure(fail)
    """
    exc_info = sys.exc_info()
    if not any(exc_info):
        raise RuntimeError("No active exception is being handled")
    else:
        yield Failure(exc_info=exc_info)


class Failure(object):
    """Object that represents failure.

    Failure objects encapsulate exception information so that they can be
    re-used later to re-raise, inspect, examine, log, print, serialize,
    deserialize...

    One example where they are dependened upon is in the WBE engine. When a
    remote worker throws an exception, the WBE based engine will receive that
    exception and desire to reraise it to the user/caller of the WBE based
    engine for appropriate handling (this matches the behavior of non-remote
    engines). To accomplish this a failure object (or a
    :py:meth:`~misc.Failure.to_dict` form) would be sent over the WBE channel
    and the WBE based engine would deserialize it and use this objects
    :meth:`.reraise` method to cause an exception that contains
    similar/equivalent information as the original exception to be reraised,
    allowing the user (or the WBE engine itself) to then handle the worker
    failure/exception as they desire.

    For those who are curious, here are a few reasons why the original
    exception itself *may* not be reraised and instead a reraised wrapped
    failure exception object will be instead. These explanations are *only*
    applicable when a failure object is serialized and deserialized (when it is
    retained inside the python process that the exception was created in the
    the original exception can be reraised correctly without issue).

    * Traceback objects are not serializable/recreatable, since they contain
      references to stack frames at the location where the exception was
      raised. When a failure object is serialized and sent across a channel
      and recreated it is *not* possible to restore the original traceback and
      originating stack frames.
    * The original exception *type* can not be guaranteed to be found, workers
      can run code that is not accessible/available when the failure is being
      deserialized. Even if it was possible to use pickle safely it would not
      be possible to find the originating exception or associated code in this
      situation.
    * The original exception *type* can not be guaranteed to be constructed in
      a *correct* manner. At the time of failure object creation the exception
      has already been created and the failure object can not assume it has
      knowledge (or the ability) to recreate the original type of the captured
      exception (this is especially hard if the original exception was created
      via a complex process via some custom exception constructor).
    * The original exception *type* can not be guaranteed to be constructed in
      a *safe* manner. Importing *foreign* exception types dynamically can be
      problematic when not done correctly and in a safe manner; since failure
      objects can capture any exception it would be *unsafe* to try to import
      those exception types namespaces and modules on the receiver side
      dynamically (this would create similar issues as the ``pickle`` module in
      python has where foreign modules can be imported, causing those modules
      to have code ran when this happens, and this can cause issues and
      side-effects that the receiver would not have intended to have caused).
    """
    DICT_VERSION = 1

    def __init__(self, exc_info=None, **kwargs):
        if not kwargs:
            if exc_info is None:
                exc_info = sys.exc_info()
            self._exc_info = exc_info
            self._exc_type_names = list(
                reflection.get_all_class_names(exc_info[0], up_to=Exception))
            if not self._exc_type_names:
                raise TypeError('Invalid exception type: %r' % exc_info[0])
            self._exception_str = exc.exception_message(self._exc_info[1])
            self._traceback_str = ''.join(
                traceback.format_tb(self._exc_info[2]))
        else:
            self._exc_info = exc_info  # may be None
            self._exception_str = kwargs.pop('exception_str')
            self._exc_type_names = kwargs.pop('exc_type_names', [])
            self._traceback_str = kwargs.pop('traceback_str', None)
            if kwargs:
                raise TypeError(
                    'Failure.__init__ got unexpected keyword argument(s): %s'
                    % ', '.join(six.iterkeys(kwargs)))

    @classmethod
    def from_exception(cls, exception):
        """Creates a failure object from a exception instance."""
        return cls((type(exception), exception, None))

    def _matches(self, other):
        if self is other:
            return True
        return (self._exc_type_names == other._exc_type_names
                and self.exception_str == other.exception_str
                and self.traceback_str == other.traceback_str)

    def matches(self, other):
        """Checks if another object is equivalent to this object."""
        if not isinstance(other, Failure):
            return False
        if self.exc_info is None or other.exc_info is None:
            return self._matches(other)
        else:
            return self == other

    def __eq__(self, other):
        if not isinstance(other, Failure):
            return NotImplemented
        return (self._matches(other) and
                are_equal_exc_info_tuples(self.exc_info, other.exc_info))

    def __ne__(self, other):
        return not (self == other)

    # NOTE(imelnikov): obj.__hash__() should return same values for equal
    # objects, so we should redefine __hash__. Failure equality semantics
    # is a bit complicated, so for now we just mark Failure objects as
    # unhashable. See python docs on object.__hash__  for more info:
    # http://docs.python.org/2/reference/datamodel.html#object.__hash__
    __hash__ = None

    @property
    def exception(self):
        """Exception value, or None if exception value is not present.

        Exception value may be lost during serialization.
        """
        if self._exc_info:
            return self._exc_info[1]
        else:
            return None

    @property
    def exception_str(self):
        """String representation of exception."""
        return self._exception_str

    @property
    def exc_info(self):
        """Exception info tuple or None."""
        return self._exc_info

    @property
    def traceback_str(self):
        """Exception traceback as string."""
        return self._traceback_str

    @staticmethod
    def reraise_if_any(failures):
        """Re-raise exceptions if argument is not empty.

        If argument is empty list, this method returns None. If
        argument is a list with a single ``Failure`` object in it,
        that failure is reraised. Else, a
        :class:`~taskflow.exceptions.WrappedFailure` exception
        is raised with a failure list as causes.
        """
        failures = list(failures)
        if len(failures) == 1:
            failures[0].reraise()
        elif len(failures) > 1:
            raise exc.WrappedFailure(failures)

    def reraise(self):
        """Re-raise captured exception."""
        if self._exc_info:
            six.reraise(*self._exc_info)
        else:
            raise exc.WrappedFailure([self])

    def check(self, *exc_classes):
        """Check if any of ``exc_classes`` caused the failure.

        Arguments of this method can be exception types or type
        names (stings). If captured exception is instance of
        exception of given type, the corresponding argument is
        returned. Else, None is returned.
        """
        for cls in exc_classes:
            if isinstance(cls, type):
                err = reflection.get_class_name(cls)
            else:
                err = cls
            if err in self._exc_type_names:
                return cls
        return None

    def __str__(self):
        return self.pformat()

    def pformat(self, traceback=False):
        """Pretty formats the failure object into a string."""
        buf = six.StringIO()
        buf.write(
            'Failure: %s: %s' % (self._exc_type_names[0], self._exception_str))
        if traceback:
            if self._traceback_str is not None:
                traceback_str = self._traceback_str.rstrip()
            else:
                traceback_str = None
            if traceback_str:
                buf.write('\nTraceback (most recent call last):\n')
                buf.write(traceback_str)
            else:
                buf.write('\nTraceback not available.')
        return buf.getvalue()

    def __iter__(self):
        """Iterate over exception type names."""
        for et in self._exc_type_names:
            yield et

    @classmethod
    def from_dict(cls, data):
        """Converts this from a dictionary to a object."""
        data = dict(data)
        version = data.pop('version', None)
        if version != cls.DICT_VERSION:
            raise ValueError('Invalid dict version of failure object: %r'
                             % version)
        return cls(**data)

    def to_dict(self):
        """Converts this object to a dictionary."""
        return {
            'exception_str': self.exception_str,
            'traceback_str': self.traceback_str,
            'exc_type_names': list(self),
            'version': self.DICT_VERSION,
        }

    def copy(self):
        """Copies this object."""
        return Failure(exc_info=copy_exc_info(self.exc_info),
                       exception_str=self.exception_str,
                       traceback_str=self.traceback_str,
                       exc_type_names=self._exc_type_names[:])