Encode output for fixing UnicodeDecodeError

Current heatclient doesn't handle properly outgoing encode process. For resolving it, this patch implements a process to encode everything going out of the client, for example, prints. This patch used two new modules (gettextutils.py and strutils.py) taken from oslo-incubator in order to use safe_encode() and safe_decode(). Closes-Bug: #1249233 Change-Id: Ia8c955a80d68126cd9c1fd791bda3e331a2aa5c9
2013-11-12 15:58:37 +08:00
parent 56f97606d8
commit 2742b33010
5 changed files with 598 additions and 2 deletions
--- a/heatclient/openstack/common/gettextutils.py
+++ b/heatclient/openstack/common/gettextutils.py
@@ -0,0 +1,373 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2012 Red Hat, Inc.
+# Copyright 2013 IBM Corp.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""
+gettext for openstack-common modules.
+
+Usual usage in an openstack.common module:
+
+    from heatclient.openstack.common.gettextutils import _
+"""
+
+import copy
+import gettext
+import logging
+import os
+import re
+try:
+    import UserString as _userString
+except ImportError:
+    import collections as _userString
+
+from babel import localedata
+import six
+
+_localedir = os.environ.get('heatclient'.upper() + '_LOCALEDIR')
+_t = gettext.translation('heatclient', localedir=_localedir, fallback=True)
+
+_AVAILABLE_LANGUAGES = {}
+USE_LAZY = False
+
+
+def enable_lazy():
+    """Convenience function for configuring _() to use lazy gettext
+
+    Call this at the start of execution to enable the gettextutils._
+    function to use lazy gettext functionality. This is useful if
+    your project is importing _ directly instead of using the
+    gettextutils.install() way of importing the _ function.
+    """
+    global USE_LAZY
+    USE_LAZY = True
+
+
+def _(msg):
+    if USE_LAZY:
+        return Message(msg, 'heatclient')
+    else:
+        if six.PY3:
+            return _t.gettext(msg)
+        return _t.ugettext(msg)
+
+
+def install(domain, lazy=False):
+    """Install a _() function using the given translation domain.
+
+    Given a translation domain, install a _() function using gettext's
+    install() function.
+
+    The main difference from gettext.install() is that we allow
+    overriding the default localedir (e.g. /usr/share/locale) using
+    a translation-domain-specific environment variable (e.g.
+    NOVA_LOCALEDIR).
+
+    :param domain: the translation domain
+    :param lazy: indicates whether or not to install the lazy _() function.
+                 The lazy _() introduces a way to do deferred translation
+                 of messages by installing a _ that builds Message objects,
+                 instead of strings, which can then be lazily translated into
+                 any available locale.
+    """
+    if lazy:
+        # NOTE(mrodden): Lazy gettext functionality.
+        #
+        # The following introduces a deferred way to do translations on
+        # messages in OpenStack. We override the standard _() function
+        # and % (format string) operation to build Message objects that can
+        # later be translated when we have more information.
+        #
+        # Also included below is an example LocaleHandler that translates
+        # Messages to an associated locale, effectively allowing many logs,
+        # each with their own locale.
+
+        def _lazy_gettext(msg):
+            """Create and return a Message object.
+
+            Lazy gettext function for a given domain, it is a factory method
+            for a project/module to get a lazy gettext function for its own
+            translation domain (i.e. nova, glance, cinder, etc.)
+
+            Message encapsulates a string so that we can translate
+            it later when needed.
+            """
+            return Message(msg, domain)
+
+        from six import moves
+        moves.builtins.__dict__['_'] = _lazy_gettext
+    else:
+        localedir = '%s_LOCALEDIR' % domain.upper()
+        if six.PY3:
+            gettext.install(domain,
+                            localedir=os.environ.get(localedir))
+        else:
+            gettext.install(domain,
+                            localedir=os.environ.get(localedir),
+                            unicode=True)
+
+
+class Message(_userString.UserString, object):
+    """Class used to encapsulate translatable messages."""
+    def __init__(self, msg, domain):
+        # _msg is the gettext msgid and should never change
+        self._msg = msg
+        self._left_extra_msg = ''
+        self._right_extra_msg = ''
+        self._locale = None
+        self.params = None
+        self.domain = domain
+
+    @property
+    def data(self):
+        # NOTE(mrodden): this should always resolve to a unicode string
+        # that best represents the state of the message currently
+
+        localedir = os.environ.get(self.domain.upper() + '_LOCALEDIR')
+        if self.locale:
+            lang = gettext.translation(self.domain,
+                                       localedir=localedir,
+                                       languages=[self.locale],
+                                       fallback=True)
+        else:
+            # use system locale for translations
+            lang = gettext.translation(self.domain,
+                                       localedir=localedir,
+                                       fallback=True)
+
+        if six.PY3:
+            ugettext = lang.gettext
+        else:
+            ugettext = lang.ugettext
+
+        full_msg = (self._left_extra_msg +
+                    ugettext(self._msg) +
+                    self._right_extra_msg)
+
+        if self.params is not None:
+            full_msg = full_msg % self.params
+
+        return six.text_type(full_msg)
+
+    @property
+    def locale(self):
+        return self._locale
+
+    @locale.setter
+    def locale(self, value):
+        self._locale = value
+        if not self.params:
+            return
+
+        # This Message object may have been constructed with one or more
+        # Message objects as substitution parameters, given as a single
+        # Message, or a tuple or Map containing some, so when setting the
+        # locale for this Message we need to set it for those Messages too.
+        if isinstance(self.params, Message):
+            self.params.locale = value
+            return
+        if isinstance(self.params, tuple):
+            for param in self.params:
+                if isinstance(param, Message):
+                    param.locale = value
+            return
+        if isinstance(self.params, dict):
+            for param in self.params.values():
+                if isinstance(param, Message):
+                    param.locale = value
+
+    def _save_dictionary_parameter(self, dict_param):
+        full_msg = self.data
+        # look for %(blah) fields in string;
+        # ignore %% and deal with the
+        # case where % is first character on the line
+        keys = re.findall('(?:[^%]|^)?%\((\w*)\)[a-z]', full_msg)
+
+        # if we don't find any %(blah) blocks but have a %s
+        if not keys and re.findall('(?:[^%]|^)%[a-z]', full_msg):
+            # apparently the full dictionary is the parameter
+            params = copy.deepcopy(dict_param)
+        else:
+            params = {}
+            for key in keys:
+                try:
+                    params[key] = copy.deepcopy(dict_param[key])
+                except TypeError:
+                    # cast uncopyable thing to unicode string
+                    params[key] = six.text_type(dict_param[key])
+
+        return params
+
+    def _save_parameters(self, other):
+        # we check for None later to see if
+        # we actually have parameters to inject,
+        # so encapsulate if our parameter is actually None
+        if other is None:
+            self.params = (other, )
+        elif isinstance(other, dict):
+            self.params = self._save_dictionary_parameter(other)
+        else:
+            # fallback to casting to unicode,
+            # this will handle the problematic python code-like
+            # objects that cannot be deep-copied
+            try:
+                self.params = copy.deepcopy(other)
+            except TypeError:
+                self.params = six.text_type(other)
+
+        return self
+
+    # overrides to be more string-like
+    def __unicode__(self):
+        return self.data
+
+    def __str__(self):
+        if six.PY3:
+            return self.__unicode__()
+        return self.data.encode('utf-8')
+
+    def __getstate__(self):
+        to_copy = ['_msg', '_right_extra_msg', '_left_extra_msg',
+                   'domain', 'params', '_locale']
+        new_dict = self.__dict__.fromkeys(to_copy)
+        for attr in to_copy:
+            new_dict[attr] = copy.deepcopy(self.__dict__[attr])
+
+        return new_dict
+
+    def __setstate__(self, state):
+        for (k, v) in state.items():
+            setattr(self, k, v)
+
+    # operator overloads
+    def __add__(self, other):
+        copied = copy.deepcopy(self)
+        copied._right_extra_msg += other.__str__()
+        return copied
+
+    def __radd__(self, other):
+        copied = copy.deepcopy(self)
+        copied._left_extra_msg += other.__str__()
+        return copied
+
+    def __mod__(self, other):
+        # do a format string to catch and raise
+        # any possible KeyErrors from missing parameters
+        self.data % other
+        copied = copy.deepcopy(self)
+        return copied._save_parameters(other)
+
+    def __mul__(self, other):
+        return self.data * other
+
+    def __rmul__(self, other):
+        return other * self.data
+
+    def __getitem__(self, key):
+        return self.data[key]
+
+    def __getslice__(self, start, end):
+        return self.data.__getslice__(start, end)
+
+    def __getattribute__(self, name):
+        # NOTE(mrodden): handle lossy operations that we can't deal with yet
+        # These override the UserString implementation, since UserString
+        # uses our __class__ attribute to try and build a new message
+        # after running the inner data string through the operation.
+        # At that point, we have lost the gettext message id and can just
+        # safely resolve to a string instead.
+        ops = ['capitalize', 'center', 'decode', 'encode',
+               'expandtabs', 'ljust', 'lstrip', 'replace', 'rjust', 'rstrip',
+               'strip', 'swapcase', 'title', 'translate', 'upper', 'zfill']
+        if name in ops:
+            return getattr(self.data, name)
+        else:
+            return _userString.UserString.__getattribute__(self, name)
+
+
+def get_available_languages(domain):
+    """Lists the available languages for the given translation domain.
+
+    :param domain: the domain to get languages for
+    """
+    if domain in _AVAILABLE_LANGUAGES:
+        return copy.copy(_AVAILABLE_LANGUAGES[domain])
+
+    localedir = '%s_LOCALEDIR' % domain.upper()
+    find = lambda x: gettext.find(domain,
+                                  localedir=os.environ.get(localedir),
+                                  languages=[x])
+
+    # NOTE(mrodden): en_US should always be available (and first in case
+    # order matters) since our in-line message strings are en_US
+    language_list = ['en_US']
+    # NOTE(luisg): Babel <1.0 used a function called list(), which was
+    # renamed to locale_identifiers() in >=1.0, the requirements master list
+    # requires >=0.9.6, uncapped, so defensively work with both. We can remove
+    # this check when the master list updates to >=1.0, and update all projects
+    list_identifiers = (getattr(localedata, 'list', None) or
+                        getattr(localedata, 'locale_identifiers'))
+    locale_identifiers = list_identifiers()
+    for i in locale_identifiers:
+        if find(i) is not None:
+            language_list.append(i)
+    _AVAILABLE_LANGUAGES[domain] = language_list
+    return copy.copy(language_list)
+
+
+def get_localized_message(message, user_locale):
+    """Gets a localized version of the given message in the given locale.
+
+    If the message is not a Message object the message is returned as-is.
+    If the locale is None the message is translated to the default locale.
+
+    :returns: the translated message in unicode, or the original message if
+              it could not be translated
+    """
+    translated = message
+    if isinstance(message, Message):
+        original_locale = message.locale
+        message.locale = user_locale
+        translated = six.text_type(message)
+        message.locale = original_locale
+    return translated
+
+
+class LocaleHandler(logging.Handler):
+    """Handler that can have a locale associated to translate Messages.
+
+    A quick example of how to utilize the Message class above.
+    LocaleHandler takes a locale and a target logging.Handler object
+    to forward LogRecord objects to after translating the internal Message.
+    """
+
+    def __init__(self, locale, target):
+        """Initialize a LocaleHandler
+
+        :param locale: locale to use for translating messages
+        :param target: logging.Handler object to forward
+                       LogRecord objects to after translation
+        """
+        logging.Handler.__init__(self)
+        self.locale = locale
+        self.target = target
+
+    def emit(self, record):
+        if isinstance(record.msg, Message):
+            # set the locale and resolve to a string
+            record.msg.locale = self.locale
+
+        self.target.emit(record)
--- a/heatclient/openstack/common/strutils.py
+++ b/heatclient/openstack/common/strutils.py
@@ -0,0 +1,218 @@
+# vim: tabstop=4 shiftwidth=4 softtabstop=4
+
+# Copyright 2011 OpenStack Foundation.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""
+System-level utilities and helper functions.
+"""
+
+import re
+import sys
+import unicodedata
+
+import six
+
+from heatclient.openstack.common.gettextutils import _  # noqa
+
+
+# Used for looking up extensions of text
+# to their 'multiplied' byte amount
+BYTE_MULTIPLIERS = {
+    '': 1,
+    't': 1024 ** 4,
+    'g': 1024 ** 3,
+    'm': 1024 ** 2,
+    'k': 1024,
+}
+BYTE_REGEX = re.compile(r'(^-?\d+)(\D*)')
+
+TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
+FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')
+
+SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
+SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")
+
+
+def int_from_bool_as_string(subject):
+    """Interpret a string as a boolean and return either 1 or 0.
+
+    Any string value in:
+
+        ('True', 'true', 'On', 'on', '1')
+
+    is interpreted as a boolean True.
+
+    Useful for JSON-decoded stuff and config file parsing
+    """
+    return bool_from_string(subject) and 1 or 0
+
+
+def bool_from_string(subject, strict=False):
+    """Interpret a string as a boolean.
+
+    A case-insensitive match is performed such that strings matching 't',
+    'true', 'on', 'y', 'yes', or '1' are considered True and, when
+    `strict=False`, anything else is considered False.
+
+    Useful for JSON-decoded stuff and config file parsing.
+
+    If `strict=True`, unrecognized values, including None, will raise a
+    ValueError which is useful when parsing values passed in from an API call.
+    Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'.
+    """
+    if not isinstance(subject, six.string_types):
+        subject = str(subject)
+
+    lowered = subject.strip().lower()
+
+    if lowered in TRUE_STRINGS:
+        return True
+    elif lowered in FALSE_STRINGS:
+        return False
+    elif strict:
+        acceptable = ', '.join(
+            "'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS))
+        msg = _("Unrecognized value '%(val)s', acceptable values are:"
+                " %(acceptable)s") % {'val': subject,
+                                      'acceptable': acceptable}
+        raise ValueError(msg)
+    else:
+        return False
+
+
+def safe_decode(text, incoming=None, errors='strict'):
+    """Decodes incoming str using `incoming` if they're not already unicode.
+
+    :param incoming: Text's current encoding
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: text or a unicode `incoming` encoded
+                representation of it.
+    :raises TypeError: If text is not an instance of str
+    """
+    if not isinstance(text, six.string_types):
+        raise TypeError("%s can't be decoded" % type(text))
+
+    if isinstance(text, six.text_type):
+        return text
+
+    if not incoming:
+        incoming = (sys.stdin.encoding or
+                    sys.getdefaultencoding())
+
+    try:
+        return text.decode(incoming, errors)
+    except UnicodeDecodeError:
+        # Note(flaper87) If we get here, it means that
+        # sys.stdin.encoding / sys.getdefaultencoding
+        # didn't return a suitable encoding to decode
+        # text. This happens mostly when global LANG
+        # var is not set correctly and there's no
+        # default encoding. In this case, most likely
+        # python will use ASCII or ANSI encoders as
+        # default encodings but they won't be capable
+        # of decoding non-ASCII characters.
+        #
+        # Also, UTF-8 is being used since it's an ASCII
+        # extension.
+        return text.decode('utf-8', errors)
+
+
+def safe_encode(text, incoming=None,
+                encoding='utf-8', errors='strict'):
+    """Encodes incoming str/unicode using `encoding`.
+
+    If incoming is not specified, text is expected to be encoded with
+    current python's default encoding. (`sys.getdefaultencoding`)
+
+    :param incoming: Text's current encoding
+    :param encoding: Expected encoding for text (Default UTF-8)
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: text or a bytestring `encoding` encoded
+                representation of it.
+    :raises TypeError: If text is not an instance of str
+    """
+    if not isinstance(text, six.string_types):
+        raise TypeError("%s can't be encoded" % type(text))
+
+    if not incoming:
+        incoming = (sys.stdin.encoding or
+                    sys.getdefaultencoding())
+
+    if isinstance(text, six.text_type):
+        return text.encode(encoding, errors)
+    elif text and encoding != incoming:
+        # Decode text before encoding it with `encoding`
+        text = safe_decode(text, incoming, errors)
+        return text.encode(encoding, errors)
+
+    return text
+
+
+def to_bytes(text, default=0):
+    """Converts a string into an integer of bytes.
+
+    Looks at the last characters of the text to determine
+    what conversion is needed to turn the input text into a byte number.
+    Supports "B, K(B), M(B), G(B), and T(B)". (case insensitive)
+
+    :param text: String input for bytes size conversion.
+    :param default: Default return value when text is blank.
+
+    """
+    match = BYTE_REGEX.search(text)
+    if match:
+        magnitude = int(match.group(1))
+        mult_key_org = match.group(2)
+        if not mult_key_org:
+            return magnitude
+    elif text:
+        msg = _('Invalid string format: %s') % text
+        raise TypeError(msg)
+    else:
+        return default
+    mult_key = mult_key_org.lower().replace('b', '', 1)
+    multiplier = BYTE_MULTIPLIERS.get(mult_key)
+    if multiplier is None:
+        msg = _('Unknown byte multiplier: %s') % mult_key_org
+        raise TypeError(msg)
+    return magnitude * multiplier
+
+
+def to_slug(value, incoming=None, errors="strict"):
+    """Normalize string.
+
+    Convert to lowercase, remove non-word characters, and convert spaces
+    to hyphens.
+
+    Inspired by Django's `slugify` filter.
+
+    :param value: Text to slugify
+    :param incoming: Text's current encoding
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: slugified unicode representation of `value`
+    :raises TypeError: If text is not an instance of str
+    """
+    value = safe_decode(value, incoming, errors)
+    # NOTE(aababilov): no need to use safe_(encode|decode) here:
+    # encodings are always "ascii", error handling is always "ignore"
+    # and types are always known (first: unicode; second: str)
+    value = unicodedata.normalize("NFKD", value).encode(
+        "ascii", "ignore").decode("ascii")
+    value = SLUGIFY_STRIP_RE.sub("", value).strip().lower()
+    return SLUGIFY_HYPHENATE_RE.sub("-", value)
--- a/heatclient/shell.py
+++ b/heatclient/shell.py
@@ -14,9 +14,12 @@
 Command-line interface to the Heat API.
 """

+from __future__ import print_function
+
 import argparse
 import httplib2
 import logging
+import six
 import sys

 from keystoneclient.v2_0 import client as ksclient
@@ -25,6 +28,7 @@ import heatclient
 from heatclient import client as heat_client
 from heatclient.common import utils
 from heatclient import exc
+from heatclient.openstack.common import strutils

 logger = logging.getLogger(__name__)

@@ -367,7 +371,7 @@ def main():
        HeatShell().main(sys.argv[1:])

    except Exception as e:
-        print >> sys.stderr, e
+        print(strutils.safe_encode(six.text_type(e)), file=sys.stderr)
        sys.exit(1)

 if __name__ == "__main__":
--- a/openstack-common.conf
+++ b/openstack-common.conf
@@ -1,7 +1,7 @@
 [DEFAULT]

 # The list of modules to copy from openstack-common
-modules=importutils
+modules=importutils,gettextutils,strutils
 module=py3kcompat

 # The base module to hold the copy of openstack.common
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ iso8601>=0.1.4
 PrettyTable>=0.6,<0.8
 python-keystoneclient>=0.4.1
 PyYAML>=3.1.0
+six>=1.4.1