Encode output for fixing UnicodeDecodeError

Current heatclient doesn't handle properly outgoing encode process.
For resolving it, this patch implements a process to encode everything
going out of the client, for example, prints.

This patch used two new modules (gettextutils.py and strutils.py)
taken from oslo-incubator in order to use safe_encode() and
safe_decode().

Closes-Bug: #1249233

Change-Id: Ia8c955a80d68126cd9c1fd791bda3e331a2aa5c9
This commit is contained in:
chenxiao 2013-11-12 15:58:37 +08:00
parent 56f97606d8
commit 2742b33010
5 changed files with 598 additions and 2 deletions

View File

@ -0,0 +1,373 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2012 Red Hat, Inc.
# Copyright 2013 IBM Corp.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
gettext for openstack-common modules.
Usual usage in an openstack.common module:
from heatclient.openstack.common.gettextutils import _
"""
import copy
import gettext
import logging
import os
import re
try:
import UserString as _userString
except ImportError:
import collections as _userString
from babel import localedata
import six
_localedir = os.environ.get('heatclient'.upper() + '_LOCALEDIR')
_t = gettext.translation('heatclient', localedir=_localedir, fallback=True)
_AVAILABLE_LANGUAGES = {}
USE_LAZY = False
def enable_lazy():
"""Convenience function for configuring _() to use lazy gettext
Call this at the start of execution to enable the gettextutils._
function to use lazy gettext functionality. This is useful if
your project is importing _ directly instead of using the
gettextutils.install() way of importing the _ function.
"""
global USE_LAZY
USE_LAZY = True
def _(msg):
if USE_LAZY:
return Message(msg, 'heatclient')
else:
if six.PY3:
return _t.gettext(msg)
return _t.ugettext(msg)
def install(domain, lazy=False):
"""Install a _() function using the given translation domain.
Given a translation domain, install a _() function using gettext's
install() function.
The main difference from gettext.install() is that we allow
overriding the default localedir (e.g. /usr/share/locale) using
a translation-domain-specific environment variable (e.g.
NOVA_LOCALEDIR).
:param domain: the translation domain
:param lazy: indicates whether or not to install the lazy _() function.
The lazy _() introduces a way to do deferred translation
of messages by installing a _ that builds Message objects,
instead of strings, which can then be lazily translated into
any available locale.
"""
if lazy:
# NOTE(mrodden): Lazy gettext functionality.
#
# The following introduces a deferred way to do translations on
# messages in OpenStack. We override the standard _() function
# and % (format string) operation to build Message objects that can
# later be translated when we have more information.
#
# Also included below is an example LocaleHandler that translates
# Messages to an associated locale, effectively allowing many logs,
# each with their own locale.
def _lazy_gettext(msg):
"""Create and return a Message object.
Lazy gettext function for a given domain, it is a factory method
for a project/module to get a lazy gettext function for its own
translation domain (i.e. nova, glance, cinder, etc.)
Message encapsulates a string so that we can translate
it later when needed.
"""
return Message(msg, domain)
from six import moves
moves.builtins.__dict__['_'] = _lazy_gettext
else:
localedir = '%s_LOCALEDIR' % domain.upper()
if six.PY3:
gettext.install(domain,
localedir=os.environ.get(localedir))
else:
gettext.install(domain,
localedir=os.environ.get(localedir),
unicode=True)
class Message(_userString.UserString, object):
"""Class used to encapsulate translatable messages."""
def __init__(self, msg, domain):
# _msg is the gettext msgid and should never change
self._msg = msg
self._left_extra_msg = ''
self._right_extra_msg = ''
self._locale = None
self.params = None
self.domain = domain
@property
def data(self):
# NOTE(mrodden): this should always resolve to a unicode string
# that best represents the state of the message currently
localedir = os.environ.get(self.domain.upper() + '_LOCALEDIR')
if self.locale:
lang = gettext.translation(self.domain,
localedir=localedir,
languages=[self.locale],
fallback=True)
else:
# use system locale for translations
lang = gettext.translation(self.domain,
localedir=localedir,
fallback=True)
if six.PY3:
ugettext = lang.gettext
else:
ugettext = lang.ugettext
full_msg = (self._left_extra_msg +
ugettext(self._msg) +
self._right_extra_msg)
if self.params is not None:
full_msg = full_msg % self.params
return six.text_type(full_msg)
@property
def locale(self):
return self._locale
@locale.setter
def locale(self, value):
self._locale = value
if not self.params:
return
# This Message object may have been constructed with one or more
# Message objects as substitution parameters, given as a single
# Message, or a tuple or Map containing some, so when setting the
# locale for this Message we need to set it for those Messages too.
if isinstance(self.params, Message):
self.params.locale = value
return
if isinstance(self.params, tuple):
for param in self.params:
if isinstance(param, Message):
param.locale = value
return
if isinstance(self.params, dict):
for param in self.params.values():
if isinstance(param, Message):
param.locale = value
def _save_dictionary_parameter(self, dict_param):
full_msg = self.data
# look for %(blah) fields in string;
# ignore %% and deal with the
# case where % is first character on the line
keys = re.findall('(?:[^%]|^)?%\((\w*)\)[a-z]', full_msg)
# if we don't find any %(blah) blocks but have a %s
if not keys and re.findall('(?:[^%]|^)%[a-z]', full_msg):
# apparently the full dictionary is the parameter
params = copy.deepcopy(dict_param)
else:
params = {}
for key in keys:
try:
params[key] = copy.deepcopy(dict_param[key])
except TypeError:
# cast uncopyable thing to unicode string
params[key] = six.text_type(dict_param[key])
return params
def _save_parameters(self, other):
# we check for None later to see if
# we actually have parameters to inject,
# so encapsulate if our parameter is actually None
if other is None:
self.params = (other, )
elif isinstance(other, dict):
self.params = self._save_dictionary_parameter(other)
else:
# fallback to casting to unicode,
# this will handle the problematic python code-like
# objects that cannot be deep-copied
try:
self.params = copy.deepcopy(other)
except TypeError:
self.params = six.text_type(other)
return self
# overrides to be more string-like
def __unicode__(self):
return self.data
def __str__(self):
if six.PY3:
return self.__unicode__()
return self.data.encode('utf-8')
def __getstate__(self):
to_copy = ['_msg', '_right_extra_msg', '_left_extra_msg',
'domain', 'params', '_locale']
new_dict = self.__dict__.fromkeys(to_copy)
for attr in to_copy:
new_dict[attr] = copy.deepcopy(self.__dict__[attr])
return new_dict
def __setstate__(self, state):
for (k, v) in state.items():
setattr(self, k, v)
# operator overloads
def __add__(self, other):
copied = copy.deepcopy(self)
copied._right_extra_msg += other.__str__()
return copied
def __radd__(self, other):
copied = copy.deepcopy(self)
copied._left_extra_msg += other.__str__()
return copied
def __mod__(self, other):
# do a format string to catch and raise
# any possible KeyErrors from missing parameters
self.data % other
copied = copy.deepcopy(self)
return copied._save_parameters(other)
def __mul__(self, other):
return self.data * other
def __rmul__(self, other):
return other * self.data
def __getitem__(self, key):
return self.data[key]
def __getslice__(self, start, end):
return self.data.__getslice__(start, end)
def __getattribute__(self, name):
# NOTE(mrodden): handle lossy operations that we can't deal with yet
# These override the UserString implementation, since UserString
# uses our __class__ attribute to try and build a new message
# after running the inner data string through the operation.
# At that point, we have lost the gettext message id and can just
# safely resolve to a string instead.
ops = ['capitalize', 'center', 'decode', 'encode',
'expandtabs', 'ljust', 'lstrip', 'replace', 'rjust', 'rstrip',
'strip', 'swapcase', 'title', 'translate', 'upper', 'zfill']
if name in ops:
return getattr(self.data, name)
else:
return _userString.UserString.__getattribute__(self, name)
def get_available_languages(domain):
"""Lists the available languages for the given translation domain.
:param domain: the domain to get languages for
"""
if domain in _AVAILABLE_LANGUAGES:
return copy.copy(_AVAILABLE_LANGUAGES[domain])
localedir = '%s_LOCALEDIR' % domain.upper()
find = lambda x: gettext.find(domain,
localedir=os.environ.get(localedir),
languages=[x])
# NOTE(mrodden): en_US should always be available (and first in case
# order matters) since our in-line message strings are en_US
language_list = ['en_US']
# NOTE(luisg): Babel <1.0 used a function called list(), which was
# renamed to locale_identifiers() in >=1.0, the requirements master list
# requires >=0.9.6, uncapped, so defensively work with both. We can remove
# this check when the master list updates to >=1.0, and update all projects
list_identifiers = (getattr(localedata, 'list', None) or
getattr(localedata, 'locale_identifiers'))
locale_identifiers = list_identifiers()
for i in locale_identifiers:
if find(i) is not None:
language_list.append(i)
_AVAILABLE_LANGUAGES[domain] = language_list
return copy.copy(language_list)
def get_localized_message(message, user_locale):
"""Gets a localized version of the given message in the given locale.
If the message is not a Message object the message is returned as-is.
If the locale is None the message is translated to the default locale.
:returns: the translated message in unicode, or the original message if
it could not be translated
"""
translated = message
if isinstance(message, Message):
original_locale = message.locale
message.locale = user_locale
translated = six.text_type(message)
message.locale = original_locale
return translated
class LocaleHandler(logging.Handler):
"""Handler that can have a locale associated to translate Messages.
A quick example of how to utilize the Message class above.
LocaleHandler takes a locale and a target logging.Handler object
to forward LogRecord objects to after translating the internal Message.
"""
def __init__(self, locale, target):
"""Initialize a LocaleHandler
:param locale: locale to use for translating messages
:param target: logging.Handler object to forward
LogRecord objects to after translation
"""
logging.Handler.__init__(self)
self.locale = locale
self.target = target
def emit(self, record):
if isinstance(record.msg, Message):
# set the locale and resolve to a string
record.msg.locale = self.locale
self.target.emit(record)

View File

@ -0,0 +1,218 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright 2011 OpenStack Foundation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
System-level utilities and helper functions.
"""
import re
import sys
import unicodedata
import six
from heatclient.openstack.common.gettextutils import _ # noqa
# Used for looking up extensions of text
# to their 'multiplied' byte amount
BYTE_MULTIPLIERS = {
'': 1,
't': 1024 ** 4,
'g': 1024 ** 3,
'm': 1024 ** 2,
'k': 1024,
}
BYTE_REGEX = re.compile(r'(^-?\d+)(\D*)')
TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')
SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")
def int_from_bool_as_string(subject):
"""Interpret a string as a boolean and return either 1 or 0.
Any string value in:
('True', 'true', 'On', 'on', '1')
is interpreted as a boolean True.
Useful for JSON-decoded stuff and config file parsing
"""
return bool_from_string(subject) and 1 or 0
def bool_from_string(subject, strict=False):
"""Interpret a string as a boolean.
A case-insensitive match is performed such that strings matching 't',
'true', 'on', 'y', 'yes', or '1' are considered True and, when
`strict=False`, anything else is considered False.
Useful for JSON-decoded stuff and config file parsing.
If `strict=True`, unrecognized values, including None, will raise a
ValueError which is useful when parsing values passed in from an API call.
Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'.
"""
if not isinstance(subject, six.string_types):
subject = str(subject)
lowered = subject.strip().lower()
if lowered in TRUE_STRINGS:
return True
elif lowered in FALSE_STRINGS:
return False
elif strict:
acceptable = ', '.join(
"'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS))
msg = _("Unrecognized value '%(val)s', acceptable values are:"
" %(acceptable)s") % {'val': subject,
'acceptable': acceptable}
raise ValueError(msg)
else:
return False
def safe_decode(text, incoming=None, errors='strict'):
"""Decodes incoming str using `incoming` if they're not already unicode.
:param incoming: Text's current encoding
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a unicode `incoming` encoded
representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, six.string_types):
raise TypeError("%s can't be decoded" % type(text))
if isinstance(text, six.text_type):
return text
if not incoming:
incoming = (sys.stdin.encoding or
sys.getdefaultencoding())
try:
return text.decode(incoming, errors)
except UnicodeDecodeError:
# Note(flaper87) If we get here, it means that
# sys.stdin.encoding / sys.getdefaultencoding
# didn't return a suitable encoding to decode
# text. This happens mostly when global LANG
# var is not set correctly and there's no
# default encoding. In this case, most likely
# python will use ASCII or ANSI encoders as
# default encodings but they won't be capable
# of decoding non-ASCII characters.
#
# Also, UTF-8 is being used since it's an ASCII
# extension.
return text.decode('utf-8', errors)
def safe_encode(text, incoming=None,
encoding='utf-8', errors='strict'):
"""Encodes incoming str/unicode using `encoding`.
If incoming is not specified, text is expected to be encoded with
current python's default encoding. (`sys.getdefaultencoding`)
:param incoming: Text's current encoding
:param encoding: Expected encoding for text (Default UTF-8)
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: text or a bytestring `encoding` encoded
representation of it.
:raises TypeError: If text is not an instance of str
"""
if not isinstance(text, six.string_types):
raise TypeError("%s can't be encoded" % type(text))
if not incoming:
incoming = (sys.stdin.encoding or
sys.getdefaultencoding())
if isinstance(text, six.text_type):
return text.encode(encoding, errors)
elif text and encoding != incoming:
# Decode text before encoding it with `encoding`
text = safe_decode(text, incoming, errors)
return text.encode(encoding, errors)
return text
def to_bytes(text, default=0):
"""Converts a string into an integer of bytes.
Looks at the last characters of the text to determine
what conversion is needed to turn the input text into a byte number.
Supports "B, K(B), M(B), G(B), and T(B)". (case insensitive)
:param text: String input for bytes size conversion.
:param default: Default return value when text is blank.
"""
match = BYTE_REGEX.search(text)
if match:
magnitude = int(match.group(1))
mult_key_org = match.group(2)
if not mult_key_org:
return magnitude
elif text:
msg = _('Invalid string format: %s') % text
raise TypeError(msg)
else:
return default
mult_key = mult_key_org.lower().replace('b', '', 1)
multiplier = BYTE_MULTIPLIERS.get(mult_key)
if multiplier is None:
msg = _('Unknown byte multiplier: %s') % mult_key_org
raise TypeError(msg)
return magnitude * multiplier
def to_slug(value, incoming=None, errors="strict"):
"""Normalize string.
Convert to lowercase, remove non-word characters, and convert spaces
to hyphens.
Inspired by Django's `slugify` filter.
:param value: Text to slugify
:param incoming: Text's current encoding
:param errors: Errors handling policy. See here for valid
values http://docs.python.org/2/library/codecs.html
:returns: slugified unicode representation of `value`
:raises TypeError: If text is not an instance of str
"""
value = safe_decode(value, incoming, errors)
# NOTE(aababilov): no need to use safe_(encode|decode) here:
# encodings are always "ascii", error handling is always "ignore"
# and types are always known (first: unicode; second: str)
value = unicodedata.normalize("NFKD", value).encode(
"ascii", "ignore").decode("ascii")
value = SLUGIFY_STRIP_RE.sub("", value).strip().lower()
return SLUGIFY_HYPHENATE_RE.sub("-", value)

View File

@ -14,9 +14,12 @@
Command-line interface to the Heat API.
"""
from __future__ import print_function
import argparse
import httplib2
import logging
import six
import sys
from keystoneclient.v2_0 import client as ksclient
@ -25,6 +28,7 @@ import heatclient
from heatclient import client as heat_client
from heatclient.common import utils
from heatclient import exc
from heatclient.openstack.common import strutils
logger = logging.getLogger(__name__)
@ -367,7 +371,7 @@ def main():
HeatShell().main(sys.argv[1:])
except Exception as e:
print >> sys.stderr, e
print(strutils.safe_encode(six.text_type(e)), file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":

View File

@ -1,7 +1,7 @@
[DEFAULT]
# The list of modules to copy from openstack-common
modules=importutils
modules=importutils,gettextutils,strutils
module=py3kcompat
# The base module to hold the copy of openstack.common

View File

@ -5,3 +5,4 @@ iso8601>=0.1.4
PrettyTable>=0.6,<0.8
python-keystoneclient>=0.4.1
PyYAML>=3.1.0
six>=1.4.1