From 3190cf9575396e263ca8fb920d46691f651484fd Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Tue, 27 May 2014 10:22:49 +0200 Subject: [PATCH] Synced jsonutils from oslo-incubator The sync includes change that makes sure we get unicode-only dicts from jsonutils no matter which json module implementation is selected. The latest commit in oslo-incubator: - 0f4586c0076183c6356eec682c8a593648125abd The sync adds a new 'strutils' module that is now used in jsonutils. Change-Id: Ic815ca3df94c33edec9104172048b2cd94b92e3f Closes-Bug: 1314129 --- openstack-common.conf | 1 + sahara/openstack/common/gettextutils.py | 238 ++++++++++++----------- sahara/openstack/common/jsonutils.py | 10 +- sahara/openstack/common/strutils.py | 239 ++++++++++++++++++++++++ 4 files changed, 377 insertions(+), 111 deletions(-) create mode 100644 sahara/openstack/common/strutils.py diff --git a/openstack-common.conf b/openstack-common.conf index 30a1fd92..d6312403 100644 --- a/openstack-common.conf +++ b/openstack-common.conf @@ -13,6 +13,7 @@ module=middleware.base module=network_utils module=periodic_task module=processutils +module=strutils module=threadgroup module=timeutils module=uuidutils diff --git a/sahara/openstack/common/gettextutils.py b/sahara/openstack/common/gettextutils.py index 68e1ed0b..dbdb6bbb 100644 --- a/sahara/openstack/common/gettextutils.py +++ b/sahara/openstack/common/gettextutils.py @@ -28,29 +28,117 @@ import gettext import locale from logging import handlers import os -import re from babel import localedata import six -_localedir = os.environ.get('sahara'.upper() + '_LOCALEDIR') -_t = gettext.translation('sahara', localedir=_localedir, fallback=True) - -# We use separate translation catalogs for each log level, so set up a -# mapping between the log level name and the translator. The domain -# for the log level is project_name + "-log-" + log_level so messages -# for each level end up in their own catalog. -_t_log_levels = dict( - (level, gettext.translation('sahara' + '-log-' + level, - localedir=_localedir, - fallback=True)) - for level in ['info', 'warning', 'error', 'critical'] -) - _AVAILABLE_LANGUAGES = {} + +# FIXME(dhellmann): Remove this when moving to oslo.i18n. USE_LAZY = False +class TranslatorFactory(object): + """Create translator functions + """ + + def __init__(self, domain, lazy=False, localedir=None): + """Establish a set of translation functions for the domain. + + :param domain: Name of translation domain, + specifying a message catalog. + :type domain: str + :param lazy: Delays translation until a message is emitted. + Defaults to False. + :type lazy: Boolean + :param localedir: Directory with translation catalogs. + :type localedir: str + """ + self.domain = domain + self.lazy = lazy + if localedir is None: + localedir = os.environ.get(domain.upper() + '_LOCALEDIR') + self.localedir = localedir + + def _make_translation_func(self, domain=None): + """Return a new translation function ready for use. + + Takes into account whether or not lazy translation is being + done. + + The domain can be specified to override the default from the + factory, but the localedir from the factory is always used + because we assume the log-level translation catalogs are + installed in the same directory as the main application + catalog. + + """ + if domain is None: + domain = self.domain + if self.lazy: + return functools.partial(Message, domain=domain) + t = gettext.translation( + domain, + localedir=self.localedir, + fallback=True, + ) + if six.PY3: + return t.gettext + return t.ugettext + + @property + def primary(self): + "The default translation function." + return self._make_translation_func() + + def _make_log_translation_func(self, level): + return self._make_translation_func(self.domain + '-log-' + level) + + @property + def log_info(self): + "Translate info-level log messages." + return self._make_log_translation_func('info') + + @property + def log_warning(self): + "Translate warning-level log messages." + return self._make_log_translation_func('warning') + + @property + def log_error(self): + "Translate error-level log messages." + return self._make_log_translation_func('error') + + @property + def log_critical(self): + "Translate critical-level log messages." + return self._make_log_translation_func('critical') + + +# NOTE(dhellmann): When this module moves out of the incubator into +# oslo.i18n, these global variables can be moved to an integration +# module within each application. + +# Create the global translation functions. +_translators = TranslatorFactory('sahara') + +# The primary translation function using the well-known name "_" +_ = _translators.primary + +# Translators for log levels. +# +# The abbreviated names are meant to reflect the usual use of a short +# name like '_'. The "L" is for "log" and the other letter comes from +# the level. +_LI = _translators.log_info +_LW = _translators.log_warning +_LE = _translators.log_error +_LC = _translators.log_critical + +# NOTE(dhellmann): End of globals that will move to the application's +# integration module. + + def enable_lazy(): """Convenience function for configuring _() to use lazy gettext @@ -59,41 +147,18 @@ def enable_lazy(): your project is importing _ directly instead of using the gettextutils.install() way of importing the _ function. """ - global USE_LAZY + # FIXME(dhellmann): This function will be removed in oslo.i18n, + # because the TranslatorFactory makes it superfluous. + global _, _LI, _LW, _LE, _LC, USE_LAZY + tf = TranslatorFactory('sahara', lazy=True) + _ = tf.primary + _LI = tf.log_info + _LW = tf.log_warning + _LE = tf.log_error + _LC = tf.log_critical USE_LAZY = True -def _(msg): - if USE_LAZY: - return Message(msg, domain='sahara') - else: - if six.PY3: - return _t.gettext(msg) - return _t.ugettext(msg) - - -def _log_translation(msg, level): - """Build a single translation of a log message - """ - if USE_LAZY: - return Message(msg, domain='sahara' + '-log-' + level) - else: - translator = _t_log_levels[level] - if six.PY3: - return translator.gettext(msg) - return translator.ugettext(msg) - -# Translators for log levels. -# -# The abbreviated names are meant to reflect the usual use of a short -# name like '_'. The "L" is for "log" and the other letter comes from -# the level. -_LI = functools.partial(_log_translation, level='info') -_LW = functools.partial(_log_translation, level='warning') -_LE = functools.partial(_log_translation, level='error') -_LC = functools.partial(_log_translation, level='critical') - - def install(domain, lazy=False): """Install a _() function using the given translation domain. @@ -113,26 +178,9 @@ def install(domain, lazy=False): any available locale. """ if lazy: - # NOTE(mrodden): Lazy gettext functionality. - # - # The following introduces a deferred way to do translations on - # messages in OpenStack. We override the standard _() function - # and % (format string) operation to build Message objects that can - # later be translated when we have more information. - def _lazy_gettext(msg): - """Create and return a Message object. - - Lazy gettext function for a given domain, it is a factory method - for a project/module to get a lazy gettext function for its own - translation domain (i.e. nova, glance, cinder, etc.) - - Message encapsulates a string so that we can translate - it later when needed. - """ - return Message(msg, domain=domain) - from six import moves - moves.builtins.__dict__['_'] = _lazy_gettext + tf = TranslatorFactory(domain, lazy=True) + moves.builtins.__dict__['_'] = tf.primary else: localedir = '%s_LOCALEDIR' % domain.upper() if six.PY3: @@ -248,47 +296,22 @@ class Message(six.text_type): if other is None: params = (other,) elif isinstance(other, dict): - params = self._trim_dictionary_parameters(other) + # Merge the dictionaries + # Copy each item in case one does not support deep copy. + params = {} + if isinstance(self.params, dict): + for key, val in self.params.items(): + params[key] = self._copy_param(val) + for key, val in other.items(): + params[key] = self._copy_param(val) else: params = self._copy_param(other) return params - def _trim_dictionary_parameters(self, dict_param): - """Return a dict that only has matching entries in the msgid.""" - # NOTE(luisg): Here we trim down the dictionary passed as parameters - # to avoid carrying a lot of unnecessary weight around in the message - # object, for example if someone passes in Message() % locals() but - # only some params are used, and additionally we prevent errors for - # non-deepcopyable objects by unicoding() them. - - # Look for %(param) keys in msgid; - # Skip %% and deal with the case where % is first character on the line - keys = re.findall('(?:[^%]|^)?%\((\w*)\)[a-z]', self.msgid) - - # If we don't find any %(param) keys but have a %s - if not keys and re.findall('(?:[^%]|^)%[a-z]', self.msgid): - # Apparently the full dictionary is the parameter - params = self._copy_param(dict_param) - else: - params = {} - # Save our existing parameters as defaults to protect - # ourselves from losing values if we are called through an - # (erroneous) chain that builds a valid Message with - # arguments, and then does something like "msg % kwds" - # where kwds is an empty dictionary. - src = {} - if isinstance(self.params, dict): - src.update(self.params) - src.update(dict_param) - for key in keys: - params[key] = self._copy_param(src[key]) - - return params - def _copy_param(self, param): try: return copy.deepcopy(param) - except TypeError: + except Exception: # Fallback to casting to unicode this will handle the # python code-like objects that can't be deep-copied return six.text_type(param) @@ -300,13 +323,14 @@ class Message(six.text_type): def __radd__(self, other): return self.__add__(other) - def __str__(self): - # NOTE(luisg): Logging in python 2.6 tries to str() log records, - # and it expects specifically a UnicodeError in order to proceed. - msg = _('Message objects do not support str() because they may ' - 'contain non-ascii characters. ' - 'Please use unicode() or translate() instead.') - raise UnicodeError(msg) + if six.PY2: + def __str__(self): + # NOTE(luisg): Logging in python 2.6 tries to str() log records, + # and it expects specifically a UnicodeError in order to proceed. + msg = _('Message objects do not support str() because they may ' + 'contain non-ascii characters. ' + 'Please use unicode() or translate() instead.') + raise UnicodeError(msg) def get_available_languages(domain): diff --git a/sahara/openstack/common/jsonutils.py b/sahara/openstack/common/jsonutils.py index 6243a871..9d80aa70 100644 --- a/sahara/openstack/common/jsonutils.py +++ b/sahara/openstack/common/jsonutils.py @@ -31,6 +31,7 @@ This module provides a few things: ''' +import codecs import datetime import functools import inspect @@ -52,6 +53,7 @@ import six.moves.xmlrpc_client as xmlrpclib from sahara.openstack.common import gettextutils from sahara.openstack.common import importutils +from sahara.openstack.common import strutils from sahara.openstack.common import timeutils netaddr = importutils.try_import("netaddr") @@ -166,12 +168,12 @@ def dumps(value, default=to_primitive, **kwargs): return json.dumps(value, default=default, **kwargs) -def loads(s): - return json.loads(s) +def loads(s, encoding='utf-8'): + return json.loads(strutils.safe_decode(s, encoding)) -def load(s): - return json.load(s) +def load(fp, encoding='utf-8'): + return json.load(codecs.getreader(encoding)(fp)) try: diff --git a/sahara/openstack/common/strutils.py b/sahara/openstack/common/strutils.py new file mode 100644 index 00000000..50590e72 --- /dev/null +++ b/sahara/openstack/common/strutils.py @@ -0,0 +1,239 @@ +# Copyright 2011 OpenStack Foundation. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +System-level utilities and helper functions. +""" + +import math +import re +import sys +import unicodedata + +import six + +from sahara.openstack.common.gettextutils import _ + + +UNIT_PREFIX_EXPONENT = { + 'k': 1, + 'K': 1, + 'Ki': 1, + 'M': 2, + 'Mi': 2, + 'G': 3, + 'Gi': 3, + 'T': 4, + 'Ti': 4, +} +UNIT_SYSTEM_INFO = { + 'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')), + 'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')), +} + +TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes') +FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no') + +SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]") +SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+") + + +def int_from_bool_as_string(subject): + """Interpret a string as a boolean and return either 1 or 0. + + Any string value in: + + ('True', 'true', 'On', 'on', '1') + + is interpreted as a boolean True. + + Useful for JSON-decoded stuff and config file parsing + """ + return bool_from_string(subject) and 1 or 0 + + +def bool_from_string(subject, strict=False, default=False): + """Interpret a string as a boolean. + + A case-insensitive match is performed such that strings matching 't', + 'true', 'on', 'y', 'yes', or '1' are considered True and, when + `strict=False`, anything else returns the value specified by 'default'. + + Useful for JSON-decoded stuff and config file parsing. + + If `strict=True`, unrecognized values, including None, will raise a + ValueError which is useful when parsing values passed in from an API call. + Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'. + """ + if not isinstance(subject, six.string_types): + subject = six.text_type(subject) + + lowered = subject.strip().lower() + + if lowered in TRUE_STRINGS: + return True + elif lowered in FALSE_STRINGS: + return False + elif strict: + acceptable = ', '.join( + "'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS)) + msg = _("Unrecognized value '%(val)s', acceptable values are:" + " %(acceptable)s") % {'val': subject, + 'acceptable': acceptable} + raise ValueError(msg) + else: + return default + + +def safe_decode(text, incoming=None, errors='strict'): + """Decodes incoming text/bytes string using `incoming` if they're not + already unicode. + + :param incoming: Text's current encoding + :param errors: Errors handling policy. See here for valid + values http://docs.python.org/2/library/codecs.html + :returns: text or a unicode `incoming` encoded + representation of it. + :raises TypeError: If text is not an instance of str + """ + if not isinstance(text, (six.string_types, six.binary_type)): + raise TypeError("%s can't be decoded" % type(text)) + + if isinstance(text, six.text_type): + return text + + if not incoming: + incoming = (sys.stdin.encoding or + sys.getdefaultencoding()) + + try: + return text.decode(incoming, errors) + except UnicodeDecodeError: + # Note(flaper87) If we get here, it means that + # sys.stdin.encoding / sys.getdefaultencoding + # didn't return a suitable encoding to decode + # text. This happens mostly when global LANG + # var is not set correctly and there's no + # default encoding. In this case, most likely + # python will use ASCII or ANSI encoders as + # default encodings but they won't be capable + # of decoding non-ASCII characters. + # + # Also, UTF-8 is being used since it's an ASCII + # extension. + return text.decode('utf-8', errors) + + +def safe_encode(text, incoming=None, + encoding='utf-8', errors='strict'): + """Encodes incoming text/bytes string using `encoding`. + + If incoming is not specified, text is expected to be encoded with + current python's default encoding. (`sys.getdefaultencoding`) + + :param incoming: Text's current encoding + :param encoding: Expected encoding for text (Default UTF-8) + :param errors: Errors handling policy. See here for valid + values http://docs.python.org/2/library/codecs.html + :returns: text or a bytestring `encoding` encoded + representation of it. + :raises TypeError: If text is not an instance of str + """ + if not isinstance(text, (six.string_types, six.binary_type)): + raise TypeError("%s can't be encoded" % type(text)) + + if not incoming: + incoming = (sys.stdin.encoding or + sys.getdefaultencoding()) + + if isinstance(text, six.text_type): + return text.encode(encoding, errors) + elif text and encoding != incoming: + # Decode text before encoding it with `encoding` + text = safe_decode(text, incoming, errors) + return text.encode(encoding, errors) + else: + return text + + +def string_to_bytes(text, unit_system='IEC', return_int=False): + """Converts a string into an float representation of bytes. + + The units supported for IEC :: + + Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it) + KB, KiB, MB, MiB, GB, GiB, TB, TiB + + The units supported for SI :: + + kb(it), Mb(it), Gb(it), Tb(it) + kB, MB, GB, TB + + Note that the SI unit system does not support capital letter 'K' + + :param text: String input for bytes size conversion. + :param unit_system: Unit system for byte size conversion. + :param return_int: If True, returns integer representation of text + in bytes. (default: decimal) + :returns: Numerical representation of text in bytes. + :raises ValueError: If text has an invalid value. + + """ + try: + base, reg_ex = UNIT_SYSTEM_INFO[unit_system] + except KeyError: + msg = _('Invalid unit system: "%s"') % unit_system + raise ValueError(msg) + match = reg_ex.match(text) + if match: + magnitude = float(match.group(1)) + unit_prefix = match.group(2) + if match.group(3) in ['b', 'bit']: + magnitude /= 8 + else: + msg = _('Invalid string format: %s') % text + raise ValueError(msg) + if not unit_prefix: + res = magnitude + else: + res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix]) + if return_int: + return int(math.ceil(res)) + return res + + +def to_slug(value, incoming=None, errors="strict"): + """Normalize string. + + Convert to lowercase, remove non-word characters, and convert spaces + to hyphens. + + Inspired by Django's `slugify` filter. + + :param value: Text to slugify + :param incoming: Text's current encoding + :param errors: Errors handling policy. See here for valid + values http://docs.python.org/2/library/codecs.html + :returns: slugified unicode representation of `value` + :raises TypeError: If text is not an instance of str + """ + value = safe_decode(value, incoming, errors) + # NOTE(aababilov): no need to use safe_(encode|decode) here: + # encodings are always "ascii", error handling is always "ignore" + # and types are always known (first: unicode; second: str) + value = unicodedata.normalize("NFKD", value).encode( + "ascii", "ignore").decode("ascii") + value = SLUGIFY_STRIP_RE.sub("", value).strip().lower() + return SLUGIFY_HYPHENATE_RE.sub("-", value)