From 2dcdd3bdb74b808afccb8278b0cca52c76f99d1a Mon Sep 17 00:00:00 2001
From: Ihar Hrachyshka <ihrachys@redhat.com>
Date: Tue, 27 May 2014 10:22:49 +0200
Subject: [PATCH] Synced jsonutils from oslo-incubator

The sync includes change that makes sure we get unicode-only dicts from
jsonutils no matter which json module implementation is selected.

The latest commit in oslo-incubator:
- 0f4586c0076183c6356eec682c8a593648125abd

The sync adds a new 'strutils' module that is now used in jsonutils.

Change-Id: Ic815ca3df94c33edec9104172048b2cd94b92e3f
Closes-Bug: 1314129
---
 neutron/openstack/common/gettextutils.py | 194 +++++++++++-------
 neutron/openstack/common/jsonutils.py    |  10 +-
 neutron/openstack/common/strutils.py     | 239 +++++++++++++++++++++++
 openstack-common.conf                    |   1 +
 4 files changed, 368 insertions(+), 76 deletions(-)
 create mode 100644 neutron/openstack/common/strutils.py

diff --git a/neutron/openstack/common/gettextutils.py b/neutron/openstack/common/gettextutils.py
index 1c33bfb83e4..71abc7151fb 100644
--- a/neutron/openstack/common/gettextutils.py
+++ b/neutron/openstack/common/gettextutils.py
@@ -32,24 +32,113 @@ import os
 from babel import localedata
 import six
 
-_localedir = os.environ.get('neutron'.upper() + '_LOCALEDIR')
-_t = gettext.translation('neutron', localedir=_localedir, fallback=True)
-
-# We use separate translation catalogs for each log level, so set up a
-# mapping between the log level name and the translator. The domain
-# for the log level is project_name + "-log-" + log_level so messages
-# for each level end up in their own catalog.
-_t_log_levels = dict(
-    (level, gettext.translation('neutron' + '-log-' + level,
-                                localedir=_localedir,
-                                fallback=True))
-    for level in ['info', 'warning', 'error', 'critical']
-)
-
 _AVAILABLE_LANGUAGES = {}
+
+# FIXME(dhellmann): Remove this when moving to oslo.i18n.
 USE_LAZY = False
 
 
+class TranslatorFactory(object):
+    """Create translator functions
+    """
+
+    def __init__(self, domain, lazy=False, localedir=None):
+        """Establish a set of translation functions for the domain.
+
+        :param domain: Name of translation domain,
+                       specifying a message catalog.
+        :type domain: str
+        :param lazy: Delays translation until a message is emitted.
+                     Defaults to False.
+        :type lazy: Boolean
+        :param localedir: Directory with translation catalogs.
+        :type localedir: str
+        """
+        self.domain = domain
+        self.lazy = lazy
+        if localedir is None:
+            localedir = os.environ.get(domain.upper() + '_LOCALEDIR')
+        self.localedir = localedir
+
+    def _make_translation_func(self, domain=None):
+        """Return a new translation function ready for use.
+
+        Takes into account whether or not lazy translation is being
+        done.
+
+        The domain can be specified to override the default from the
+        factory, but the localedir from the factory is always used
+        because we assume the log-level translation catalogs are
+        installed in the same directory as the main application
+        catalog.
+
+        """
+        if domain is None:
+            domain = self.domain
+        if self.lazy:
+            return functools.partial(Message, domain=domain)
+        t = gettext.translation(
+            domain,
+            localedir=self.localedir,
+            fallback=True,
+        )
+        if six.PY3:
+            return t.gettext
+        return t.ugettext
+
+    @property
+    def primary(self):
+        "The default translation function."
+        return self._make_translation_func()
+
+    def _make_log_translation_func(self, level):
+        return self._make_translation_func(self.domain + '-log-' + level)
+
+    @property
+    def log_info(self):
+        "Translate info-level log messages."
+        return self._make_log_translation_func('info')
+
+    @property
+    def log_warning(self):
+        "Translate warning-level log messages."
+        return self._make_log_translation_func('warning')
+
+    @property
+    def log_error(self):
+        "Translate error-level log messages."
+        return self._make_log_translation_func('error')
+
+    @property
+    def log_critical(self):
+        "Translate critical-level log messages."
+        return self._make_log_translation_func('critical')
+
+
+# NOTE(dhellmann): When this module moves out of the incubator into
+# oslo.i18n, these global variables can be moved to an integration
+# module within each application.
+
+# Create the global translation functions.
+_translators = TranslatorFactory('neutron')
+
+# The primary translation function using the well-known name "_"
+_ = _translators.primary
+
+# Translators for log levels.
+#
+# The abbreviated names are meant to reflect the usual use of a short
+# name like '_'. The "L" is for "log" and the other letter comes from
+# the level.
+_LI = _translators.log_info
+_LW = _translators.log_warning
+_LE = _translators.log_error
+_LC = _translators.log_critical
+
+# NOTE(dhellmann): End of globals that will move to the application's
+# integration module.
+
+
 def enable_lazy():
     """Convenience function for configuring _() to use lazy gettext
 
@@ -58,41 +147,18 @@ def enable_lazy():
     your project is importing _ directly instead of using the
     gettextutils.install() way of importing the _ function.
     """
-    global USE_LAZY
+    # FIXME(dhellmann): This function will be removed in oslo.i18n,
+    # because the TranslatorFactory makes it superfluous.
+    global _, _LI, _LW, _LE, _LC, USE_LAZY
+    tf = TranslatorFactory('neutron', lazy=True)
+    _ = tf.primary
+    _LI = tf.log_info
+    _LW = tf.log_warning
+    _LE = tf.log_error
+    _LC = tf.log_critical
     USE_LAZY = True
 
 
-def _(msg):
-    if USE_LAZY:
-        return Message(msg, domain='neutron')
-    else:
-        if six.PY3:
-            return _t.gettext(msg)
-        return _t.ugettext(msg)
-
-
-def _log_translation(msg, level):
-    """Build a single translation of a log message
-    """
-    if USE_LAZY:
-        return Message(msg, domain='neutron' + '-log-' + level)
-    else:
-        translator = _t_log_levels[level]
-        if six.PY3:
-            return translator.gettext(msg)
-        return translator.ugettext(msg)
-
-# Translators for log levels.
-#
-# The abbreviated names are meant to reflect the usual use of a short
-# name like '_'. The "L" is for "log" and the other letter comes from
-# the level.
-_LI = functools.partial(_log_translation, level='info')
-_LW = functools.partial(_log_translation, level='warning')
-_LE = functools.partial(_log_translation, level='error')
-_LC = functools.partial(_log_translation, level='critical')
-
-
 def install(domain, lazy=False):
     """Install a _() function using the given translation domain.
 
@@ -112,26 +178,9 @@ def install(domain, lazy=False):
                  any available locale.
     """
     if lazy:
-        # NOTE(mrodden): Lazy gettext functionality.
-        #
-        # The following introduces a deferred way to do translations on
-        # messages in OpenStack. We override the standard _() function
-        # and % (format string) operation to build Message objects that can
-        # later be translated when we have more information.
-        def _lazy_gettext(msg):
-            """Create and return a Message object.
-
-            Lazy gettext function for a given domain, it is a factory method
-            for a project/module to get a lazy gettext function for its own
-            translation domain (i.e. nova, glance, cinder, etc.)
-
-            Message encapsulates a string so that we can translate
-            it later when needed.
-            """
-            return Message(msg, domain=domain)
-
         from six import moves
-        moves.builtins.__dict__['_'] = _lazy_gettext
+        tf = TranslatorFactory(domain, lazy=True)
+        moves.builtins.__dict__['_'] = tf.primary
     else:
         localedir = '%s_LOCALEDIR' % domain.upper()
         if six.PY3:
@@ -274,13 +323,14 @@ class Message(six.text_type):
     def __radd__(self, other):
         return self.__add__(other)
 
-    def __str__(self):
-        # NOTE(luisg): Logging in python 2.6 tries to str() log records,
-        # and it expects specifically a UnicodeError in order to proceed.
-        msg = _('Message objects do not support str() because they may '
-                'contain non-ascii characters. '
-                'Please use unicode() or translate() instead.')
-        raise UnicodeError(msg)
+    if six.PY2:
+        def __str__(self):
+            # NOTE(luisg): Logging in python 2.6 tries to str() log records,
+            # and it expects specifically a UnicodeError in order to proceed.
+            msg = _('Message objects do not support str() because they may '
+                    'contain non-ascii characters. '
+                    'Please use unicode() or translate() instead.')
+            raise UnicodeError(msg)
 
 
 def get_available_languages(domain):
diff --git a/neutron/openstack/common/jsonutils.py b/neutron/openstack/common/jsonutils.py
index ba2437dc26f..581b8c051d9 100644
--- a/neutron/openstack/common/jsonutils.py
+++ b/neutron/openstack/common/jsonutils.py
@@ -31,6 +31,7 @@ This module provides a few things:
 '''
 
 
+import codecs
 import datetime
 import functools
 import inspect
@@ -52,6 +53,7 @@ import six.moves.xmlrpc_client as xmlrpclib
 
 from neutron.openstack.common import gettextutils
 from neutron.openstack.common import importutils
+from neutron.openstack.common import strutils
 from neutron.openstack.common import timeutils
 
 netaddr = importutils.try_import("netaddr")
@@ -166,12 +168,12 @@ def dumps(value, default=to_primitive, **kwargs):
     return json.dumps(value, default=default, **kwargs)
 
 
-def loads(s):
-    return json.loads(s)
+def loads(s, encoding='utf-8'):
+    return json.loads(strutils.safe_decode(s, encoding))
 
 
-def load(s):
-    return json.load(s)
+def load(fp, encoding='utf-8'):
+    return json.load(codecs.getreader(encoding)(fp))
 
 
 try:
diff --git a/neutron/openstack/common/strutils.py b/neutron/openstack/common/strutils.py
new file mode 100644
index 00000000000..8c796d4f79f
--- /dev/null
+++ b/neutron/openstack/common/strutils.py
@@ -0,0 +1,239 @@
+# Copyright 2011 OpenStack Foundation.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+
+"""
+System-level utilities and helper functions.
+"""
+
+import math
+import re
+import sys
+import unicodedata
+
+import six
+
+from neutron.openstack.common.gettextutils import _
+
+
+UNIT_PREFIX_EXPONENT = {
+    'k': 1,
+    'K': 1,
+    'Ki': 1,
+    'M': 2,
+    'Mi': 2,
+    'G': 3,
+    'Gi': 3,
+    'T': 4,
+    'Ti': 4,
+}
+UNIT_SYSTEM_INFO = {
+    'IEC': (1024, re.compile(r'(^[-+]?\d*\.?\d+)([KMGT]i?)?(b|bit|B)$')),
+    'SI': (1000, re.compile(r'(^[-+]?\d*\.?\d+)([kMGT])?(b|bit|B)$')),
+}
+
+TRUE_STRINGS = ('1', 't', 'true', 'on', 'y', 'yes')
+FALSE_STRINGS = ('0', 'f', 'false', 'off', 'n', 'no')
+
+SLUGIFY_STRIP_RE = re.compile(r"[^\w\s-]")
+SLUGIFY_HYPHENATE_RE = re.compile(r"[-\s]+")
+
+
+def int_from_bool_as_string(subject):
+    """Interpret a string as a boolean and return either 1 or 0.
+
+    Any string value in:
+
+        ('True', 'true', 'On', 'on', '1')
+
+    is interpreted as a boolean True.
+
+    Useful for JSON-decoded stuff and config file parsing
+    """
+    return bool_from_string(subject) and 1 or 0
+
+
+def bool_from_string(subject, strict=False, default=False):
+    """Interpret a string as a boolean.
+
+    A case-insensitive match is performed such that strings matching 't',
+    'true', 'on', 'y', 'yes', or '1' are considered True and, when
+    `strict=False`, anything else returns the value specified by 'default'.
+
+    Useful for JSON-decoded stuff and config file parsing.
+
+    If `strict=True`, unrecognized values, including None, will raise a
+    ValueError which is useful when parsing values passed in from an API call.
+    Strings yielding False are 'f', 'false', 'off', 'n', 'no', or '0'.
+    """
+    if not isinstance(subject, six.string_types):
+        subject = six.text_type(subject)
+
+    lowered = subject.strip().lower()
+
+    if lowered in TRUE_STRINGS:
+        return True
+    elif lowered in FALSE_STRINGS:
+        return False
+    elif strict:
+        acceptable = ', '.join(
+            "'%s'" % s for s in sorted(TRUE_STRINGS + FALSE_STRINGS))
+        msg = _("Unrecognized value '%(val)s', acceptable values are:"
+                " %(acceptable)s") % {'val': subject,
+                                      'acceptable': acceptable}
+        raise ValueError(msg)
+    else:
+        return default
+
+
+def safe_decode(text, incoming=None, errors='strict'):
+    """Decodes incoming text/bytes string using `incoming` if they're not
+       already unicode.
+
+    :param incoming: Text's current encoding
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: text or a unicode `incoming` encoded
+                representation of it.
+    :raises TypeError: If text is not an instance of str
+    """
+    if not isinstance(text, (six.string_types, six.binary_type)):
+        raise TypeError("%s can't be decoded" % type(text))
+
+    if isinstance(text, six.text_type):
+        return text
+
+    if not incoming:
+        incoming = (sys.stdin.encoding or
+                    sys.getdefaultencoding())
+
+    try:
+        return text.decode(incoming, errors)
+    except UnicodeDecodeError:
+        # Note(flaper87) If we get here, it means that
+        # sys.stdin.encoding / sys.getdefaultencoding
+        # didn't return a suitable encoding to decode
+        # text. This happens mostly when global LANG
+        # var is not set correctly and there's no
+        # default encoding. In this case, most likely
+        # python will use ASCII or ANSI encoders as
+        # default encodings but they won't be capable
+        # of decoding non-ASCII characters.
+        #
+        # Also, UTF-8 is being used since it's an ASCII
+        # extension.
+        return text.decode('utf-8', errors)
+
+
+def safe_encode(text, incoming=None,
+                encoding='utf-8', errors='strict'):
+    """Encodes incoming text/bytes string using `encoding`.
+
+    If incoming is not specified, text is expected to be encoded with
+    current python's default encoding. (`sys.getdefaultencoding`)
+
+    :param incoming: Text's current encoding
+    :param encoding: Expected encoding for text (Default UTF-8)
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: text or a bytestring `encoding` encoded
+                representation of it.
+    :raises TypeError: If text is not an instance of str
+    """
+    if not isinstance(text, (six.string_types, six.binary_type)):
+        raise TypeError("%s can't be encoded" % type(text))
+
+    if not incoming:
+        incoming = (sys.stdin.encoding or
+                    sys.getdefaultencoding())
+
+    if isinstance(text, six.text_type):
+        return text.encode(encoding, errors)
+    elif text and encoding != incoming:
+        # Decode text before encoding it with `encoding`
+        text = safe_decode(text, incoming, errors)
+        return text.encode(encoding, errors)
+    else:
+        return text
+
+
+def string_to_bytes(text, unit_system='IEC', return_int=False):
+    """Converts a string into an float representation of bytes.
+
+    The units supported for IEC ::
+
+        Kb(it), Kib(it), Mb(it), Mib(it), Gb(it), Gib(it), Tb(it), Tib(it)
+        KB, KiB, MB, MiB, GB, GiB, TB, TiB
+
+    The units supported for SI ::
+
+        kb(it), Mb(it), Gb(it), Tb(it)
+        kB, MB, GB, TB
+
+    Note that the SI unit system does not support capital letter 'K'
+
+    :param text: String input for bytes size conversion.
+    :param unit_system: Unit system for byte size conversion.
+    :param return_int: If True, returns integer representation of text
+                       in bytes. (default: decimal)
+    :returns: Numerical representation of text in bytes.
+    :raises ValueError: If text has an invalid value.
+
+    """
+    try:
+        base, reg_ex = UNIT_SYSTEM_INFO[unit_system]
+    except KeyError:
+        msg = _('Invalid unit system: "%s"') % unit_system
+        raise ValueError(msg)
+    match = reg_ex.match(text)
+    if match:
+        magnitude = float(match.group(1))
+        unit_prefix = match.group(2)
+        if match.group(3) in ['b', 'bit']:
+            magnitude /= 8
+    else:
+        msg = _('Invalid string format: %s') % text
+        raise ValueError(msg)
+    if not unit_prefix:
+        res = magnitude
+    else:
+        res = magnitude * pow(base, UNIT_PREFIX_EXPONENT[unit_prefix])
+    if return_int:
+        return int(math.ceil(res))
+    return res
+
+
+def to_slug(value, incoming=None, errors="strict"):
+    """Normalize string.
+
+    Convert to lowercase, remove non-word characters, and convert spaces
+    to hyphens.
+
+    Inspired by Django's `slugify` filter.
+
+    :param value: Text to slugify
+    :param incoming: Text's current encoding
+    :param errors: Errors handling policy. See here for valid
+        values http://docs.python.org/2/library/codecs.html
+    :returns: slugified unicode representation of `value`
+    :raises TypeError: If text is not an instance of str
+    """
+    value = safe_decode(value, incoming, errors)
+    # NOTE(aababilov): no need to use safe_(encode|decode) here:
+    # encodings are always "ascii", error handling is always "ignore"
+    # and types are always known (first: unicode; second: str)
+    value = unicodedata.normalize("NFKD", value).encode(
+        "ascii", "ignore").decode("ascii")
+    value = SLUGIFY_STRIP_RE.sub("", value).strip().lower()
+    return SLUGIFY_HYPHENATE_RE.sub("-", value)
diff --git a/openstack-common.conf b/openstack-common.conf
index 395576fb921..15001a3591e 100644
--- a/openstack-common.conf
+++ b/openstack-common.conf
@@ -26,6 +26,7 @@ module=processutils
 module=rpc
 module=service
 module=sslutils
+module=strutils
 module=systemd
 module=threadgroup
 module=timeutils