Split strutils into 2 different modules
This patch pulls encoding related functions out of strutils into its own encodeutils module. We could probably find a better name for strutils now, although it seems short and contextualized enough. Partially-implements blueprint: graduate-oslo-utils Change-Id: Ib76065823c8a1b56020f14cea80b6d73e150aa49
This commit is contained in:
89
oslo/utils/encodeutils.py
Normal file
89
oslo/utils/encodeutils.py
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
# Copyright 2014 Red Hat, Inc.
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import six
|
||||||
|
|
||||||
|
|
||||||
|
def safe_decode(text, incoming=None, errors='strict'):
|
||||||
|
"""Decodes incoming text/bytes string using `incoming` if they're not
|
||||||
|
already unicode.
|
||||||
|
|
||||||
|
:param incoming: Text's current encoding
|
||||||
|
:param errors: Errors handling policy. See here for valid
|
||||||
|
values http://docs.python.org/2/library/codecs.html
|
||||||
|
:returns: text or a unicode `incoming` encoded
|
||||||
|
representation of it.
|
||||||
|
:raises TypeError: If text is not an instance of str
|
||||||
|
"""
|
||||||
|
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||||
|
raise TypeError("%s can't be decoded" % type(text))
|
||||||
|
|
||||||
|
if isinstance(text, six.text_type):
|
||||||
|
return text
|
||||||
|
|
||||||
|
if not incoming:
|
||||||
|
incoming = (sys.stdin.encoding or
|
||||||
|
sys.getdefaultencoding())
|
||||||
|
|
||||||
|
try:
|
||||||
|
return text.decode(incoming, errors)
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Note(flaper87) If we get here, it means that
|
||||||
|
# sys.stdin.encoding / sys.getdefaultencoding
|
||||||
|
# didn't return a suitable encoding to decode
|
||||||
|
# text. This happens mostly when global LANG
|
||||||
|
# var is not set correctly and there's no
|
||||||
|
# default encoding. In this case, most likely
|
||||||
|
# python will use ASCII or ANSI encoders as
|
||||||
|
# default encodings but they won't be capable
|
||||||
|
# of decoding non-ASCII characters.
|
||||||
|
#
|
||||||
|
# Also, UTF-8 is being used since it's an ASCII
|
||||||
|
# extension.
|
||||||
|
return text.decode('utf-8', errors)
|
||||||
|
|
||||||
|
|
||||||
|
def safe_encode(text, incoming=None,
|
||||||
|
encoding='utf-8', errors='strict'):
|
||||||
|
"""Encodes incoming text/bytes string using `encoding`.
|
||||||
|
|
||||||
|
If incoming is not specified, text is expected to be encoded with
|
||||||
|
current python's default encoding. (`sys.getdefaultencoding`)
|
||||||
|
|
||||||
|
:param incoming: Text's current encoding
|
||||||
|
:param encoding: Expected encoding for text (Default UTF-8)
|
||||||
|
:param errors: Errors handling policy. See here for valid
|
||||||
|
values http://docs.python.org/2/library/codecs.html
|
||||||
|
:returns: text or a bytestring `encoding` encoded
|
||||||
|
representation of it.
|
||||||
|
:raises TypeError: If text is not an instance of str
|
||||||
|
"""
|
||||||
|
if not isinstance(text, (six.string_types, six.binary_type)):
|
||||||
|
raise TypeError("%s can't be encoded" % type(text))
|
||||||
|
|
||||||
|
if not incoming:
|
||||||
|
incoming = (sys.stdin.encoding or
|
||||||
|
sys.getdefaultencoding())
|
||||||
|
|
||||||
|
if isinstance(text, six.text_type):
|
||||||
|
return text.encode(encoding, errors)
|
||||||
|
elif text and encoding != incoming:
|
||||||
|
# Decode text before encoding it with `encoding`
|
||||||
|
text = safe_decode(text, incoming, errors)
|
||||||
|
return text.encode(encoding, errors)
|
||||||
|
else:
|
||||||
|
return text
|
@@ -19,11 +19,11 @@ System-level utilities and helper functions.
|
|||||||
|
|
||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
import sys
|
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
import six
|
import six
|
||||||
|
|
||||||
|
from oslo.utils import encodeutils
|
||||||
from oslo.utils.openstack.common.gettextutils import _
|
from oslo.utils.openstack.common.gettextutils import _
|
||||||
|
|
||||||
|
|
||||||
@@ -97,77 +97,6 @@ def bool_from_string(subject, strict=False, default=False):
|
|||||||
return default
|
return default
|
||||||
|
|
||||||
|
|
||||||
def safe_decode(text, incoming=None, errors='strict'):
|
|
||||||
"""Decodes incoming text/bytes string using `incoming` if they're not
|
|
||||||
already unicode.
|
|
||||||
|
|
||||||
:param incoming: Text's current encoding
|
|
||||||
:param errors: Errors handling policy. See here for valid
|
|
||||||
values http://docs.python.org/2/library/codecs.html
|
|
||||||
:returns: text or a unicode `incoming` encoded
|
|
||||||
representation of it.
|
|
||||||
:raises TypeError: If text is not an instance of str
|
|
||||||
"""
|
|
||||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
|
||||||
raise TypeError("%s can't be decoded" % type(text))
|
|
||||||
|
|
||||||
if isinstance(text, six.text_type):
|
|
||||||
return text
|
|
||||||
|
|
||||||
if not incoming:
|
|
||||||
incoming = (sys.stdin.encoding or
|
|
||||||
sys.getdefaultencoding())
|
|
||||||
|
|
||||||
try:
|
|
||||||
return text.decode(incoming, errors)
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
# Note(flaper87) If we get here, it means that
|
|
||||||
# sys.stdin.encoding / sys.getdefaultencoding
|
|
||||||
# didn't return a suitable encoding to decode
|
|
||||||
# text. This happens mostly when global LANG
|
|
||||||
# var is not set correctly and there's no
|
|
||||||
# default encoding. In this case, most likely
|
|
||||||
# python will use ASCII or ANSI encoders as
|
|
||||||
# default encodings but they won't be capable
|
|
||||||
# of decoding non-ASCII characters.
|
|
||||||
#
|
|
||||||
# Also, UTF-8 is being used since it's an ASCII
|
|
||||||
# extension.
|
|
||||||
return text.decode('utf-8', errors)
|
|
||||||
|
|
||||||
|
|
||||||
def safe_encode(text, incoming=None,
|
|
||||||
encoding='utf-8', errors='strict'):
|
|
||||||
"""Encodes incoming text/bytes string using `encoding`.
|
|
||||||
|
|
||||||
If incoming is not specified, text is expected to be encoded with
|
|
||||||
current python's default encoding. (`sys.getdefaultencoding`)
|
|
||||||
|
|
||||||
:param incoming: Text's current encoding
|
|
||||||
:param encoding: Expected encoding for text (Default UTF-8)
|
|
||||||
:param errors: Errors handling policy. See here for valid
|
|
||||||
values http://docs.python.org/2/library/codecs.html
|
|
||||||
:returns: text or a bytestring `encoding` encoded
|
|
||||||
representation of it.
|
|
||||||
:raises TypeError: If text is not an instance of str
|
|
||||||
"""
|
|
||||||
if not isinstance(text, (six.string_types, six.binary_type)):
|
|
||||||
raise TypeError("%s can't be encoded" % type(text))
|
|
||||||
|
|
||||||
if not incoming:
|
|
||||||
incoming = (sys.stdin.encoding or
|
|
||||||
sys.getdefaultencoding())
|
|
||||||
|
|
||||||
if isinstance(text, six.text_type):
|
|
||||||
return text.encode(encoding, errors)
|
|
||||||
elif text and encoding != incoming:
|
|
||||||
# Decode text before encoding it with `encoding`
|
|
||||||
text = safe_decode(text, incoming, errors)
|
|
||||||
return text.encode(encoding, errors)
|
|
||||||
else:
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def string_to_bytes(text, unit_system='IEC', return_int=False):
|
def string_to_bytes(text, unit_system='IEC', return_int=False):
|
||||||
"""Converts a string into an float representation of bytes.
|
"""Converts a string into an float representation of bytes.
|
||||||
|
|
||||||
@@ -229,7 +158,7 @@ def to_slug(value, incoming=None, errors="strict"):
|
|||||||
:returns: slugified unicode representation of `value`
|
:returns: slugified unicode representation of `value`
|
||||||
:raises TypeError: If text is not an instance of str
|
:raises TypeError: If text is not an instance of str
|
||||||
"""
|
"""
|
||||||
value = safe_decode(value, incoming, errors)
|
value = encodeutils.safe_decode(value, incoming, errors)
|
||||||
# NOTE(aababilov): no need to use safe_(encode|decode) here:
|
# NOTE(aababilov): no need to use safe_(encode|decode) here:
|
||||||
# encodings are always "ascii", error handling is always "ignore"
|
# encodings are always "ascii", error handling is always "ignore"
|
||||||
# and types are always known (first: unicode; second: str)
|
# and types are always known (first: unicode; second: str)
|
||||||
|
@@ -143,48 +143,6 @@ class StrUtilsTest(test_base.BaseTestCase):
|
|||||||
self.assertEqual(1, strutils.int_from_bool_as_string(True))
|
self.assertEqual(1, strutils.int_from_bool_as_string(True))
|
||||||
self.assertEqual(0, strutils.int_from_bool_as_string(False))
|
self.assertEqual(0, strutils.int_from_bool_as_string(False))
|
||||||
|
|
||||||
def test_safe_decode(self):
|
|
||||||
safe_decode = strutils.safe_decode
|
|
||||||
self.assertRaises(TypeError, safe_decode, True)
|
|
||||||
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
|
|
||||||
incoming="utf-8"))
|
|
||||||
if six.PY2:
|
|
||||||
# In Python 3, bytes.decode() doesn't support anymore
|
|
||||||
# bytes => bytes encodings like base64
|
|
||||||
self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
|
|
||||||
incoming='base64'))
|
|
||||||
|
|
||||||
self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
|
|
||||||
errors='ignore'))
|
|
||||||
|
|
||||||
self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
|
|
||||||
incoming='iso-8859-1'))
|
|
||||||
|
|
||||||
# Forcing incoming to ascii so it falls back to utf-8
|
|
||||||
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
|
|
||||||
incoming='ascii'))
|
|
||||||
|
|
||||||
self.assertEqual(six.u('foo'), safe_decode(b'foo'))
|
|
||||||
|
|
||||||
def test_safe_encode(self):
|
|
||||||
safe_encode = strutils.safe_encode
|
|
||||||
self.assertRaises(TypeError, safe_encode, True)
|
|
||||||
self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
|
|
||||||
encoding="utf-8"))
|
|
||||||
if six.PY2:
|
|
||||||
# In Python 3, str.encode() doesn't support anymore
|
|
||||||
# text => text encodings like base64
|
|
||||||
self.assertEqual(six.b("dGVzdA==\n"),
|
|
||||||
safe_encode("test", encoding='base64'))
|
|
||||||
self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
|
|
||||||
encoding="iso-8859-1",
|
|
||||||
incoming="utf-8"))
|
|
||||||
|
|
||||||
# Forcing incoming to ascii so it falls back to utf-8
|
|
||||||
self.assertEqual(six.b('ni\xc3\xb1o'),
|
|
||||||
safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
|
|
||||||
self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
|
|
||||||
|
|
||||||
def test_slugify(self):
|
def test_slugify(self):
|
||||||
to_slug = strutils.to_slug
|
to_slug = strutils.to_slug
|
||||||
self.assertRaises(TypeError, to_slug, True)
|
self.assertRaises(TypeError, to_slug, True)
|
||||||
|
66
tests/tests_encodeutils.py
Normal file
66
tests/tests_encodeutils.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
# Copyright 2014 Red Hat, Inc.
|
||||||
|
# All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
# not use this file except in compliance with the License. You may obtain
|
||||||
|
# a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
# License for the specific language governing permissions and limitations
|
||||||
|
# under the License.
|
||||||
|
|
||||||
|
from oslotest import base as test_base
|
||||||
|
import six
|
||||||
|
|
||||||
|
from oslo.utils import encodeutils
|
||||||
|
|
||||||
|
|
||||||
|
class EncodeUtilsTest(test_base.BaseTestCase):
|
||||||
|
|
||||||
|
def test_safe_decode(self):
|
||||||
|
safe_decode = encodeutils.safe_decode
|
||||||
|
self.assertRaises(TypeError, safe_decode, True)
|
||||||
|
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b("ni\xc3\xb1o"),
|
||||||
|
incoming="utf-8"))
|
||||||
|
if six.PY2:
|
||||||
|
# In Python 3, bytes.decode() doesn't support anymore
|
||||||
|
# bytes => bytes encodings like base64
|
||||||
|
self.assertEqual(six.u("test"), safe_decode("dGVzdA==",
|
||||||
|
incoming='base64'))
|
||||||
|
|
||||||
|
self.assertEqual(six.u("strange"), safe_decode(six.b('\x80strange'),
|
||||||
|
errors='ignore'))
|
||||||
|
|
||||||
|
self.assertEqual(six.u('\xc0'), safe_decode(six.b('\xc0'),
|
||||||
|
incoming='iso-8859-1'))
|
||||||
|
|
||||||
|
# Forcing incoming to ascii so it falls back to utf-8
|
||||||
|
self.assertEqual(six.u('ni\xf1o'), safe_decode(six.b('ni\xc3\xb1o'),
|
||||||
|
incoming='ascii'))
|
||||||
|
|
||||||
|
self.assertEqual(six.u('foo'), safe_decode(b'foo'))
|
||||||
|
|
||||||
|
def test_safe_encode(self):
|
||||||
|
safe_encode = encodeutils.safe_encode
|
||||||
|
self.assertRaises(TypeError, safe_encode, True)
|
||||||
|
self.assertEqual(six.b("ni\xc3\xb1o"), safe_encode(six.u('ni\xf1o'),
|
||||||
|
encoding="utf-8"))
|
||||||
|
if six.PY2:
|
||||||
|
# In Python 3, str.encode() doesn't support anymore
|
||||||
|
# text => text encodings like base64
|
||||||
|
self.assertEqual(six.b("dGVzdA==\n"),
|
||||||
|
safe_encode("test", encoding='base64'))
|
||||||
|
self.assertEqual(six.b('ni\xf1o'), safe_encode(six.b("ni\xc3\xb1o"),
|
||||||
|
encoding="iso-8859-1",
|
||||||
|
incoming="utf-8"))
|
||||||
|
|
||||||
|
# Forcing incoming to ascii so it falls back to utf-8
|
||||||
|
self.assertEqual(six.b('ni\xc3\xb1o'),
|
||||||
|
safe_encode(six.b('ni\xc3\xb1o'), incoming='ascii'))
|
||||||
|
self.assertEqual(six.b('foo'), safe_encode(six.u('foo')))
|
Reference in New Issue
Block a user