Add utilities for working with binary data
This utilities help to mitigate differences between Python 2 and Python 3 when it comes to encoding and decoding binary data. They will be widely useful across taskflow, including ZooKeeper logbook and storage backend patches. Initially from change I1de1525df0deee612fb14ca36f0415ea7d2f707c by Joshua Harlow, reworked for better handling of non-ascii characters. Change-Id: I4136fd6d7e55b716b0ba5eab838d17a77095c726
This commit is contained in:
107
taskflow/tests/unit/test_utils_binary.py
Normal file
107
taskflow/tests/unit/test_utils_binary.py
Normal file
@@ -0,0 +1,107 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||
|
||||
# Copyright (C) 2014 Yahoo! Inc. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import six
|
||||
|
||||
from taskflow import test
|
||||
from taskflow.utils import misc
|
||||
|
||||
|
||||
def _bytes(data):
|
||||
if six.PY3:
|
||||
return data.encode(encoding='utf-8')
|
||||
else:
|
||||
return data
|
||||
|
||||
|
||||
class BinaryEncodeTest(test.TestCase):
|
||||
|
||||
def _check(self, data, expected_result):
|
||||
result = misc.binary_encode(data)
|
||||
self.assertIsInstance(result, six.binary_type)
|
||||
self.assertEqual(result, expected_result)
|
||||
|
||||
def test_simple_binary(self):
|
||||
data = _bytes('hello')
|
||||
self._check(data, data)
|
||||
|
||||
def test_unicode_binary(self):
|
||||
data = _bytes('привет')
|
||||
self._check(data, data)
|
||||
|
||||
def test_simple_text(self):
|
||||
self._check(u'hello', _bytes('hello'))
|
||||
|
||||
def test_unicode_text(self):
|
||||
self._check(u'привет', _bytes('привет'))
|
||||
|
||||
def test_unicode_other_encoding(self):
|
||||
result = misc.binary_encode(u'mañana', 'latin-1')
|
||||
self.assertIsInstance(result, six.binary_type)
|
||||
self.assertEqual(result, u'mañana'.encode('latin-1'))
|
||||
|
||||
|
||||
class BinaryDecodeTest(test.TestCase):
|
||||
|
||||
def _check(self, data, expected_result):
|
||||
result = misc.binary_decode(data)
|
||||
self.assertIsInstance(result, six.text_type)
|
||||
self.assertEqual(result, expected_result)
|
||||
|
||||
def test_simple_text(self):
|
||||
data = u'hello'
|
||||
self._check(data, data)
|
||||
|
||||
def test_unicode_text(self):
|
||||
data = u'привет'
|
||||
self._check(data, data)
|
||||
|
||||
def test_simple_binary(self):
|
||||
self._check(_bytes('hello'), u'hello')
|
||||
|
||||
def test_unicode_binary(self):
|
||||
self._check(_bytes('привет'), u'привет')
|
||||
|
||||
def test_unicode_other_encoding(self):
|
||||
data = u'mañana'.encode('latin-1')
|
||||
result = misc.binary_decode(data, 'latin-1')
|
||||
self.assertIsInstance(result, six.text_type)
|
||||
self.assertEqual(result, u'mañana')
|
||||
|
||||
|
||||
class DecodeJsonTest(test.TestCase):
|
||||
|
||||
def test_it_works(self):
|
||||
self.assertEqual(misc.decode_json(_bytes('{"foo": 1}')),
|
||||
{"foo": 1})
|
||||
|
||||
def test_it_works_with_unicode(self):
|
||||
data = _bytes('{"foo": "фуу"}')
|
||||
self.assertEqual(misc.decode_json(data), {"foo": u'фуу'})
|
||||
|
||||
def test_handles_invalid_unicode(self):
|
||||
self.assertRaises(ValueError, misc.decode_json,
|
||||
six.b('{"\xf1": 1}'))
|
||||
|
||||
def test_handles_bad_json(self):
|
||||
self.assertRaises(ValueError, misc.decode_json,
|
||||
_bytes('{"foo":'))
|
||||
|
||||
def test_handles_wrong_types(self):
|
||||
self.assertRaises(ValueError, misc.decode_json,
|
||||
_bytes('42'))
|
||||
@@ -32,6 +32,7 @@ import traceback
|
||||
import six
|
||||
|
||||
from taskflow import exceptions
|
||||
from taskflow.openstack.common import jsonutils
|
||||
from taskflow.utils import reflection
|
||||
|
||||
|
||||
@@ -39,6 +40,52 @@ LOG = logging.getLogger(__name__)
|
||||
NUMERIC_TYPES = six.integer_types + (float,)
|
||||
|
||||
|
||||
def binary_encode(text, encoding='utf-8'):
|
||||
"""Converts a string of into a binary type using given encoding.
|
||||
|
||||
Does nothing if text not unicode string.
|
||||
"""
|
||||
if isinstance(text, six.binary_type):
|
||||
return text
|
||||
elif isinstance(text, six.text_type):
|
||||
return text.encode(encoding)
|
||||
else:
|
||||
raise TypeError("Expected binary or string type")
|
||||
|
||||
|
||||
def binary_decode(data, encoding='utf-8'):
|
||||
"""Converts a binary type into a text type using given encoding.
|
||||
|
||||
Does nothing if data is already unicode string.
|
||||
"""
|
||||
if isinstance(data, six.binary_type):
|
||||
return data.decode(encoding)
|
||||
elif isinstance(data, six.text_type):
|
||||
return data
|
||||
else:
|
||||
raise TypeError("Expected binary or string type")
|
||||
|
||||
|
||||
def decode_json(raw_data, root_types=(dict,)):
|
||||
"""Parse raw data to get JSON object.
|
||||
|
||||
Decodes a JSON from a given raw data binary and checks that the root
|
||||
type of that decoded object is in the allowed set of types (by
|
||||
default a JSON object/dict should be the root type).
|
||||
"""
|
||||
try:
|
||||
data = jsonutils.loads(binary_decode(raw_data))
|
||||
except UnicodeDecodeError as e:
|
||||
raise ValueError("Expected UTF-8 decodable data: %s" % e)
|
||||
except ValueError as e:
|
||||
raise ValueError("Expected JSON decodable data: %s" % e)
|
||||
if root_types and not isinstance(data, tuple(root_types)):
|
||||
ok_types = ", ".join(str(t) for t in root_types)
|
||||
raise ValueError("Expected (%s) root types not: %s"
|
||||
% (ok_types, type(data)))
|
||||
return data
|
||||
|
||||
|
||||
def wallclock():
|
||||
# NOTE(harlowja): made into a function so that this can be easily mocked
|
||||
# out if we want to alter time related functionality (for testing
|
||||
|
||||
Reference in New Issue
Block a user