Add utilities for working with binary data

This utilities help to mitigate differences between Python 2 and
Python 3 when it comes to encoding and decoding binary data. They
will be widely useful across taskflow, including ZooKeeper logbook
and storage backend patches.

Initially from change I1de1525df0deee612fb14ca36f0415ea7d2f707c by
Joshua Harlow, reworked for better handling of non-ascii characters.

Change-Id: I4136fd6d7e55b716b0ba5eab838d17a77095c726
This commit is contained in:
Ivan A. Melnikov
2014-01-24 17:19:16 +04:00
parent 0e1e857b9f
commit 5acc013956
2 changed files with 154 additions and 0 deletions

View File

@@ -0,0 +1,107 @@
# -*- coding: utf-8 -*-
# vim: tabstop=4 shiftwidth=4 softtabstop=4
# Copyright (C) 2014 Yahoo! Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import six
from taskflow import test
from taskflow.utils import misc
def _bytes(data):
if six.PY3:
return data.encode(encoding='utf-8')
else:
return data
class BinaryEncodeTest(test.TestCase):
def _check(self, data, expected_result):
result = misc.binary_encode(data)
self.assertIsInstance(result, six.binary_type)
self.assertEqual(result, expected_result)
def test_simple_binary(self):
data = _bytes('hello')
self._check(data, data)
def test_unicode_binary(self):
data = _bytes('привет')
self._check(data, data)
def test_simple_text(self):
self._check(u'hello', _bytes('hello'))
def test_unicode_text(self):
self._check(u'привет', _bytes('привет'))
def test_unicode_other_encoding(self):
result = misc.binary_encode(u'mañana', 'latin-1')
self.assertIsInstance(result, six.binary_type)
self.assertEqual(result, u'mañana'.encode('latin-1'))
class BinaryDecodeTest(test.TestCase):
def _check(self, data, expected_result):
result = misc.binary_decode(data)
self.assertIsInstance(result, six.text_type)
self.assertEqual(result, expected_result)
def test_simple_text(self):
data = u'hello'
self._check(data, data)
def test_unicode_text(self):
data = u'привет'
self._check(data, data)
def test_simple_binary(self):
self._check(_bytes('hello'), u'hello')
def test_unicode_binary(self):
self._check(_bytes('привет'), u'привет')
def test_unicode_other_encoding(self):
data = u'mañana'.encode('latin-1')
result = misc.binary_decode(data, 'latin-1')
self.assertIsInstance(result, six.text_type)
self.assertEqual(result, u'mañana')
class DecodeJsonTest(test.TestCase):
def test_it_works(self):
self.assertEqual(misc.decode_json(_bytes('{"foo": 1}')),
{"foo": 1})
def test_it_works_with_unicode(self):
data = _bytes('{"foo": "фуу"}')
self.assertEqual(misc.decode_json(data), {"foo": u'фуу'})
def test_handles_invalid_unicode(self):
self.assertRaises(ValueError, misc.decode_json,
six.b('{"\xf1": 1}'))
def test_handles_bad_json(self):
self.assertRaises(ValueError, misc.decode_json,
_bytes('{"foo":'))
def test_handles_wrong_types(self):
self.assertRaises(ValueError, misc.decode_json,
_bytes('42'))

View File

@@ -32,6 +32,7 @@ import traceback
import six
from taskflow import exceptions
from taskflow.openstack.common import jsonutils
from taskflow.utils import reflection
@@ -39,6 +40,52 @@ LOG = logging.getLogger(__name__)
NUMERIC_TYPES = six.integer_types + (float,)
def binary_encode(text, encoding='utf-8'):
"""Converts a string of into a binary type using given encoding.
Does nothing if text not unicode string.
"""
if isinstance(text, six.binary_type):
return text
elif isinstance(text, six.text_type):
return text.encode(encoding)
else:
raise TypeError("Expected binary or string type")
def binary_decode(data, encoding='utf-8'):
"""Converts a binary type into a text type using given encoding.
Does nothing if data is already unicode string.
"""
if isinstance(data, six.binary_type):
return data.decode(encoding)
elif isinstance(data, six.text_type):
return data
else:
raise TypeError("Expected binary or string type")
def decode_json(raw_data, root_types=(dict,)):
"""Parse raw data to get JSON object.
Decodes a JSON from a given raw data binary and checks that the root
type of that decoded object is in the allowed set of types (by
default a JSON object/dict should be the root type).
"""
try:
data = jsonutils.loads(binary_decode(raw_data))
except UnicodeDecodeError as e:
raise ValueError("Expected UTF-8 decodable data: %s" % e)
except ValueError as e:
raise ValueError("Expected JSON decodable data: %s" % e)
if root_types and not isinstance(data, tuple(root_types)):
ok_types = ", ".join(str(t) for t in root_types)
raise ValueError("Expected (%s) root types not: %s"
% (ok_types, type(data)))
return data
def wallclock():
# NOTE(harlowja): made into a function so that this can be easily mocked
# out if we want to alter time related functionality (for testing