diff --git a/CHANGES.txt b/CHANGES.txt index 11c7c3f..f5058af 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,17 @@ Changes ======= +Next release +------------ + +- The default ``encoding`` parameter value to the ``colander.String`` + type is still ``None``, however its meaning has changed. An + encoding of ``None`` now means that no special encoding and decoding + of Unicode values is done by the String type. This differs from the + previous behavior, where ``None`` implied that the encoding was + ``utf-8``. Pass the encoding as ``utf-8`` specifically to get the + older behavior back. + 0.6.1 (2010-05-04) ------------------ diff --git a/colander/__init__.py b/colander/__init__.py index 12520c7..5c207c1 100644 --- a/colander/__init__.py +++ b/colander/__init__.py @@ -615,43 +615,81 @@ class Sequence(Positional): Seq = Sequence -default_encoding = 'utf-8' - class String(object): """ A type representing a Unicode string. This type constructor accepts a number of arguments: ``encoding`` - Represents the encoding which should be applied to object - serialization. It defaults to ``utf-8`` if not provided. + Represents the encoding which should be applied to value + serialization and deserialization, for example ``utf-8``. If + ``encoding`` is passed as ``None``, the ``serialize`` method of + this type will not do any special encoding of the value it is + provided, nor will the ``deserialize`` method of this type do + any special decoding of the value it is provided; inputs and + outputs will be assumed to be Unicode. ``encoding`` defaults + to ``None``. + + If ``encoding`` is ``None``: + + - A Unicode input value to ``serialize`` is returned untouched. + + - A non-Unicode input value to ``serialize`` is run through the + ``unicode()`` function without an ``encoding`` parameter + (``unicode(value)``) and the result is returned. + + - A Unicode input value to ``deserialize`` is returned untouched. + + - A non-Unicode input value to ``deserialize`` is run through the + ``unicode()`` function without an ``encoding`` parameter + (``unicode(value)``) and the result is returned. + + If ``encoding`` is not ``None``: + + - A Unicode input value to ``serialize`` is run through the + ``unicode`` function with the encoding parameter + (``unicode(value, encoding)``) and the result (a ``str`` + object) is returned. + + - A non-Unicode input value to ``serialize`` is converted to a + Unicode using the encoding (``unicode(value, encoding)``); + subsequently the Unicode object is reeencoded to a ``str`` + object using the encoding and returned. + + - A Unicode input value to ``deserialize`` is returned + untouched. + + - A non-Unicode input value to ``deserialize`` is converted to + a ``str`` object using ``str(value``). The resulting str + value is converted to Unicode using the encoding + (``unicode(value, encoding)``) and the result is returned. + + A corollary: If a string (as opposed to a unicode object) is + provided as a value to either the serialize or deserialize + method of this type, and the type also has an non-None + ``encoding``, the string must be encoded with the type's + encoding. If this is not true, an :exc:`colander.Invalid` + error will result. ``allow_empty`` Boolean representing whether an empty string input to deserialize will be accepted even if the enclosing schema node is required (has no default). Default: ``False``. - Input to ``serialize`` is serialized to a Python ``str`` object, - which is encoded in the encoding provided. - - If a string (as opposed to a unicode object) is provided as a - value to either the serialize or deserialize method of this type, - it must be encoded with the type's encoding; an - :exc:`colander.Invalid` error will result if not. - The subnodes of the :class:`colander.SchemaNode` that wraps this type are ignored. """ def __init__(self, encoding=None, allow_empty=False): - if encoding is None: - encoding = default_encoding self.encoding = encoding self.allow_empty = allow_empty def deserialize(self, node, value): try: if not isinstance(value, unicode): - value = unicode(str(value), self.encoding) + if self.encoding: + value = unicode(str(value), self.encoding) + else: + value = unicode(value) except Exception, e: raise Invalid(node, _('${val} is not a string: %{err}', @@ -665,14 +703,19 @@ class String(object): def serialize(self, node, value): try: if isinstance(value, unicode): - result = value.encode(self.encoding) + if self.encoding: + result = value.encode(self.encoding) + else: + result = value else: - # do validation here - result = unicode(value, self.encoding).encode(self.encoding) + if self.encoding: + result = unicode(value, self.encoding).encode(self.encoding) + else: + result = unicode(value) return result except Exception, e: raise Invalid(node, - _('"${val} cannot be serialized to str: ${err}', + _('"${val} cannot be serialized: ${err}', mapping={'val':value, 'err':e}) ) diff --git a/colander/tests.py b/colander/tests.py index 192a723..b0705c1 100644 --- a/colander/tests.py +++ b/colander/tests.py @@ -632,7 +632,7 @@ class TestSequence(unittest.TestCase): self.assertEqual(len(e.children), 2) class TestString(unittest.TestCase): - def _makeOne(self, encoding='utf-8', allow_empty=False): + def _makeOne(self, encoding=None, allow_empty=False): from colander import String return String(encoding, allow_empty) @@ -641,6 +641,49 @@ class TestString(unittest.TestCase): from colander import String self.assertEqual(Str, String) + def test_deserialize_emptystring_allow_empty(self): + node = DummySchemaNode(None) + typ = self._makeOne(None, True) + result = typ.deserialize(node, '') + self.assertEqual(result, '') + + def test_deserialize_uncooperative(self): + val = Uncooperative() + node = DummySchemaNode(None) + typ = self._makeOne() + e = invalid_exc(typ.deserialize, node, val) + self.failUnless(e.msg) + + def test_deserialize_unicode_from_None(self): + uni = u'\xf8' + node = DummySchemaNode(None) + typ = self._makeOne() + result = typ.deserialize(node, uni) + self.assertEqual(result, uni) + + def test_deserialize_nonunicode_from_None(self): + value = object() + node = DummySchemaNode(None) + typ = self._makeOne() + result = typ.deserialize(node, value) + self.assertEqual(result, unicode(value)) + + def test_deserialize_from_utf8(self): + utf8 = '\xc3\xb8' + uni = u'\xf8' + node = DummySchemaNode(None) + typ = self._makeOne('utf-8') + result = typ.deserialize(node, utf8) + self.assertEqual(result, uni) + + def test_deserialize_from_utf16(self): + utf16 = '\xff\xfe\xf8\x00' + uni = u'\xf8' + node = DummySchemaNode(None) + typ = self._makeOne('utf-16') + result = typ.deserialize(node, utf16) + self.assertEqual(result, uni) + def test_serialize_emptystring_required(self): val = '' node = DummySchemaNode(None) @@ -655,42 +698,6 @@ class TestString(unittest.TestCase): result = typ.deserialize(node, val) self.assertEqual(result, 'default') - def test_deserialize_emptystring_allow_empty(self): - node = DummySchemaNode(None) - typ = self._makeOne(None, True) - result = typ.deserialize(node, '') - self.assertEqual(result, '') - - def test_deserialize_uncooperative(self): - val = Uncooperative() - node = DummySchemaNode(None) - typ = self._makeOne() - e = invalid_exc(typ.deserialize, node, val) - self.failUnless(e.msg) - - def test_deserialize_unicode(self): - uni = u'\xf8' - node = DummySchemaNode(None) - typ = self._makeOne() - result = typ.deserialize(node, uni) - self.assertEqual(result, uni) - - def test_deserialize_from_utf8(self): - utf8 = '\xc3\xb8' - uni = u'\xf8' - node = DummySchemaNode(None) - typ = self._makeOne() - result = typ.deserialize(node, utf8) - self.assertEqual(result, uni) - - def test_deserialize_from_utf16(self): - utf16 = '\xff\xfe\xf8\x00' - uni = u'\xf8' - node = DummySchemaNode(None) - typ = self._makeOne('utf-16') - result = typ.deserialize(node, utf16) - self.assertEqual(result, uni) - def test_serialize_uncooperative(self): val = Uncooperative() node = DummySchemaNode(None) @@ -698,11 +705,25 @@ class TestString(unittest.TestCase): e = invalid_exc(typ.serialize, node, val) self.failUnless(e.msg) + def test_serialize_nonunicode_to_None(self): + value = object() + node = DummySchemaNode(None) + typ = self._makeOne() + result = typ.serialize(node, value) + self.assertEqual(result, unicode(value)) + + def test_serialize_unicode_to_None(self): + value = u'abc' + node = DummySchemaNode(None) + typ = self._makeOne() + result = typ.serialize(node, value) + self.assertEqual(result, value) + def test_serialize_to_utf8(self): utf8 = '\xc3\xb8' uni = u'\xf8' node = DummySchemaNode(None) - typ = self._makeOne() + typ = self._makeOne('utf-8') result = typ.serialize(node, uni) self.assertEqual(result, utf8) @@ -719,9 +740,8 @@ class TestString(unittest.TestCase): node = DummySchemaNode(None) typ = self._makeOne('utf-8') e = invalid_exc(typ.serialize, node, not_utf8) - self.failUnless('cannot be serialized to str' in e.msg) + self.failUnless('cannot be serialized' in e.msg) - class TestInteger(unittest.TestCase): def _makeOne(self): from colander import Integer