- The default `encoding parameter value to the colander.String`

type is still ``None``, however its meaning has changed.  An
  encoding of ``None`` now means that no special encoding and decoding
  of Unicode values is done by the String type.  This differs from the
  previous behavior, where ``None`` implied that the encoding was
  ``utf-8``.  Pass the encoding as ``utf-8`` specifically to get the
  older behavior back.
This commit is contained in:
Chris McDonough
2010-05-08 05:34:51 +00:00
parent f61417d1b7
commit 4eefbb5f9b
3 changed files with 133 additions and 59 deletions

View File

@@ -1,6 +1,17 @@
Changes
=======
Next release
------------
- The default ``encoding`` parameter value to the ``colander.String``
type is still ``None``, however its meaning has changed. An
encoding of ``None`` now means that no special encoding and decoding
of Unicode values is done by the String type. This differs from the
previous behavior, where ``None`` implied that the encoding was
``utf-8``. Pass the encoding as ``utf-8`` specifically to get the
older behavior back.
0.6.1 (2010-05-04)
------------------

View File

@@ -615,43 +615,81 @@ class Sequence(Positional):
Seq = Sequence
default_encoding = 'utf-8'
class String(object):
""" A type representing a Unicode string.
This type constructor accepts a number of arguments:
``encoding``
Represents the encoding which should be applied to object
serialization. It defaults to ``utf-8`` if not provided.
Represents the encoding which should be applied to value
serialization and deserialization, for example ``utf-8``. If
``encoding`` is passed as ``None``, the ``serialize`` method of
this type will not do any special encoding of the value it is
provided, nor will the ``deserialize`` method of this type do
any special decoding of the value it is provided; inputs and
outputs will be assumed to be Unicode. ``encoding`` defaults
to ``None``.
If ``encoding`` is ``None``:
- A Unicode input value to ``serialize`` is returned untouched.
- A non-Unicode input value to ``serialize`` is run through the
``unicode()`` function without an ``encoding`` parameter
(``unicode(value)``) and the result is returned.
- A Unicode input value to ``deserialize`` is returned untouched.
- A non-Unicode input value to ``deserialize`` is run through the
``unicode()`` function without an ``encoding`` parameter
(``unicode(value)``) and the result is returned.
If ``encoding`` is not ``None``:
- A Unicode input value to ``serialize`` is run through the
``unicode`` function with the encoding parameter
(``unicode(value, encoding)``) and the result (a ``str``
object) is returned.
- A non-Unicode input value to ``serialize`` is converted to a
Unicode using the encoding (``unicode(value, encoding)``);
subsequently the Unicode object is reeencoded to a ``str``
object using the encoding and returned.
- A Unicode input value to ``deserialize`` is returned
untouched.
- A non-Unicode input value to ``deserialize`` is converted to
a ``str`` object using ``str(value``). The resulting str
value is converted to Unicode using the encoding
(``unicode(value, encoding)``) and the result is returned.
A corollary: If a string (as opposed to a unicode object) is
provided as a value to either the serialize or deserialize
method of this type, and the type also has an non-None
``encoding``, the string must be encoded with the type's
encoding. If this is not true, an :exc:`colander.Invalid`
error will result.
``allow_empty``
Boolean representing whether an empty string input to
deserialize will be accepted even if the enclosing schema node
is required (has no default). Default: ``False``.
Input to ``serialize`` is serialized to a Python ``str`` object,
which is encoded in the encoding provided.
If a string (as opposed to a unicode object) is provided as a
value to either the serialize or deserialize method of this type,
it must be encoded with the type's encoding; an
:exc:`colander.Invalid` error will result if not.
The subnodes of the :class:`colander.SchemaNode` that wraps
this type are ignored.
"""
def __init__(self, encoding=None, allow_empty=False):
if encoding is None:
encoding = default_encoding
self.encoding = encoding
self.allow_empty = allow_empty
def deserialize(self, node, value):
try:
if not isinstance(value, unicode):
value = unicode(str(value), self.encoding)
if self.encoding:
value = unicode(str(value), self.encoding)
else:
value = unicode(value)
except Exception, e:
raise Invalid(node,
_('${val} is not a string: %{err}',
@@ -665,14 +703,19 @@ class String(object):
def serialize(self, node, value):
try:
if isinstance(value, unicode):
result = value.encode(self.encoding)
if self.encoding:
result = value.encode(self.encoding)
else:
result = value
else:
# do validation here
result = unicode(value, self.encoding).encode(self.encoding)
if self.encoding:
result = unicode(value, self.encoding).encode(self.encoding)
else:
result = unicode(value)
return result
except Exception, e:
raise Invalid(node,
_('"${val} cannot be serialized to str: ${err}',
_('"${val} cannot be serialized: ${err}',
mapping={'val':value, 'err':e})
)

View File

@@ -632,7 +632,7 @@ class TestSequence(unittest.TestCase):
self.assertEqual(len(e.children), 2)
class TestString(unittest.TestCase):
def _makeOne(self, encoding='utf-8', allow_empty=False):
def _makeOne(self, encoding=None, allow_empty=False):
from colander import String
return String(encoding, allow_empty)
@@ -641,6 +641,49 @@ class TestString(unittest.TestCase):
from colander import String
self.assertEqual(Str, String)
def test_deserialize_emptystring_allow_empty(self):
node = DummySchemaNode(None)
typ = self._makeOne(None, True)
result = typ.deserialize(node, '')
self.assertEqual(result, '')
def test_deserialize_uncooperative(self):
val = Uncooperative()
node = DummySchemaNode(None)
typ = self._makeOne()
e = invalid_exc(typ.deserialize, node, val)
self.failUnless(e.msg)
def test_deserialize_unicode_from_None(self):
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne()
result = typ.deserialize(node, uni)
self.assertEqual(result, uni)
def test_deserialize_nonunicode_from_None(self):
value = object()
node = DummySchemaNode(None)
typ = self._makeOne()
result = typ.deserialize(node, value)
self.assertEqual(result, unicode(value))
def test_deserialize_from_utf8(self):
utf8 = '\xc3\xb8'
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne('utf-8')
result = typ.deserialize(node, utf8)
self.assertEqual(result, uni)
def test_deserialize_from_utf16(self):
utf16 = '\xff\xfe\xf8\x00'
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne('utf-16')
result = typ.deserialize(node, utf16)
self.assertEqual(result, uni)
def test_serialize_emptystring_required(self):
val = ''
node = DummySchemaNode(None)
@@ -655,42 +698,6 @@ class TestString(unittest.TestCase):
result = typ.deserialize(node, val)
self.assertEqual(result, 'default')
def test_deserialize_emptystring_allow_empty(self):
node = DummySchemaNode(None)
typ = self._makeOne(None, True)
result = typ.deserialize(node, '')
self.assertEqual(result, '')
def test_deserialize_uncooperative(self):
val = Uncooperative()
node = DummySchemaNode(None)
typ = self._makeOne()
e = invalid_exc(typ.deserialize, node, val)
self.failUnless(e.msg)
def test_deserialize_unicode(self):
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne()
result = typ.deserialize(node, uni)
self.assertEqual(result, uni)
def test_deserialize_from_utf8(self):
utf8 = '\xc3\xb8'
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne()
result = typ.deserialize(node, utf8)
self.assertEqual(result, uni)
def test_deserialize_from_utf16(self):
utf16 = '\xff\xfe\xf8\x00'
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne('utf-16')
result = typ.deserialize(node, utf16)
self.assertEqual(result, uni)
def test_serialize_uncooperative(self):
val = Uncooperative()
node = DummySchemaNode(None)
@@ -698,11 +705,25 @@ class TestString(unittest.TestCase):
e = invalid_exc(typ.serialize, node, val)
self.failUnless(e.msg)
def test_serialize_nonunicode_to_None(self):
value = object()
node = DummySchemaNode(None)
typ = self._makeOne()
result = typ.serialize(node, value)
self.assertEqual(result, unicode(value))
def test_serialize_unicode_to_None(self):
value = u'abc'
node = DummySchemaNode(None)
typ = self._makeOne()
result = typ.serialize(node, value)
self.assertEqual(result, value)
def test_serialize_to_utf8(self):
utf8 = '\xc3\xb8'
uni = u'\xf8'
node = DummySchemaNode(None)
typ = self._makeOne()
typ = self._makeOne('utf-8')
result = typ.serialize(node, uni)
self.assertEqual(result, utf8)
@@ -719,9 +740,8 @@ class TestString(unittest.TestCase):
node = DummySchemaNode(None)
typ = self._makeOne('utf-8')
e = invalid_exc(typ.serialize, node, not_utf8)
self.failUnless('cannot be serialized to str' in e.msg)
self.failUnless('cannot be serialized' in e.msg)
class TestInteger(unittest.TestCase):
def _makeOne(self):
from colander import Integer