Documentation.

This commit is contained in:
Chris McDonough
2010-03-12 05:08:14 +00:00
parent f333a26610
commit c6ec36b5c0
6 changed files with 598 additions and 39 deletions

View File

@@ -1,20 +1,10 @@
import pkg_resources
import itertools
def resolve_dotted(dottedname, package=None):
if dottedname.startswith('.') or dottedname.startswith(':'):
if not package:
raise ImportError('name "%s" is irresolveable (no package)' %
dottedname)
if dottedname in ['.', ':']:
dottedname = package.__name__
else:
dottedname = package.__name__ + dottedname
return pkg_resources.EntryPoint.parse(
'x=%s' % dottedname).load(False)
class Invalid(Exception):
"""
An exception raised by data types and validators indicating that
the value for a particular structure was not valid.
"""
pos = None
parent = None
@@ -25,10 +15,12 @@ class Invalid(Exception):
self.subexceptions = []
def add(self, error):
""" Add a subexception to this exception """
error.parent = self
self.subexceptions.append(error)
def paths(self):
""" Return all paths through the exception graph """
def traverse(node, stack):
stack.append(node)
@@ -44,6 +36,8 @@ class Invalid(Exception):
return traverse(self, [])
def asdict(self):
""" Return a dictionary containing an error report for this
exception"""
paths = self.paths()
D = {}
for path in paths:
@@ -61,6 +55,8 @@ class Invalid(Exception):
return D
class All(object):
""" Composite validator which succeeds if none of its
subvalidators raises an Invalid exception """
def __init__(self, *validators):
self.validators = validators
@@ -76,6 +72,8 @@ class All(object):
raise Invalid(struct, msgs)
class Range(object):
""" Validator which succeeds if the value it is passed is greater
or equal to ``min`` and less than or equal to ``max``."""
def __init__(self, min=None, max=None):
self.min = min
self.max = max
@@ -93,9 +91,18 @@ class Range(object):
struct,
'%r is greater than maximum value %r' % (value, self.max))
class OneOf(object):
""" Validator which succeeds if the value passed to it is one of
a fixed set of values """
def __init__(self, values):
self.values = values
def __call__(self, struct, value):
if not value in self.values:
raise Invalid(struct, '%r is not one of %r' % (value, self.values))
class Mapping(object):
""" A type which represents a mapping of names to data
structures. """
""" A type which represents a mapping of names to structures. """
def _validate(self, struct, value):
if not hasattr(value, 'get'):
raise Invalid(struct, '%r is not a mapping type' % value)
@@ -169,9 +176,7 @@ class Positional(object):
"""
class Tuple(Positional):
""" A type which represents a fixed-length sequence of data
structures, each one of which may be different as denoted by the
types of the associated structure's children."""
""" A type which represents a fixed-length sequence of structures. """
def _validate(self, struct, value):
if not hasattr(value, '__iter__'):
raise Invalid(struct, '%r is not an iterable value' % value)
@@ -228,9 +233,9 @@ class Tuple(Positional):
return tuple(result)
class Sequence(Positional):
""" A type which represents a variable-length sequence of values,
all of which must be of the same type as denoted by the type of
the Structure instance ``substruct``"""
""" A type which represents a variable-length sequence of
structures, all of which must be of the same type as denoted by
the type of the Structure instance ``substruct``"""
def __init__(self, substruct):
self.substruct = substruct
@@ -329,15 +334,29 @@ class GlobalObject(object):
raise Invalid(struct, '%r has no __name__' % value)
def deserialize(self, struct, value):
import pkg_resources
if not isinstance(value, basestring):
raise Invalid(struct, '%r is not a global object specification')
try:
return resolve_dotted(value, package=self.package)
if value.startswith('.') or value.startswith(':'):
if not self.package:
raise ImportError(
'name "%s" is irresolveable (no package)' % value)
if value in ['.', ':']:
value = self.package.__name__
else:
value = self.package.__name__ + value
return pkg_resources.EntryPoint.parse(
'x=%s' % value).load(False)
except ImportError:
raise Invalid(struct,
'The dotted name %r cannot be imported' % value)
class Structure(object):
"""
Fundamental building block of schemas.
"""
_counter = itertools.count()
def __new__(cls, *arg, **kw):
@@ -405,6 +424,3 @@ class SequenceSchema(Schema):
class TupleSchema(Schema):
struct_type = Tuple

52
cereal/interfaces.py Normal file
View File

@@ -0,0 +1,52 @@
def Validator(struct, value):
"""
If ``value`` is not valid, raise a ``cereal.Invalid`` exception.
``struct`` is the ``cereal.Structure`` instance which
contains, among other things, the default value, the name of
the value, and a ``required`` flag indicating whether this
value is required.
"""
class Type(object):
def serialize(self, struct, value):
"""
Serialize the object represented by ``value`` to a
data structure. The serialization should be composed of one or
more objects which can be deserialized by the
:meth:`cereal.interfaces.Type.deserialize` method of this
type.
This method should also do type validation of ``value``.
``struct`` is the ``cereal.Structure`` instance which
contains, among other things, the default value, the name of
the value, and a ``required`` flag indicating whether this
value is required.
If the object cannot be serialized, or type validation for
``value`` fails, a ``cereal.Invalid`` exception should be
raised.
"""
def deserialize(self, struct, value):
"""
Deserialze the serialization represented by ``value`` to a
data structure. The deserialization should be composed of one
or more objects which can be serialized by the
:meth:`cereal.interfaces.Type.serialize` method of this
type.
This method should also do type validation of ``value``.
``struct`` is the ``cereal.Structure`` instance which
contains, among other things, the default value, the name of
the value, and a ``required`` flag indicating whether this
value is required.
If the object cannot be deserialized, or type validation for
``value`` fails, a ``cereal.Invalid`` exception should be
raised.
"""

View File

@@ -1,4 +1,48 @@
API Documentation
=================
Cereal API
----------
Exceptions
~~~~~~~~~~
.. automodule:: cereal
.. autoclass:: Invalid
Validators
~~~~~~~~~~
.. autoclass:: All
.. autoclass:: Range
.. autoclass:: OneOf
Types
~~~~~
.. autoclass:: Mapping
.. autoclass:: Tuple
.. autoclass:: Sequence
.. autoclass:: String
.. autoclass:: Integer
.. autoclass:: GlobalObject
Other
~~~~~
.. autoclass:: Structure
.. autoclass:: Schema
.. autoclass:: MappingSchema
.. autoclass:: TupleSchema
.. autoclass:: SequenceSchema
XXX

View File

@@ -14,10 +14,19 @@
import sys, os
# If your extensions are in another directory, add it here. If the
# directory is relative to the documentation root, use os.path.abspath to
# make it absolute, like shown here.
#sys.path.append(os.path.abspath('some/directory'))
# If your extensions are in another directory, add it here. If the directory
# is relative to the documentation root, use os.path.abspath to make it
# absolute, like shown here.
parent = os.path.dirname(os.path.dirname(__file__))
sys.path.append(os.path.abspath(parent))
wd = os.getcwd()
os.chdir(parent)
os.system('%s setup.py test -q' % sys.executable)
os.chdir(wd)
for item in os.listdir(parent):
if item.endswith('.egg'):
sys.path.append(os.path.join(parent, item))
# General configuration
# ---------------------
@@ -183,3 +192,5 @@ latex_logo = '.static/logo_hi.gif'
# If false, no module index is generated.
#latex_use_modindex = True
#autoclass_content = 'both'

View File

@@ -1,20 +1,446 @@
cereal
Cereal
======
Cereal is an extensible package which can be used to:
Cereal is useful as a system for validating and deserializing data
obtained via XML, JSON, an HTML form post or any other equally simple
data serialization. Cereal can be used to:
- deserialize and validate a data structure composed of strings,
mappings, and lists.
- Define a data schema
- serialize an arbitrary data structure to a data structure composed
- Serialize an arbitrary Python structure to a data structure composed
of strings, mappings, and lists.
- Deserialize a data structure composed of strings, mappings, and
lists into an arbitrary Python structure after validating the data
structure against a data schema.
Out of the box, Cereal can serialize the following types of objects:
- A mapping object (e.g. dictionary)
- A variable-length sequence of objects (each object is of the same
type).
- A fixed-length tuple of objects (each object is of a different
type).
- A string or Unicode object.
- An integer.
- A dotted Python object path.
Cereal allows additional data structures to be serialized and
deserialized by allowing a developer to define new "types".
Defining A Cereal Schema
------------------------
Imagine you want to deserialize and validate a serialization of data
you've obtained by reading a YAML document. An example of such a data
serialization might look something like this:
.. code-block:: python
:linenos:
{
'name':'keith',
'age':'20',
'friends':[('1', 'jim'),('2', 'bob'), ('3', 'joe'), ('4', 'fred')],
'phones':[{'location':'home', 'number':'555-1212'},
{'location':'work', 'number':'555-8989'},],
}
Let's further imagine you'd like to make sure, on demand, that a
particular serialization of this type read from this YAML document or
another YAML document is "valid".
Notice that all the innermost values in the serialization are strings,
even though some of them (such as age and the position of each friend)
are more naturally integer-like. Let's define a schema which will
attempt to convert a serialization to a data structure that has
different types.
.. code-block:: python
:linenos:
import cereal
class Friend(cereal.TupleSchema):
rank = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999))
name = cereal.Structure(cereal.String())
class Phone(cereal.MappingSchema):
location = cereal.Structure(cereal.String(),
validator=cereal.OneOf(['home', 'work']))
number = cereal.Structure(cereal.String())
class PersonSchema(cereal.MappingSchema):
name = cereal.Structure(cereal.String())
age = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 200))
friends = cereal.Structure(cereal.Sequence(Friend()))
phones = cereal.Structure(cereal.Sequence(Phone()))
For ease of reading, we've actually defined *three* schemas above, but
we coalesce them all into a single ``PersonSchema``. As the result of
our definitions, a ``PersonSchema`` represents:
- A ``name``, which must be a string.
- An ``age``, which must be deserializable to an integer; after
deserialization happens, a validator ensures that the integer is
between 0 and 200 inclusive.
- A sequence of ``friend`` structures. Each friend structure is a
two-element tuple. The first element represents an integer rank; it
must be between 0 and 9999 inclusive. The second element represents
a string name.
- A sequence of ``phone`` structures. Each phone structure is a
mapping. Each phone mapping has two keys: ``location`` and
``number``. The ``location`` must be one of ``work`` or ``home``.
The number must be a string.
Structure Objects
~~~~~~~~~~~~~~~~~
A schema is composed of one or more *structure* objects, usually in a
nested arrangement. Each structure object has a required *type*, an
optional *validator*, and a slightly less optional *name*.
The *type* of a structure indicates its data type (such as
``cereal.Int`` or ``cereal.String``).
The *validator* of a structure is called after deserialization; it
makes sure the deserialized value matches a constraint. An example of
such a validator is provided in the schema above:
``validator=cereal.Range(0, 200)``. The *name* of a structure appears
in error reports.
The *name* of a structure that is introduced as a class-level
attribute of a ``cereal.MappingSchema`` or ``cereal.TupleSchema`` is
its class attribute name. For example:
.. code-block:: python
:linenos:
import cereal
class Phone(cereal.MappingSchema):
location = cereal.Structure(cereal.String(),
validator=cereal.OneOf(['home', 'work']))
number = cereal.Structure(cereal.String())
The *name* of the structure defined by ``location =
cereal.Structure(..)`` is ``location``.
Schema Objects
~~~~~~~~~~~~~~
The result of creating an instance of a ``cereal.MappingSchema`` or
``cereal.TupleSchema`` object is also a *structure* object.
Instantiating a ``cereal.MappingSchema`` creates a structure which has
a *type* value of ``cereal.Mapping``. Instantiating a
``cereal.TupleSchema`` creates a structure which has a *type* value of
``cereal.Tuple``.
A structure defined by instantiating a ``cereal.MappingSchema`` or a
``cereal.TupleSchema`` usually has no validator, and has the empty
string as its name.
Deserializing A Data Structure Using a Schema
---------------------------------------------
Earlier we defined a schema:
.. code-block:: python
:linenos:
import cereal
class Friend(cereal.TupleSchema):
rank = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999))
name = cereal.Structure(cereal.String())
class Phone(cereal.MappingSchema):
location = cereal.Structure(cereal.String(),
validator=cereal.OneOf(['home', 'work']))
number = cereal.Structure(cereal.String())
class PersonSchema(cereal.MappingSchema):
name = cereal.Structure(cereal.String())
age = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 200))
friends = cereal.Structure(cereal.Sequence(Friend()))
phones = cereal.Structure(cereal.Sequence(Phone()))
Let's now use this schema to try to deserialize some concrete data
structures.
Deserializing A Valid Serialization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: python
:linenos:
data = {
'name':'keith',
'age':'20',
'friends':[('1', 'jim'),('2', 'bob'), ('3', 'joe'), ('4', 'fred')],
'phones':[{'location':'home', 'number':'555-1212'},
{'location':'work', 'number':'555-8989'},],
}
schema = PersonSchema()
deserialized = schema.deserialize(data)
When ``schema.deserialize(data)`` is called, because all the data in
the schema is valid, and the structure represented by ``data``
conforms to the schema, ``deserialized`` will be the following:
.. code-block:: python
:linenos:
{
'name':'keith',
'age':20,
'friends':[(1, 'jim'),(2, 'bob'), (3, 'joe'), (4, 'fred')],
'phones':[{'location':'home', 'number':'555-1212'},
{'location':'work', 'number':'555-8989'},],
}
Note that all the friend rankings have been converted to integers,
likewise for the age.
Deserializing An Invalid Serialization
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Below, the ``data` structure has some problems. The ``age`` is a
negative number. The rank for ``bob`` is ``t`` which is not a valid
integer. The ``location`` of the first phone is ``bar``, which is not
a valid location (it is not one of "work" or "home"). What happens
when a data structure cannot be deserialized due to a data type error
or a validation error?
.. code-block:: python
:linenos:
import cereal
data = {
'name':'keith',
'age':'-1',
'friends':[('1', 'jim'),('t', 'bob'), ('3', 'joe'), ('4', 'fred')],
'phones':[{'location':'bar', 'number':'555-1212'},
{'location':'work', 'number':'555-8989'},],
}
schema = PersonSchema()
try:
schema.deserialize(data)
except cereal.Invalid, e:
print e.asdict()
The ``deserialize`` method will raise an exception, and the ``except``
clause above will be invoked, causing ``e.asdict()`` to be printed.
This wil print:
.. code-block:: python
:linenos:
{'age':'-1 is less than minimum value 0',
'friends.1.0':'"t" is not a number',
'phones.0.location:'"bar" is not one of ["home", "work"]'}
The above error dictionary is telling us that:
- The top-level age variable failed validation.
- Bob's rank (the Friend tuple name ``bob``'s zeroth element) is not a
valid number.
- The zeroth phone number has a bad location: it should be one of
"home" or "work".
Defining A Schema Imperatively
------------------------------
The above schema we defined was defined declaratively via a set of
``class`` statements. It's often useful to create schemas more
dynamically. For this reason, Cereal offers an "imperative" mode of
schema configuration. Here's our previous declarative schema:
.. code-block:: python
:linenos:
import cereal
class Friend(cereal.TupleSchema):
rank = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999))
name = cereal.Structure(cereal.String())
class Phone(cereal.MappingSchema):
location = cereal.Structure(cereal.String(),
validator=cereal.OneOf(['home', 'work']))
number = cereal.Structure(cereal.String())
class PersonSchema(cereal.MappingSchema):
name = cereal.Structure(cereal.String())
age = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 200))
friends = cereal.Structure(cereal.Sequence(Friend()))
phones = cereal.Structure(cereal.Sequence(Phone()))
We can imperatively construct a completely equivalent schema like so:
.. code-block:: python
:linenos:
import cereal
friend = cereal.Structure(Tuple())
friend.add(cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999),
name='rank'))
friend.add(cereal.Structure(cereal.String()), name='name')
phone = cereal.Structure(Mapping())
phone.add(cereal.Structure(cereal.String(),
validator=cereal.OneOf(['home', 'work']),
name='location'))
phone.add(cereal.Structure(cereal.String(), name='number'))
schema = cereal.Structure(Mapping())
schema.add(cereal.Structure(cereal.String(), name='name'))
schema.add(cereal.Structure(cereal.Int(), name='age'),
validator=cereal.Range(0, 200))
schema.add(cereal.Structure(cereal.Sequence(friend), name='friends'))
schema.add(cereal.Structure(cereal.Sequence(phone), name='phones'))
Defining a schema imperatively is a lot uglier than defining a schema
declaratively, but it's often more useful when you need to define a
schema dynamically. Perhaps in the body of a function or method you
may need to disinclude a particular schema field based on a business
condition; when you define a schema imperatively, you have more
opportunity to control the schema composition.
Serializing and deserializing using a schema created imperatively is
done exactly the same way as you would serialize or deserialize using
a schema created declaratively:
.. code-block:: python
:linenos:
data = {
'name':'keith',
'age':'20',
'friends':[('1', 'jim'),('2', 'bob'), ('3', 'joe'), ('4', 'fred')],
'phones':[{'location':'home', 'number':'555-1212'},
{'location':'work', 'number':'555-8989'},],
}
deserialized = schema.deserialize(data)
Defining a New Type
-------------------
A new type is a class with two methods:: ``serialize`` and
``deserialize``. ``serialize`` converts a Python data structure to a
serialization. ``deserialize`` converts a value to a Python data
structure.
Here's a type which implements boolean serialization and
deserialization. It serializes a boolean to the string ``true`` or
``false``; it deserializes a string (presumably ``true`` or ``false``,
but allows some wiggle room for ``t``, ``on``, ``yes``, ``y``, and
``1``) to a boolean value.
.. code-block:: python
:linenos:
class Boolean(object):
def deserialize(self, struct, value):
if not isinstance(value, basestring):
raise Invalid(struct, '%r is not a string' % value)
value = value.lower()
if value in ('true', 'yes', 'y', 'on', 't', '1'):
return True
return False
def serialize(self, struct, value):
if not isinstance(value, bool):
raise Invalid(struct, '%r is not a boolean')
return value and 'true' or 'false'
Here's how you would use the resulting class as part of a schema:
.. code-block:: python
:linenos:
import cereal
class Schema(cereal.MappingSchema):
interested = cereal.Structure(Boolean())
The above schema has a member named ``interested`` which will now be
serialized and deserialized as a boolean, according to the logic
defined in the ``Boolean`` type class.
Note that the only real constraint of a type class is that its
``serialize`` method must be able to make sense of a value generated
by its ``deserialize`` method and vice versa.
Defining a New Validator
------------------------
A validator is a callable which accepts two positional arguments:
``struct`` and ``value``. It returns ``None`` if the value is valid.
It raises a ``cereal.Invalid`` exception if the value is not valid.
Here's a validator that checks if the value is a valid credit card number.
.. code-block:: python
:linenos:
def luhnok(struct, value):
""" checks to make sure that the value passes a luhn mod-10 checksum """
sum = 0
num_digits = len(value)
oddeven = num_digits & 1
for count in range(0, num_digits):
digit = int(value[count])
if not (( count & 1 ) ^ oddeven ):
digit = digit * 2
if digit > 9:
digit = digit - 9
sum = sum + digit
if not (sum % 10) == 0:
raise Invalid(struct,
'%r is not a valid credit card number' % value)
Here's how the resulting ``luhnok`` validator might be used in a
schema:
.. code-block:: python
:linenos:
import cereal
class Schema(cereal.MappingSchema):
cc_number = cereal.Structure(cereal.String(), validator=lunhnok)
Note that the validator doesn't need to check if the ``value`` is a
string: this has already been done as the result of the type of the
``cc_number`` structure being ``cereal.String``. Validators are always
passed the *deserialized* value when they are invoked.
Interface and API Documentation
-------------------------------
.. toctree::
:maxdepth: 2
interfaces.rst
api.rst
Indices and tables
------------------

10
docs/interfaces.rst Normal file
View File

@@ -0,0 +1,10 @@
Interfaces
----------
.. automodule:: cereal.interfaces
.. autofunction:: Validator
.. autoclass:: Type
:members: