From c6ec36b5c041b2fd0e75f905502dc61d6aee9413 Mon Sep 17 00:00:00 2001 From: Chris McDonough Date: Fri, 12 Mar 2010 05:08:14 +0000 Subject: [PATCH] Documentation. --- cereal/__init__.py | 68 ++++--- cereal/interfaces.py | 52 +++++ docs/api.rst | 50 ++++- docs/conf.py | 19 +- docs/index.rst | 438 ++++++++++++++++++++++++++++++++++++++++++- docs/interfaces.rst | 10 + 6 files changed, 598 insertions(+), 39 deletions(-) create mode 100644 cereal/interfaces.py create mode 100644 docs/interfaces.rst diff --git a/cereal/__init__.py b/cereal/__init__.py index f990d7c..2eba874 100644 --- a/cereal/__init__.py +++ b/cereal/__init__.py @@ -1,20 +1,10 @@ -import pkg_resources import itertools -def resolve_dotted(dottedname, package=None): - if dottedname.startswith('.') or dottedname.startswith(':'): - if not package: - raise ImportError('name "%s" is irresolveable (no package)' % - dottedname) - if dottedname in ['.', ':']: - dottedname = package.__name__ - else: - dottedname = package.__name__ + dottedname - return pkg_resources.EntryPoint.parse( - 'x=%s' % dottedname).load(False) - class Invalid(Exception): - + """ + An exception raised by data types and validators indicating that + the value for a particular structure was not valid. + """ pos = None parent = None @@ -25,10 +15,12 @@ class Invalid(Exception): self.subexceptions = [] def add(self, error): + """ Add a subexception to this exception """ error.parent = self self.subexceptions.append(error) def paths(self): + """ Return all paths through the exception graph """ def traverse(node, stack): stack.append(node) @@ -44,6 +36,8 @@ class Invalid(Exception): return traverse(self, []) def asdict(self): + """ Return a dictionary containing an error report for this + exception""" paths = self.paths() D = {} for path in paths: @@ -61,6 +55,8 @@ class Invalid(Exception): return D class All(object): + """ Composite validator which succeeds if none of its + subvalidators raises an Invalid exception """ def __init__(self, *validators): self.validators = validators @@ -76,6 +72,8 @@ class All(object): raise Invalid(struct, msgs) class Range(object): + """ Validator which succeeds if the value it is passed is greater + or equal to ``min`` and less than or equal to ``max``.""" def __init__(self, min=None, max=None): self.min = min self.max = max @@ -93,9 +91,18 @@ class Range(object): struct, '%r is greater than maximum value %r' % (value, self.max)) +class OneOf(object): + """ Validator which succeeds if the value passed to it is one of + a fixed set of values """ + def __init__(self, values): + self.values = values + + def __call__(self, struct, value): + if not value in self.values: + raise Invalid(struct, '%r is not one of %r' % (value, self.values)) + class Mapping(object): - """ A type which represents a mapping of names to data - structures. """ + """ A type which represents a mapping of names to structures. """ def _validate(self, struct, value): if not hasattr(value, 'get'): raise Invalid(struct, '%r is not a mapping type' % value) @@ -169,9 +176,7 @@ class Positional(object): """ class Tuple(Positional): - """ A type which represents a fixed-length sequence of data - structures, each one of which may be different as denoted by the - types of the associated structure's children.""" + """ A type which represents a fixed-length sequence of structures. """ def _validate(self, struct, value): if not hasattr(value, '__iter__'): raise Invalid(struct, '%r is not an iterable value' % value) @@ -228,9 +233,9 @@ class Tuple(Positional): return tuple(result) class Sequence(Positional): - """ A type which represents a variable-length sequence of values, - all of which must be of the same type as denoted by the type of - the Structure instance ``substruct``""" + """ A type which represents a variable-length sequence of + structures, all of which must be of the same type as denoted by + the type of the Structure instance ``substruct``""" def __init__(self, substruct): self.substruct = substruct @@ -329,15 +334,29 @@ class GlobalObject(object): raise Invalid(struct, '%r has no __name__' % value) def deserialize(self, struct, value): + import pkg_resources if not isinstance(value, basestring): raise Invalid(struct, '%r is not a global object specification') try: - return resolve_dotted(value, package=self.package) + if value.startswith('.') or value.startswith(':'): + if not self.package: + raise ImportError( + 'name "%s" is irresolveable (no package)' % value) + if value in ['.', ':']: + value = self.package.__name__ + else: + value = self.package.__name__ + value + return pkg_resources.EntryPoint.parse( + 'x=%s' % value).load(False) except ImportError: raise Invalid(struct, 'The dotted name %r cannot be imported' % value) class Structure(object): + """ + Fundamental building block of schemas. + """ + _counter = itertools.count() def __new__(cls, *arg, **kw): @@ -405,6 +424,3 @@ class SequenceSchema(Schema): class TupleSchema(Schema): struct_type = Tuple - - - diff --git a/cereal/interfaces.py b/cereal/interfaces.py new file mode 100644 index 0000000..efe9c69 --- /dev/null +++ b/cereal/interfaces.py @@ -0,0 +1,52 @@ +def Validator(struct, value): + """ + If ``value`` is not valid, raise a ``cereal.Invalid`` exception. + + ``struct`` is the ``cereal.Structure`` instance which + contains, among other things, the default value, the name of + the value, and a ``required`` flag indicating whether this + value is required. + """ + +class Type(object): + def serialize(self, struct, value): + """ + Serialize the object represented by ``value`` to a + data structure. The serialization should be composed of one or + more objects which can be deserialized by the + :meth:`cereal.interfaces.Type.deserialize` method of this + type. + + This method should also do type validation of ``value``. + + ``struct`` is the ``cereal.Structure`` instance which + contains, among other things, the default value, the name of + the value, and a ``required`` flag indicating whether this + value is required. + + If the object cannot be serialized, or type validation for + ``value`` fails, a ``cereal.Invalid`` exception should be + raised. + """ + + def deserialize(self, struct, value): + """ + + Deserialze the serialization represented by ``value`` to a + data structure. The deserialization should be composed of one + or more objects which can be serialized by the + :meth:`cereal.interfaces.Type.serialize` method of this + type. + + This method should also do type validation of ``value``. + + ``struct`` is the ``cereal.Structure`` instance which + contains, among other things, the default value, the name of + the value, and a ``required`` flag indicating whether this + value is required. + + If the object cannot be deserialized, or type validation for + ``value`` fails, a ``cereal.Invalid`` exception should be + raised. + """ + diff --git a/docs/api.rst b/docs/api.rst index 6f9b395..7f8592b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -1,4 +1,48 @@ -API Documentation -================= +Cereal API +---------- + +Exceptions +~~~~~~~~~~ + +.. automodule:: cereal + + .. autoclass:: Invalid + +Validators +~~~~~~~~~~ + + .. autoclass:: All + + .. autoclass:: Range + + .. autoclass:: OneOf + +Types +~~~~~ + + .. autoclass:: Mapping + + .. autoclass:: Tuple + + .. autoclass:: Sequence + + .. autoclass:: String + + .. autoclass:: Integer + + .. autoclass:: GlobalObject + +Other +~~~~~ + + .. autoclass:: Structure + + .. autoclass:: Schema + + .. autoclass:: MappingSchema + + .. autoclass:: TupleSchema + + .. autoclass:: SequenceSchema + -XXX diff --git a/docs/conf.py b/docs/conf.py index 25895c9..43249dd 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -14,10 +14,19 @@ import sys, os -# If your extensions are in another directory, add it here. If the -# directory is relative to the documentation root, use os.path.abspath to -# make it absolute, like shown here. -#sys.path.append(os.path.abspath('some/directory')) +# If your extensions are in another directory, add it here. If the directory +# is relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +parent = os.path.dirname(os.path.dirname(__file__)) +sys.path.append(os.path.abspath(parent)) +wd = os.getcwd() +os.chdir(parent) +os.system('%s setup.py test -q' % sys.executable) +os.chdir(wd) + +for item in os.listdir(parent): + if item.endswith('.egg'): + sys.path.append(os.path.join(parent, item)) # General configuration # --------------------- @@ -183,3 +192,5 @@ latex_logo = '.static/logo_hi.gif' # If false, no module index is generated. #latex_use_modindex = True + +#autoclass_content = 'both' diff --git a/docs/index.rst b/docs/index.rst index 0ffd7d1..e756201 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,20 +1,446 @@ -cereal +Cereal ====== -Cereal is an extensible package which can be used to: +Cereal is useful as a system for validating and deserializing data +obtained via XML, JSON, an HTML form post or any other equally simple +data serialization. Cereal can be used to: -- deserialize and validate a data structure composed of strings, - mappings, and lists. +- Define a data schema -- serialize an arbitrary data structure to a data structure composed +- Serialize an arbitrary Python structure to a data structure composed of strings, mappings, and lists. +- Deserialize a data structure composed of strings, mappings, and + lists into an arbitrary Python structure after validating the data + structure against a data schema. + +Out of the box, Cereal can serialize the following types of objects: + +- A mapping object (e.g. dictionary) + +- A variable-length sequence of objects (each object is of the same + type). + +- A fixed-length tuple of objects (each object is of a different + type). + +- A string or Unicode object. + +- An integer. + +- A dotted Python object path. + +Cereal allows additional data structures to be serialized and +deserialized by allowing a developer to define new "types". + +Defining A Cereal Schema +------------------------ + +Imagine you want to deserialize and validate a serialization of data +you've obtained by reading a YAML document. An example of such a data +serialization might look something like this: + +.. code-block:: python + :linenos: + + { + 'name':'keith', + 'age':'20', + 'friends':[('1', 'jim'),('2', 'bob'), ('3', 'joe'), ('4', 'fred')], + 'phones':[{'location':'home', 'number':'555-1212'}, + {'location':'work', 'number':'555-8989'},], + } + +Let's further imagine you'd like to make sure, on demand, that a +particular serialization of this type read from this YAML document or +another YAML document is "valid". + +Notice that all the innermost values in the serialization are strings, +even though some of them (such as age and the position of each friend) +are more naturally integer-like. Let's define a schema which will +attempt to convert a serialization to a data structure that has +different types. + +.. code-block:: python + :linenos: + + import cereal + + class Friend(cereal.TupleSchema): + rank = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999)) + name = cereal.Structure(cereal.String()) + + class Phone(cereal.MappingSchema): + location = cereal.Structure(cereal.String(), + validator=cereal.OneOf(['home', 'work'])) + number = cereal.Structure(cereal.String()) + + class PersonSchema(cereal.MappingSchema): + name = cereal.Structure(cereal.String()) + age = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 200)) + friends = cereal.Structure(cereal.Sequence(Friend())) + phones = cereal.Structure(cereal.Sequence(Phone())) + +For ease of reading, we've actually defined *three* schemas above, but +we coalesce them all into a single ``PersonSchema``. As the result of +our definitions, a ``PersonSchema`` represents: + +- A ``name``, which must be a string. + +- An ``age``, which must be deserializable to an integer; after + deserialization happens, a validator ensures that the integer is + between 0 and 200 inclusive. + +- A sequence of ``friend`` structures. Each friend structure is a + two-element tuple. The first element represents an integer rank; it + must be between 0 and 9999 inclusive. The second element represents + a string name. + +- A sequence of ``phone`` structures. Each phone structure is a + mapping. Each phone mapping has two keys: ``location`` and + ``number``. The ``location`` must be one of ``work`` or ``home``. + The number must be a string. + +Structure Objects +~~~~~~~~~~~~~~~~~ + +A schema is composed of one or more *structure* objects, usually in a +nested arrangement. Each structure object has a required *type*, an +optional *validator*, and a slightly less optional *name*. + +The *type* of a structure indicates its data type (such as +``cereal.Int`` or ``cereal.String``). + +The *validator* of a structure is called after deserialization; it +makes sure the deserialized value matches a constraint. An example of +such a validator is provided in the schema above: +``validator=cereal.Range(0, 200)``. The *name* of a structure appears +in error reports. + +The *name* of a structure that is introduced as a class-level +attribute of a ``cereal.MappingSchema`` or ``cereal.TupleSchema`` is +its class attribute name. For example: + +.. code-block:: python + :linenos: + + import cereal + + class Phone(cereal.MappingSchema): + location = cereal.Structure(cereal.String(), + validator=cereal.OneOf(['home', 'work'])) + number = cereal.Structure(cereal.String()) + +The *name* of the structure defined by ``location = +cereal.Structure(..)`` is ``location``. + +Schema Objects +~~~~~~~~~~~~~~ + +The result of creating an instance of a ``cereal.MappingSchema`` or +``cereal.TupleSchema`` object is also a *structure* object. + +Instantiating a ``cereal.MappingSchema`` creates a structure which has +a *type* value of ``cereal.Mapping``. Instantiating a +``cereal.TupleSchema`` creates a structure which has a *type* value of +``cereal.Tuple``. + +A structure defined by instantiating a ``cereal.MappingSchema`` or a +``cereal.TupleSchema`` usually has no validator, and has the empty +string as its name. + +Deserializing A Data Structure Using a Schema +--------------------------------------------- + +Earlier we defined a schema: + +.. code-block:: python + :linenos: + + import cereal + + class Friend(cereal.TupleSchema): + rank = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999)) + name = cereal.Structure(cereal.String()) + + class Phone(cereal.MappingSchema): + location = cereal.Structure(cereal.String(), + validator=cereal.OneOf(['home', 'work'])) + number = cereal.Structure(cereal.String()) + + class PersonSchema(cereal.MappingSchema): + name = cereal.Structure(cereal.String()) + age = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 200)) + friends = cereal.Structure(cereal.Sequence(Friend())) + phones = cereal.Structure(cereal.Sequence(Phone())) + +Let's now use this schema to try to deserialize some concrete data +structures. + +Deserializing A Valid Serialization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. code-block:: python + :linenos: + + data = { + 'name':'keith', + 'age':'20', + 'friends':[('1', 'jim'),('2', 'bob'), ('3', 'joe'), ('4', 'fred')], + 'phones':[{'location':'home', 'number':'555-1212'}, + {'location':'work', 'number':'555-8989'},], + } + schema = PersonSchema() + deserialized = schema.deserialize(data) + +When ``schema.deserialize(data)`` is called, because all the data in +the schema is valid, and the structure represented by ``data`` +conforms to the schema, ``deserialized`` will be the following: + +.. code-block:: python + :linenos: + + { + 'name':'keith', + 'age':20, + 'friends':[(1, 'jim'),(2, 'bob'), (3, 'joe'), (4, 'fred')], + 'phones':[{'location':'home', 'number':'555-1212'}, + {'location':'work', 'number':'555-8989'},], + } + +Note that all the friend rankings have been converted to integers, +likewise for the age. + +Deserializing An Invalid Serialization +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Below, the ``data` structure has some problems. The ``age`` is a +negative number. The rank for ``bob`` is ``t`` which is not a valid +integer. The ``location`` of the first phone is ``bar``, which is not +a valid location (it is not one of "work" or "home"). What happens +when a data structure cannot be deserialized due to a data type error +or a validation error? + +.. code-block:: python + :linenos: + + import cereal + + data = { + 'name':'keith', + 'age':'-1', + 'friends':[('1', 'jim'),('t', 'bob'), ('3', 'joe'), ('4', 'fred')], + 'phones':[{'location':'bar', 'number':'555-1212'}, + {'location':'work', 'number':'555-8989'},], + } + schema = PersonSchema() + try: + schema.deserialize(data) + except cereal.Invalid, e: + print e.asdict() + +The ``deserialize`` method will raise an exception, and the ``except`` +clause above will be invoked, causing ``e.asdict()`` to be printed. +This wil print: + +.. code-block:: python + :linenos: + + {'age':'-1 is less than minimum value 0', + 'friends.1.0':'"t" is not a number', + 'phones.0.location:'"bar" is not one of ["home", "work"]'} + +The above error dictionary is telling us that: + +- The top-level age variable failed validation. + +- Bob's rank (the Friend tuple name ``bob``'s zeroth element) is not a + valid number. + +- The zeroth phone number has a bad location: it should be one of + "home" or "work". + +Defining A Schema Imperatively +------------------------------ + +The above schema we defined was defined declaratively via a set of +``class`` statements. It's often useful to create schemas more +dynamically. For this reason, Cereal offers an "imperative" mode of +schema configuration. Here's our previous declarative schema: + +.. code-block:: python + :linenos: + + import cereal + + class Friend(cereal.TupleSchema): + rank = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999)) + name = cereal.Structure(cereal.String()) + + class Phone(cereal.MappingSchema): + location = cereal.Structure(cereal.String(), + validator=cereal.OneOf(['home', 'work'])) + number = cereal.Structure(cereal.String()) + + class PersonSchema(cereal.MappingSchema): + name = cereal.Structure(cereal.String()) + age = cereal.Structure(cereal.Int(), validator=cereal.Range(0, 200)) + friends = cereal.Structure(cereal.Sequence(Friend())) + phones = cereal.Structure(cereal.Sequence(Phone())) + +We can imperatively construct a completely equivalent schema like so: + +.. code-block:: python + :linenos: + + import cereal + + friend = cereal.Structure(Tuple()) + friend.add(cereal.Structure(cereal.Int(), validator=cereal.Range(0, 9999), + name='rank')) + friend.add(cereal.Structure(cereal.String()), name='name') + + phone = cereal.Structure(Mapping()) + phone.add(cereal.Structure(cereal.String(), + validator=cereal.OneOf(['home', 'work']), + name='location')) + phone.add(cereal.Structure(cereal.String(), name='number')) + + schema = cereal.Structure(Mapping()) + schema.add(cereal.Structure(cereal.String(), name='name')) + schema.add(cereal.Structure(cereal.Int(), name='age'), + validator=cereal.Range(0, 200)) + schema.add(cereal.Structure(cereal.Sequence(friend), name='friends')) + schema.add(cereal.Structure(cereal.Sequence(phone), name='phones')) + +Defining a schema imperatively is a lot uglier than defining a schema +declaratively, but it's often more useful when you need to define a +schema dynamically. Perhaps in the body of a function or method you +may need to disinclude a particular schema field based on a business +condition; when you define a schema imperatively, you have more +opportunity to control the schema composition. + +Serializing and deserializing using a schema created imperatively is +done exactly the same way as you would serialize or deserialize using +a schema created declaratively: + +.. code-block:: python + :linenos: + + data = { + 'name':'keith', + 'age':'20', + 'friends':[('1', 'jim'),('2', 'bob'), ('3', 'joe'), ('4', 'fred')], + 'phones':[{'location':'home', 'number':'555-1212'}, + {'location':'work', 'number':'555-8989'},], + } + deserialized = schema.deserialize(data) + +Defining a New Type +------------------- + +A new type is a class with two methods:: ``serialize`` and +``deserialize``. ``serialize`` converts a Python data structure to a +serialization. ``deserialize`` converts a value to a Python data +structure. + +Here's a type which implements boolean serialization and +deserialization. It serializes a boolean to the string ``true`` or +``false``; it deserializes a string (presumably ``true`` or ``false``, +but allows some wiggle room for ``t``, ``on``, ``yes``, ``y``, and +``1``) to a boolean value. + +.. code-block:: python + :linenos: + + class Boolean(object): + def deserialize(self, struct, value): + if not isinstance(value, basestring): + raise Invalid(struct, '%r is not a string' % value) + value = value.lower() + if value in ('true', 'yes', 'y', 'on', 't', '1'): + return True + return False + + def serialize(self, struct, value): + if not isinstance(value, bool): + raise Invalid(struct, '%r is not a boolean') + return value and 'true' or 'false' + +Here's how you would use the resulting class as part of a schema: + +.. code-block:: python + :linenos: + + import cereal + + class Schema(cereal.MappingSchema): + interested = cereal.Structure(Boolean()) + +The above schema has a member named ``interested`` which will now be +serialized and deserialized as a boolean, according to the logic +defined in the ``Boolean`` type class. + +Note that the only real constraint of a type class is that its +``serialize`` method must be able to make sense of a value generated +by its ``deserialize`` method and vice versa. + +Defining a New Validator +------------------------ + +A validator is a callable which accepts two positional arguments: +``struct`` and ``value``. It returns ``None`` if the value is valid. +It raises a ``cereal.Invalid`` exception if the value is not valid. +Here's a validator that checks if the value is a valid credit card number. + +.. code-block:: python + :linenos: + + def luhnok(struct, value): + """ checks to make sure that the value passes a luhn mod-10 checksum """ + sum = 0 + num_digits = len(value) + oddeven = num_digits & 1 + + for count in range(0, num_digits): + digit = int(value[count]) + + if not (( count & 1 ) ^ oddeven ): + digit = digit * 2 + if digit > 9: + digit = digit - 9 + + sum = sum + digit + + if not (sum % 10) == 0: + raise Invalid(struct, + '%r is not a valid credit card number' % value) + +Here's how the resulting ``luhnok`` validator might be used in a +schema: + +.. code-block:: python + :linenos: + + import cereal + + class Schema(cereal.MappingSchema): + cc_number = cereal.Structure(cereal.String(), validator=lunhnok) + +Note that the validator doesn't need to check if the ``value`` is a +string: this has already been done as the result of the type of the +``cc_number`` structure being ``cereal.String``. Validators are always +passed the *deserialized* value when they are invoked. + +Interface and API Documentation +------------------------------- + .. toctree:: :maxdepth: 2 + interfaces.rst api.rst - Indices and tables ------------------ diff --git a/docs/interfaces.rst b/docs/interfaces.rst new file mode 100644 index 0000000..81bd038 --- /dev/null +++ b/docs/interfaces.rst @@ -0,0 +1,10 @@ +Interfaces +---------- + +.. automodule:: cereal.interfaces + + .. autofunction:: Validator + + .. autoclass:: Type + :members: +