oslo.serialization/oslo_serialization/msgpackutils.py
Gevorg Davoian 4fdaeff758 Replace TypeError by ValueError in msgpackutils
There are some differences in the behaviour of json and msgpack
serializers. Msgpack throws TypeError when it doesn't know
how to handle an object. Json throws ValueError instead. Regarding
the fact that serialization should be configurable (in particular,
serializers should have similar behaviour; right now transition from
json to msgpack leads to errors and fails) this patch proposes to
raise ValueError instead of TypeError on failures so that libraries
already using jsonutils would be able to also work with msgpackutils.

Change-Id: I3d21b7d136e5a426a3c4a70a953c82ddcd6ef5af
2016-06-08 08:41:36 +00:00

488 lines
15 KiB
Python

# Copyright (C) 2015 Yahoo! Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
'''
MessagePack related utilities.
This module provides a few things:
#. A handy registry for getting an object down to something that can be
msgpack serialized. See :class:`.HandlerRegistry`.
#. Wrappers around :func:`.loads` and :func:`.dumps`. The :func:`.dumps`
wrapper will automatically use
the :py:attr:`~oslo_serialization.msgpackutils.default_registry` for
you if needed.
.. versionadded:: 1.3
'''
import datetime
import functools
import itertools
import uuid
import msgpack
from oslo_utils import importutils
from pytz import timezone
import six
import six.moves.xmlrpc_client as xmlrpclib
netaddr = importutils.try_import("netaddr")
class Interval(object):
"""Small and/or simple immutable integer/float interval class.
Interval checking is **inclusive** of the min/max boundaries.
"""
def __init__(self, min_value, max_value):
if min_value > max_value:
raise ValueError("Minimum value %s must be less than"
" or equal to maximum value %s" % (min_value,
max_value))
self._min_value = min_value
self._max_value = max_value
@property
def min_value(self):
return self._min_value
@property
def max_value(self):
return self._max_value
def __contains__(self, value):
return value >= self.min_value and value <= self.max_value
def __repr__(self):
return 'Interval(%s, %s)' % (self._min_value, self._max_value)
# Expose these so that users don't have to import msgpack to gain these.
PackException = msgpack.PackException
UnpackException = msgpack.UnpackException
class HandlerRegistry(object):
"""Registry of *type* specific msgpack handlers extensions.
See: https://github.com/msgpack/msgpack/blob/master/spec.md#formats-ext
Do note that due to the current limitations in the msgpack python
library we can not *currently* dump/load a tuple without converting
it to a list.
This may be fixed in: https://github.com/msgpack/msgpack-python/pull/100
.. versionadded:: 1.5
"""
reserved_extension_range = Interval(0, 32)
"""
These ranges are **always** reserved for use by ``oslo.serialization`` and
its own add-ons extensions (these extensions are meant to be generally
applicable to all of python).
"""
non_reserved_extension_range = Interval(33, 127)
"""
These ranges are **always** reserved for use by applications building
their own type specific handlers (the meaning of extensions in this range
will typically vary depending on application).
"""
min_value = 0
"""
Applications can assign 0 to 127 to store application (or library)
specific type handlers; see above ranges for what is reserved by this
library and what is not.
"""
max_value = 127
"""
Applications can assign 0 to 127 to store application (or library)
specific type handlers; see above ranges for what is reserved by this
library and what is not.
"""
def __init__(self):
self._handlers = {}
self._num_handlers = 0
self.frozen = False
def __iter__(self):
"""Iterates over **all** registered handlers."""
for handlers in six.itervalues(self._handlers):
for h in handlers:
yield h
def register(self, handler, reserved=False, override=False):
"""Register a extension handler to handle its associated type."""
if self.frozen:
raise ValueError("Frozen handler registry can't be modified")
if reserved:
ok_interval = self.reserved_extension_range
else:
ok_interval = self.non_reserved_extension_range
ident = handler.identity
if ident < ok_interval.min_value:
raise ValueError("Handler '%s' identity must be greater"
" or equal to %s" % (handler,
ok_interval.min_value))
if ident > ok_interval.max_value:
raise ValueError("Handler '%s' identity must be less than"
" or equal to %s" % (handler,
ok_interval.max_value))
if ident in self._handlers and override:
existing_handlers = self._handlers[ident]
# Insert at the front so that overrides get selected before
# whatever existed before the override...
existing_handlers.insert(0, handler)
self._num_handlers += 1
elif ident in self._handlers and not override:
raise ValueError("Already registered handler(s) with"
" identity %s: %s" % (ident,
self._handlers[ident]))
else:
self._handlers[ident] = [handler]
self._num_handlers += 1
def __len__(self):
"""Return how many extension handlers are registered."""
return self._num_handlers
def __contains__(self, identity):
"""Return if any handler exists for the given identity (number)."""
return identity in self._handlers
def copy(self, unfreeze=False):
"""Deep copy the given registry (and its handlers)."""
c = type(self)()
for ident, handlers in six.iteritems(self._handlers):
cloned_handlers = []
for h in handlers:
if hasattr(h, 'copy'):
h = h.copy(c)
cloned_handlers.append(h)
c._handlers[ident] = cloned_handlers
c._num_handlers += len(cloned_handlers)
if not unfreeze and self.frozen:
c.frozen = True
return c
def get(self, identity):
"""Get the handler for the given numeric identity (or none)."""
maybe_handlers = self._handlers.get(identity)
if maybe_handlers:
# Prefer the first (if there are many) as this is how we
# override built-in extensions (for those that wish to do this).
return maybe_handlers[0]
else:
return None
def match(self, obj):
"""Match the registries handlers to the given object (or none)."""
for possible_handlers in six.itervalues(self._handlers):
for h in possible_handlers:
if isinstance(obj, h.handles):
return h
return None
class UUIDHandler(object):
identity = 0
handles = (uuid.UUID,)
@staticmethod
def serialize(obj):
return six.text_type(obj.hex).encode('ascii')
@staticmethod
def deserialize(data):
return uuid.UUID(hex=six.text_type(data, encoding='ascii'))
class DateTimeHandler(object):
identity = 1
handles = (datetime.datetime,)
def __init__(self, registry):
self._registry = registry
def copy(self, registry):
return type(self)(registry)
def serialize(self, dt):
dct = {
u'day': dt.day,
u'month': dt.month,
u'year': dt.year,
u'hour': dt.hour,
u'minute': dt.minute,
u'second': dt.second,
u'microsecond': dt.microsecond,
}
if dt.tzinfo:
tz = dt.tzinfo.tzname(None)
if six.PY2:
tz = tz.decode("ascii")
dct[u'tz'] = tz
return dumps(dct, registry=self._registry)
def deserialize(self, blob):
dct = loads(blob, registry=self._registry)
if six.PY3 and b"day" in dct:
# NOTE(sileht): oslo.serialization <= 2.4.1 was
# storing thing as unicode for py3 while is was
# bytes for py2
# For python2, we don't care bytes or unicode works
# for dict keys and tz
# But for python3, we have some backward compability
# to take care in case of the payload have been produced
# by python2 and now read by python3
dct = dict((k.decode("ascii"), v) for k, v in dct.items())
if 'tz' in dct:
dct['tz'] = dct['tz'].decode("ascii")
dt = datetime.datetime(day=dct['day'],
month=dct['month'],
year=dct['year'],
hour=dct['hour'],
minute=dct['minute'],
second=dct['second'],
microsecond=dct['microsecond'])
if 'tz' in dct:
tzinfo = timezone(dct['tz'])
dt = tzinfo.localize(dt)
return dt
class CountHandler(object):
identity = 2
handles = (itertools.count,)
@staticmethod
def serialize(obj):
# FIXME(harlowja): figure out a better way to avoid hacking into
# the string representation of count to get at the right numbers...
obj = six.text_type(obj)
start = obj.find("(") + 1
end = obj.rfind(")")
pieces = obj[start:end].split(",")
if len(pieces) == 1:
start = int(pieces[0])
step = 1
else:
start = int(pieces[0])
step = int(pieces[1])
return msgpack.packb([start, step])
@staticmethod
def deserialize(data):
value = msgpack.unpackb(data)
start, step = value
return itertools.count(start, step)
if netaddr is not None:
class NetAddrIPHandler(object):
identity = 3
handles = (netaddr.IPAddress,)
@staticmethod
def serialize(obj):
return msgpack.packb(obj.value)
@staticmethod
def deserialize(data):
return netaddr.IPAddress(msgpack.unpackb(data))
else:
NetAddrIPHandler = None
class SetHandler(object):
identity = 4
handles = (set,)
def __init__(self, registry):
self._registry = registry
def copy(self, registry):
return type(self)(registry)
def serialize(self, obj):
return dumps(list(obj), registry=self._registry)
def deserialize(self, data):
return self.handles[0](loads(data, registry=self._registry))
class FrozenSetHandler(SetHandler):
identity = 5
handles = (frozenset,)
class XMLRPCDateTimeHandler(object):
handles = (xmlrpclib.DateTime,)
identity = 6
def __init__(self, registry):
self._handler = DateTimeHandler(registry)
def copy(self, registry):
return type(self)(registry)
def serialize(self, obj):
dt = datetime.datetime(*tuple(obj.timetuple())[:6])
return self._handler.serialize(dt)
def deserialize(self, blob):
dt = self._handler.deserialize(blob)
return xmlrpclib.DateTime(dt.timetuple())
class DateHandler(object):
identity = 7
handles = (datetime.date,)
def __init__(self, registry):
self._registry = registry
def copy(self, registry):
return type(self)(registry)
def serialize(self, d):
dct = {
u'year': d.year,
u'month': d.month,
u'day': d.day,
}
return dumps(dct, registry=self._registry)
def deserialize(self, blob):
dct = loads(blob, registry=self._registry)
if six.PY3 and b"day" in dct:
# NOTE(sileht): see DateTimeHandler.deserialize()
dct = dict((k.decode("ascii"), v) for k, v in dct.items())
return datetime.date(year=dct['year'],
month=dct['month'],
day=dct['day'])
def _serializer(registry, obj):
handler = registry.match(obj)
if handler is None:
raise ValueError("No serialization handler registered"
" for type '%s'" % (type(obj).__name__))
return msgpack.ExtType(handler.identity, handler.serialize(obj))
def _unserializer(registry, code, data):
handler = registry.get(code)
if not handler:
return msgpack.ExtType(code, data)
else:
return handler.deserialize(data)
def _create_default_registry():
registry = HandlerRegistry()
registry.register(DateTimeHandler(registry), reserved=True)
registry.register(DateHandler(registry), reserved=True)
registry.register(UUIDHandler(), reserved=True)
registry.register(CountHandler(), reserved=True)
registry.register(SetHandler(registry), reserved=True)
registry.register(FrozenSetHandler(registry), reserved=True)
if netaddr is not None:
registry.register(NetAddrIPHandler(), reserved=True)
registry.register(XMLRPCDateTimeHandler(registry), reserved=True)
registry.frozen = True
return registry
default_registry = _create_default_registry()
"""
Default, read-only/frozen registry that will be used when none is provided.
This registry has msgpack extensions for the following:
* ``DateTime`` objects.
* ``Date`` objects.
* ``UUID`` objects.
* ``itertools.count`` objects/iterators.
* ``set`` and ``frozenset`` container(s).
* ``netaddr.IPAddress`` objects (only if ``netaddr`` is importable).
* ``xmlrpclib.DateTime`` datetime objects.
.. versionadded:: 1.5
"""
def load(fp, registry=None):
"""Deserialize ``fp`` into a Python object.
.. versionchanged:: 1.5
Added *registry* parameter.
"""
if registry is None:
registry = default_registry
# NOTE(harlowja): the reason we can't use the more native msgpack functions
# here is that the unpack() function (oddly) doesn't seem to take a
# 'ext_hook' parameter..
ext_hook = functools.partial(_unserializer, registry)
return msgpack.Unpacker(fp, ext_hook=ext_hook, encoding='utf-8').unpack()
def dump(obj, fp, registry=None):
"""Serialize ``obj`` as a messagepack formatted stream to ``fp``.
.. versionchanged:: 1.5
Added *registry* parameter.
"""
if registry is None:
registry = default_registry
return msgpack.pack(obj, fp,
default=functools.partial(_serializer, registry),
use_bin_type=True)
def dumps(obj, registry=None):
"""Serialize ``obj`` to a messagepack formatted ``str``.
.. versionchanged:: 1.5
Added *registry* parameter.
"""
if registry is None:
registry = default_registry
return msgpack.packb(obj,
default=functools.partial(_serializer, registry),
use_bin_type=True)
def loads(s, registry=None):
"""Deserialize ``s`` messagepack ``str`` into a Python object.
.. versionchanged:: 1.5
Added *registry* parameter.
"""
if registry is None:
registry = default_registry
ext_hook = functools.partial(_unserializer, registry)
return msgpack.unpackb(s, ext_hook=ext_hook, encoding='utf-8')