Files
deb-python-taskflow/taskflow/persistence/backends/impl_zookeeper.py
Joshua Harlow e393612054 Use checked commit when committing kazoo transactions
To avoid the case where the transaction appears to have
committed successfully (when it actually has not) create
a new checked_commit utility function and use it in the
zookeeper persistence backend to ensure that the transaction
has completed correctly (when it has not raise a new kazoo
exception that contains the failures to the caller, which can
then reraise that exception, examine the failures or do some
other action).

Change-Id: Ib169b36abb568a0e1516c4c786fccb30856661e1
2014-07-25 11:01:19 -07:00

422 lines
16 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (C) 2014 AT&T Labs All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import contextlib
import logging
from kazoo import exceptions as k_exc
from kazoo.protocol import paths
from taskflow import exceptions as exc
from taskflow.openstack.common import jsonutils
from taskflow.persistence.backends import base
from taskflow.persistence import logbook
from taskflow.utils import kazoo_utils as k_utils
from taskflow.utils import misc
LOG = logging.getLogger(__name__)
# Transaction support was added in 3.4.0
MIN_ZK_VERSION = (3, 4, 0)
class ZkBackend(base.Backend):
"""A zookeeper backend.
This backend writes logbooks, flow details, and atom details to a provided
base path in zookeeper. It will create and store those objects in three
key directories (one for logbooks, one for flow details and one for atom
details). It creates those associated directories and then creates files
inside those directories that represent the contents of those objects for
later reading and writing.
Example conf:
conf = {
"hosts": "192.168.0.1:2181,192.168.0.2:2181,192.168.0.3:2181",
"path": "/taskflow",
}
"""
def __init__(self, conf, client=None):
super(ZkBackend, self).__init__(conf)
path = str(conf.get("path", "/taskflow"))
if not path:
raise ValueError("Empty zookeeper path is disallowed")
if not paths.isabs(path):
raise ValueError("Zookeeper path must be absolute")
self._path = path
if client is not None:
self._client = client
self._owned = False
else:
self._client = k_utils.make_client(conf)
self._owned = True
self._validated = False
@property
def path(self):
return self._path
def get_connection(self):
conn = ZkConnection(self, self._client)
if not self._validated:
conn.validate()
self._validated = True
return conn
def close(self):
self._validated = False
if not self._owned:
return
try:
k_utils.finalize_client(self._client)
except (k_exc.KazooException, k_exc.ZookeeperError) as e:
raise exc.StorageFailure("Unable to finalize client", e)
class ZkConnection(base.Connection):
def __init__(self, backend, client):
self._backend = backend
self._client = client
self._book_path = paths.join(self._backend.path, "books")
self._flow_path = paths.join(self._backend.path, "flow_details")
self._atom_path = paths.join(self._backend.path, "atom_details")
with self._exc_wrapper():
# NOOP if already started.
self._client.start()
def validate(self):
with self._exc_wrapper():
try:
k_utils.check_compatible(self._client, MIN_ZK_VERSION)
except exc.IncompatibleVersion as e:
raise exc.StorageFailure("Backend storage is not a"
" compatible version", e)
@property
def backend(self):
return self._backend
@property
def book_path(self):
return self._book_path
@property
def flow_path(self):
return self._flow_path
@property
def atom_path(self):
return self._atom_path
def close(self):
pass
def upgrade(self):
"""Creates the initial paths (if they already don't exist)."""
with self._exc_wrapper():
for path in (self.book_path, self.flow_path, self.atom_path):
self._client.ensure_path(path)
@contextlib.contextmanager
def _exc_wrapper(self):
"""Exception context-manager which wraps kazoo exceptions.
This is used to capture and wrap any kazoo specific exceptions and
then group them into corresponding taskflow exceptions (not doing
that would expose the underlying kazoo exception model).
"""
try:
yield
except self._client.handler.timeout_exception as e:
raise exc.StorageFailure("Storage backend timeout", e)
except k_exc.SessionExpiredError as e:
raise exc.StorageFailure("Storage backend session"
" has expired", e)
except k_exc.NoNodeError as e:
raise exc.NotFound("Storage backend node not found: %s" % e)
except k_exc.NodeExistsError as e:
raise exc.Duplicate("Storage backend duplicate node: %s" % e)
except (k_exc.KazooException, k_exc.ZookeeperError) as e:
raise exc.StorageFailure("Storage backend internal error", e)
def update_atom_details(self, ad):
"""Update a atom detail transactionally."""
with self._exc_wrapper():
txn = self._client.transaction()
ad = self._update_atom_details(ad, txn)
k_utils.checked_commit(txn)
return ad
def _update_atom_details(self, ad, txn, create_missing=False):
# Determine whether the desired data exists or not.
ad_path = paths.join(self.atom_path, ad.uuid)
e_ad = None
try:
ad_data, _zstat = self._client.get(ad_path)
except k_exc.NoNodeError:
# Not-existent: create or raise exception.
raise exc.NotFound("No atom details found with id: %s" % ad.uuid)
else:
# Existent: read it out.
try:
ad_data = misc.decode_json(ad_data)
ad_cls = logbook.atom_detail_class(ad_data['type'])
e_ad = ad_cls.from_dict(ad_data['atom'])
except KeyError:
pass
# Update and write it back
if e_ad:
e_ad = e_ad.merge(ad)
else:
e_ad = ad
ad_data = base._format_atom(e_ad)
txn.set_data(ad_path,
misc.binary_encode(jsonutils.dumps(ad_data)))
return e_ad
def get_atom_details(self, ad_uuid):
"""Read a atom detail.
*Read-only*, so no need of zk transaction.
"""
with self._exc_wrapper():
return self._get_atom_details(ad_uuid)
def _get_atom_details(self, ad_uuid):
ad_path = paths.join(self.atom_path, ad_uuid)
try:
ad_data, _zstat = self._client.get(ad_path)
except k_exc.NoNodeError:
raise exc.NotFound("No atom details found with id: %s" % ad_uuid)
else:
ad_data = misc.decode_json(ad_data)
ad_cls = logbook.atom_detail_class(ad_data['type'])
return ad_cls.from_dict(ad_data['atom'])
def update_flow_details(self, fd):
"""Update a flow detail transactionally."""
with self._exc_wrapper():
txn = self._client.transaction()
fd = self._update_flow_details(fd, txn)
k_utils.checked_commit(txn)
return fd
def _update_flow_details(self, fd, txn, create_missing=False):
# Determine whether the desired data exists or not
fd_path = paths.join(self.flow_path, fd.uuid)
try:
fd_data, _zstat = self._client.get(fd_path)
except k_exc.NoNodeError:
# Not-existent: create or raise exception
if create_missing:
txn.create(fd_path)
e_fd = logbook.FlowDetail(name=fd.name, uuid=fd.uuid)
else:
raise exc.NotFound("No flow details found with id: %s"
% fd.uuid)
else:
# Existent: read it out
e_fd = logbook.FlowDetail.from_dict(misc.decode_json(fd_data))
# Update and write it back
e_fd = e_fd.merge(fd)
fd_data = e_fd.to_dict()
txn.set_data(fd_path, misc.binary_encode(jsonutils.dumps(fd_data)))
for ad in fd:
ad_path = paths.join(fd_path, ad.uuid)
# NOTE(harlowja): create an entry in the flow detail path
# for the provided atom detail so that a reference exists
# from the flow detail to its atom details.
if not self._client.exists(ad_path):
txn.create(ad_path)
e_fd.add(self._update_atom_details(ad, txn, create_missing=True))
return e_fd
def get_flow_details(self, fd_uuid):
"""Read a flow detail.
*Read-only*, so no need of zk transaction.
"""
with self._exc_wrapper():
return self._get_flow_details(fd_uuid)
def _get_flow_details(self, fd_uuid):
fd_path = paths.join(self.flow_path, fd_uuid)
try:
fd_data, _zstat = self._client.get(fd_path)
except k_exc.NoNodeError:
raise exc.NotFound("No flow details found with id: %s" % fd_uuid)
fd = logbook.FlowDetail.from_dict(misc.decode_json(fd_data))
for ad_uuid in self._client.get_children(fd_path):
fd.add(self._get_atom_details(ad_uuid))
return fd
def save_logbook(self, lb):
"""Save (update) a log_book transactionally."""
def _create_logbook(lb_path, txn):
lb_data = lb.to_dict(marshal_time=True)
txn.create(lb_path, misc.binary_encode(jsonutils.dumps(lb_data)))
for fd in lb:
# NOTE(harlowja): create an entry in the logbook path
# for the provided flow detail so that a reference exists
# from the logbook to its flow details.
txn.create(paths.join(lb_path, fd.uuid))
fd_path = paths.join(self.flow_path, fd.uuid)
fd_data = jsonutils.dumps(fd.to_dict())
txn.create(fd_path, misc.binary_encode(fd_data))
for ad in fd:
# NOTE(harlowja): create an entry in the flow detail path
# for the provided atom detail so that a reference exists
# from the flow detail to its atom details.
txn.create(paths.join(fd_path, ad.uuid))
ad_path = paths.join(self.atom_path, ad.uuid)
ad_data = base._format_atom(ad)
txn.create(ad_path,
misc.binary_encode(jsonutils.dumps(ad_data)))
return lb
def _update_logbook(lb_path, lb_data, txn):
e_lb = logbook.LogBook.from_dict(misc.decode_json(lb_data),
unmarshal_time=True)
e_lb = e_lb.merge(lb)
lb_data = e_lb.to_dict(marshal_time=True)
txn.set_data(lb_path, misc.binary_encode(jsonutils.dumps(lb_data)))
for fd in lb:
fd_path = paths.join(lb_path, fd.uuid)
if not self._client.exists(fd_path):
# NOTE(harlowja): create an entry in the logbook path
# for the provided flow detail so that a reference exists
# from the logbook to its flow details.
txn.create(fd_path)
e_fd = self._update_flow_details(fd, txn, create_missing=True)
e_lb.add(e_fd)
return e_lb
with self._exc_wrapper():
txn = self._client.transaction()
# Determine whether the desired data exists or not.
lb_path = paths.join(self.book_path, lb.uuid)
try:
lb_data, _zstat = self._client.get(lb_path)
except k_exc.NoNodeError:
# Create a new logbook since it doesn't exist.
e_lb = _create_logbook(lb_path, txn)
else:
# Otherwise update the existing logbook instead.
e_lb = _update_logbook(lb_path, lb_data, txn)
k_utils.checked_commit(txn)
return e_lb
def _get_logbook(self, lb_uuid):
lb_path = paths.join(self.book_path, lb_uuid)
try:
lb_data, _zstat = self._client.get(lb_path)
except k_exc.NoNodeError:
raise exc.NotFound("No logbook found with id: %s" % lb_uuid)
else:
lb = logbook.LogBook.from_dict(misc.decode_json(lb_data),
unmarshal_time=True)
for fd_uuid in self._client.get_children(lb_path):
lb.add(self._get_flow_details(fd_uuid))
return lb
def get_logbook(self, lb_uuid):
"""Read a logbook.
*Read-only*, so no need of zk transaction.
"""
with self._exc_wrapper():
return self._get_logbook(lb_uuid)
def get_logbooks(self):
"""Read all logbooks.
*Read-only*, so no need of zk transaction.
"""
with self._exc_wrapper():
for lb_uuid in self._client.get_children(self.book_path):
yield self._get_logbook(lb_uuid)
def destroy_logbook(self, lb_uuid):
"""Destroy (delete) a log_book transactionally."""
def _destroy_atom_details(ad_uuid, txn):
ad_path = paths.join(self.atom_path, ad_uuid)
if not self._client.exists(ad_path):
raise exc.NotFound("No atom details found with id: %s"
% ad_uuid)
txn.delete(ad_path)
def _destroy_flow_details(fd_uuid, txn):
fd_path = paths.join(self.flow_path, fd_uuid)
if not self._client.exists(fd_path):
raise exc.NotFound("No flow details found with id: %s"
% fd_uuid)
for ad_uuid in self._client.get_children(fd_path):
_destroy_atom_details(ad_uuid, txn)
txn.delete(paths.join(fd_path, ad_uuid))
txn.delete(fd_path)
def _destroy_logbook(lb_uuid, txn):
lb_path = paths.join(self.book_path, lb_uuid)
if not self._client.exists(lb_path):
raise exc.NotFound("No logbook found with id: %s" % lb_uuid)
for fd_uuid in self._client.get_children(lb_path):
_destroy_flow_details(fd_uuid, txn)
txn.delete(paths.join(lb_path, fd_uuid))
txn.delete(lb_path)
with self._exc_wrapper():
txn = self._client.transaction()
_destroy_logbook(lb_uuid, txn)
k_utils.checked_commit(txn)
def clear_all(self, delete_dirs=True):
"""Delete all data transactionally."""
with self._exc_wrapper():
txn = self._client.transaction()
# Delete all data under logbook path.
for lb_uuid in self._client.get_children(self.book_path):
lb_path = paths.join(self.book_path, lb_uuid)
for fd_uuid in self._client.get_children(lb_path):
txn.delete(paths.join(lb_path, fd_uuid))
txn.delete(lb_path)
# Delete all data under flow detail path.
for fd_uuid in self._client.get_children(self.flow_path):
fd_path = paths.join(self.flow_path, fd_uuid)
for ad_uuid in self._client.get_children(fd_path):
txn.delete(paths.join(fd_path, ad_uuid))
txn.delete(fd_path)
# Delete all data under atom detail path.
for ad_uuid in self._client.get_children(self.atom_path):
ad_path = paths.join(self.atom_path, ad_uuid)
txn.delete(ad_path)
# Delete containing directories.
if delete_dirs:
txn.delete(self.book_path)
txn.delete(self.atom_path)
txn.delete(self.flow_path)
k_utils.checked_commit(txn)