A few worker-engine cleanups

- Remove usage of % with logs, just let the underlying
  LOG do the interpolation for us if the log level passes.
- Use LOG.exception instead of LOG.warn and then including
  the exception as a interpolated variable. Just let the
  LOG.exception output the exception trace and info.
- Capture the failure object before doing other work using
  a new context manager that can yield back a failure object
  before further code is called (to avoid a context switch
  happening in eventlet or elsewhere that would clear the
  exception state).

Change-Id: Ic1a504ba69c56ba226b1a067b5ade4590c245dd8
This commit is contained in:
Joshua Harlow
2014-02-21 17:35:13 -08:00
parent 3e495e44f3
commit 1cf69b9ec6
6 changed files with 92 additions and 61 deletions

View File

@@ -45,7 +45,7 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
# TODO(skudriashev): This data should be collected from workers # TODO(skudriashev): This data should be collected from workers
# using broadcast messages directly. # using broadcast messages directly.
self._workers_info = {} self._workers_info = {}
for topic, tasks in workers_info.items(): for topic, tasks in six.iteritems(workers_info):
for task in tasks: for task in tasks:
self._workers_info[task] = topic self._workers_info[task] = topic
@@ -59,12 +59,12 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
def _on_message(self, response, message): def _on_message(self, response, message):
"""This method is called on incoming response.""" """This method is called on incoming response."""
LOG.debug("Got response: %s" % response) LOG.debug("Got response: %s", response)
try: try:
# acknowledge message # acknowledge message before processing.
message.ack() message.ack()
except kombu_exc.MessageStateError as e: except kombu_exc.MessageStateError:
LOG.warning("Failed to acknowledge AMQP message: %s" % e) LOG.exception("Failed to acknowledge AMQP message")
else: else:
LOG.debug("AMQP message acknowledged.") LOG.debug("AMQP message acknowledged.")
# get task uuid from message correlation id parameter # get task uuid from message correlation id parameter
@@ -73,7 +73,7 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
except KeyError: except KeyError:
LOG.warning("Got message with no 'correlation_id' property.") LOG.warning("Got message with no 'correlation_id' property.")
else: else:
LOG.debug("Task uuid: '%s'" % task_uuid) LOG.debug("Task uuid: '%s'", task_uuid)
self._process_response(task_uuid, response) self._process_response(task_uuid, response)
def _process_response(self, task_uuid, response): def _process_response(self, task_uuid, response):
@@ -81,7 +81,7 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
try: try:
task = self._remote_tasks[task_uuid] task = self._remote_tasks[task_uuid]
except KeyError: except KeyError:
LOG.debug("Task with id='%s' not found." % task_uuid) LOG.debug("Task with id='%s' not found.", task_uuid)
else: else:
state = response.pop('state') state = response.pop('state')
if state == pr.RUNNING: if state == pr.RUNNING:
@@ -89,15 +89,14 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
elif state == pr.PROGRESS: elif state == pr.PROGRESS:
task.on_progress(**response) task.on_progress(**response)
elif state == pr.FAILURE: elif state == pr.FAILURE:
response['result'] = pu.failure_from_dict( response['result'] = pu.failure_from_dict(response['result'])
response['result'])
task.set_result(**response) task.set_result(**response)
self._remove_remote_task(task) self._remove_remote_task(task)
elif state == pr.SUCCESS: elif state == pr.SUCCESS:
task.set_result(**response) task.set_result(**response)
self._remove_remote_task(task) self._remove_remote_task(task)
else: else:
LOG.warning("Unexpected response status: '%s'" % state) LOG.warning("Unexpected response status: '%s'", state)
def _on_wait(self): def _on_wait(self):
"""This function is called cyclically between draining events """This function is called cyclically between draining events
@@ -106,9 +105,9 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
expired_tasks = [task for task in six.itervalues(self._remote_tasks) expired_tasks = [task for task in six.itervalues(self._remote_tasks)
if task.expired] if task.expired]
for task in expired_tasks: for task in expired_tasks:
LOG.debug("Task request '%s' is expired." % task) LOG.debug("Task request '%s' has expired.", task)
task.set_result(misc.Failure.from_exception( task.set_result(misc.Failure.from_exception(
exc.Timeout("Task request '%s' is expired" % task))) exc.Timeout("Task request '%s' has expired" % task)))
del self._remote_tasks[task.uuid] del self._remote_tasks[task.uuid]
def _store_remote_task(self, task): def _store_remote_task(self, task):
@@ -134,17 +133,18 @@ class WorkerTaskExecutor(executor.TaskExecutorBase):
topic = self._workers_info[remote_task.name] topic = self._workers_info[remote_task.name]
except KeyError: except KeyError:
raise exc.NotFound("Workers topic not found for the '%s'" raise exc.NotFound("Workers topic not found for the '%s'"
"task." % remote_task.name) " task" % remote_task.name)
else: else:
# publish request # publish request
request = remote_task.request request = remote_task.request
LOG.debug("Sending request: %s" % request) LOG.debug("Sending request: %s", request)
self._proxy.publish(request, remote_task.uuid, self._proxy.publish(request, remote_task.uuid,
routing_key=topic, reply_to=self._uuid) routing_key=topic, reply_to=self._uuid)
except Exception as e: except Exception:
LOG.error("Failed to submit the '%s' task: %s" % (remote_task, e)) with misc.capture_failure() as failure:
self._remove_remote_task(remote_task) LOG.exception("Failed to submit the '%s' task", remote_task)
remote_task.set_result(misc.Failure()) self._remove_remote_task(remote_task)
remote_task.set_result(failure)
return remote_task.result return remote_task.result
def execute_task(self, task, task_uuid, arguments, def execute_task(self, task, task_uuid, arguments,

View File

@@ -84,7 +84,7 @@ class Proxy(object):
def start(self): def start(self):
"""Start proxy.""" """Start proxy."""
LOG.info("Starting to consume from the '%s' exchange." % LOG.info("Starting to consume from the '%s' exchange.",
self._exchange_name) self._exchange_name)
with kombu.connections[self._conn].acquire(block=True) as conn: with kombu.connections[self._conn].acquire(block=True) as conn:
queue = self._make_queue(self._topic, self._exchange, channel=conn) queue = self._make_queue(self._topic, self._exchange, channel=conn)
@@ -104,16 +104,16 @@ class Proxy(object):
queue.delete(if_unused=True) queue.delete(if_unused=True)
except (amqp_exc.PreconditionFailed, amqp_exc.NotFound): except (amqp_exc.PreconditionFailed, amqp_exc.NotFound):
pass pass
except Exception as e: except Exception:
LOG.error("Failed to delete the '%s' queue: %s" % LOG.exception("Failed to delete the '%s' queue",
(queue.name, e)) queue.name)
try: try:
self._exchange.delete(if_unused=True) self._exchange.delete(if_unused=True)
except (amqp_exc.PreconditionFailed, amqp_exc.NotFound): except (amqp_exc.PreconditionFailed, amqp_exc.NotFound):
pass pass
except Exception as e: except Exception:
LOG.error("Failed to delete the '%s' exchange: %s" % LOG.exception("Failed to delete the '%s' exchange",
(self._exchange.name, e)) self._exchange.name)
def wait(self): def wait(self):
"""Wait until proxy is started.""" """Wait until proxy is started."""

View File

@@ -38,7 +38,7 @@ class Server(object):
def _on_message(self, request, message): def _on_message(self, request, message):
"""This method is called on incoming request.""" """This method is called on incoming request."""
LOG.debug("Got request: %s" % request) LOG.debug("Got request: %s", request)
# NOTE(skudriashev): Process all incoming requests only if proxy is # NOTE(skudriashev): Process all incoming requests only if proxy is
# running, otherwise reject and requeue them. # running, otherwise reject and requeue them.
if self._proxy.is_running: if self._proxy.is_running:
@@ -47,20 +47,20 @@ class Server(object):
try: try:
# acknowledge message # acknowledge message
message.ack() message.ack()
except kombu_exc.MessageStateError as e: except kombu_exc.MessageStateError:
LOG.warning("Failed to acknowledge AMQP message: %s" % e) LOG.exception("Failed to acknowledge AMQP message")
else: else:
LOG.debug("AMQP message acknowledged.") LOG.debug("AMQP message acknowledged")
# spawn new thread to process request # spawn new thread to process request
self._executor.submit(self._process_request, request, message) self._executor.submit(self._process_request, request, message)
else: else:
try: try:
# reject and requeue message # reject and requeue message
message.reject(requeue=True) message.reject(requeue=True)
except kombu_exc.MessageStateError as e: except kombu_exc.MessageStateError:
LOG.warning("Failed to reject/requeue AMQP message: %s" % e) LOG.exception("Failed to reject/requeue AMQP message")
else: else:
LOG.debug("AMQP message rejected and requeued.") LOG.debug("AMQP message rejected and requeued")
@staticmethod @staticmethod
def _parse_request(task, task_name, action, arguments, result=None, def _parse_request(task, task_name, action, arguments, result=None,
@@ -93,7 +93,7 @@ class Server(object):
try: try:
properties.append(message.properties[prop]) properties.append(message.properties[prop])
except KeyError: except KeyError:
raise ValueError("The '%s' message property is missing." % raise ValueError("The '%s' message property is missing" %
prop) prop)
return properties return properties
@@ -101,11 +101,11 @@ class Server(object):
def _reply(self, reply_to, task_uuid, state=pr.FAILURE, **kwargs): def _reply(self, reply_to, task_uuid, state=pr.FAILURE, **kwargs):
"""Send reply to the `reply_to` queue.""" """Send reply to the `reply_to` queue."""
response = dict(state=state, **kwargs) response = dict(state=state, **kwargs)
LOG.debug("Send reply: %s" % response) LOG.debug("Sending reply: %s", response)
try: try:
self._proxy.publish(response, task_uuid, reply_to) self._proxy.publish(response, task_uuid, reply_to)
except Exception as e: except Exception:
LOG.error("Failed to send reply: %s" % e) LOG.exception("Failed to send reply")
def _on_update_progress(self, reply_to, task_uuid, task, event_data, def _on_update_progress(self, reply_to, task_uuid, task, event_data,
progress): progress):
@@ -115,12 +115,12 @@ class Server(object):
def _process_request(self, request, message): def _process_request(self, request, message):
"""Process request in separate thread and reply back.""" """Process request in separate thread and reply back."""
# parse broker message first to get the `reply_to` and the `task_uuid` # NOTE(skudriashev): parse broker message first to get the `reply_to`
# parameters to have possibility to reply back # and the `task_uuid` parameters to have possibility to reply back.
try: try:
reply_to, task_uuid = self._parse_message(message) reply_to, task_uuid = self._parse_message(message)
except ValueError as e: except ValueError:
LOG.error("Failed to parse broker message: %s" % e) LOG.exception("Failed to parse broker message")
return return
else: else:
# prepare task progress callback # prepare task progress callback
@@ -135,27 +135,30 @@ class Server(object):
task, action, action_args = self._parse_request(**request) task, action, action_args = self._parse_request(**request)
action_args.update(task_uuid=task_uuid, action_args.update(task_uuid=task_uuid,
progress_callback=progress_callback) progress_callback=progress_callback)
except ValueError as e: except ValueError:
LOG.error("Failed to parse request: %s" % e) with misc.capture_failure() as failure:
reply_callback(result=pu.failure_to_dict(misc.Failure())) LOG.exception("Failed to parse request")
return reply_callback(result=pu.failure_to_dict(failure))
return
# get task endpoint # get task endpoint
try: try:
endpoint = self._endpoints[task] endpoint = self._endpoints[task]
except KeyError: except KeyError:
LOG.error("The '%s' task endpoint does not exist." % task) with misc.capture_failure() as failure:
reply_callback(result=pu.failure_to_dict(misc.Failure())) LOG.exception("The '%s' task endpoint does not exist", task)
return reply_callback(result=pu.failure_to_dict(failure))
return
else: else:
reply_callback(state=pr.RUNNING) reply_callback(state=pr.RUNNING)
# perform task action # perform task action
try: try:
result = getattr(endpoint, action)(**action_args) result = getattr(endpoint, action)(**action_args)
except Exception as e: except Exception:
LOG.error("The %s task execution failed: %s" % (endpoint, e)) with misc.capture_failure() as failure:
reply_callback(result=pu.failure_to_dict(misc.Failure())) LOG.exception("The %s task execution failed", endpoint)
reply_callback(result=pu.failure_to_dict(failure))
else: else:
if isinstance(result, misc.Failure): if isinstance(result, misc.Failure):
reply_callback(result=pu.failure_to_dict(result)) reply_callback(result=pu.failure_to_dict(result))

View File

@@ -177,11 +177,11 @@ class TestWorkerTaskExecutor(test.MockTestCase):
self.assertEqual(self.remote_task_mock.mock_calls, []) self.assertEqual(self.remote_task_mock.mock_calls, [])
self.assertEqual(self.message_mock.mock_calls, [mock.call.ack()]) self.assertEqual(self.message_mock.mock_calls, [mock.call.ack()])
@mock.patch('taskflow.engines.worker_based.executor.LOG.warning') @mock.patch('taskflow.engines.worker_based.executor.LOG.exception')
def test_on_message_acknowledge_raises(self, mocked_warning): def test_on_message_acknowledge_raises(self, mocked_exception):
self.message_mock.ack.side_effect = kombu_exc.MessageStateError() self.message_mock.ack.side_effect = kombu_exc.MessageStateError()
self.executor()._on_message({}, self.message_mock) self.executor()._on_message({}, self.message_mock)
self.assertTrue(mocked_warning.called) self.assertTrue(mocked_exception.called)
@mock.patch('taskflow.engines.worker_based.remote_task.misc.wallclock') @mock.patch('taskflow.engines.worker_based.remote_task.misc.wallclock')
def test_on_wait_task_not_expired(self, mock_time): def test_on_wait_task_not_expired(self, mock_time):

View File

@@ -210,8 +210,8 @@ class TestServer(test.MockTestCase):
failures=dict((str(i), utils.FailureMatcher(f)) failures=dict((str(i), utils.FailureMatcher(f))
for i, f in enumerate(failures))))) for i, f in enumerate(failures)))))
@mock.patch("taskflow.engines.worker_based.server.LOG.error") @mock.patch("taskflow.engines.worker_based.server.LOG.exception")
def test_reply_publish_failure(self, mocked_error): def test_reply_publish_failure(self, mocked_exception):
self.proxy_inst_mock.publish.side_effect = RuntimeError('Woot!') self.proxy_inst_mock.publish.side_effect = RuntimeError('Woot!')
# create server and process request # create server and process request
@@ -222,9 +222,7 @@ class TestServer(test.MockTestCase):
mock.call.proxy.publish({'state': 'FAILURE'}, self.task_uuid, mock.call.proxy.publish({'state': 'FAILURE'}, self.task_uuid,
self.reply_to) self.reply_to)
]) ])
self.assertEqual(mocked_error.mock_calls, [ self.assertTrue(mocked_exception.called)
mock.call("Failed to send reply: Woot!")
])
def test_on_update_progress(self): def test_on_update_progress(self):
request = self.request(task='taskflow.tests.utils.ProgressingTask', request = self.request(task='taskflow.tests.utils.ProgressingTask',
@@ -261,15 +259,15 @@ class TestServer(test.MockTestCase):
] ]
self.assertEqual(self.master_mock.mock_calls, master_mock_calls) self.assertEqual(self.master_mock.mock_calls, master_mock_calls)
@mock.patch("taskflow.engines.worker_based.server.LOG.error") @mock.patch("taskflow.engines.worker_based.server.LOG.exception")
def test_process_request_parse_message_failure(self, mocked_error): def test_process_request_parse_message_failure(self, mocked_exception):
self.message_mock.properties = {} self.message_mock.properties = {}
request = self.request() request = self.request()
s = self.server(reset_master_mock=True) s = self.server(reset_master_mock=True)
s._process_request(request, self.message_mock) s._process_request(request, self.message_mock)
self.assertEqual(self.master_mock.mock_calls, []) self.assertEqual(self.master_mock.mock_calls, [])
self.assertTrue(mocked_error.called) self.assertTrue(mocked_exception.called)
@mock.patch('taskflow.engines.worker_based.server.pu') @mock.patch('taskflow.engines.worker_based.server.pu')
def test_process_request_parse_failure(self, pu_mock): def test_process_request_parse_failure(self, pu_mock):

View File

@@ -16,6 +16,7 @@
# under the License. # under the License.
import collections import collections
import contextlib
import copy import copy
import errno import errno
import functools import functools
@@ -452,6 +453,35 @@ def are_equal_exc_info_tuples(ei1, ei2):
return tb1 == tb2 return tb1 == tb2
@contextlib.contextmanager
def capture_failure():
"""Save current exception, and yield back the failure (or raises a
runtime error if no active exception is being handled).
In some cases the exception context can be cleared, resulting in None
being attempted to be saved after an exception handler is run. This
can happen when eventlet switches greenthreads or when running an
exception handler, code raises and catches an exception. In both
cases the exception context will be cleared.
To work around this, we save the exception state, yield a failure and
then run other code.
For example::
except Exception:
with capture_failure() as fail:
LOG.warn("Activating cleanup")
cleanup()
save_failure(fail)
"""
exc_info = sys.exc_info()
if not any(exc_info):
raise RuntimeError("No active exception is being handled")
else:
yield Failure(exc_info=exc_info)
class Failure(object): class Failure(object):
"""Object that represents failure. """Object that represents failure.