Refactor scatter-gather utility to return exception objects
Scatter-gather utility returns a raised_exception_sentinel for all kinds of exceptions that are caught and often times there maybe situations where we may have to handle the different types of exceptions differently. To facilitate that, it might be more useful to return the Exception object itself instead of the dummy raised_exception_sentinel so that based on the result's exception type we can handle them differently. Related to blueprint handling-down-cell Change-Id: I861b223ee46b0f0a31f646a4b45f8a02410253cf
This commit is contained in:
parent
37a5ef5113
commit
031314b6d8
|
@ -54,8 +54,7 @@ class ConsoleAuthTokensController(wsgi.Controller):
|
|||
# loop as soon as we find a result because the token is associated
|
||||
# with one instance, which can only be in one cell.
|
||||
for result in results.values():
|
||||
if result not in (nova_context.did_not_respond_sentinel,
|
||||
nova_context.raised_exception_sentinel):
|
||||
if not nova_context.is_cell_failure_sentinel(result):
|
||||
connect_info = result.to_dict()
|
||||
break
|
||||
|
||||
|
|
|
@ -536,7 +536,7 @@ class ViewBuilder(common.ViewBuilder):
|
|||
objects.BlockDeviceMappingList.bdms_by_instance_uuid,
|
||||
instance_uuids)
|
||||
for cell_uuid, result in results.items():
|
||||
if result is nova_context.raised_exception_sentinel:
|
||||
if isinstance(result, Exception):
|
||||
LOG.warning('Failed to get block device mappings for cell %s',
|
||||
cell_uuid)
|
||||
elif result is nova_context.did_not_respond_sentinel:
|
||||
|
|
|
@ -5046,8 +5046,7 @@ class HostAPI(base.Base):
|
|||
service_dict = nova_context.scatter_gather_all_cells(context,
|
||||
objects.ServiceList.get_all, disabled, set_zones=set_zones)
|
||||
for service in service_dict.values():
|
||||
if service not in (nova_context.did_not_respond_sentinel,
|
||||
nova_context.raised_exception_sentinel):
|
||||
if not nova_context.is_cell_failure_sentinel(service):
|
||||
services.extend(service)
|
||||
else:
|
||||
services = objects.ServiceList.get_all(context, disabled,
|
||||
|
|
|
@ -57,8 +57,7 @@ class MigrationLister(multi_cell_list.CrossCellLister):
|
|||
ctx, db.migration_get_by_uuid, marker)
|
||||
db_migration = None
|
||||
for result_cell_uuid, result in results.items():
|
||||
if result not in (context.did_not_respond_sentinel,
|
||||
context.raised_exception_sentinel):
|
||||
if not context.is_cell_failure_sentinel(result):
|
||||
db_migration = result
|
||||
cell_uuid = result_cell_uuid
|
||||
break
|
||||
|
|
|
@ -25,8 +25,6 @@ from nova import exception
|
|||
from nova.i18n import _
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
CELL_FAIL_SENTINELS = (context.did_not_respond_sentinel,
|
||||
context.raised_exception_sentinel)
|
||||
|
||||
CONF = nova.conf.CONF
|
||||
|
||||
|
@ -77,9 +75,9 @@ class RecordWrapper(object):
|
|||
# $limit results from good cells before we noticed the failed
|
||||
# cells, and would not properly report them as failed for
|
||||
# fix-up in the higher layers.
|
||||
if self._db_record in CELL_FAIL_SENTINELS:
|
||||
if context.is_cell_failure_sentinel(self._db_record):
|
||||
return True
|
||||
elif other._db_record in CELL_FAIL_SENTINELS:
|
||||
elif context.is_cell_failure_sentinel(other._db_record):
|
||||
return False
|
||||
|
||||
r = self._sort_ctx.compare_records(self._db_record,
|
||||
|
@ -108,11 +106,11 @@ def query_wrapper(ctx, fn, *args, **kwargs):
|
|||
# wrapping the sentinel indicating timeout.
|
||||
yield RecordWrapper(ctx, None, context.did_not_respond_sentinel)
|
||||
raise StopIteration
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
# Here, we yield a RecordWrapper (no sort_ctx needed since
|
||||
# we won't call into the implementation's comparison routines)
|
||||
# wrapping the sentinel indicating failure.
|
||||
yield RecordWrapper(ctx, None, context.raised_exception_sentinel)
|
||||
# wrapping the exception object indicating failure.
|
||||
yield RecordWrapper(ctx, None, e.__class__(e.args))
|
||||
raise StopIteration
|
||||
|
||||
|
||||
|
@ -403,7 +401,7 @@ class CrossCellLister(object):
|
|||
except StopIteration:
|
||||
return
|
||||
|
||||
if item._db_record in CELL_FAIL_SENTINELS:
|
||||
if context.is_cell_failure_sentinel(item._db_record):
|
||||
if not CONF.api.list_records_by_skipping_down_cells:
|
||||
raise exception.NovaException(
|
||||
_('Cell %s is not responding but configuration '
|
||||
|
@ -413,7 +411,7 @@ class CrossCellLister(object):
|
|||
item.cell_uuid)
|
||||
if item._db_record == context.did_not_respond_sentinel:
|
||||
self._cells_timed_out.add(item.cell_uuid)
|
||||
elif item._db_record == context.raised_exception_sentinel:
|
||||
elif isinstance(item._db_record, Exception):
|
||||
self._cells_failed.add(item.cell_uuid)
|
||||
# We might have received one batch but timed out or failed
|
||||
# on a later one, so be sure we fix the accounting.
|
||||
|
|
|
@ -45,9 +45,6 @@ CELL_CACHE = {}
|
|||
# NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out
|
||||
# waiting for a result from a cell.
|
||||
did_not_respond_sentinel = object()
|
||||
# NOTE(melwitt): Used for the scatter-gather utility to indicate an exception
|
||||
# was raised gathering a result from a cell.
|
||||
raised_exception_sentinel = object()
|
||||
# FIXME(danms): Keep a global cache of the cells we find the
|
||||
# first time we look. This needs to be refreshed on a timer or
|
||||
# trigger.
|
||||
|
@ -429,7 +426,7 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
|
|||
:param kwargs: The kwargs for the function to call for each cell
|
||||
:returns: A dict {cell_uuid: result} containing the joined results. The
|
||||
did_not_respond_sentinel will be returned if a cell did not
|
||||
respond within the timeout. The raised_exception_sentinel will
|
||||
respond within the timeout. The exception object will
|
||||
be returned if the call to a cell raised an exception. The
|
||||
exception will be logged.
|
||||
"""
|
||||
|
@ -442,9 +439,9 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
|
|||
try:
|
||||
with target_cell(context, cell_mapping) as cctxt:
|
||||
result = fn(cctxt, *args, **kwargs)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
LOG.exception('Error gathering result from cell %s', cell_uuid)
|
||||
result = raised_exception_sentinel
|
||||
result = e.__class__(e.args)
|
||||
# The queue is already synchronized.
|
||||
queue.put((cell_uuid, result))
|
||||
|
||||
|
@ -488,6 +485,11 @@ def load_cells():
|
|||
LOG.error('No cells are configured, unable to continue')
|
||||
|
||||
|
||||
def is_cell_failure_sentinel(record):
|
||||
return (record is did_not_respond_sentinel or
|
||||
isinstance(record, Exception))
|
||||
|
||||
|
||||
def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
|
||||
"""Target all cells except cell0 in parallel and return their results.
|
||||
|
||||
|
@ -502,7 +504,7 @@ def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
|
|||
:param kwargs: The kwargs for the function to call for each cell
|
||||
:returns: A dict {cell_uuid: result} containing the joined results. The
|
||||
did_not_respond_sentinel will be returned if a cell did not
|
||||
respond within the timeout. The raised_exception_sentinel will
|
||||
respond within the timeout. The exception object will
|
||||
be returned if the call to a cell raised an exception. The
|
||||
exception will be logged.
|
||||
"""
|
||||
|
@ -527,7 +529,7 @@ def scatter_gather_single_cell(context, cell_mapping, fn, *args, **kwargs):
|
|||
:param kwargs: The kwargs for the function to call for this cell
|
||||
:returns: A dict {cell_uuid: result} containing the joined results. The
|
||||
did_not_respond_sentinel will be returned if the cell did not
|
||||
respond within the timeout. The raised_exception_sentinel will
|
||||
respond within the timeout. The exception object will
|
||||
be returned if the call to the cell raised an exception. The
|
||||
exception will be logged.
|
||||
"""
|
||||
|
@ -549,7 +551,7 @@ def scatter_gather_all_cells(context, fn, *args, **kwargs):
|
|||
:param kwargs: The kwargs for the function to call for each cell
|
||||
:returns: A dict {cell_uuid: result} containing the joined results. The
|
||||
did_not_respond_sentinel will be returned if a cell did not
|
||||
respond within the timeout. The raised_exception_sentinel will
|
||||
respond within the timeout. The exception object will
|
||||
be returned if the call to a cell raised an exception. The
|
||||
exception will be logged.
|
||||
"""
|
||||
|
|
|
@ -519,7 +519,7 @@ def get_minimum_version_all_cells(context, binaries, require_all=False):
|
|||
'service version', cell_uuid)
|
||||
if require_all:
|
||||
raise exception.CellTimeout()
|
||||
elif result is nova_context.raised_exception_sentinel:
|
||||
elif isinstance(result, Exception):
|
||||
LOG.warning('Failed to get minimum service version for cell %s',
|
||||
cell_uuid)
|
||||
if require_all:
|
||||
|
|
|
@ -1338,8 +1338,7 @@ def _instances_cores_ram_count(context, project_id, user_id=None):
|
|||
if user_id:
|
||||
total_counts['user'] = {'instances': 0, 'cores': 0, 'ram': 0}
|
||||
for result in results.values():
|
||||
if result not in (nova_context.did_not_respond_sentinel,
|
||||
nova_context.raised_exception_sentinel):
|
||||
if not nova_context.is_cell_failure_sentinel(result):
|
||||
for resource, count in result['project'].items():
|
||||
total_counts['project'][resource] += count
|
||||
if user_id:
|
||||
|
|
|
@ -630,7 +630,7 @@ class HostManager(object):
|
|||
compute_nodes = collections.defaultdict(list)
|
||||
services = {}
|
||||
for cell_uuid, result in results.items():
|
||||
if result is context_module.raised_exception_sentinel:
|
||||
if isinstance(result, Exception):
|
||||
LOG.warning('Failed to get computes for cell %s', cell_uuid)
|
||||
elif result is context_module.did_not_respond_sentinel:
|
||||
LOG.warning('Timeout getting computes for cell %s', cell_uuid)
|
||||
|
|
|
@ -876,8 +876,7 @@ def _get_instance_group_hosts_all_cells(context, instance_group):
|
|||
# is raised while targeting a cell and when a cell does not respond
|
||||
# as part of the "handling of a down cell" spec:
|
||||
# https://blueprints.launchpad.net/nova/+spec/handling-down-cell
|
||||
if result not in (nova_context.did_not_respond_sentinel,
|
||||
nova_context.raised_exception_sentinel):
|
||||
if not nova_context.is_cell_failure_sentinel(result):
|
||||
hosts.extend(result)
|
||||
return hosts
|
||||
|
||||
|
|
|
@ -6803,7 +6803,7 @@ class ServersViewBuilderTest(test.TestCase):
|
|||
# just faking a nova list scenario
|
||||
mock_sg.return_value = {
|
||||
uuids.cell1: bdms[0],
|
||||
uuids.cell2: context.raised_exception_sentinel
|
||||
uuids.cell2: exception.BDMNotFound(id='fake')
|
||||
}
|
||||
ctxt = context.RequestContext('fake', 'fake')
|
||||
result = self.view_builder._get_instance_bdms_in_multiple_cells(
|
||||
|
|
|
@ -192,7 +192,7 @@ class ComputeHostAPITestCase(test.TestCase):
|
|||
host='host-%s' % uuids.cell1)
|
||||
mock_sg.return_value = {
|
||||
uuids.cell1: [service],
|
||||
uuids.cell2: context.raised_exception_sentinel
|
||||
uuids.cell2: context.did_not_respond_sentinel
|
||||
}
|
||||
services = self.host_api.service_get_all(self.ctxt, all_cells=True)
|
||||
# returns the results from cell1 and ignores cell2.
|
||||
|
|
|
@ -175,7 +175,7 @@ class TestInstanceList(test.NoDBTestCase):
|
|||
# creating one up cell and two down cells
|
||||
ret_val = {}
|
||||
ret_val[uuids.cell0] = instances
|
||||
ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)]
|
||||
ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))]
|
||||
ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)]
|
||||
mock_sg.return_value = ret_val
|
||||
|
||||
|
@ -201,7 +201,7 @@ class TestInstanceList(test.NoDBTestCase):
|
|||
# creating one up cell and two down cells
|
||||
ret_val = {}
|
||||
ret_val[uuids.cell0] = instances
|
||||
ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)]
|
||||
ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))]
|
||||
ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)]
|
||||
mock_sg.return_value = ret_val
|
||||
|
||||
|
|
|
@ -123,7 +123,8 @@ class TestUtils(test.NoDBTestCase):
|
|||
iw2 = multi_cell_list.RecordWrapper(ctx, sort_ctx,
|
||||
context.did_not_respond_sentinel)
|
||||
iw3 = multi_cell_list.RecordWrapper(ctx, sort_ctx,
|
||||
context.raised_exception_sentinel)
|
||||
exception.InstanceNotFound(
|
||||
instance_id='fake'))
|
||||
|
||||
# NOTE(danms): The sentinel wrappers always win
|
||||
self.assertTrue(iw2 < iw1)
|
||||
|
@ -157,13 +158,15 @@ class TestUtils(test.NoDBTestCase):
|
|||
mock.MagicMock(), test)])
|
||||
|
||||
def test_query_wrapper_fail(self):
|
||||
def test(ctx):
|
||||
def tester(ctx):
|
||||
raise test.TestingException
|
||||
|
||||
self.assertEqual([context.raised_exception_sentinel],
|
||||
[x._db_record for x in
|
||||
multi_cell_list.query_wrapper(
|
||||
mock.MagicMock(), test)])
|
||||
self.assertIsInstance(
|
||||
# query_wrapper is a generator so we convert to a list and
|
||||
# check the type on the first and only result
|
||||
[x._db_record for x in multi_cell_list.query_wrapper(
|
||||
mock.MagicMock(), tester)][0],
|
||||
test.TestingException)
|
||||
|
||||
|
||||
class TestListContext(multi_cell_list.RecordSortContext):
|
||||
|
@ -351,7 +354,7 @@ class FailureLister(TestLister):
|
|||
|
||||
if action == context.did_not_respond_sentinel:
|
||||
raise exception.CellTimeout
|
||||
elif action == context.raised_exception_sentinel:
|
||||
elif isinstance(action, Exception):
|
||||
raise test.TestingException
|
||||
else:
|
||||
return super(FailureLister, self).get_by_filters(ctx, *a, **k)
|
||||
|
@ -369,7 +372,11 @@ class TestBaseClass(test.NoDBTestCase):
|
|||
# Two of the cells will fail, one with timeout and one
|
||||
# with an error
|
||||
lister.set_fails(uuids.cell0, [context.did_not_respond_sentinel])
|
||||
lister.set_fails(uuids.cell1, [context.raised_exception_sentinel])
|
||||
# Note that InstanceNotFound exception will never appear during
|
||||
# instance listing, the aim is to only simulate a situation where
|
||||
# there could be some type of exception arising.
|
||||
lister.set_fails(uuids.cell1, exception.InstanceNotFound(
|
||||
instance_id='fake'))
|
||||
ctx = context.RequestContext()
|
||||
result = lister.get_records_sorted(ctx, {}, 50, None, batch_size=10)
|
||||
# We should still have 50 results since there are enough from the
|
||||
|
@ -390,7 +397,11 @@ class TestBaseClass(test.NoDBTestCase):
|
|||
# One cell will succeed and then time out, one will fail immediately,
|
||||
# and the last will always work
|
||||
lister.set_fails(uuids.cell0, [None, context.did_not_respond_sentinel])
|
||||
lister.set_fails(uuids.cell1, [context.raised_exception_sentinel])
|
||||
# Note that BuildAbortException will never appear during instance
|
||||
# listing, the aim is to only simulate a situation where there could
|
||||
# be some type of exception arising.
|
||||
lister.set_fails(uuids.cell1, exception.BuildAbortException(
|
||||
instance_uuid='fake', reason='fake'))
|
||||
ctx = context.RequestContext()
|
||||
result = lister.get_records_sorted(ctx, {}, 50, None,
|
||||
batch_size=5)
|
||||
|
|
|
@ -550,7 +550,7 @@ class TestServiceVersionCells(test.TestCase):
|
|||
def test_version_all_cells_with_fail(self, mock_scatter):
|
||||
mock_scatter.return_value = {
|
||||
'foo': {'nova-compute': 13},
|
||||
'bar': context.raised_exception_sentinel,
|
||||
'bar': exception.ServiceNotFound(service_id='fake'),
|
||||
}
|
||||
self.assertEqual(13, service.get_minimum_version_all_cells(
|
||||
self.context, ['nova-compute']))
|
||||
|
|
|
@ -1039,7 +1039,7 @@ class HostManagerTestCase(test.NoDBTestCase):
|
|||
uuids.cell1: ([mock.MagicMock(host='a'), mock.MagicMock(host='b')],
|
||||
[mock.sentinel.c1n1, mock.sentinel.c1n2]),
|
||||
uuids.cell2: nova_context.did_not_respond_sentinel,
|
||||
uuids.cell3: nova_context.raised_exception_sentinel,
|
||||
uuids.cell3: exception.ComputeHostNotFound(host='c'),
|
||||
}
|
||||
context = nova_context.RequestContext('fake', 'fake')
|
||||
cns, srv = self.host_manager._get_computes_for_cells(context, [])
|
||||
|
|
|
@ -335,6 +335,14 @@ class ContextTestCase(test.NoDBTestCase):
|
|||
mock_create_cm.assert_not_called()
|
||||
mock_create_tport.assert_not_called()
|
||||
|
||||
def test_is_cell_failure_sentinel(self):
|
||||
record = context.did_not_respond_sentinel
|
||||
self.assertTrue(context.is_cell_failure_sentinel(record))
|
||||
record = TypeError()
|
||||
self.assertTrue(context.is_cell_failure_sentinel(record))
|
||||
record = objects.Instance()
|
||||
self.assertFalse(context.is_cell_failure_sentinel(record))
|
||||
|
||||
@mock.patch('nova.context.target_cell')
|
||||
@mock.patch('nova.objects.InstanceList.get_by_filters')
|
||||
def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell):
|
||||
|
@ -422,7 +430,7 @@ class ContextTestCase(test.NoDBTestCase):
|
|||
ctxt, mappings, 30, objects.InstanceList.get_by_filters)
|
||||
self.assertEqual(2, len(results))
|
||||
self.assertIn(mock.sentinel.instances, results.values())
|
||||
self.assertIn(context.raised_exception_sentinel, results.values())
|
||||
isinstance(results.values(), Exception)
|
||||
self.assertTrue(mock_log_exception.called)
|
||||
|
||||
@mock.patch('nova.context.scatter_gather_cells')
|
||||
|
|
Loading…
Reference in New Issue