Refactor scatter-gather utility to return exception objects

Scatter-gather utility returns a raised_exception_sentinel for
all kinds of exceptions that are caught and often times there
maybe situations where we may have to handle the different types
of exceptions differently. To facilitate that, it might be more
useful to return the Exception object itself instead of the dummy
raised_exception_sentinel so that based on the result's exception
type we can handle them differently.

Related to blueprint handling-down-cell

Change-Id: I861b223ee46b0f0a31f646a4b45f8a02410253cf
This commit is contained in:
Surya Seetharaman 2018-10-04 13:41:49 +02:00 committed by Matt Riedemann
parent 37a5ef5113
commit 031314b6d8
17 changed files with 61 additions and 47 deletions

View File

@ -54,8 +54,7 @@ class ConsoleAuthTokensController(wsgi.Controller):
# loop as soon as we find a result because the token is associated
# with one instance, which can only be in one cell.
for result in results.values():
if result not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(result):
connect_info = result.to_dict()
break

View File

@ -536,7 +536,7 @@ class ViewBuilder(common.ViewBuilder):
objects.BlockDeviceMappingList.bdms_by_instance_uuid,
instance_uuids)
for cell_uuid, result in results.items():
if result is nova_context.raised_exception_sentinel:
if isinstance(result, Exception):
LOG.warning('Failed to get block device mappings for cell %s',
cell_uuid)
elif result is nova_context.did_not_respond_sentinel:

View File

@ -5046,8 +5046,7 @@ class HostAPI(base.Base):
service_dict = nova_context.scatter_gather_all_cells(context,
objects.ServiceList.get_all, disabled, set_zones=set_zones)
for service in service_dict.values():
if service not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(service):
services.extend(service)
else:
services = objects.ServiceList.get_all(context, disabled,

View File

@ -57,8 +57,7 @@ class MigrationLister(multi_cell_list.CrossCellLister):
ctx, db.migration_get_by_uuid, marker)
db_migration = None
for result_cell_uuid, result in results.items():
if result not in (context.did_not_respond_sentinel,
context.raised_exception_sentinel):
if not context.is_cell_failure_sentinel(result):
db_migration = result
cell_uuid = result_cell_uuid
break

View File

@ -25,8 +25,6 @@ from nova import exception
from nova.i18n import _
LOG = logging.getLogger(__name__)
CELL_FAIL_SENTINELS = (context.did_not_respond_sentinel,
context.raised_exception_sentinel)
CONF = nova.conf.CONF
@ -77,9 +75,9 @@ class RecordWrapper(object):
# $limit results from good cells before we noticed the failed
# cells, and would not properly report them as failed for
# fix-up in the higher layers.
if self._db_record in CELL_FAIL_SENTINELS:
if context.is_cell_failure_sentinel(self._db_record):
return True
elif other._db_record in CELL_FAIL_SENTINELS:
elif context.is_cell_failure_sentinel(other._db_record):
return False
r = self._sort_ctx.compare_records(self._db_record,
@ -108,11 +106,11 @@ def query_wrapper(ctx, fn, *args, **kwargs):
# wrapping the sentinel indicating timeout.
yield RecordWrapper(ctx, None, context.did_not_respond_sentinel)
raise StopIteration
except Exception:
except Exception as e:
# Here, we yield a RecordWrapper (no sort_ctx needed since
# we won't call into the implementation's comparison routines)
# wrapping the sentinel indicating failure.
yield RecordWrapper(ctx, None, context.raised_exception_sentinel)
# wrapping the exception object indicating failure.
yield RecordWrapper(ctx, None, e.__class__(e.args))
raise StopIteration
@ -403,7 +401,7 @@ class CrossCellLister(object):
except StopIteration:
return
if item._db_record in CELL_FAIL_SENTINELS:
if context.is_cell_failure_sentinel(item._db_record):
if not CONF.api.list_records_by_skipping_down_cells:
raise exception.NovaException(
_('Cell %s is not responding but configuration '
@ -413,7 +411,7 @@ class CrossCellLister(object):
item.cell_uuid)
if item._db_record == context.did_not_respond_sentinel:
self._cells_timed_out.add(item.cell_uuid)
elif item._db_record == context.raised_exception_sentinel:
elif isinstance(item._db_record, Exception):
self._cells_failed.add(item.cell_uuid)
# We might have received one batch but timed out or failed
# on a later one, so be sure we fix the accounting.

View File

@ -45,9 +45,6 @@ CELL_CACHE = {}
# NOTE(melwitt): Used for the scatter-gather utility to indicate we timed out
# waiting for a result from a cell.
did_not_respond_sentinel = object()
# NOTE(melwitt): Used for the scatter-gather utility to indicate an exception
# was raised gathering a result from a cell.
raised_exception_sentinel = object()
# FIXME(danms): Keep a global cache of the cells we find the
# first time we look. This needs to be refreshed on a timer or
# trigger.
@ -429,7 +426,7 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
"""
@ -442,9 +439,9 @@ def scatter_gather_cells(context, cell_mappings, timeout, fn, *args, **kwargs):
try:
with target_cell(context, cell_mapping) as cctxt:
result = fn(cctxt, *args, **kwargs)
except Exception:
except Exception as e:
LOG.exception('Error gathering result from cell %s', cell_uuid)
result = raised_exception_sentinel
result = e.__class__(e.args)
# The queue is already synchronized.
queue.put((cell_uuid, result))
@ -488,6 +485,11 @@ def load_cells():
LOG.error('No cells are configured, unable to continue')
def is_cell_failure_sentinel(record):
return (record is did_not_respond_sentinel or
isinstance(record, Exception))
def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
"""Target all cells except cell0 in parallel and return their results.
@ -502,7 +504,7 @@ def scatter_gather_skip_cell0(context, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
"""
@ -527,7 +529,7 @@ def scatter_gather_single_cell(context, cell_mapping, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for this cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if the cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to the cell raised an exception. The
exception will be logged.
"""
@ -549,7 +551,7 @@ def scatter_gather_all_cells(context, fn, *args, **kwargs):
:param kwargs: The kwargs for the function to call for each cell
:returns: A dict {cell_uuid: result} containing the joined results. The
did_not_respond_sentinel will be returned if a cell did not
respond within the timeout. The raised_exception_sentinel will
respond within the timeout. The exception object will
be returned if the call to a cell raised an exception. The
exception will be logged.
"""

View File

@ -519,7 +519,7 @@ def get_minimum_version_all_cells(context, binaries, require_all=False):
'service version', cell_uuid)
if require_all:
raise exception.CellTimeout()
elif result is nova_context.raised_exception_sentinel:
elif isinstance(result, Exception):
LOG.warning('Failed to get minimum service version for cell %s',
cell_uuid)
if require_all:

View File

@ -1338,8 +1338,7 @@ def _instances_cores_ram_count(context, project_id, user_id=None):
if user_id:
total_counts['user'] = {'instances': 0, 'cores': 0, 'ram': 0}
for result in results.values():
if result not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(result):
for resource, count in result['project'].items():
total_counts['project'][resource] += count
if user_id:

View File

@ -630,7 +630,7 @@ class HostManager(object):
compute_nodes = collections.defaultdict(list)
services = {}
for cell_uuid, result in results.items():
if result is context_module.raised_exception_sentinel:
if isinstance(result, Exception):
LOG.warning('Failed to get computes for cell %s', cell_uuid)
elif result is context_module.did_not_respond_sentinel:
LOG.warning('Timeout getting computes for cell %s', cell_uuid)

View File

@ -876,8 +876,7 @@ def _get_instance_group_hosts_all_cells(context, instance_group):
# is raised while targeting a cell and when a cell does not respond
# as part of the "handling of a down cell" spec:
# https://blueprints.launchpad.net/nova/+spec/handling-down-cell
if result not in (nova_context.did_not_respond_sentinel,
nova_context.raised_exception_sentinel):
if not nova_context.is_cell_failure_sentinel(result):
hosts.extend(result)
return hosts

View File

@ -6803,7 +6803,7 @@ class ServersViewBuilderTest(test.TestCase):
# just faking a nova list scenario
mock_sg.return_value = {
uuids.cell1: bdms[0],
uuids.cell2: context.raised_exception_sentinel
uuids.cell2: exception.BDMNotFound(id='fake')
}
ctxt = context.RequestContext('fake', 'fake')
result = self.view_builder._get_instance_bdms_in_multiple_cells(

View File

@ -192,7 +192,7 @@ class ComputeHostAPITestCase(test.TestCase):
host='host-%s' % uuids.cell1)
mock_sg.return_value = {
uuids.cell1: [service],
uuids.cell2: context.raised_exception_sentinel
uuids.cell2: context.did_not_respond_sentinel
}
services = self.host_api.service_get_all(self.ctxt, all_cells=True)
# returns the results from cell1 and ignores cell2.

View File

@ -175,7 +175,7 @@ class TestInstanceList(test.NoDBTestCase):
# creating one up cell and two down cells
ret_val = {}
ret_val[uuids.cell0] = instances
ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)]
ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))]
ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)]
mock_sg.return_value = ret_val
@ -201,7 +201,7 @@ class TestInstanceList(test.NoDBTestCase):
# creating one up cell and two down cells
ret_val = {}
ret_val[uuids.cell0] = instances
ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)]
ret_val[uuids.cell1] = [wrap(exception.BuildRequestNotFound(uuid='f'))]
ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)]
mock_sg.return_value = ret_val

View File

@ -123,7 +123,8 @@ class TestUtils(test.NoDBTestCase):
iw2 = multi_cell_list.RecordWrapper(ctx, sort_ctx,
context.did_not_respond_sentinel)
iw3 = multi_cell_list.RecordWrapper(ctx, sort_ctx,
context.raised_exception_sentinel)
exception.InstanceNotFound(
instance_id='fake'))
# NOTE(danms): The sentinel wrappers always win
self.assertTrue(iw2 < iw1)
@ -157,13 +158,15 @@ class TestUtils(test.NoDBTestCase):
mock.MagicMock(), test)])
def test_query_wrapper_fail(self):
def test(ctx):
def tester(ctx):
raise test.TestingException
self.assertEqual([context.raised_exception_sentinel],
[x._db_record for x in
multi_cell_list.query_wrapper(
mock.MagicMock(), test)])
self.assertIsInstance(
# query_wrapper is a generator so we convert to a list and
# check the type on the first and only result
[x._db_record for x in multi_cell_list.query_wrapper(
mock.MagicMock(), tester)][0],
test.TestingException)
class TestListContext(multi_cell_list.RecordSortContext):
@ -351,7 +354,7 @@ class FailureLister(TestLister):
if action == context.did_not_respond_sentinel:
raise exception.CellTimeout
elif action == context.raised_exception_sentinel:
elif isinstance(action, Exception):
raise test.TestingException
else:
return super(FailureLister, self).get_by_filters(ctx, *a, **k)
@ -369,7 +372,11 @@ class TestBaseClass(test.NoDBTestCase):
# Two of the cells will fail, one with timeout and one
# with an error
lister.set_fails(uuids.cell0, [context.did_not_respond_sentinel])
lister.set_fails(uuids.cell1, [context.raised_exception_sentinel])
# Note that InstanceNotFound exception will never appear during
# instance listing, the aim is to only simulate a situation where
# there could be some type of exception arising.
lister.set_fails(uuids.cell1, exception.InstanceNotFound(
instance_id='fake'))
ctx = context.RequestContext()
result = lister.get_records_sorted(ctx, {}, 50, None, batch_size=10)
# We should still have 50 results since there are enough from the
@ -390,7 +397,11 @@ class TestBaseClass(test.NoDBTestCase):
# One cell will succeed and then time out, one will fail immediately,
# and the last will always work
lister.set_fails(uuids.cell0, [None, context.did_not_respond_sentinel])
lister.set_fails(uuids.cell1, [context.raised_exception_sentinel])
# Note that BuildAbortException will never appear during instance
# listing, the aim is to only simulate a situation where there could
# be some type of exception arising.
lister.set_fails(uuids.cell1, exception.BuildAbortException(
instance_uuid='fake', reason='fake'))
ctx = context.RequestContext()
result = lister.get_records_sorted(ctx, {}, 50, None,
batch_size=5)

View File

@ -550,7 +550,7 @@ class TestServiceVersionCells(test.TestCase):
def test_version_all_cells_with_fail(self, mock_scatter):
mock_scatter.return_value = {
'foo': {'nova-compute': 13},
'bar': context.raised_exception_sentinel,
'bar': exception.ServiceNotFound(service_id='fake'),
}
self.assertEqual(13, service.get_minimum_version_all_cells(
self.context, ['nova-compute']))

View File

@ -1039,7 +1039,7 @@ class HostManagerTestCase(test.NoDBTestCase):
uuids.cell1: ([mock.MagicMock(host='a'), mock.MagicMock(host='b')],
[mock.sentinel.c1n1, mock.sentinel.c1n2]),
uuids.cell2: nova_context.did_not_respond_sentinel,
uuids.cell3: nova_context.raised_exception_sentinel,
uuids.cell3: exception.ComputeHostNotFound(host='c'),
}
context = nova_context.RequestContext('fake', 'fake')
cns, srv = self.host_manager._get_computes_for_cells(context, [])

View File

@ -335,6 +335,14 @@ class ContextTestCase(test.NoDBTestCase):
mock_create_cm.assert_not_called()
mock_create_tport.assert_not_called()
def test_is_cell_failure_sentinel(self):
record = context.did_not_respond_sentinel
self.assertTrue(context.is_cell_failure_sentinel(record))
record = TypeError()
self.assertTrue(context.is_cell_failure_sentinel(record))
record = objects.Instance()
self.assertFalse(context.is_cell_failure_sentinel(record))
@mock.patch('nova.context.target_cell')
@mock.patch('nova.objects.InstanceList.get_by_filters')
def test_scatter_gather_cells(self, mock_get_inst, mock_target_cell):
@ -422,7 +430,7 @@ class ContextTestCase(test.NoDBTestCase):
ctxt, mappings, 30, objects.InstanceList.get_by_filters)
self.assertEqual(2, len(results))
self.assertIn(mock.sentinel.instances, results.values())
self.assertIn(context.raised_exception_sentinel, results.values())
isinstance(results.values(), Exception)
self.assertTrue(mock_log_exception.called)
@mock.patch('nova.context.scatter_gather_cells')