diff --git a/nova/compute/multi_cell_list.py b/nova/compute/multi_cell_list.py index b5c40e352e36..99f29a41c15a 100644 --- a/nova/compute/multi_cell_list.py +++ b/nova/compute/multi_cell_list.py @@ -19,13 +19,17 @@ import six from oslo_log import log as logging +import nova.conf from nova import context from nova import exception +from nova.i18n import _ LOG = logging.getLogger(__name__) CELL_FAIL_SENTINELS = (context.did_not_respond_sentinel, context.raised_exception_sentinel) +CONF = nova.conf.CONF + class RecordSortContext(object): def __init__(self, sort_keys, sort_dirs): @@ -400,6 +404,10 @@ class CrossCellLister(object): return if item._db_record in CELL_FAIL_SENTINELS: + if not CONF.api.list_records_by_skipping_down_cells: + raise exception.NovaException( + _('Cell %s is not responding but configuration ' + 'indicates that we should fail.') % item.cell_uuid) LOG.warning('Cell %s is not responding and hence is ' 'being omitted from the results', item.cell_uuid) diff --git a/nova/conf/api.py b/nova/conf/api.py index e542f2b25d43..1c0395f43b2b 100644 --- a/nova/conf/api.py +++ b/nova/conf/api.py @@ -320,6 +320,14 @@ Related options: * instance_list_cells_batch_strategy * max_limit +"""), + cfg.BoolOpt("list_records_by_skipping_down_cells", + default=True, + help=""" +When set to False, this will cause the API to return a 500 error if there is an +infrastructure failure like non-responsive cells. If you want the API to skip +the down cells and return the results from the up cells set this option to +True. """), ] diff --git a/nova/tests/unit/compute/test_instance_list.py b/nova/tests/unit/compute/test_instance_list.py index c3be17dbb461..644d1e00bdf2 100644 --- a/nova/tests/unit/compute/test_instance_list.py +++ b/nova/tests/unit/compute/test_instance_list.py @@ -12,10 +12,12 @@ import mock from oslo_utils.fixture import uuidsentinel as uuids +import six from nova.compute import instance_list from nova.compute import multi_cell_list from nova import context as nova_context +from nova import exception from nova import objects from nova import test from nova.tests import fixtures @@ -185,6 +187,32 @@ class TestInstanceList(test.NoDBTestCase): # return the results from the up cell, ignoring the down cell. self.assertEqual(uuid_initial, uuid_final) + @mock.patch('nova.context.scatter_gather_cells') + def test_get_instances_by_not_skipping_down_cells(self, mock_sg): + self.flags(list_records_by_skipping_down_cells=False, group='api') + inst_cell0 = self.insts[uuids.cell0] + + def wrap(thing): + return multi_cell_list.RecordWrapper(ctx, self.context, thing) + + ctx = nova_context.RequestContext() + instances = [wrap(inst) for inst in inst_cell0] + + # creating one up cell and two down cells + ret_val = {} + ret_val[uuids.cell0] = instances + ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)] + ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)] + mock_sg.return_value = ret_val + + # Raises exception if a cell is down without skipping them + # as CONF.api.list_records_by_skipping_down_cells is set to False. + # This would in turn result in an API 500 internal error. + exp = self.assertRaises(exception.NovaException, + instance_list.get_instance_objects_sorted, self.context, {}, None, + None, [], None, None) + self.assertIn('configuration indicates', six.text_type(exp)) + def test_batch_size_fixed(self): fixed_size = 200 self.flags(instance_list_cells_batch_strategy='fixed', group='api') diff --git a/releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml b/releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml new file mode 100644 index 000000000000..8db45ac6931b --- /dev/null +++ b/releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml @@ -0,0 +1,16 @@ +--- +other: + - | + In case of infrastructure failures like non-responsive cells, prior to + `change e3534d`_ we raised an API 500 error. However currently when + listing instances or migrations, we skip that cell and display results from + the up cells with the aim of increasing availability at the expense of + accuracy. If the old behaviour is desired, a new flag called + ``CONF.api.list_records_by_skipping_down_cells`` has been added which can + be set to False to mimic the old behavior. Both of these potential + behaviors will be unified in an upcoming microversion done through the + `blueprint handling-down-cell`_ where minimal constructs would be returned + for the down cell instances instead of raising 500s or skipping down cells. + + .. _change e3534d : https://review.openstack.org/#/q/I308b494ab07f6936bef94f4c9da45e9473e3534d + .. _blueprint handling-down-cell: https://blueprints.launchpad.net/nova/+spec/handling-down-cell