Make nova list and migration-list ignore down cells

This patch makes InstanceLister and MigrationLister ignore down
cells and list the instances/records from the up cell as opposed to
giving 500 to the users as is the current situation.

Change-Id: I308b494ab07f6936bef94f4c9da45e9473e3534d
Partial-Bug: #1726301
This commit is contained in:
Surya Seetharaman 2018-06-15 15:25:13 +02:00 committed by Matt Riedemann
parent dee92e6876
commit ee461b5bf6
3 changed files with 51 additions and 4 deletions

View File

@ -17,8 +17,12 @@ import itertools
import six
from oslo_log import log as logging
from nova import context
LOG = logging.getLogger(__name__)
class RecordSortContext(object):
def __init__(self, sort_keys, sort_dirs):
@ -243,15 +247,23 @@ class CrossCellLister(object):
return (RecordWrapper(self.sort_ctx, inst) for inst in
itertools.chain(local_marker_prefix, main_query_result))
# FIXME(danms): If we raise or timeout on a cell we need to handle
# that here gracefully. The below routine will provide sentinels
# to indicate that, which will crash the merge below, but we don't
# handle this anywhere yet anyway.
# NOTE(tssurya): When the below routine provides sentinels to indicate
# a timeout on a cell, we ignore that cell to avoid the crash when
# doing the merge below and continue merging the results from the 'up'
# cells.
# TODO(tssurya): Modify this to return the minimal available info from
# the down cells.
if self.cells:
results = context.scatter_gather_cells(ctx, self.cells, 60,
do_query)
else:
results = context.scatter_gather_all_cells(ctx, do_query)
for cell_uuid in list(results):
if results[cell_uuid] in (context.did_not_respond_sentinel,
context.raised_exception_sentinel):
LOG.warning("Cell %s is not responding and hence skipped "
"from the results.", cell_uuid)
results.pop(cell_uuid)
# If a limit was provided, it was passed to the per-cell query
# routines. That means we have NUM_CELLS * limit items across

View File

@ -13,6 +13,7 @@
import mock
from nova.compute import instance_list
from nova.compute import multi_cell_list
from nova import context as nova_context
from nova import objects
from nova import test
@ -137,3 +138,27 @@ class TestInstanceList(test.NoDBTestCase):
None, None,
cell_mappings=None)
mock_cm.assert_not_called()
@mock.patch('nova.context.scatter_gather_cells')
def test_get_instances_with_down_cells(self, mock_sg):
inst_cell0 = self.insts[uuids.cell0]
# storing the uuids of the instances from the up cell
uuid_initial = [inst['uuid'] for inst in inst_cell0]
instances = (multi_cell_list.RecordWrapper(self.context, inst)
for inst in inst_cell0)
# creating one up cell and two down cells
ret_val = {}
ret_val[uuids.cell0] = instances
ret_val[uuids.cell1] = nova_context.raised_exception_sentinel
ret_val[uuids.cell2] = nova_context.did_not_respond_sentinel
mock_sg.return_value = ret_val
res = instance_list.get_instances_sorted(self.context, {}, None, None,
[], None, None)
uuid_final = [inst['uuid'] for inst in res]
# return the results from the up cell, ignoring the down cell.
self.assertEqual(uuid_initial, uuid_final)

View File

@ -0,0 +1,10 @@
---
fixes:
- |
Listing server and migration records used to give a 500 to users
when a cell database was unreachable. Now only records from available
cells are included to avoid the 500 error. The down cells are basically
skipped when forming the results and this solution is planned to be
further enhanced through the `blueprint handling-down-cell`_.
.. _blueprint handling-down-cell: https://blueprints.launchpad.net/nova/+spec/handling-down-cell