Make host_manager use scatter-gather and ignore down cells
This makes the host_manager query for computes in parallel across all the
cells. It also ignores cells that fail or time out so that scheduling can
proceed.
Closes-Bug: #1746561
Change-Id: I48d8b763f475c010fa48ee1db232a6d3ae75f5e6
(cherry picked from commit fdea8b723b
)
This commit is contained in:
parent
123d096671
commit
6933c880b8
@ -616,24 +616,29 @@ class HostManager(object):
|
||||
- services is a dict of services indexed by hostname
|
||||
"""
|
||||
|
||||
def targeted_operation(cctxt):
|
||||
services = objects.ServiceList.get_by_binary(
|
||||
cctxt, 'nova-compute', include_disabled=True)
|
||||
if compute_uuids is None:
|
||||
return services, objects.ComputeNodeList.get_all(cctxt)
|
||||
else:
|
||||
return services, objects.ComputeNodeList.get_all_by_uuids(
|
||||
cctxt, compute_uuids)
|
||||
|
||||
results = context_module.scatter_gather_cells(context, cells, 60,
|
||||
targeted_operation)
|
||||
compute_nodes = collections.defaultdict(list)
|
||||
services = {}
|
||||
for cell in cells:
|
||||
LOG.debug('Getting compute nodes and services for cell %(cell)s',
|
||||
{'cell': cell.identity})
|
||||
with context_module.target_cell(context, cell) as cctxt:
|
||||
if compute_uuids is None:
|
||||
compute_nodes[cell.uuid].extend(
|
||||
objects.ComputeNodeList.get_all(cctxt))
|
||||
else:
|
||||
compute_nodes[cell.uuid].extend(
|
||||
objects.ComputeNodeList.get_all_by_uuids(
|
||||
cctxt, compute_uuids))
|
||||
services.update(
|
||||
{service.host: service
|
||||
for service in objects.ServiceList.get_by_binary(
|
||||
cctxt, 'nova-compute',
|
||||
include_disabled=True)})
|
||||
for cell_uuid, result in results.items():
|
||||
if result is context_module.raised_exception_sentinel:
|
||||
LOG.warning('Failed to get computes for cell %s', cell_uuid)
|
||||
elif result is context_module.did_not_respond_sentinel:
|
||||
LOG.warning('Timeout getting computes for cell %s', cell_uuid)
|
||||
else:
|
||||
_services, _compute_nodes = result
|
||||
compute_nodes[cell_uuid].extend(_compute_nodes)
|
||||
services.update({service.host: service
|
||||
for service in _services})
|
||||
return compute_nodes, services
|
||||
|
||||
def _load_cells(self, context):
|
||||
|
@ -987,6 +987,21 @@ class HostManagerTestCase(test.NoDBTestCase):
|
||||
mock_sl.assert_called_once_with(mock.sentinel.cctxt, 'nova-compute',
|
||||
include_disabled=True)
|
||||
|
||||
@mock.patch('nova.context.scatter_gather_cells')
|
||||
def test_get_computes_for_cells_failures(self, mock_sg):
|
||||
mock_sg.return_value = {
|
||||
uuids.cell1: ([mock.MagicMock(host='a'), mock.MagicMock(host='b')],
|
||||
[mock.sentinel.c1n1, mock.sentinel.c1n2]),
|
||||
uuids.cell2: nova_context.did_not_respond_sentinel,
|
||||
uuids.cell3: nova_context.raised_exception_sentinel,
|
||||
}
|
||||
context = nova_context.RequestContext('fake', 'fake')
|
||||
cns, srv = self.host_manager._get_computes_for_cells(context, [])
|
||||
|
||||
self.assertEqual({uuids.cell1: [mock.sentinel.c1n1,
|
||||
mock.sentinel.c1n2]}, cns)
|
||||
self.assertEqual(['a', 'b'], sorted(srv.keys()))
|
||||
|
||||
|
||||
class HostManagerChangedNodesTestCase(test.NoDBTestCase):
|
||||
"""Test case for HostManager class."""
|
||||
|
Loading…
Reference in New Issue
Block a user