Make service all-cells min version helper use scatter-gather

This makes the Service get_minimum_version_all_cells() helper use the parallel
scatter-gather utilities to make the determination. It also adds fault handling
for cases where a cell fails to respond, and an option to ignore or raise in
that situation.

Closes-Bug: #1746558
Change-Id: I62dc1033437f91bded28a9fd58759d2dbd7e55fb
This commit is contained in:
Dan Smith 2018-01-31 08:13:36 -08:00
parent 1d335b3332
commit 5fe3a01b60
5 changed files with 119 additions and 9 deletions

View File

@ -492,6 +492,9 @@ class ServersController(wsgi.Controller):
if host or node:
context.can(server_policies.SERVERS % 'create:forced_host', {})
# NOTE(danms): Don't require an answer from all cells here, as
# we assume that if a cell isn't reporting we won't schedule into
# it anyway. A bit of a gamble, but a reasonable one.
min_compute_version = service_obj.get_minimum_version_all_cells(
nova_context.get_admin_context(), ['nova-compute'])
supports_device_tagging = (min_compute_version >=

View File

@ -3817,6 +3817,11 @@ class API(base.Base):
else:
# The instance is being created and we don't know which
# cell it's going to land in, so check all cells.
# NOTE(danms): We don't require all cells to report here since
# we're really concerned about the new-ness of cells that the
# instance may be scheduled into. If a cell doesn't respond here,
# then it won't be a candidate for the instance and thus doesn't
# matter.
min_compute_version = \
objects.service.get_minimum_version_all_cells(
context, ['nova-compute'])

View File

@ -460,17 +460,72 @@ class Service(base.NovaPersistentObject, base.NovaObject,
use_slave=use_slave)
def get_minimum_version_all_cells(context, binaries):
"""Get the minimum service version, checking all cells"""
def get_minimum_version_all_cells(context, binaries, require_all=False):
"""Get the minimum service version, checking all cells.
This attempts to calculate the minimum service version for a set
of binaries across all the cells in the system. If require_all
is False, then any cells that fail to report a version will be
ignored (assuming they won't be candidates for scheduling and thus
excluding them from the minimum version calculation is reasonable).
If require_all is True, then a failing cell will cause this to raise
exception.CellTimeout, as would be appropriate for gating some
data migration until everything is new enough.
Note that services that do not report a positive version are excluded
from this, as it crosses all cells which will naturally not have all
services.
"""
if not all(binary.startswith('nova-') for binary in binaries):
LOG.warning('get_minimum_version_all_cells called with '
'likely-incorrect binaries `%s\'', ','.join(binaries))
raise exception.ObjectActionError(
action='get_minimum_version_all_cells',
reason='Invalid binary prefix')
# NOTE(danms): Instead of using Service.get_minimum_version_multi(), we
# replicate the call directly to the underlying DB method here because
# we want to defeat the caching and we need to filter non-present
# services differently from the single-cell method.
results = nova_context.scatter_gather_all_cells(
context,
Service._db_service_get_minimum_version,
binaries)
cells = objects.CellMappingList.get_all(context)
min_version = None
for cell in cells:
with nova_context.target_cell(context, cell) as cctxt:
version = objects.Service.get_minimum_version_multi(
cctxt, binaries)
min_version = min(min_version, version) if min_version else version
return min_version
for cell_uuid, result in results.items():
if result is nova_context.did_not_respond_sentinel:
LOG.warning('Cell %s did not respond when getting minimum '
'service version', cell_uuid)
if require_all:
raise exception.CellTimeout()
elif result is nova_context.raised_exception_sentinel:
LOG.warning('Failed to get minimum service version for cell %s',
cell_uuid)
if require_all:
# NOTE(danms): Okay, this isn't necessarily a timeout, but
# it's functionally the same from the caller's perspective
# and we logged the fact that it was actually a failure
# for the forensic investigator during the scatter/gather
# routine.
raise exception.CellTimeout()
else:
# NOTE(danms): Don't consider a zero or None result as the minimum
# since we're crossing cells and will likely not have all the
# services being probed.
relevant_versions = [version for version in result.values()
if version]
if relevant_versions:
min_version_cell = min(relevant_versions)
min_version = (min(min_version, min_version_cell)
if min_version else min_version_cell)
# NOTE(danms): If we got no matches at all (such as at first startup)
# then report that as zero to be consistent with the other such
# methods.
return min_version or 0
@base.NovaObjectRegistry.register

View File

@ -1040,6 +1040,9 @@ class AllServicesCurrent(fixtures.Fixture):
self.useFixture(fixtures.MonkeyPatch(
'nova.objects.Service.get_minimum_version_multi',
self._fake_minimum))
self.useFixture(fixtures.MonkeyPatch(
'nova.objects.service.get_minimum_version_all_cells',
lambda *a, **k: service_obj.SERVICE_VERSION))
compute_rpcapi.LAST_VERSION = None
def _fake_minimum(self, *args, **kwargs):

View File

@ -16,6 +16,7 @@ import mock
from oslo_utils import timeutils
from oslo_versionedobjects import base as ovo_base
from oslo_versionedobjects import exception as ovo_exc
import six
from nova.compute import manager as compute_manager
from nova import context
@ -535,3 +536,46 @@ class TestServiceVersionCells(test.TestCase):
self._create_services(16, 16, 13, 16)
self.assertEqual(13, service.get_minimum_version_all_cells(
self.context, ['nova-compute']))
@mock.patch('nova.objects.service.LOG')
def test_get_minimum_version_checks_binary(self, mock_log):
ex = self.assertRaises(exception.ObjectActionError,
service.get_minimum_version_all_cells,
self.context, ['compute'])
self.assertIn('Invalid binary prefix', six.text_type(ex))
self.assertTrue(mock_log.warning.called)
@mock.patch('nova.context.scatter_gather_all_cells')
def test_version_all_cells_with_fail(self, mock_scatter):
mock_scatter.return_value = {
'foo': {'nova-compute': 13},
'bar': context.raised_exception_sentinel,
}
self.assertEqual(13, service.get_minimum_version_all_cells(
self.context, ['nova-compute']))
self.assertRaises(exception.CellTimeout,
service.get_minimum_version_all_cells,
self.context, ['nova-compute'],
require_all=True)
@mock.patch('nova.context.scatter_gather_all_cells')
def test_version_all_cells_with_timeout(self, mock_scatter):
mock_scatter.return_value = {
'foo': {'nova-compute': 13},
'bar': context.did_not_respond_sentinel,
}
self.assertEqual(13, service.get_minimum_version_all_cells(
self.context, ['nova-compute']))
self.assertRaises(exception.CellTimeout,
service.get_minimum_version_all_cells,
self.context, ['nova-compute'],
require_all=True)
@mock.patch('nova.context.scatter_gather_all_cells')
def test_version_all_cells_exclude_zero_service(self, mock_scatter):
mock_scatter.return_value = {
'foo': {'nova-compute': 13},
'bar': {'nova-compute': 0},
}
self.assertEqual(13, service.get_minimum_version_all_cells(
self.context, ['nova-compute']))