Make service all-cells min version helper use scatter-gather
This makes the Service get_minimum_version_all_cells() helper use the parallel
scatter-gather utilities to make the determination. It also adds fault handling
for cases where a cell fails to respond, and an option to ignore or raise in
that situation.
Closes-Bug: #1746558
Change-Id: I62dc1033437f91bded28a9fd58759d2dbd7e55fb
(cherry picked from commit 5fe3a01b60
)
This commit is contained in:
parent
8369a78af0
commit
b7237a9e6c
@ -492,6 +492,9 @@ class ServersController(wsgi.Controller):
|
||||
if host or node:
|
||||
context.can(server_policies.SERVERS % 'create:forced_host', {})
|
||||
|
||||
# NOTE(danms): Don't require an answer from all cells here, as
|
||||
# we assume that if a cell isn't reporting we won't schedule into
|
||||
# it anyway. A bit of a gamble, but a reasonable one.
|
||||
min_compute_version = service_obj.get_minimum_version_all_cells(
|
||||
nova_context.get_admin_context(), ['nova-compute'])
|
||||
supports_device_tagging = (min_compute_version >=
|
||||
|
@ -3921,6 +3921,11 @@ class API(base.Base):
|
||||
else:
|
||||
# The instance is being created and we don't know which
|
||||
# cell it's going to land in, so check all cells.
|
||||
# NOTE(danms): We don't require all cells to report here since
|
||||
# we're really concerned about the new-ness of cells that the
|
||||
# instance may be scheduled into. If a cell doesn't respond here,
|
||||
# then it won't be a candidate for the instance and thus doesn't
|
||||
# matter.
|
||||
min_compute_version = \
|
||||
objects.service.get_minimum_version_all_cells(
|
||||
context, ['nova-compute'])
|
||||
|
@ -460,17 +460,72 @@ class Service(base.NovaPersistentObject, base.NovaObject,
|
||||
use_slave=use_slave)
|
||||
|
||||
|
||||
def get_minimum_version_all_cells(context, binaries):
|
||||
"""Get the minimum service version, checking all cells"""
|
||||
def get_minimum_version_all_cells(context, binaries, require_all=False):
|
||||
"""Get the minimum service version, checking all cells.
|
||||
|
||||
This attempts to calculate the minimum service version for a set
|
||||
of binaries across all the cells in the system. If require_all
|
||||
is False, then any cells that fail to report a version will be
|
||||
ignored (assuming they won't be candidates for scheduling and thus
|
||||
excluding them from the minimum version calculation is reasonable).
|
||||
If require_all is True, then a failing cell will cause this to raise
|
||||
exception.CellTimeout, as would be appropriate for gating some
|
||||
data migration until everything is new enough.
|
||||
|
||||
Note that services that do not report a positive version are excluded
|
||||
from this, as it crosses all cells which will naturally not have all
|
||||
services.
|
||||
"""
|
||||
|
||||
if not all(binary.startswith('nova-') for binary in binaries):
|
||||
LOG.warning('get_minimum_version_all_cells called with '
|
||||
'likely-incorrect binaries `%s\'', ','.join(binaries))
|
||||
raise exception.ObjectActionError(
|
||||
action='get_minimum_version_all_cells',
|
||||
reason='Invalid binary prefix')
|
||||
|
||||
# NOTE(danms): Instead of using Service.get_minimum_version_multi(), we
|
||||
# replicate the call directly to the underlying DB method here because
|
||||
# we want to defeat the caching and we need to filter non-present
|
||||
# services differently from the single-cell method.
|
||||
|
||||
results = nova_context.scatter_gather_all_cells(
|
||||
context,
|
||||
Service._db_service_get_minimum_version,
|
||||
binaries)
|
||||
|
||||
cells = objects.CellMappingList.get_all(context)
|
||||
min_version = None
|
||||
for cell in cells:
|
||||
with nova_context.target_cell(context, cell) as cctxt:
|
||||
version = objects.Service.get_minimum_version_multi(
|
||||
cctxt, binaries)
|
||||
min_version = min(min_version, version) if min_version else version
|
||||
return min_version
|
||||
for cell_uuid, result in results.items():
|
||||
if result is nova_context.did_not_respond_sentinel:
|
||||
LOG.warning('Cell %s did not respond when getting minimum '
|
||||
'service version', cell_uuid)
|
||||
if require_all:
|
||||
raise exception.CellTimeout()
|
||||
elif result is nova_context.raised_exception_sentinel:
|
||||
LOG.warning('Failed to get minimum service version for cell %s',
|
||||
cell_uuid)
|
||||
if require_all:
|
||||
# NOTE(danms): Okay, this isn't necessarily a timeout, but
|
||||
# it's functionally the same from the caller's perspective
|
||||
# and we logged the fact that it was actually a failure
|
||||
# for the forensic investigator during the scatter/gather
|
||||
# routine.
|
||||
raise exception.CellTimeout()
|
||||
else:
|
||||
# NOTE(danms): Don't consider a zero or None result as the minimum
|
||||
# since we're crossing cells and will likely not have all the
|
||||
# services being probed.
|
||||
relevant_versions = [version for version in result.values()
|
||||
if version]
|
||||
if relevant_versions:
|
||||
min_version_cell = min(relevant_versions)
|
||||
min_version = (min(min_version, min_version_cell)
|
||||
if min_version else min_version_cell)
|
||||
|
||||
# NOTE(danms): If we got no matches at all (such as at first startup)
|
||||
# then report that as zero to be consistent with the other such
|
||||
# methods.
|
||||
return min_version or 0
|
||||
|
||||
|
||||
@base.NovaObjectRegistry.register
|
||||
|
@ -1046,6 +1046,9 @@ class AllServicesCurrent(fixtures.Fixture):
|
||||
self.useFixture(fixtures.MonkeyPatch(
|
||||
'nova.objects.Service.get_minimum_version_multi',
|
||||
self._fake_minimum))
|
||||
self.useFixture(fixtures.MonkeyPatch(
|
||||
'nova.objects.service.get_minimum_version_all_cells',
|
||||
lambda *a, **k: service_obj.SERVICE_VERSION))
|
||||
compute_rpcapi.LAST_VERSION = None
|
||||
|
||||
def _fake_minimum(self, *args, **kwargs):
|
||||
|
@ -16,6 +16,7 @@ import mock
|
||||
from oslo_utils import timeutils
|
||||
from oslo_versionedobjects import base as ovo_base
|
||||
from oslo_versionedobjects import exception as ovo_exc
|
||||
import six
|
||||
|
||||
from nova.compute import manager as compute_manager
|
||||
from nova import context
|
||||
@ -535,3 +536,46 @@ class TestServiceVersionCells(test.TestCase):
|
||||
self._create_services(16, 16, 13, 16)
|
||||
self.assertEqual(13, service.get_minimum_version_all_cells(
|
||||
self.context, ['nova-compute']))
|
||||
|
||||
@mock.patch('nova.objects.service.LOG')
|
||||
def test_get_minimum_version_checks_binary(self, mock_log):
|
||||
ex = self.assertRaises(exception.ObjectActionError,
|
||||
service.get_minimum_version_all_cells,
|
||||
self.context, ['compute'])
|
||||
self.assertIn('Invalid binary prefix', six.text_type(ex))
|
||||
self.assertTrue(mock_log.warning.called)
|
||||
|
||||
@mock.patch('nova.context.scatter_gather_all_cells')
|
||||
def test_version_all_cells_with_fail(self, mock_scatter):
|
||||
mock_scatter.return_value = {
|
||||
'foo': {'nova-compute': 13},
|
||||
'bar': context.raised_exception_sentinel,
|
||||
}
|
||||
self.assertEqual(13, service.get_minimum_version_all_cells(
|
||||
self.context, ['nova-compute']))
|
||||
self.assertRaises(exception.CellTimeout,
|
||||
service.get_minimum_version_all_cells,
|
||||
self.context, ['nova-compute'],
|
||||
require_all=True)
|
||||
|
||||
@mock.patch('nova.context.scatter_gather_all_cells')
|
||||
def test_version_all_cells_with_timeout(self, mock_scatter):
|
||||
mock_scatter.return_value = {
|
||||
'foo': {'nova-compute': 13},
|
||||
'bar': context.did_not_respond_sentinel,
|
||||
}
|
||||
self.assertEqual(13, service.get_minimum_version_all_cells(
|
||||
self.context, ['nova-compute']))
|
||||
self.assertRaises(exception.CellTimeout,
|
||||
service.get_minimum_version_all_cells,
|
||||
self.context, ['nova-compute'],
|
||||
require_all=True)
|
||||
|
||||
@mock.patch('nova.context.scatter_gather_all_cells')
|
||||
def test_version_all_cells_exclude_zero_service(self, mock_scatter):
|
||||
mock_scatter.return_value = {
|
||||
'foo': {'nova-compute': 13},
|
||||
'bar': {'nova-compute': 0},
|
||||
}
|
||||
self.assertEqual(13, service.get_minimum_version_all_cells(
|
||||
self.context, ['nova-compute']))
|
||||
|
Loading…
Reference in New Issue
Block a user