diff --git a/nova/api/openstack/compute/servers.py b/nova/api/openstack/compute/servers.py index b7abe976e78e..b056999a4cc2 100644 --- a/nova/api/openstack/compute/servers.py +++ b/nova/api/openstack/compute/servers.py @@ -492,6 +492,9 @@ class ServersController(wsgi.Controller): if host or node: context.can(server_policies.SERVERS % 'create:forced_host', {}) + # NOTE(danms): Don't require an answer from all cells here, as + # we assume that if a cell isn't reporting we won't schedule into + # it anyway. A bit of a gamble, but a reasonable one. min_compute_version = service_obj.get_minimum_version_all_cells( nova_context.get_admin_context(), ['nova-compute']) supports_device_tagging = (min_compute_version >= diff --git a/nova/compute/api.py b/nova/compute/api.py index 1c4520c2c59c..35048dee6535 100644 --- a/nova/compute/api.py +++ b/nova/compute/api.py @@ -3921,6 +3921,11 @@ class API(base.Base): else: # The instance is being created and we don't know which # cell it's going to land in, so check all cells. + # NOTE(danms): We don't require all cells to report here since + # we're really concerned about the new-ness of cells that the + # instance may be scheduled into. If a cell doesn't respond here, + # then it won't be a candidate for the instance and thus doesn't + # matter. min_compute_version = \ objects.service.get_minimum_version_all_cells( context, ['nova-compute']) diff --git a/nova/objects/service.py b/nova/objects/service.py index 6445a1e68d4c..560166992a28 100644 --- a/nova/objects/service.py +++ b/nova/objects/service.py @@ -460,17 +460,72 @@ class Service(base.NovaPersistentObject, base.NovaObject, use_slave=use_slave) -def get_minimum_version_all_cells(context, binaries): - """Get the minimum service version, checking all cells""" +def get_minimum_version_all_cells(context, binaries, require_all=False): + """Get the minimum service version, checking all cells. + + This attempts to calculate the minimum service version for a set + of binaries across all the cells in the system. If require_all + is False, then any cells that fail to report a version will be + ignored (assuming they won't be candidates for scheduling and thus + excluding them from the minimum version calculation is reasonable). + If require_all is True, then a failing cell will cause this to raise + exception.CellTimeout, as would be appropriate for gating some + data migration until everything is new enough. + + Note that services that do not report a positive version are excluded + from this, as it crosses all cells which will naturally not have all + services. + """ + + if not all(binary.startswith('nova-') for binary in binaries): + LOG.warning('get_minimum_version_all_cells called with ' + 'likely-incorrect binaries `%s\'', ','.join(binaries)) + raise exception.ObjectActionError( + action='get_minimum_version_all_cells', + reason='Invalid binary prefix') + + # NOTE(danms): Instead of using Service.get_minimum_version_multi(), we + # replicate the call directly to the underlying DB method here because + # we want to defeat the caching and we need to filter non-present + # services differently from the single-cell method. + + results = nova_context.scatter_gather_all_cells( + context, + Service._db_service_get_minimum_version, + binaries) - cells = objects.CellMappingList.get_all(context) min_version = None - for cell in cells: - with nova_context.target_cell(context, cell) as cctxt: - version = objects.Service.get_minimum_version_multi( - cctxt, binaries) - min_version = min(min_version, version) if min_version else version - return min_version + for cell_uuid, result in results.items(): + if result is nova_context.did_not_respond_sentinel: + LOG.warning('Cell %s did not respond when getting minimum ' + 'service version', cell_uuid) + if require_all: + raise exception.CellTimeout() + elif result is nova_context.raised_exception_sentinel: + LOG.warning('Failed to get minimum service version for cell %s', + cell_uuid) + if require_all: + # NOTE(danms): Okay, this isn't necessarily a timeout, but + # it's functionally the same from the caller's perspective + # and we logged the fact that it was actually a failure + # for the forensic investigator during the scatter/gather + # routine. + raise exception.CellTimeout() + else: + # NOTE(danms): Don't consider a zero or None result as the minimum + # since we're crossing cells and will likely not have all the + # services being probed. + relevant_versions = [version for version in result.values() + if version] + if relevant_versions: + min_version_cell = min(relevant_versions) + min_version = (min(min_version, min_version_cell) + if min_version else min_version_cell) + + # NOTE(danms): If we got no matches at all (such as at first startup) + # then report that as zero to be consistent with the other such + # methods. + return min_version or 0 @base.NovaObjectRegistry.register diff --git a/nova/tests/fixtures.py b/nova/tests/fixtures.py index 570160b0a8a6..331b69eb8747 100644 --- a/nova/tests/fixtures.py +++ b/nova/tests/fixtures.py @@ -1046,6 +1046,9 @@ class AllServicesCurrent(fixtures.Fixture): self.useFixture(fixtures.MonkeyPatch( 'nova.objects.Service.get_minimum_version_multi', self._fake_minimum)) + self.useFixture(fixtures.MonkeyPatch( + 'nova.objects.service.get_minimum_version_all_cells', + lambda *a, **k: service_obj.SERVICE_VERSION)) compute_rpcapi.LAST_VERSION = None def _fake_minimum(self, *args, **kwargs): diff --git a/nova/tests/unit/objects/test_service.py b/nova/tests/unit/objects/test_service.py index 59704bed7182..abf1ded76fa2 100644 --- a/nova/tests/unit/objects/test_service.py +++ b/nova/tests/unit/objects/test_service.py @@ -16,6 +16,7 @@ import mock from oslo_utils import timeutils from oslo_versionedobjects import base as ovo_base from oslo_versionedobjects import exception as ovo_exc +import six from nova.compute import manager as compute_manager from nova import context @@ -535,3 +536,46 @@ class TestServiceVersionCells(test.TestCase): self._create_services(16, 16, 13, 16) self.assertEqual(13, service.get_minimum_version_all_cells( self.context, ['nova-compute'])) + + @mock.patch('nova.objects.service.LOG') + def test_get_minimum_version_checks_binary(self, mock_log): + ex = self.assertRaises(exception.ObjectActionError, + service.get_minimum_version_all_cells, + self.context, ['compute']) + self.assertIn('Invalid binary prefix', six.text_type(ex)) + self.assertTrue(mock_log.warning.called) + + @mock.patch('nova.context.scatter_gather_all_cells') + def test_version_all_cells_with_fail(self, mock_scatter): + mock_scatter.return_value = { + 'foo': {'nova-compute': 13}, + 'bar': context.raised_exception_sentinel, + } + self.assertEqual(13, service.get_minimum_version_all_cells( + self.context, ['nova-compute'])) + self.assertRaises(exception.CellTimeout, + service.get_minimum_version_all_cells, + self.context, ['nova-compute'], + require_all=True) + + @mock.patch('nova.context.scatter_gather_all_cells') + def test_version_all_cells_with_timeout(self, mock_scatter): + mock_scatter.return_value = { + 'foo': {'nova-compute': 13}, + 'bar': context.did_not_respond_sentinel, + } + self.assertEqual(13, service.get_minimum_version_all_cells( + self.context, ['nova-compute'])) + self.assertRaises(exception.CellTimeout, + service.get_minimum_version_all_cells, + self.context, ['nova-compute'], + require_all=True) + + @mock.patch('nova.context.scatter_gather_all_cells') + def test_version_all_cells_exclude_zero_service(self, mock_scatter): + mock_scatter.return_value = { + 'foo': {'nova-compute': 13}, + 'bar': {'nova-compute': 0}, + } + self.assertEqual(13, service.get_minimum_version_all_cells( + self.context, ['nova-compute']))