diff --git a/doc/source/admin/configuration/schedulers.rst b/doc/source/admin/configuration/schedulers.rst index 538919d70646..1ef22d43fce3 100644 --- a/doc/source/admin/configuration/schedulers.rst +++ b/doc/source/admin/configuration/schedulers.rst @@ -822,6 +822,11 @@ Hosts and cells are weighted based on the following options in the - Multiplier used for weighing hosts for group soft-anti-affinity. Only a positive value is meaningful. Negative means that the behavior will change to the opposite, which is soft-affinity. + * - [filter_scheduler] + - ``build_failure_weight_multiplier`` + - Multiplier used for weighing hosts which have recent build failures. A + positive value increases the significance of build falures reported by + the host recently, making them less likely to be chosen. * - [metrics] - ``weight_multiplier`` - Multiplier for weighting meters. Use a floating-point value. diff --git a/nova/compute/manager.py b/nova/compute/manager.py index a0c192ebbd6e..077ab3d2635c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -525,7 +525,6 @@ class ComputeManager(manager.Manager): CONF.max_concurrent_live_migrations) else: self._live_migration_semaphore = compute_utils.UnlimitedSemaphore() - self._failed_builds = 0 super(ComputeManager, self).__init__(service_name="compute", *args, **kwargs) @@ -1677,29 +1676,15 @@ class ComputeManager(manager.Manager): return block_device_info def _build_failed(self): - self._failed_builds += 1 - limit = CONF.compute.consecutive_build_service_disable_threshold - if limit and self._failed_builds >= limit: - # NOTE(danms): If we're doing a bunch of parallel builds, - # it is possible (although not likely) that we have already - # failed N-1 builds before this and we race with a successful - # build and disable ourselves here when we might've otherwise - # not. - LOG.error('Disabling service due to %(fails)i ' - 'consecutive build failures', - {'fails': self._failed_builds}) - ctx = nova.context.get_admin_context() - service = objects.Service.get_by_compute_host(ctx, CONF.host) - service.disabled = True - service.disabled_reason = ( - 'Auto-disabled due to %i build failures' % self._failed_builds) - service.save() - # NOTE(danms): Reset our counter now so that when the admin - # re-enables us we can start fresh - self._failed_builds = 0 - elif self._failed_builds > 1: - LOG.warning('%(fails)i consecutive build failures', - {'fails': self._failed_builds}) + if CONF.compute.consecutive_build_service_disable_threshold: + rt = self._get_resource_tracker() + # NOTE(danms): Update our counter, but wait for the next + # update_available_resource() periodic to flush it to the DB + rt.stats.build_failed() + + def _build_succeeded(self): + rt = self._get_resource_tracker() + rt.stats.build_succeeded() @wrap_exception() @reverts_task_state @@ -1751,7 +1736,7 @@ class ComputeManager(manager.Manager): build_results.RESCHEDULED): self._build_failed() else: - self._failed_builds = 0 + self._build_succeeded() # NOTE(danms): We spawn here to return the RPC worker thread back to # the pool. Since what follows could take a really long time, we don't diff --git a/nova/compute/resource_tracker.py b/nova/compute/resource_tracker.py index f2c90addc9d4..dda3b673b648 100644 --- a/nova/compute/resource_tracker.py +++ b/nova/compute/resource_tracker.py @@ -604,7 +604,13 @@ class ResourceTracker(object): def _copy_resources(self, compute_node, resources): """Copy resource values to supplied compute_node.""" # purge old stats and init with anything passed in by the driver + # NOTE(danms): Preserve 'failed_builds' across the stats clearing, + # as that is not part of resources + # TODO(danms): Stop doing this when we get a column to store this + # directly + prev_failed_builds = self.stats.get('failed_builds', 0) self.stats.clear() + self.stats['failed_builds'] = prev_failed_builds self.stats.digest_stats(resources.get('stats')) compute_node.stats = copy.deepcopy(self.stats) diff --git a/nova/compute/stats.py b/nova/compute/stats.py index 9509c908ede6..cfbee2e6bc17 100644 --- a/nova/compute/stats.py +++ b/nova/compute/stats.py @@ -138,3 +138,11 @@ class Stats(dict): os_type=os_type, project_id=project_id) return (vm_state, task_state, os_type, project_id) + + def build_failed(self): + self['failed_builds'] = self.get('failed_builds', 0) + 1 + + def build_succeeded(self): + # FIXME(danms): Make this more graceful, either by time-based aging or + # a fixed decline upon success + self['failed_builds'] = 0 diff --git a/nova/conf/compute.py b/nova/conf/compute.py index 214b685017b0..f101f0db0279 100644 --- a/nova/conf/compute.py +++ b/nova/conf/compute.py @@ -609,20 +609,20 @@ compute_group_opts = [ cfg.IntOpt('consecutive_build_service_disable_threshold', default=10, help=""" -Number of consecutive failed builds that result in disabling a compute service. +Enables reporting of build failures to the scheduler. -This option will cause nova-compute to set itself to a disabled state -if a certain number of consecutive build failures occur. This will -prevent the scheduler from continuing to send builds to a compute node that is -consistently failing. Note that all failures qualify and count towards this -score, including reschedules that may have been due to racy scheduler behavior. -Since the failures must be consecutive, it is unlikely that occasional expected -reschedules will actually disable a compute node. +Any nonzero value will enable sending build failure statistics to the +scheduler for use by the BuildFailureWeigher. Possible values: -* Any positive integer representing a build failure count. -* Zero to never auto-disable. +* Any positive integer enables reporting build failures. +* Zero to disable reporting build failures. + +Related options: + +* [filter_scheduler]/build_failure_weight_multiplier + """), cfg.IntOpt("shutdown_retry_interval", default=10, diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py index d4d2415ee813..54ff66cda620 100644 --- a/nova/conf/scheduler.py +++ b/nova/conf/scheduler.py @@ -493,6 +493,34 @@ Possible values: for hosts with group soft anti-affinity. Only a positive value are meaningful, as negative values would make this behave as a soft affinity weigher. +"""), + cfg.FloatOpt( + "build_failure_weight_multiplier", + default=1000000.0, + help=""" +Multiplier used for weighing hosts that have had recent build failures. + +This option determines how much weight is placed on a compute node with +recent build failures. Build failures may indicate a failing, misconfigured, +or otherwise ailing compute node, and avoiding it during scheduling may be +beneficial. The weight is inversely proportional to the number of recent +build failures the compute node has experienced. This value should be +set to some high value to offset weight given by other enabled weighers +due to available resources. To disable weighing compute hosts by the +number of recent failures, set this to zero. + +This option is only used by the FilterScheduler and its subclasses; if you use +a different scheduler, this option has no effect. + +Possible values: + +* An integer or float value, where the value corresponds to the multiplier + ratio for this weigher. + +Related options: + +* [compute]/consecutive_build_service_disable_threshold - Must be nonzero + for a compute to report data considered by this weigher. """), cfg.BoolOpt( "shuffle_best_same_weighed_hosts", diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index aab4b90ac53f..489e1fb051ec 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -256,6 +256,9 @@ class HostState(object): self.ram_allocation_ratio = compute.ram_allocation_ratio self.disk_allocation_ratio = compute.disk_allocation_ratio + # update failed_builds counter reported by the compute + self.failed_builds = int(self.stats.get('failed_builds', 0)) + def consume_from_request(self, spec_obj): """Incrementally update host state from a RequestSpec object.""" diff --git a/nova/scheduler/weights/compute.py b/nova/scheduler/weights/compute.py new file mode 100644 index 000000000000..39e7813e05ee --- /dev/null +++ b/nova/scheduler/weights/compute.py @@ -0,0 +1,33 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +""" +BuildFailure Weigher. Weigh hosts by the number of recent failed boot attempts. + +""" + +import nova.conf +from nova.scheduler import weights + +CONF = nova.conf.CONF + + +class BuildFailureWeigher(weights.BaseHostWeigher): + def weight_multiplier(self): + """Override the weight multiplier. Note this is negated.""" + return -1 * CONF.filter_scheduler.build_failure_weight_multiplier + + def _weigh_object(self, host_state, weight_properties): + """Higher weights win. Our multiplier is negative, so reduce our + weight by number of failed builds. + """ + return host_state.failed_builds diff --git a/nova/test.py b/nova/test.py index d43782b7a91b..6b225b29c5ac 100644 --- a/nova/test.py +++ b/nova/test.py @@ -335,6 +335,11 @@ class TestCase(testtools.TestCase): self.useFixture(nova_fixtures.PrivsepNoHelperFixture()) self.useFixture(mock_fixture.MockAutospecFixture()) + # FIXME(danms): Disable this for all tests by default to avoid breaking + # any that depend on default/previous ordering + self.flags(build_failure_weight_multiplier=0.0, + group='filter_scheduler') + def _setup_cells(self): """Setup a normal cellsv2 environment. diff --git a/nova/tests/functional/test_servers.py b/nova/tests/functional/test_servers.py index e292408b54f6..7ea8329164ed 100644 --- a/nova/tests/functional/test_servers.py +++ b/nova/tests/functional/test_servers.py @@ -73,6 +73,7 @@ class ServersTestBase(integrated_helpers._IntegratedTestBase): _min_count_parameter = 'min_count' def setUp(self): + self.computes = {} super(ServersTestBase, self).setUp() # The network service is called as part of server creates but no # networks have been populated in the db, so stub the methods. @@ -133,6 +134,30 @@ class ServersTest(ServersTestBase): for server in servers: LOG.debug("server: %s", server) + def _get_node_build_failures(self): + ctxt = context.get_admin_context() + computes = objects.ComputeNodeList.get_all(ctxt) + return { + node.hypervisor_hostname: int(node.stats.get('failed_builds', 0)) + for node in computes} + + def _run_periodics(self): + """Run the update_available_resource task on every compute manager + + This runs periodics on the computes in an undefined order; some child + class redefined this function to force a specific order. + """ + + if self.compute.host not in self.computes: + self.computes[self.compute.host] = self.compute + + ctx = context.get_admin_context() + for compute in self.computes.values(): + LOG.info('Running periodic for compute (%s)', + compute.manager.host) + compute.manager.update_available_resource(ctx) + LOG.info('Finished with periodics') + def test_create_server_with_error(self): # Create a server which will enter error state. @@ -154,6 +179,12 @@ class ServersTest(ServersTestBase): self.assertEqual('ERROR', found_server['status']) self._delete_server(created_server_id) + # We should have no (persisted) build failures until we update + # resources, after which we should have one + self.assertEqual([0], list(self._get_node_build_failures().values())) + self._run_periodics() + self.assertEqual([1], list(self._get_node_build_failures().values())) + def _test_create_server_with_error_with_retries(self): # Create a server which will enter error state. @@ -161,6 +192,7 @@ class ServersTest(ServersTestBase): self.addCleanup(fake.restore_nodes) self.flags(host='host2') self.compute2 = self.start_service('compute', host='host2') + self.computes['compute2'] = self.compute2 fails = [] @@ -188,11 +220,17 @@ class ServersTest(ServersTestBase): self.flags(max_attempts=2, group='scheduler') fails = self._test_create_server_with_error_with_retries() self.assertEqual(2, fails) + self._run_periodics() + self.assertEqual( + [1, 1], list(self._get_node_build_failures().values())) def test_create_server_with_error_with_no_retries(self): self.flags(max_attempts=1, group='scheduler') fails = self._test_create_server_with_error_with_retries() self.assertEqual(1, fails) + self._run_periodics() + self.assertEqual( + [0, 1], list(sorted(self._get_node_build_failures().values()))) def test_create_and_delete_server(self): # Creates and deletes a server. diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 265300a0d14a..505392483aa2 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -5258,24 +5258,19 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): nil_out_host_and_node=True) @mock.patch.object(manager.ComputeManager, '_do_build_and_run_instance') - @mock.patch('nova.objects.Service.get_by_compute_host') - def test_build_failures_disable_service(self, mock_service, mock_dbari): + @mock.patch('nova.compute.stats.Stats.build_failed') + def test_build_failures_reported(self, mock_failed, mock_dbari): mock_dbari.return_value = build_results.FAILED instance = objects.Instance(uuid=uuids.instance) for i in range(0, 10): self.compute.build_and_run_instance(self.context, instance, None, None, None) - service = mock_service.return_value - self.assertTrue(service.disabled) - self.assertEqual('Auto-disabled due to 10 build failures', - service.disabled_reason) - service.save.assert_called_once_with() - self.assertEqual(0, self.compute._failed_builds) + + self.assertEqual(10, mock_failed.call_count) @mock.patch.object(manager.ComputeManager, '_do_build_and_run_instance') - @mock.patch('nova.objects.Service.get_by_compute_host') - def test_build_failures_not_disable_service(self, mock_service, - mock_dbari): + @mock.patch('nova.compute.stats.Stats.build_failed') + def test_build_failures_not_reported(self, mock_failed, mock_dbari): self.flags(consecutive_build_service_disable_threshold=0, group='compute') mock_dbari.return_value = build_results.FAILED @@ -5283,14 +5278,15 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): for i in range(0, 10): self.compute.build_and_run_instance(self.context, instance, None, None, None) - service = mock_service.return_value - self.assertFalse(service.save.called) - self.assertEqual(10, self.compute._failed_builds) + + mock_failed.assert_not_called() @mock.patch.object(manager.ComputeManager, '_do_build_and_run_instance') - @mock.patch('nova.objects.Service.get_by_compute_host') - def test_transient_build_failures_no_disable_service(self, mock_service, - mock_dbari): + @mock.patch.object(manager.ComputeManager, '_build_failed') + @mock.patch.object(manager.ComputeManager, '_build_succeeded') + def test_transient_build_failures_no_report(self, mock_succeeded, + mock_failed, + mock_dbari): results = [build_results.FAILED, build_results.ACTIVE, build_results.RESCHEDULED] @@ -5306,31 +5302,34 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): for i in range(0, 10): self.compute.build_and_run_instance(self.context, instance, None, None, None) - service = mock_service.return_value - self.assertFalse(service.save.called) - self.assertEqual(0, self.compute._failed_builds) + + self.assertEqual(2, mock_failed.call_count) + self.assertEqual(8, mock_succeeded.call_count) @mock.patch.object(manager.ComputeManager, '_do_build_and_run_instance') - @mock.patch('nova.objects.Service.get_by_compute_host') - def test_build_reschedules_disable_service(self, mock_service, mock_dbari): + @mock.patch.object(manager.ComputeManager, '_build_failed') + @mock.patch.object(manager.ComputeManager, '_build_succeeded') + def test_build_reschedules_reported(self, mock_succeeded, + mock_failed, + mock_dbari): mock_dbari.return_value = build_results.RESCHEDULED instance = objects.Instance(uuid=uuids.instance) for i in range(0, 10): self.compute.build_and_run_instance(self.context, instance, None, None, None) - service = mock_service.return_value - self.assertTrue(service.disabled) - self.assertEqual('Auto-disabled due to 10 build failures', - service.disabled_reason) - service.save.assert_called_once_with() - self.assertEqual(0, self.compute._failed_builds) + + self.assertEqual(10, mock_failed.call_count) + mock_succeeded.assert_not_called() @mock.patch.object(manager.ComputeManager, '_do_build_and_run_instance') - @mock.patch('nova.objects.Service.get_by_compute_host') @mock.patch('nova.exception_wrapper._emit_exception_notification') @mock.patch('nova.compute.utils.add_instance_fault_from_exc') - def test_build_exceptions_disable_service(self, mock_if, mock_notify, - mock_service, mock_dbari): + @mock.patch.object(manager.ComputeManager, '_build_failed') + @mock.patch.object(manager.ComputeManager, '_build_succeeded') + def test_build_exceptions_reported(self, mock_succeeded, + mock_failed, + mock_if, mock_notify, + mock_dbari): mock_dbari.side_effect = test.TestingException() instance = objects.Instance(uuid=uuids.instance, task_state=None) @@ -5339,12 +5338,9 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): self.compute.build_and_run_instance, self.context, instance, None, None, None) - service = mock_service.return_value - self.assertTrue(service.disabled) - self.assertEqual('Auto-disabled due to 10 build failures', - service.disabled_reason) - service.save.assert_called_once_with() - self.assertEqual(0, self.compute._failed_builds) + + self.assertEqual(10, mock_failed.call_count) + mock_succeeded.assert_not_called() @mock.patch.object(manager.ComputeManager, '_shutdown_instance') @mock.patch.object(manager.ComputeManager, '_build_networks_for_instance') diff --git a/nova/tests/unit/compute/test_resource_tracker.py b/nova/tests/unit/compute/test_resource_tracker.py index f390c979123c..43ee8b63fe2f 100644 --- a/nova/tests/unit/compute/test_resource_tracker.py +++ b/nova/tests/unit/compute/test_resource_tracker.py @@ -1155,7 +1155,7 @@ class TestInitComputeNode(BaseTestCase): ram_allocation_ratio=1.0, cpu_allocation_ratio=1.0, disk_allocation_ratio=1.0, - stats={}, + stats={'failed_builds': 0}, pci_device_pools=objects.PciDevicePoolList(objects=[]), uuid=uuids.compute_node_uuid ) diff --git a/nova/tests/unit/compute/test_stats.py b/nova/tests/unit/compute/test_stats.py index 3010a5d5cbea..f58d8bdad452 100644 --- a/nova/tests/unit/compute/test_stats.py +++ b/nova/tests/unit/compute/test_stats.py @@ -238,3 +238,19 @@ class StatsTestCase(test.NoDBTestCase): self.assertEqual(0, len(self.stats)) self.assertEqual(0, len(self.stats.states)) + + def test_build_failed_succeded(self): + self.assertEqual('not-set', self.stats.get('failed_builds', 'not-set')) + self.stats.build_failed() + self.assertEqual(1, self.stats['failed_builds']) + self.stats.build_failed() + self.assertEqual(2, self.stats['failed_builds']) + self.stats.build_succeeded() + self.assertEqual(0, self.stats['failed_builds']) + self.stats.build_succeeded() + self.assertEqual(0, self.stats['failed_builds']) + + def test_build_succeeded_first(self): + self.assertEqual('not-set', self.stats.get('failed_builds', 'not-set')) + self.stats.build_succeeded() + self.assertEqual(0, self.stats['failed_builds']) diff --git a/nova/tests/unit/scheduler/test_caching_scheduler.py b/nova/tests/unit/scheduler/test_caching_scheduler.py index dd72e73a260f..2760f9262910 100644 --- a/nova/tests/unit/scheduler/test_caching_scheduler.py +++ b/nova/tests/unit/scheduler/test_caching_scheduler.py @@ -163,6 +163,7 @@ class CachingSchedulerTestCase(test_scheduler.SchedulerTestCase): host_state.ram_allocation_ratio = 1.5 host_state.disk_allocation_ratio = 1.0 host_state.metrics = objects.MonitorMetricList(objects=[]) + host_state.failed_builds = 0 return host_state @mock.patch('nova.db.instance_extra_get_by_instance_uuid', diff --git a/nova/tests/unit/scheduler/weights/test_weights_compute.py b/nova/tests/unit/scheduler/weights/test_weights_compute.py new file mode 100644 index 000000000000..7e6e3814c948 --- /dev/null +++ b/nova/tests/unit/scheduler/weights/test_weights_compute.py @@ -0,0 +1,57 @@ +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +""" +Tests For Scheduler build failure weights. +""" + +from nova.scheduler import weights +from nova.scheduler.weights import compute +from nova import test +from nova.tests.unit.scheduler import fakes + + +class BuildFailureWeigherTestCase(test.NoDBTestCase): + def setUp(self): + super(BuildFailureWeigherTestCase, self).setUp() + self.weight_handler = weights.HostWeightHandler() + self.weighers = [compute.BuildFailureWeigher()] + + def _get_weighed_host(self, hosts): + return self.weight_handler.get_weighed_objects(self.weighers, + hosts, {}) + + def _get_all_hosts(self): + host_values = [ + ('host1', 'node1', {'failed_builds': 0}), + ('host2', 'node2', {'failed_builds': 1}), + ('host3', 'node3', {'failed_builds': 10}), + ('host4', 'node4', {'failed_builds': 100}) + ] + return [fakes.FakeHostState(host, node, values) + for host, node, values in host_values] + + def test_build_failure_weigher_disabled(self): + self.flags(build_failure_weight_multiplier=0.0, + group='filter_scheduler') + hosts = self._get_all_hosts() + weighed_hosts = self._get_weighed_host(hosts) + self.assertTrue(all([wh.weight == 0.0 + for wh in weighed_hosts])) + + def test_build_failure_weigher_scaled(self): + self.flags(build_failure_weight_multiplier=1000.0, + group='filter_scheduler') + hosts = self._get_all_hosts() + weighed_hosts = self._get_weighed_host(hosts) + self.assertEqual([0, -10, -100, -1000], + [wh.weight for wh in weighed_hosts]) diff --git a/releasenotes/notes/change-consecutive-boot-failure-counter-to-weigher-428de7da0ed2033a.yaml b/releasenotes/notes/change-consecutive-boot-failure-counter-to-weigher-428de7da0ed2033a.yaml new file mode 100644 index 000000000000..fcf0d5f257d1 --- /dev/null +++ b/releasenotes/notes/change-consecutive-boot-failure-counter-to-weigher-428de7da0ed2033a.yaml @@ -0,0 +1,23 @@ +--- +security: + - | + To mitigate potential issues with compute nodes disabling + themselves in response to failures that were either non-fatal or + user-generated, the consecutive build failure counter + functionality in the compute service has been changed to advise + the scheduler of the count instead of self-disabling the service + upon exceeding the threshold. The + ``[compute]/consecutive_build_service_disable_threshold`` + configuration option still controls whether the count is tracked, + but the action taken on this value has been changed to a scheduler + weigher. This allows the scheduler to be configured to weigh hosts + with consecutive failures lower than other hosts, configured by the + ``[filter_scheduler]/build_failure_weight_multiplier`` option. If + the compute threshold option is nonzero, computes will report their + failure count for the scheduler to consider. If the threshold + value is zero, then computes will not report this value + and the scheduler will assume the number of failures for + non-reporting compute nodes to be zero. By default, the scheduler + weigher is enabled and configured with a very large multiplier to + ensure that hosts with consecutive failures are scored low by + default.