diff --git a/nova/scheduler/driver.py b/nova/scheduler/driver.py index db6acb88f..c75a5014c 100644 --- a/nova/scheduler/driver.py +++ b/nova/scheduler/driver.py @@ -44,6 +44,9 @@ scheduler_driver_opts = [ cfg.StrOpt('scheduler_host_manager', default='nova.scheduler.host_manager.HostManager', help='The scheduler host manager class to use'), + cfg.IntOpt('scheduler_max_attempts', + default=3, + help='Maximum number of attempts to schedule an instance'), ] FLAGS = flags.FLAGS diff --git a/nova/scheduler/filter_scheduler.py b/nova/scheduler/filter_scheduler.py index dce7f9ae0..69682be45 100644 --- a/nova/scheduler/filter_scheduler.py +++ b/nova/scheduler/filter_scheduler.py @@ -59,7 +59,6 @@ class FilterScheduler(driver.Scheduler): Returns a list of the instances created. """ - elevated = context.elevated() num_instances = request_spec.get('num_instances', 1) LOG.debug(_("Attempting to build %(num_instances)d instance(s)") % @@ -69,15 +68,16 @@ class FilterScheduler(driver.Scheduler): notifier.notify(context, notifier.publisher_id("scheduler"), 'scheduler.run_instance.start', notifier.INFO, payload) + filter_properties = kwargs.pop('filter_properties', {}) weighted_hosts = self._schedule(context, "compute", request_spec, - *args, **kwargs) + filter_properties, *args, **kwargs) if not weighted_hosts: raise exception.NoValidHost(reason="") # NOTE(comstud): Make sure we do not pass this through. It # contains an instance of RpcContext that cannot be serialized. - kwargs.pop('filter_properties', None) + filter_properties.pop('context', None) instances = [] for num in xrange(num_instances): @@ -86,9 +86,14 @@ class FilterScheduler(driver.Scheduler): weighted_host = weighted_hosts.pop(0) request_spec['instance_properties']['launch_index'] = num + instance = self._provision_resource(elevated, weighted_host, request_spec, reservations, - kwargs) + filter_properties, kwargs) + # scrub retry host list in case we're scheduling multiple + # instances: + retry = filter_properties.get('retry', {}) + retry['hosts'] = [] if instance: instances.append(instance) @@ -120,11 +125,14 @@ class FilterScheduler(driver.Scheduler): 'prep_resize', **kwargs) def _provision_resource(self, context, weighted_host, request_spec, - reservations, kwargs): + reservations, filter_properties, kwargs): """Create the requested resource in this Zone.""" instance = self.create_instance_db_entry(context, request_spec, reservations) + # Add a retry entry for the selected compute host: + self._add_retry_host(filter_properties, weighted_host.host_state.host) + payload = dict(request_spec=request_spec, weighted_host=weighted_host.to_dict(), instance_id=instance['uuid']) @@ -133,14 +141,29 @@ class FilterScheduler(driver.Scheduler): payload) driver.cast_to_compute_host(context, weighted_host.host_state.host, - 'run_instance', instance_uuid=instance['uuid'], **kwargs) + 'run_instance', instance_uuid=instance['uuid'], + request_spec=request_spec, filter_properties=filter_properties, + **kwargs) inst = driver.encode_instance(instance, local=True) + # So if another instance is created, create_instance_db_entry will # actually create a new entry, instead of assume it's been created # already del request_spec['instance_properties']['uuid'] + return inst + def _add_retry_host(self, filter_properties, host): + """Add a retry entry for the selected computep host. In the event that + the request gets re-scheduled, this entry will signal that the given + host has already been tried. + """ + retry = filter_properties.get('retry', None) + if not retry: + return + hosts = retry['hosts'] + hosts.append(host) + def _get_configuration_options(self): """Fetch options dictionary. Broken out for testing.""" return self.options.get_configuration() @@ -151,7 +174,41 @@ class FilterScheduler(driver.Scheduler): """ pass - def _schedule(self, context, topic, request_spec, *args, **kwargs): + def _max_attempts(self): + max_attempts = FLAGS.scheduler_max_attempts + if max_attempts < 1: + raise exception.NovaException(_("Invalid value for " + "'scheduler_max_attempts', must be >= 1")) + return max_attempts + + def _populate_retry(self, filter_properties, instance_properties): + """Populate filter properties with history of retries for this + request. If maximum retries is exceeded, raise NoValidHost. + """ + max_attempts = self._max_attempts() + retry = filter_properties.pop('retry', {}) + + if max_attempts == 1: + # re-scheduling is disabled. + return + + # retry is enabled, update attempt count: + if retry: + retry['num_attempts'] += 1 + else: + retry = { + 'num_attempts': 1, + 'hosts': [] # list of compute hosts tried + } + filter_properties['retry'] = retry + + if retry['num_attempts'] > max_attempts: + uuid = instance_properties.get('uuid', None) + msg = _("Exceeded max scheduling attempts %d ") % max_attempts + raise exception.NoValidHost(msg, instance_uuid=uuid) + + def _schedule(self, context, topic, request_spec, filter_properties, *args, + **kwargs): """Returns a list of hosts that meet the required specs, ordered by their fitness. """ @@ -166,7 +223,9 @@ class FilterScheduler(driver.Scheduler): cost_functions = self.get_cost_functions() config_options = self._get_configuration_options() - filter_properties = kwargs.get('filter_properties', {}) + # check retry policy: + self._populate_retry(filter_properties, instance_properties) + filter_properties.update({'context': context, 'request_spec': request_spec, 'config_options': config_options, diff --git a/nova/scheduler/filters/retry_filter.py b/nova/scheduler/filters/retry_filter.py new file mode 100644 index 000000000..6740ec099 --- /dev/null +++ b/nova/scheduler/filters/retry_filter.py @@ -0,0 +1,42 @@ +# Copyright (c) 2012 OpenStack, LLC. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.openstack.common import log as logging +from nova.scheduler import filters + +LOG = logging.getLogger(__name__) + + +class RetryFilter(filters.BaseHostFilter): + """Filter out hosts that have already been attempted for scheduling + purposes + """ + + def host_passes(self, host_state, filter_properties): + """Skip hosts that have already been attempted""" + retry = filter_properties.get('retry', None) + if not retry: + # Re-scheduling is disabled + LOG.debug("Re-scheduling is disabled") + return True + + hosts = retry.get('hosts', []) + host = host_state.host + + LOG.debug(_("Previously tried hosts: %(hosts)s. (host=%(host)s)") % + locals()) + + # Host passes if it's not in the list of previously attempted hosts: + return host not in hosts diff --git a/nova/scheduler/host_manager.py b/nova/scheduler/host_manager.py index 2c1d597b7..6b450c769 100644 --- a/nova/scheduler/host_manager.py +++ b/nova/scheduler/host_manager.py @@ -44,6 +44,7 @@ host_manager_opts = [ 'maps to all filters included with nova.'), cfg.ListOpt('scheduler_default_filters', default=[ + 'RetryFilter', 'AvailabilityZoneFilter', 'RamFilter', 'ComputeFilter' diff --git a/nova/tests/scheduler/test_filter_scheduler.py b/nova/tests/scheduler/test_filter_scheduler.py index 1af48dc53..158fc0d83 100644 --- a/nova/tests/scheduler/test_filter_scheduler.py +++ b/nova/tests/scheduler/test_filter_scheduler.py @@ -79,7 +79,7 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): sched = fakes.FakeFilterScheduler() fake_context = context.RequestContext('user', 'project') self.assertRaises(NotImplementedError, sched._schedule, fake_context, - "foo", {}) + "foo", {}, {}) def test_scheduler_includes_launch_index(self): ctxt = "fake-context" @@ -111,18 +111,18 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): self.mox.StubOutWithMock(self.driver, '_provision_resource') self.driver._schedule(context_fake, 'compute', - request_spec, **fake_kwargs + request_spec, {}, **fake_kwargs ).AndReturn(['host1', 'host2']) # instance 1 self.driver._provision_resource( ctxt, 'host1', mox.Func(_has_launch_index(0)), None, - fake_kwargs).AndReturn(instance1) + {}, fake_kwargs).AndReturn(instance1) # instance 2 self.driver._provision_resource( ctxt, 'host2', mox.Func(_has_launch_index(1)), None, - fake_kwargs).AndReturn(instance2) + {}, fake_kwargs).AndReturn(instance2) self.mox.ReplayAll() self.driver.schedule_run_instance(context_fake, request_spec, None, @@ -160,7 +160,7 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): 'vcpus': 1}} self.mox.ReplayAll() weighted_hosts = sched._schedule(fake_context, 'compute', - request_spec) + request_spec, {}) self.assertEquals(len(weighted_hosts), 10) for weighted_host in weighted_hosts: self.assertTrue(weighted_host.host_state is not None) @@ -176,3 +176,88 @@ class FilterSchedulerTestCase(test_scheduler.SchedulerTestCase): hostinfo.update_from_compute_node(dict(memory_mb=1000, local_gb=0, vcpus=1)) self.assertEquals(1000 - 128, fn(hostinfo, {})) + + def test_max_attempts(self): + self.flags(scheduler_max_attempts=4) + + sched = fakes.FakeFilterScheduler() + self.assertEqual(4, sched._max_attempts()) + + def test_invalid_max_attempts(self): + self.flags(scheduler_max_attempts=0) + + sched = fakes.FakeFilterScheduler() + self.assertRaises(exception.NovaException, sched._max_attempts) + + def test_retry_disabled(self): + """Retry info should not get populated when re-scheduling is off""" + self.flags(scheduler_max_attempts=1) + sched = fakes.FakeFilterScheduler() + + instance_properties = {} + request_spec = dict(instance_properties=instance_properties) + filter_properties = {} + + sched._schedule(self.context, 'compute', request_spec, + filter_properties=filter_properties) + + # should not have retry info in the populated filter properties: + self.assertFalse("retry" in filter_properties) + + def test_retry_attempt_one(self): + """Test retry logic on initial scheduling attempt""" + self.flags(scheduler_max_attempts=2) + sched = fakes.FakeFilterScheduler() + + instance_properties = {} + request_spec = dict(instance_properties=instance_properties) + filter_properties = {} + + sched._schedule(self.context, 'compute', request_spec, + filter_properties=filter_properties) + + num_attempts = filter_properties['retry']['num_attempts'] + self.assertEqual(1, num_attempts) + + def test_retry_attempt_two(self): + """Test retry logic when re-scheduling""" + self.flags(scheduler_max_attempts=2) + sched = fakes.FakeFilterScheduler() + + instance_properties = {} + request_spec = dict(instance_properties=instance_properties) + + retry = dict(num_attempts=1) + filter_properties = dict(retry=retry) + + sched._schedule(self.context, 'compute', request_spec, + filter_properties=filter_properties) + + num_attempts = filter_properties['retry']['num_attempts'] + self.assertEqual(2, num_attempts) + + def test_retry_exceeded_max_attempts(self): + """Test for necessary explosion when max retries is exceeded""" + self.flags(scheduler_max_attempts=2) + sched = fakes.FakeFilterScheduler() + + instance_properties = {} + request_spec = dict(instance_properties=instance_properties) + + retry = dict(num_attempts=2) + filter_properties = dict(retry=retry) + + self.assertRaises(exception.NoValidHost, sched._schedule, self.context, + 'compute', request_spec, filter_properties=filter_properties) + + def test_add_retry_host(self): + retry = dict(num_attempts=1, hosts=[]) + filter_properties = dict(retry=retry) + host = "fakehost" + + sched = fakes.FakeFilterScheduler() + sched._add_retry_host(filter_properties, host) + + hosts = filter_properties['retry']['hosts'] + self.assertEqual(1, len(hosts)) + self.assertEqual(host, hosts[0]) diff --git a/nova/tests/scheduler/test_host_filters.py b/nova/tests/scheduler/test_host_filters.py index c6fabc1b5..759bd70ec 100644 --- a/nova/tests/scheduler/test_host_filters.py +++ b/nova/tests/scheduler/test_host_filters.py @@ -885,3 +885,26 @@ class HostFiltersTestCase(test.TestCase): host = fakes.FakeHostState('host1', 'compute', {'capabilities': capabilities, 'service': service}) self.assertFalse(filt_cls.host_passes(host, filter_properties)) + + def test_retry_filter_disabled(self): + """Test case where retry/re-scheduling is disabled""" + filt_cls = self.class_map['RetryFilter']() + host = fakes.FakeHostState('host1', 'compute', {}) + filter_properties = {} + self.assertTrue(filt_cls.host_passes(host, filter_properties)) + + def test_retry_filter_pass(self): + """Host not previously tried""" + filt_cls = self.class_map['RetryFilter']() + host = fakes.FakeHostState('host1', 'compute', {}) + retry = dict(num_attempts=1, hosts=['host2', 'host3']) + filter_properties = dict(retry=retry) + self.assertTrue(filt_cls.host_passes(host, filter_properties)) + + def test_retry_filter_fail(self): + """Host was already tried""" + filt_cls = self.class_map['RetryFilter']() + host = fakes.FakeHostState('host1', 'compute', {}) + retry = dict(num_attempts=1, hosts=['host3', 'host1']) + filter_properties = dict(retry=retry) + self.assertFalse(filt_cls.host_passes(host, filter_properties))