From 4abdde7af7f3987e4dc62c9f48892a9bd17687a8 Mon Sep 17 00:00:00 2001 From: Johannes Erdfelt Date: Fri, 30 Sep 2011 15:42:38 +0000 Subject: [PATCH] Restructure host filtering to be easier to use. The original design for host filtering in the scheduler required the entire filtering process be contained in a single class; contrast this with the design for weighting the hosts, which allowed you to specify a list of functions that would apply various weighting factors to the hosts. This commit modifies the filtering process to resemble the way that the weighting process is designed. Filters can now be small, focused classes, and you specify which filters to apply by setting the 'FLAGS.default_host_filters' flag to a list of the filter classes that match your needs. This is a port of the code from Launchpad, where it was orphaned: https://code.launchpad.net/~ed-leafe/nova/scheduler-multifilter/+merge/72478 Change-Id: I5f3eff6f21409a9f0eddda3392e9ff9d03039ebe --- nova/scheduler/abstract_scheduler.py | 4 +- nova/scheduler/base_scheduler.py | 27 +++++-- nova/scheduler/host_filter.py | 41 ++++++---- nova/scheduler/least_cost.py | 9 ++- .../scheduler/test_abstract_scheduler.py | 3 +- nova/tests/scheduler/test_host_filter.py | 74 ++++++++++--------- .../scheduler/test_least_cost_scheduler.py | 40 ++-------- 7 files changed, 102 insertions(+), 96 deletions(-) diff --git a/nova/scheduler/abstract_scheduler.py b/nova/scheduler/abstract_scheduler.py index e8712e5d..725f0b34 100644 --- a/nova/scheduler/abstract_scheduler.py +++ b/nova/scheduler/abstract_scheduler.py @@ -271,7 +271,7 @@ class AbstractScheduler(driver.Scheduler): # weigh the selected hosts. # weighted_hosts = [{weight=weight, hostname=hostname, # capabilities=capabs}, ...] - weighted_hosts = self.weigh_hosts(topic, request_spec, filtered_hosts) + weighted_hosts = self.weigh_hosts(request_spec, filtered_hosts) # Next, tack on the host weights from the child zones json_spec = json.dumps(request_spec) all_zones = db.zone_get_all(context.elevated()) @@ -306,7 +306,7 @@ class AbstractScheduler(driver.Scheduler): return [(host, services) for host, services in host_list if basic_ram_filter(host, services, request_spec)] - def weigh_hosts(self, topic, request_spec, hosts): + def weigh_hosts(self, request_spec, hosts): """This version assigns a weight of 1 to all hosts, making selection of any host basically a random event. Override this method in your subclass to add logic to prefer one potential host over another. diff --git a/nova/scheduler/base_scheduler.py b/nova/scheduler/base_scheduler.py index 132b8a49..77617a19 100644 --- a/nova/scheduler/base_scheduler.py +++ b/nova/scheduler/base_scheduler.py @@ -38,9 +38,20 @@ class BaseScheduler(abstract_scheduler.AbstractScheduler): """ def filter_hosts(self, topic, request_spec, hosts=None): """Filter the full host list (from the ZoneManager)""" - filter_name = request_spec.get('filter', None) - # Make sure that the requested filter is legitimate. - selected_filter = host_filter.choose_host_filter(filter_name) + filters = request_spec.get('filter') + if filters is None: + # Not specified; use the default + filters = FLAGS.default_host_filters + if not isinstance(filters, (list, tuple)): + filters = [filters] + if hosts is None: + # Get the full list (only considering 'compute' services) + all_hosts = self.zone_manager.service_states.iteritems() + hosts = [(host, services["compute"]) + for host, services in all_hosts + if "compute" in services] + # Make sure that the requested filters are legitimate. + selected_filters = host_filter.choose_host_filters(filters) # TODO(sandy): We're only using InstanceType-based specs # currently. Later we'll need to snoop for more detailed @@ -48,11 +59,13 @@ class BaseScheduler(abstract_scheduler.AbstractScheduler): instance_type = request_spec.get("instance_type", None) if instance_type is None: # No way to select; return the specified hosts - return hosts or [] - name, query = selected_filter.instance_type_to_filter(instance_type) - return selected_filter.filter_hosts(self.zone_manager, query) + return hosts + for selected_filter in selected_filters: + query = selected_filter.instance_type_to_filter(instance_type) + hosts = selected_filter.filter_hosts(hosts, query) + return hosts - def weigh_hosts(self, topic, request_spec, hosts): + def weigh_hosts(self, request_spec, hosts): """Derived classes may override this to provide more sophisticated scheduling objectives """ diff --git a/nova/scheduler/host_filter.py b/nova/scheduler/host_filter.py index 9f7d34ea..cb77f156 100644 --- a/nova/scheduler/host_filter.py +++ b/nova/scheduler/host_filter.py @@ -32,17 +32,16 @@ from nova import exception from nova import flags import nova.scheduler -# NOTE(Vek): Even though we don't use filters in here anywhere, we -# depend on default_host_filter being available in FLAGS, -# and that happens only when filters/abstract_filter.py is -# imported. from nova.scheduler import filters FLAGS = flags.FLAGS +flags.DEFINE_list('default_host_filters', ['AllHostsFilter'], + 'Which filters to use for filtering hosts when not specified ' + 'in the request.') -def _get_filters(): +def _get_filter_classes(): # Imported here to avoid circular imports from nova.scheduler import filters @@ -55,15 +54,29 @@ def _get_filters(): and get_itm(itm) is not filters.AbstractHostFilter] -def choose_host_filter(filter_name=None): - """Since the caller may specify which filter to use we need +def choose_host_filters(filters=None): + """Since the caller may specify which filters to use we need to have an authoritative list of what is permissible. This - function checks the filter name against a predefined set + function checks the filter names against a predefined set of acceptable filters. """ - if not filter_name: - filter_name = FLAGS.default_host_filter - for filter_class in _get_filters(): - if filter_class.__name__ == filter_name: - return filter_class() - raise exception.SchedulerHostFilterNotFound(filter_name=filter_name) + if not filters: + filters = FLAGS.default_host_filters + if not isinstance(filters, (list, tuple)): + filters = [filters] + good_filters = [] + bad_filters = [] + filter_classes = _get_filter_classes() + for filter_name in filters: + found_class = False + for cls in filter_classes: + if cls.__name__ == filter_name: + good_filters.append(cls()) + found_class = True + break + if not found_class: + bad_filters.append(filter_name) + if bad_filters: + msg = ", ".join(bad_filters) + raise exception.SchedulerHostFilterNotFound(filter_name=msg) + return good_filters diff --git a/nova/scheduler/least_cost.py b/nova/scheduler/least_cost.py index 1a6ef990..b35e3160 100644 --- a/nova/scheduler/least_cost.py +++ b/nova/scheduler/least_cost.py @@ -114,10 +114,13 @@ class LeastCostScheduler(base_scheduler.BaseScheduler): self.cost_fns_cache = {} super(LeastCostScheduler, self).__init__(*args, **kwargs) - def get_cost_fns(self, topic): + def get_cost_fns(self, topic=None): """Returns a list of tuples containing weights and cost functions to use for weighing hosts """ + if topic is None: + # Schedulers only support compute right now. + topic = "compute" if topic in self.cost_fns_cache: return self.cost_fns_cache[topic] cost_fns = [] @@ -151,11 +154,11 @@ class LeastCostScheduler(base_scheduler.BaseScheduler): self.cost_fns_cache[topic] = cost_fns return cost_fns - def weigh_hosts(self, topic, request_spec, hosts): + def weigh_hosts(self, request_spec, hosts): """Returns a list of dictionaries of form: [ {weight: weight, hostname: hostname, capabilities: capabs} ] """ - cost_fns = self.get_cost_fns(topic) + cost_fns = self.get_cost_fns() costs = weighted_sum(domain=hosts, weighted_fns=cost_fns) weighted = [] diff --git a/nova/tests/scheduler/test_abstract_scheduler.py b/nova/tests/scheduler/test_abstract_scheduler.py index da25f154..08b0b9cd 100644 --- a/nova/tests/scheduler/test_abstract_scheduler.py +++ b/nova/tests/scheduler/test_abstract_scheduler.py @@ -455,8 +455,7 @@ class BaseSchedulerTestCase(test.TestCase): # Call weigh_hosts() num_instances = len(hostlist) * 2 + len(hostlist) / 2 - instlist = sched.weigh_hosts('compute', - dict(num_instances=num_instances), + instlist = sched.weigh_hosts(dict(num_instances=num_instances), hostlist) # Should be enough entries to cover all instances diff --git a/nova/tests/scheduler/test_host_filter.py b/nova/tests/scheduler/test_host_filter.py index 17431fc7..a21f4c38 100644 --- a/nova/tests/scheduler/test_host_filter.py +++ b/nova/tests/scheduler/test_host_filter.py @@ -18,10 +18,10 @@ Tests For Scheduler Host Filters. import json +import nova from nova import exception from nova import test from nova.scheduler import host_filter -from nova.scheduler import filters class FakeZoneManager: @@ -52,12 +52,13 @@ class HostFilterTestCase(test.TestCase): 'disk_total': 1000, 'disk_used': 0, 'host_uuid': 'xxx-%d' % multiplier, - 'host_name-label': 'xs-%s' % multiplier} + 'host_name-label': 'xs-%s' % multiplier, + 'enabled': True} def setUp(self): super(HostFilterTestCase, self).setUp() - default_host_filter = 'AllHostsFilter' - self.flags(default_host_filter=default_host_filter) + default_host_filters = ['AllHostsFilter'] + self.flags(default_host_filters=default_host_filters) self.instance_type = dict(name='tiny', memory_mb=50, vcpus=10, @@ -96,34 +97,41 @@ class HostFilterTestCase(test.TestCase): host09['xpu_arch'] = 'fermi' host09['xpu_info'] = 'Tesla 2150' + def _get_all_hosts(self): + return self.zone_manager.service_states.items() + def test_choose_filter(self): # Test default filter ... - hf = host_filter.choose_host_filter() + hfs = host_filter.choose_host_filters() + hf = hfs[0] self.assertEquals(hf._full_name().split(".")[-1], 'AllHostsFilter') # Test valid filter ... - hf = host_filter.choose_host_filter('InstanceTypeFilter') + hfs = host_filter.choose_host_filters('InstanceTypeFilter') + hf = hfs[0] self.assertEquals(hf._full_name().split(".")[-1], 'InstanceTypeFilter') # Test invalid filter ... try: - host_filter.choose_host_filter('does not exist') + host_filter.choose_host_filters('does not exist') self.fail("Should not find host filter.") except exception.SchedulerHostFilterNotFound: pass def test_all_host_filter(self): - hf = filters.AllHostsFilter() + hfs = host_filter.choose_host_filters('AllHostsFilter') + hf = hfs[0] + all_hosts = self._get_all_hosts() cooked = hf.instance_type_to_filter(self.instance_type) - hosts = hf.filter_hosts(self.zone_manager, cooked) + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(10, len(hosts)) for host, capabilities in hosts: self.assertTrue(host.startswith('host')) def test_instance_type_filter(self): - hf = filters.InstanceTypeFilter() + hf = nova.scheduler.filters.InstanceTypeFilter() # filter all hosts that can support 50 ram and 500 disk - name, cooked = hf.instance_type_to_filter(self.instance_type) - self.assertEquals(name.split(".")[-1], 'InstanceTypeFilter') - hosts = hf.filter_hosts(self.zone_manager, cooked) + cooked = hf.instance_type_to_filter(self.instance_type) + all_hosts = self._get_all_hosts() + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(6, len(hosts)) just_hosts = [host for host, caps in hosts] just_hosts.sort() @@ -131,21 +139,21 @@ class HostFilterTestCase(test.TestCase): self.assertEquals('host10', just_hosts[5]) def test_instance_type_filter_extra_specs(self): - hf = filters.InstanceTypeFilter() + hf = nova.scheduler.filters.InstanceTypeFilter() # filter all hosts that can support 50 ram and 500 disk - name, cooked = hf.instance_type_to_filter(self.gpu_instance_type) - self.assertEquals(name.split(".")[-1], 'InstanceTypeFilter') - hosts = hf.filter_hosts(self.zone_manager, cooked) + cooked = hf.instance_type_to_filter(self.gpu_instance_type) + all_hosts = self._get_all_hosts() + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(1, len(hosts)) just_hosts = [host for host, caps in hosts] self.assertEquals('host07', just_hosts[0]) def test_json_filter(self): - hf = filters.JsonFilter() + hf = nova.scheduler.filters.JsonFilter() # filter all hosts that can support 50 ram and 500 disk - name, cooked = hf.instance_type_to_filter(self.instance_type) - self.assertEquals(name.split(".")[-1], 'JsonFilter') - hosts = hf.filter_hosts(self.zone_manager, cooked) + cooked = hf.instance_type_to_filter(self.instance_type) + all_hosts = self._get_all_hosts() + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(6, len(hosts)) just_hosts = [host for host, caps in hosts] just_hosts.sort() @@ -165,7 +173,7 @@ class HostFilterTestCase(test.TestCase): ] ] cooked = json.dumps(raw) - hosts = hf.filter_hosts(self.zone_manager, cooked) + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(5, len(hosts)) just_hosts = [host for host, caps in hosts] @@ -177,7 +185,7 @@ class HostFilterTestCase(test.TestCase): ['=', '$compute.host_memory_free', 30], ] cooked = json.dumps(raw) - hosts = hf.filter_hosts(self.zone_manager, cooked) + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(9, len(hosts)) just_hosts = [host for host, caps in hosts] @@ -187,7 +195,7 @@ class HostFilterTestCase(test.TestCase): raw = ['in', '$compute.host_memory_free', 20, 40, 60, 80, 100] cooked = json.dumps(raw) - hosts = hf.filter_hosts(self.zone_manager, cooked) + hosts = hf.filter_hosts(all_hosts, cooked) self.assertEquals(5, len(hosts)) just_hosts = [host for host, caps in hosts] just_hosts.sort() @@ -198,32 +206,32 @@ class HostFilterTestCase(test.TestCase): raw = ['unknown command', ] cooked = json.dumps(raw) try: - hf.filter_hosts(self.zone_manager, cooked) + hf.filter_hosts(all_hosts, cooked) self.fail("Should give KeyError") except KeyError, e: pass - self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps([]))) - self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps({}))) - self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps( + self.assertTrue(hf.filter_hosts(all_hosts, json.dumps([]))) + self.assertTrue(hf.filter_hosts(all_hosts, json.dumps({}))) + self.assertTrue(hf.filter_hosts(all_hosts, json.dumps( ['not', True, False, True, False], ))) try: - hf.filter_hosts(self.zone_manager, json.dumps( + hf.filter_hosts(all_hosts, json.dumps( 'not', True, False, True, False, )) self.fail("Should give KeyError") except KeyError, e: pass - self.assertFalse(hf.filter_hosts(self.zone_manager, + self.assertFalse(hf.filter_hosts(all_hosts, json.dumps(['=', '$foo', 100]))) - self.assertFalse(hf.filter_hosts(self.zone_manager, + self.assertFalse(hf.filter_hosts(all_hosts, json.dumps(['=', '$.....', 100]))) - self.assertFalse(hf.filter_hosts(self.zone_manager, + self.assertFalse(hf.filter_hosts(all_hosts, json.dumps( ['>', ['and', ['or', ['not', ['<', ['>=', ['<=', ['in', ]]]]]]]]))) - self.assertFalse(hf.filter_hosts(self.zone_manager, + self.assertFalse(hf.filter_hosts(all_hosts, json.dumps(['=', {}, ['>', '$missing....foo']]))) diff --git a/nova/tests/scheduler/test_least_cost_scheduler.py b/nova/tests/scheduler/test_least_cost_scheduler.py index b8847a2b..589308e3 100644 --- a/nova/tests/scheduler/test_least_cost_scheduler.py +++ b/nova/tests/scheduler/test_least_cost_scheduler.py @@ -82,7 +82,7 @@ class LeastCostSchedulerTestCase(test.TestCase): super(LeastCostSchedulerTestCase, self).tearDown() def assertWeights(self, expected, num, request_spec, hosts): - weighted = self.sched.weigh_hosts("compute", request_spec, hosts) + weighted = self.sched.weigh_hosts(request_spec, hosts) self.assertDictListMatch(weighted, expected, approx_equal=True) def test_no_hosts(self): @@ -97,50 +97,20 @@ class LeastCostSchedulerTestCase(test.TestCase): self.flags(least_cost_scheduler_cost_functions=[ 'nova.scheduler.least_cost.noop_cost_fn'], noop_cost_fn_weight=1) - num = 1 request_spec = {} hosts = self.sched.filter_hosts(num, request_spec) - - expected = [dict(weight=1, hostname=hostname) - for hostname, caps in hosts] + expected = [{"hostname": hostname, "weight": 1, "capabilities": caps} + for hostname, caps in hosts] self.assertWeights(expected, num, request_spec, hosts) def test_cost_fn_weights(self): self.flags(least_cost_scheduler_cost_functions=[ 'nova.scheduler.least_cost.noop_cost_fn'], noop_cost_fn_weight=2) - num = 1 request_spec = {} hosts = self.sched.filter_hosts(num, request_spec) - - expected = [dict(weight=2, hostname=hostname) - for hostname, caps in hosts] - self.assertWeights(expected, num, request_spec, hosts) - - def test_compute_fill_first_cost_fn(self): - self.flags(least_cost_scheduler_cost_functions=[ - 'nova.scheduler.least_cost.compute_fill_first_cost_fn'], - compute_fill_first_cost_fn_weight=1) - num = 1 - instance_type = {'memory_mb': 1024} - request_spec = {'instance_type': instance_type} - svc_states = self.sched.zone_manager.service_states.iteritems() - all_hosts = [(host, services["compute"]) - for host, services in svc_states - if "compute" in services] - hosts = self.sched.filter_hosts('compute', request_spec, all_hosts) - - expected = [] - for idx, (hostname, services) in enumerate(hosts): - caps = copy.deepcopy(services) - # Costs are normalized so over 10 hosts, each host with increasing - # free ram will cost 1/N more. Since the lowest cost host has some - # free ram, we add in the 1/N for the base_cost - weight = 0.1 + (0.1 * idx) - wtd_dict = dict(hostname=hostname, weight=weight, - capabilities=caps) - expected.append(wtd_dict) - + expected = [{"hostname": hostname, "weight": 2, "capabilities": caps} + for hostname, caps in hosts] self.assertWeights(expected, num, request_spec, hosts)