Restructure host filtering to be easier to use.

The original design for host filtering in the scheduler required the
entire filtering process be contained in a single class; contrast this
with the design for weighting the hosts, which allowed you to specify
a list of functions that would apply various weighting factors to the
hosts.

This commit modifies the filtering process to resemble the way that
the weighting process is designed. Filters can now be small, focused
classes, and you specify which filters to apply by setting the
'FLAGS.default_host_filters' flag to a list of the filter classes
that match your needs.

This is a port of the code from Launchpad, where it was orphaned:

https://code.launchpad.net/~ed-leafe/nova/scheduler-multifilter/+merge/72478

Change-Id: I5f3eff6f21409a9f0eddda3392e9ff9d03039ebe
This commit is contained in:
Johannes Erdfelt
2011-09-30 15:42:38 +00:00
parent 342fb9feee
commit 4abdde7af7
7 changed files with 102 additions and 96 deletions

View File

@@ -271,7 +271,7 @@ class AbstractScheduler(driver.Scheduler):
# weigh the selected hosts.
# weighted_hosts = [{weight=weight, hostname=hostname,
# capabilities=capabs}, ...]
weighted_hosts = self.weigh_hosts(topic, request_spec, filtered_hosts)
weighted_hosts = self.weigh_hosts(request_spec, filtered_hosts)
# Next, tack on the host weights from the child zones
json_spec = json.dumps(request_spec)
all_zones = db.zone_get_all(context.elevated())
@@ -306,7 +306,7 @@ class AbstractScheduler(driver.Scheduler):
return [(host, services) for host, services in host_list
if basic_ram_filter(host, services, request_spec)]
def weigh_hosts(self, topic, request_spec, hosts):
def weigh_hosts(self, request_spec, hosts):
"""This version assigns a weight of 1 to all hosts, making selection
of any host basically a random event. Override this method in your
subclass to add logic to prefer one potential host over another.

View File

@@ -38,9 +38,20 @@ class BaseScheduler(abstract_scheduler.AbstractScheduler):
"""
def filter_hosts(self, topic, request_spec, hosts=None):
"""Filter the full host list (from the ZoneManager)"""
filter_name = request_spec.get('filter', None)
# Make sure that the requested filter is legitimate.
selected_filter = host_filter.choose_host_filter(filter_name)
filters = request_spec.get('filter')
if filters is None:
# Not specified; use the default
filters = FLAGS.default_host_filters
if not isinstance(filters, (list, tuple)):
filters = [filters]
if hosts is None:
# Get the full list (only considering 'compute' services)
all_hosts = self.zone_manager.service_states.iteritems()
hosts = [(host, services["compute"])
for host, services in all_hosts
if "compute" in services]
# Make sure that the requested filters are legitimate.
selected_filters = host_filter.choose_host_filters(filters)
# TODO(sandy): We're only using InstanceType-based specs
# currently. Later we'll need to snoop for more detailed
@@ -48,11 +59,13 @@ class BaseScheduler(abstract_scheduler.AbstractScheduler):
instance_type = request_spec.get("instance_type", None)
if instance_type is None:
# No way to select; return the specified hosts
return hosts or []
name, query = selected_filter.instance_type_to_filter(instance_type)
return selected_filter.filter_hosts(self.zone_manager, query)
return hosts
for selected_filter in selected_filters:
query = selected_filter.instance_type_to_filter(instance_type)
hosts = selected_filter.filter_hosts(hosts, query)
return hosts
def weigh_hosts(self, topic, request_spec, hosts):
def weigh_hosts(self, request_spec, hosts):
"""Derived classes may override this to provide more sophisticated
scheduling objectives
"""

View File

@@ -32,17 +32,16 @@ from nova import exception
from nova import flags
import nova.scheduler
# NOTE(Vek): Even though we don't use filters in here anywhere, we
# depend on default_host_filter being available in FLAGS,
# and that happens only when filters/abstract_filter.py is
# imported.
from nova.scheduler import filters
FLAGS = flags.FLAGS
flags.DEFINE_list('default_host_filters', ['AllHostsFilter'],
'Which filters to use for filtering hosts when not specified '
'in the request.')
def _get_filters():
def _get_filter_classes():
# Imported here to avoid circular imports
from nova.scheduler import filters
@@ -55,15 +54,29 @@ def _get_filters():
and get_itm(itm) is not filters.AbstractHostFilter]
def choose_host_filter(filter_name=None):
"""Since the caller may specify which filter to use we need
def choose_host_filters(filters=None):
"""Since the caller may specify which filters to use we need
to have an authoritative list of what is permissible. This
function checks the filter name against a predefined set
function checks the filter names against a predefined set
of acceptable filters.
"""
if not filter_name:
filter_name = FLAGS.default_host_filter
for filter_class in _get_filters():
if filter_class.__name__ == filter_name:
return filter_class()
raise exception.SchedulerHostFilterNotFound(filter_name=filter_name)
if not filters:
filters = FLAGS.default_host_filters
if not isinstance(filters, (list, tuple)):
filters = [filters]
good_filters = []
bad_filters = []
filter_classes = _get_filter_classes()
for filter_name in filters:
found_class = False
for cls in filter_classes:
if cls.__name__ == filter_name:
good_filters.append(cls())
found_class = True
break
if not found_class:
bad_filters.append(filter_name)
if bad_filters:
msg = ", ".join(bad_filters)
raise exception.SchedulerHostFilterNotFound(filter_name=msg)
return good_filters

View File

@@ -114,10 +114,13 @@ class LeastCostScheduler(base_scheduler.BaseScheduler):
self.cost_fns_cache = {}
super(LeastCostScheduler, self).__init__(*args, **kwargs)
def get_cost_fns(self, topic):
def get_cost_fns(self, topic=None):
"""Returns a list of tuples containing weights and cost functions to
use for weighing hosts
"""
if topic is None:
# Schedulers only support compute right now.
topic = "compute"
if topic in self.cost_fns_cache:
return self.cost_fns_cache[topic]
cost_fns = []
@@ -151,11 +154,11 @@ class LeastCostScheduler(base_scheduler.BaseScheduler):
self.cost_fns_cache[topic] = cost_fns
return cost_fns
def weigh_hosts(self, topic, request_spec, hosts):
def weigh_hosts(self, request_spec, hosts):
"""Returns a list of dictionaries of form:
[ {weight: weight, hostname: hostname, capabilities: capabs} ]
"""
cost_fns = self.get_cost_fns(topic)
cost_fns = self.get_cost_fns()
costs = weighted_sum(domain=hosts, weighted_fns=cost_fns)
weighted = []

View File

@@ -455,8 +455,7 @@ class BaseSchedulerTestCase(test.TestCase):
# Call weigh_hosts()
num_instances = len(hostlist) * 2 + len(hostlist) / 2
instlist = sched.weigh_hosts('compute',
dict(num_instances=num_instances),
instlist = sched.weigh_hosts(dict(num_instances=num_instances),
hostlist)
# Should be enough entries to cover all instances

View File

@@ -18,10 +18,10 @@ Tests For Scheduler Host Filters.
import json
import nova
from nova import exception
from nova import test
from nova.scheduler import host_filter
from nova.scheduler import filters
class FakeZoneManager:
@@ -52,12 +52,13 @@ class HostFilterTestCase(test.TestCase):
'disk_total': 1000,
'disk_used': 0,
'host_uuid': 'xxx-%d' % multiplier,
'host_name-label': 'xs-%s' % multiplier}
'host_name-label': 'xs-%s' % multiplier,
'enabled': True}
def setUp(self):
super(HostFilterTestCase, self).setUp()
default_host_filter = 'AllHostsFilter'
self.flags(default_host_filter=default_host_filter)
default_host_filters = ['AllHostsFilter']
self.flags(default_host_filters=default_host_filters)
self.instance_type = dict(name='tiny',
memory_mb=50,
vcpus=10,
@@ -96,34 +97,41 @@ class HostFilterTestCase(test.TestCase):
host09['xpu_arch'] = 'fermi'
host09['xpu_info'] = 'Tesla 2150'
def _get_all_hosts(self):
return self.zone_manager.service_states.items()
def test_choose_filter(self):
# Test default filter ...
hf = host_filter.choose_host_filter()
hfs = host_filter.choose_host_filters()
hf = hfs[0]
self.assertEquals(hf._full_name().split(".")[-1], 'AllHostsFilter')
# Test valid filter ...
hf = host_filter.choose_host_filter('InstanceTypeFilter')
hfs = host_filter.choose_host_filters('InstanceTypeFilter')
hf = hfs[0]
self.assertEquals(hf._full_name().split(".")[-1], 'InstanceTypeFilter')
# Test invalid filter ...
try:
host_filter.choose_host_filter('does not exist')
host_filter.choose_host_filters('does not exist')
self.fail("Should not find host filter.")
except exception.SchedulerHostFilterNotFound:
pass
def test_all_host_filter(self):
hf = filters.AllHostsFilter()
hfs = host_filter.choose_host_filters('AllHostsFilter')
hf = hfs[0]
all_hosts = self._get_all_hosts()
cooked = hf.instance_type_to_filter(self.instance_type)
hosts = hf.filter_hosts(self.zone_manager, cooked)
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(10, len(hosts))
for host, capabilities in hosts:
self.assertTrue(host.startswith('host'))
def test_instance_type_filter(self):
hf = filters.InstanceTypeFilter()
hf = nova.scheduler.filters.InstanceTypeFilter()
# filter all hosts that can support 50 ram and 500 disk
name, cooked = hf.instance_type_to_filter(self.instance_type)
self.assertEquals(name.split(".")[-1], 'InstanceTypeFilter')
hosts = hf.filter_hosts(self.zone_manager, cooked)
cooked = hf.instance_type_to_filter(self.instance_type)
all_hosts = self._get_all_hosts()
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(6, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
@@ -131,21 +139,21 @@ class HostFilterTestCase(test.TestCase):
self.assertEquals('host10', just_hosts[5])
def test_instance_type_filter_extra_specs(self):
hf = filters.InstanceTypeFilter()
hf = nova.scheduler.filters.InstanceTypeFilter()
# filter all hosts that can support 50 ram and 500 disk
name, cooked = hf.instance_type_to_filter(self.gpu_instance_type)
self.assertEquals(name.split(".")[-1], 'InstanceTypeFilter')
hosts = hf.filter_hosts(self.zone_manager, cooked)
cooked = hf.instance_type_to_filter(self.gpu_instance_type)
all_hosts = self._get_all_hosts()
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(1, len(hosts))
just_hosts = [host for host, caps in hosts]
self.assertEquals('host07', just_hosts[0])
def test_json_filter(self):
hf = filters.JsonFilter()
hf = nova.scheduler.filters.JsonFilter()
# filter all hosts that can support 50 ram and 500 disk
name, cooked = hf.instance_type_to_filter(self.instance_type)
self.assertEquals(name.split(".")[-1], 'JsonFilter')
hosts = hf.filter_hosts(self.zone_manager, cooked)
cooked = hf.instance_type_to_filter(self.instance_type)
all_hosts = self._get_all_hosts()
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(6, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
@@ -165,7 +173,7 @@ class HostFilterTestCase(test.TestCase):
]
]
cooked = json.dumps(raw)
hosts = hf.filter_hosts(self.zone_manager, cooked)
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(5, len(hosts))
just_hosts = [host for host, caps in hosts]
@@ -177,7 +185,7 @@ class HostFilterTestCase(test.TestCase):
['=', '$compute.host_memory_free', 30],
]
cooked = json.dumps(raw)
hosts = hf.filter_hosts(self.zone_manager, cooked)
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(9, len(hosts))
just_hosts = [host for host, caps in hosts]
@@ -187,7 +195,7 @@ class HostFilterTestCase(test.TestCase):
raw = ['in', '$compute.host_memory_free', 20, 40, 60, 80, 100]
cooked = json.dumps(raw)
hosts = hf.filter_hosts(self.zone_manager, cooked)
hosts = hf.filter_hosts(all_hosts, cooked)
self.assertEquals(5, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
@@ -198,32 +206,32 @@ class HostFilterTestCase(test.TestCase):
raw = ['unknown command', ]
cooked = json.dumps(raw)
try:
hf.filter_hosts(self.zone_manager, cooked)
hf.filter_hosts(all_hosts, cooked)
self.fail("Should give KeyError")
except KeyError, e:
pass
self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps([])))
self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps({})))
self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps(
self.assertTrue(hf.filter_hosts(all_hosts, json.dumps([])))
self.assertTrue(hf.filter_hosts(all_hosts, json.dumps({})))
self.assertTrue(hf.filter_hosts(all_hosts, json.dumps(
['not', True, False, True, False],
)))
try:
hf.filter_hosts(self.zone_manager, json.dumps(
hf.filter_hosts(all_hosts, json.dumps(
'not', True, False, True, False,
))
self.fail("Should give KeyError")
except KeyError, e:
pass
self.assertFalse(hf.filter_hosts(self.zone_manager,
self.assertFalse(hf.filter_hosts(all_hosts,
json.dumps(['=', '$foo', 100])))
self.assertFalse(hf.filter_hosts(self.zone_manager,
self.assertFalse(hf.filter_hosts(all_hosts,
json.dumps(['=', '$.....', 100])))
self.assertFalse(hf.filter_hosts(self.zone_manager,
self.assertFalse(hf.filter_hosts(all_hosts,
json.dumps(
['>', ['and', ['or', ['not', ['<', ['>=', ['<=', ['in', ]]]]]]]])))
self.assertFalse(hf.filter_hosts(self.zone_manager,
self.assertFalse(hf.filter_hosts(all_hosts,
json.dumps(['=', {}, ['>', '$missing....foo']])))

View File

@@ -82,7 +82,7 @@ class LeastCostSchedulerTestCase(test.TestCase):
super(LeastCostSchedulerTestCase, self).tearDown()
def assertWeights(self, expected, num, request_spec, hosts):
weighted = self.sched.weigh_hosts("compute", request_spec, hosts)
weighted = self.sched.weigh_hosts(request_spec, hosts)
self.assertDictListMatch(weighted, expected, approx_equal=True)
def test_no_hosts(self):
@@ -97,12 +97,10 @@ class LeastCostSchedulerTestCase(test.TestCase):
self.flags(least_cost_scheduler_cost_functions=[
'nova.scheduler.least_cost.noop_cost_fn'],
noop_cost_fn_weight=1)
num = 1
request_spec = {}
hosts = self.sched.filter_hosts(num, request_spec)
expected = [dict(weight=1, hostname=hostname)
expected = [{"hostname": hostname, "weight": 1, "capabilities": caps}
for hostname, caps in hosts]
self.assertWeights(expected, num, request_spec, hosts)
@@ -110,37 +108,9 @@ class LeastCostSchedulerTestCase(test.TestCase):
self.flags(least_cost_scheduler_cost_functions=[
'nova.scheduler.least_cost.noop_cost_fn'],
noop_cost_fn_weight=2)
num = 1
request_spec = {}
hosts = self.sched.filter_hosts(num, request_spec)
expected = [dict(weight=2, hostname=hostname)
expected = [{"hostname": hostname, "weight": 2, "capabilities": caps}
for hostname, caps in hosts]
self.assertWeights(expected, num, request_spec, hosts)
def test_compute_fill_first_cost_fn(self):
self.flags(least_cost_scheduler_cost_functions=[
'nova.scheduler.least_cost.compute_fill_first_cost_fn'],
compute_fill_first_cost_fn_weight=1)
num = 1
instance_type = {'memory_mb': 1024}
request_spec = {'instance_type': instance_type}
svc_states = self.sched.zone_manager.service_states.iteritems()
all_hosts = [(host, services["compute"])
for host, services in svc_states
if "compute" in services]
hosts = self.sched.filter_hosts('compute', request_spec, all_hosts)
expected = []
for idx, (hostname, services) in enumerate(hosts):
caps = copy.deepcopy(services)
# Costs are normalized so over 10 hosts, each host with increasing
# free ram will cost 1/N more. Since the lowest cost host has some
# free ram, we add in the 1/N for the base_cost
weight = 0.1 + (0.1 * idx)
wtd_dict = dict(hostname=hostname, weight=weight,
capabilities=caps)
expected.append(wtd_dict)
self.assertWeights(expected, num, request_spec, hosts)