Adds LeastCostScheduler which uses a series of cost functions and associated weights to determine which host to provision to.

Left for future work:

* Handle scheduling of many instances (currently assumes n=1)
* Handle scheduling of arbitrary resources (currently weigh_hosts only handles instances)
* Add more cost functions (currently just noop and fill-first)
* Simulator so we can determine sensible values for cost-function-weights

NOTE: This patch depends on Sandy's dist-scheduler-2a patch.
This commit is contained in:
Rick Harris 2011-06-03 14:25:59 +00:00 committed by Tarmac
commit 56c3418d53
10 changed files with 577 additions and 10 deletions

View File

@ -486,6 +486,15 @@ class SchedulerHostFilterNotFound(NotFound):
message = _("Scheduler Host Filter %(filter_name)s could not be found.")
class SchedulerCostFunctionNotFound(NotFound):
message = _("Scheduler cost function %(cost_fn_str)s could"
" not be found.")
class SchedulerWeightFlagNotFound(NotFound):
message = _("Scheduler weight flag not found: %(flag_name)s")
class InstanceMetadataNotFound(NotFound):
message = _("Instance %(instance_id)s has no metadata with "
"key %(metadata_key)s.")

View File

@ -41,6 +41,7 @@ import json
from nova import exception
from nova import flags
from nova import log as logging
from nova.scheduler import zone_aware_scheduler
from nova import utils
from nova.scheduler import zone_aware_scheduler

View File

@ -0,0 +1,156 @@
# Copyright (c) 2011 Openstack, LLC.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Least Cost Scheduler is a mechanism for choosing which host machines to
provision a set of resources to. The input of the least-cost-scheduler is a
set of objective-functions, called the 'cost-functions', a weight for each
cost-function, and a list of candidate hosts (gathered via FilterHosts).
The cost-function and weights are tabulated, and the host with the least cost
is then selected for provisioning.
"""
import collections
from nova import flags
from nova import log as logging
from nova.scheduler import zone_aware_scheduler
from nova import utils
LOG = logging.getLogger('nova.scheduler.least_cost')
FLAGS = flags.FLAGS
flags.DEFINE_list('least_cost_scheduler_cost_functions',
['nova.scheduler.least_cost.noop_cost_fn'],
'Which cost functions the LeastCostScheduler should use.')
# TODO(sirp): Once we have enough of these rules, we can break them out into a
# cost_functions.py file (perhaps in a least_cost_scheduler directory)
flags.DEFINE_integer('noop_cost_fn_weight', 1,
'How much weight to give the noop cost function')
def noop_cost_fn(host):
"""Return a pre-weight cost of 1 for each host"""
return 1
flags.DEFINE_integer('fill_first_cost_fn_weight', 1,
'How much weight to give the fill-first cost function')
def fill_first_cost_fn(host):
"""Prefer hosts that have less ram available, filter_hosts will exclude
hosts that don't have enough ram"""
hostname, caps = host
free_mem = caps['compute']['host_memory_free']
return free_mem
class LeastCostScheduler(zone_aware_scheduler.ZoneAwareScheduler):
def get_cost_fns(self):
"""Returns a list of tuples containing weights and cost functions to
use for weighing hosts
"""
cost_fns = []
for cost_fn_str in FLAGS.least_cost_scheduler_cost_functions:
try:
# NOTE(sirp): import_class is somewhat misnamed since it can
# any callable from a module
cost_fn = utils.import_class(cost_fn_str)
except exception.ClassNotFound:
raise exception.SchedulerCostFunctionNotFound(
cost_fn_str=cost_fn_str)
try:
weight = getattr(FLAGS, "%s_weight" % cost_fn.__name__)
except AttributeError:
raise exception.SchedulerWeightFlagNotFound(
flag_name=flag_name)
cost_fns.append((weight, cost_fn))
return cost_fns
def weigh_hosts(self, num, request_spec, hosts):
"""Returns a list of dictionaries of form:
[ {weight: weight, hostname: hostname} ]"""
# FIXME(sirp): weigh_hosts should handle more than just instances
hostnames = [hostname for hostname, caps in hosts]
cost_fns = self.get_cost_fns()
costs = weighted_sum(domain=hosts, weighted_fns=cost_fns)
weighted = []
weight_log = []
for cost, hostname in zip(costs, hostnames):
weight_log.append("%s: %s" % (hostname, "%.2f" % cost))
weight_dict = dict(weight=cost, hostname=hostname)
weighted.append(weight_dict)
LOG.debug(_("Weighted Costs => %s") % weight_log)
return weighted
def normalize_list(L):
"""Normalize an array of numbers such that each element satisfies:
0 <= e <= 1"""
if not L:
return L
max_ = max(L)
if max_ > 0:
return [(float(e) / max_) for e in L]
return L
def weighted_sum(domain, weighted_fns, normalize=True):
"""Use the weighted-sum method to compute a score for an array of objects.
Normalize the results of the objective-functions so that the weights are
meaningful regardless of objective-function's range.
domain - input to be scored
weighted_fns - list of weights and functions like:
[(weight, objective-functions)]
Returns an unsorted of scores. To pair with hosts do: zip(scores, hosts)
"""
# Table of form:
# { domain1: [score1, score2, ..., scoreM]
# ...
# domainN: [score1, score2, ..., scoreM] }
score_table = collections.defaultdict(list)
for weight, fn in weighted_fns:
scores = [fn(elem) for elem in domain]
if normalize:
norm_scores = normalize_list(scores)
else:
norm_scores = scores
for idx, score in enumerate(norm_scores):
weighted_score = score * weight
score_table[idx].append(weighted_score)
# Sum rows in table to compute score for each element in domain
domain_scores = []
for idx in sorted(score_table):
elem_score = sum(score_table[idx])
elem = domain[idx]
domain_scores.append(elem_score)
return domain_scores

View File

@ -116,6 +116,9 @@ class ZoneAwareScheduler(driver.Scheduler):
# Filter local hosts based on requirements ...
host_list = self.filter_hosts(num_instances, request_spec)
# TODO(sirp): weigh_hosts should also be a function of 'topic' or
# resources, so that we can apply different objective functions to it
# then weigh the selected hosts.
# weighted = [{weight=weight, name=hostname}, ...]
weighted = self.weigh_hosts(num_instances, request_spec, host_list)
@ -141,10 +144,14 @@ class ZoneAwareScheduler(driver.Scheduler):
"""Derived classes must override this method and return
a list of hosts in [(hostname, capability_dict)] format.
"""
raise NotImplemented()
# NOTE(sirp): The default logic is the equivalent to AllHostsFilter
service_states = self.zone_manager.service_states
return [(host, services)
for host, services in service_states.iteritems()]
def weigh_hosts(self, num, request_spec, hosts):
"""Derived classes must override this method and return
a lists of hosts in [{weight, hostname}] format.
"""Derived classes may override this to provide more sophisticated
scheduling objectives
"""
raise NotImplemented()
# NOTE(sirp): The default logic is the same as the NoopCostFunction
return [dict(weight=1, hostname=host) for host, caps in hosts]

View File

@ -184,7 +184,7 @@ class TestCase(unittest.TestCase):
wsgi.Server.start = _wrapped_start
# Useful assertions
def assertDictMatch(self, d1, d2):
def assertDictMatch(self, d1, d2, approx_equal=False, tolerance=0.001):
"""Assert two dicts are equivalent.
This is a 'deep' match in the sense that it handles nested
@ -215,15 +215,26 @@ class TestCase(unittest.TestCase):
for key in d1keys:
d1value = d1[key]
d2value = d2[key]
try:
error = abs(float(d1value) - float(d2value))
within_tolerance = error <= tolerance
except (ValueError, TypeError):
# If both values aren't convertable to float, just ignore
# ValueError if arg is a str, TypeError if it's something else
# (like None)
within_tolerance = False
if hasattr(d1value, 'keys') and hasattr(d2value, 'keys'):
self.assertDictMatch(d1value, d2value)
elif 'DONTCARE' in (d1value, d2value):
continue
elif approx_equal and within_tolerance:
continue
elif d1value != d2value:
raise_assertion("d1['%(key)s']=%(d1value)s != "
"d2['%(key)s']=%(d2value)s" % locals())
def assertDictListMatch(self, L1, L2):
def assertDictListMatch(self, L1, L2, approx_equal=False, tolerance=0.001):
"""Assert a list of dicts are equivalent."""
def raise_assertion(msg):
L1str = str(L1)
@ -239,4 +250,5 @@ class TestCase(unittest.TestCase):
'len(L2)=%(L2count)d' % locals())
for d1, d2 in zip(L1, L2):
self.assertDictMatch(d1, d2)
self.assertDictMatch(d1, d2, approx_equal=approx_equal,
tolerance=tolerance)

View File

View File

@ -0,0 +1,206 @@
# Copyright 2011 OpenStack LLC.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Tests For Scheduler Host Filters.
"""
import json
from nova import exception
from nova import flags
from nova import test
from nova.scheduler import host_filter
FLAGS = flags.FLAGS
class FakeZoneManager:
pass
class HostFilterTestCase(test.TestCase):
"""Test case for host filters."""
def _host_caps(self, multiplier):
# Returns host capabilities in the following way:
# host1 = memory:free 10 (100max)
# disk:available 100 (1000max)
# hostN = memory:free 10 + 10N
# disk:available 100 + 100N
# in other words: hostN has more resources than host0
# which means ... don't go above 10 hosts.
return {'host_name-description': 'XenServer %s' % multiplier,
'host_hostname': 'xs-%s' % multiplier,
'host_memory_total': 100,
'host_memory_overhead': 10,
'host_memory_free': 10 + multiplier * 10,
'host_memory_free-computed': 10 + multiplier * 10,
'host_other-config': {},
'host_ip_address': '192.168.1.%d' % (100 + multiplier),
'host_cpu_info': {},
'disk_available': 100 + multiplier * 100,
'disk_total': 1000,
'disk_used': 0,
'host_uuid': 'xxx-%d' % multiplier,
'host_name-label': 'xs-%s' % multiplier}
def setUp(self):
self.old_flag = FLAGS.default_host_filter
FLAGS.default_host_filter = \
'nova.scheduler.host_filter.AllHostsFilter'
self.instance_type = dict(name='tiny',
memory_mb=50,
vcpus=10,
local_gb=500,
flavorid=1,
swap=500,
rxtx_quota=30000,
rxtx_cap=200)
self.zone_manager = FakeZoneManager()
states = {}
for x in xrange(10):
states['host%02d' % (x + 1)] = {'compute': self._host_caps(x)}
self.zone_manager.service_states = states
def tearDown(self):
FLAGS.default_host_filter = self.old_flag
def test_choose_filter(self):
# Test default filter ...
hf = host_filter.choose_host_filter()
self.assertEquals(hf._full_name(),
'nova.scheduler.host_filter.AllHostsFilter')
# Test valid filter ...
hf = host_filter.choose_host_filter(
'nova.scheduler.host_filter.InstanceTypeFilter')
self.assertEquals(hf._full_name(),
'nova.scheduler.host_filter.InstanceTypeFilter')
# Test invalid filter ...
try:
host_filter.choose_host_filter('does not exist')
self.fail("Should not find host filter.")
except exception.SchedulerHostFilterNotFound:
pass
def test_all_host_filter(self):
hf = host_filter.AllHostsFilter()
cooked = hf.instance_type_to_filter(self.instance_type)
hosts = hf.filter_hosts(self.zone_manager, cooked)
self.assertEquals(10, len(hosts))
for host, capabilities in hosts:
self.assertTrue(host.startswith('host'))
def test_instance_type_filter(self):
hf = host_filter.InstanceTypeFilter()
# filter all hosts that can support 50 ram and 500 disk
name, cooked = hf.instance_type_to_filter(self.instance_type)
self.assertEquals('nova.scheduler.host_filter.InstanceTypeFilter',
name)
hosts = hf.filter_hosts(self.zone_manager, cooked)
self.assertEquals(6, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
self.assertEquals('host05', just_hosts[0])
self.assertEquals('host10', just_hosts[5])
def test_json_filter(self):
hf = host_filter.JsonFilter()
# filter all hosts that can support 50 ram and 500 disk
name, cooked = hf.instance_type_to_filter(self.instance_type)
self.assertEquals('nova.scheduler.host_filter.JsonFilter', name)
hosts = hf.filter_hosts(self.zone_manager, cooked)
self.assertEquals(6, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
self.assertEquals('host05', just_hosts[0])
self.assertEquals('host10', just_hosts[5])
# Try some custom queries
raw = ['or',
['and',
['<', '$compute.host_memory_free', 30],
['<', '$compute.disk_available', 300]
],
['and',
['>', '$compute.host_memory_free', 70],
['>', '$compute.disk_available', 700]
]
]
cooked = json.dumps(raw)
hosts = hf.filter_hosts(self.zone_manager, cooked)
self.assertEquals(5, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
for index, host in zip([1, 2, 8, 9, 10], just_hosts):
self.assertEquals('host%02d' % index, host)
raw = ['not',
['=', '$compute.host_memory_free', 30],
]
cooked = json.dumps(raw)
hosts = hf.filter_hosts(self.zone_manager, cooked)
self.assertEquals(9, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
for index, host in zip([1, 2, 4, 5, 6, 7, 8, 9, 10], just_hosts):
self.assertEquals('host%02d' % index, host)
raw = ['in', '$compute.host_memory_free', 20, 40, 60, 80, 100]
cooked = json.dumps(raw)
hosts = hf.filter_hosts(self.zone_manager, cooked)
self.assertEquals(5, len(hosts))
just_hosts = [host for host, caps in hosts]
just_hosts.sort()
for index, host in zip([2, 4, 6, 8, 10], just_hosts):
self.assertEquals('host%02d' % index, host)
# Try some bogus input ...
raw = ['unknown command', ]
cooked = json.dumps(raw)
try:
hf.filter_hosts(self.zone_manager, cooked)
self.fail("Should give KeyError")
except KeyError, e:
pass
self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps([])))
self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps({})))
self.assertTrue(hf.filter_hosts(self.zone_manager, json.dumps(
['not', True, False, True, False]
)))
try:
hf.filter_hosts(self.zone_manager, json.dumps(
'not', True, False, True, False
))
self.fail("Should give KeyError")
except KeyError, e:
pass
self.assertFalse(hf.filter_hosts(self.zone_manager,
json.dumps(['=', '$foo', 100])))
self.assertFalse(hf.filter_hosts(self.zone_manager,
json.dumps(['=', '$.....', 100])))
self.assertFalse(hf.filter_hosts(self.zone_manager,
json.dumps(
['>', ['and', ['or', ['not', ['<', ['>=', ['<=', ['in', ]]]]]]]])))
self.assertFalse(hf.filter_hosts(self.zone_manager,
json.dumps(['=', {}, ['>', '$missing....foo']])))

View File

@ -0,0 +1,144 @@
# Copyright 2011 OpenStack LLC.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""
Tests For Least Cost Scheduler
"""
from nova import flags
from nova import test
from nova.scheduler import least_cost
from nova.tests.scheduler import test_zone_aware_scheduler
MB = 1024 * 1024
FLAGS = flags.FLAGS
class FakeHost(object):
def __init__(self, host_id, free_ram, io):
self.id = host_id
self.free_ram = free_ram
self.io = io
class WeightedSumTestCase(test.TestCase):
def test_empty_domain(self):
domain = []
weighted_fns = []
result = least_cost.weighted_sum(domain, weighted_fns)
expected = []
self.assertEqual(expected, result)
def test_basic_costing(self):
hosts = [
FakeHost(1, 512 * MB, 100),
FakeHost(2, 256 * MB, 400),
FakeHost(3, 512 * MB, 100)
]
weighted_fns = [
(1, lambda h: h.free_ram), # Fill-first, free_ram is a *cost*
(2, lambda h: h.io), # Avoid high I/O
]
costs = least_cost.weighted_sum(
domain=hosts, weighted_fns=weighted_fns)
# Each 256 MB unit of free-ram contributes 0.5 points by way of:
# cost = weight * (score/max_score) = 1 * (256/512) = 0.5
# Each 100 iops of IO adds 0.5 points by way of:
# cost = 2 * (100/400) = 2 * 0.25 = 0.5
expected = [1.5, 2.5, 1.5]
self.assertEqual(expected, costs)
class LeastCostSchedulerTestCase(test.TestCase):
def setUp(self):
super(LeastCostSchedulerTestCase, self).setUp()
class FakeZoneManager:
pass
zone_manager = FakeZoneManager()
states = test_zone_aware_scheduler.fake_zone_manager_service_states(
num_hosts=10)
zone_manager.service_states = states
self.sched = least_cost.LeastCostScheduler()
self.sched.zone_manager = zone_manager
def tearDown(self):
super(LeastCostSchedulerTestCase, self).tearDown()
def assertWeights(self, expected, num, request_spec, hosts):
weighted = self.sched.weigh_hosts(num, request_spec, hosts)
self.assertDictListMatch(weighted, expected, approx_equal=True)
def test_no_hosts(self):
num = 1
request_spec = {}
hosts = []
expected = []
self.assertWeights(expected, num, request_spec, hosts)
def test_noop_cost_fn(self):
FLAGS.least_cost_scheduler_cost_functions = [
'nova.scheduler.least_cost.noop_cost_fn'
]
FLAGS.noop_cost_fn_weight = 1
num = 1
request_spec = {}
hosts = self.sched.filter_hosts(num, request_spec)
expected = [dict(weight=1, hostname=hostname)
for hostname, caps in hosts]
self.assertWeights(expected, num, request_spec, hosts)
def test_cost_fn_weights(self):
FLAGS.least_cost_scheduler_cost_functions = [
'nova.scheduler.least_cost.noop_cost_fn'
]
FLAGS.noop_cost_fn_weight = 2
num = 1
request_spec = {}
hosts = self.sched.filter_hosts(num, request_spec)
expected = [dict(weight=2, hostname=hostname)
for hostname, caps in hosts]
self.assertWeights(expected, num, request_spec, hosts)
def test_fill_first_cost_fn(self):
FLAGS.least_cost_scheduler_cost_functions = [
'nova.scheduler.least_cost.fill_first_cost_fn'
]
FLAGS.fill_first_cost_fn_weight = 1
num = 1
request_spec = {}
hosts = self.sched.filter_hosts(num, request_spec)
expected = []
for idx, (hostname, caps) in enumerate(hosts):
# Costs are normalized so over 10 hosts, each host with increasing
# free ram will cost 1/N more. Since the lowest cost host has some
# free ram, we add in the 1/N for the base_cost
weight = 0.1 + (0.1 * idx)
weight_dict = dict(weight=weight, hostname=hostname)
expected.append(weight_dict)
self.assertWeights(expected, num, request_spec, hosts)

View File

@ -61,7 +61,8 @@ class SchedulerTestCase(test.TestCase):
"""Test case for scheduler"""
def setUp(self):
super(SchedulerTestCase, self).setUp()
self.flags(scheduler_driver='nova.tests.test_scheduler.TestDriver')
driver = 'nova.tests.scheduler.test_scheduler.TestDriver'
self.flags(scheduler_driver=driver)
def _create_compute_service(self):
"""Create compute-manager(ComputeNode and Service record)."""

View File

@ -22,6 +22,37 @@ from nova.scheduler import zone_aware_scheduler
from nova.scheduler import zone_manager
def _host_caps(multiplier):
# Returns host capabilities in the following way:
# host1 = memory:free 10 (100max)
# disk:available 100 (1000max)
# hostN = memory:free 10 + 10N
# disk:available 100 + 100N
# in other words: hostN has more resources than host0
# which means ... don't go above 10 hosts.
return {'host_name-description': 'XenServer %s' % multiplier,
'host_hostname': 'xs-%s' % multiplier,
'host_memory_total': 100,
'host_memory_overhead': 10,
'host_memory_free': 10 + multiplier * 10,
'host_memory_free-computed': 10 + multiplier * 10,
'host_other-config': {},
'host_ip_address': '192.168.1.%d' % (100 + multiplier),
'host_cpu_info': {},
'disk_available': 100 + multiplier * 100,
'disk_total': 1000,
'disk_used': 0,
'host_uuid': 'xxx-%d' % multiplier,
'host_name-label': 'xs-%s' % multiplier}
def fake_zone_manager_service_states(num_hosts):
states = {}
for x in xrange(num_hosts):
states['host%02d' % (x + 1)] = {'compute': _host_caps(x)}
return states
class FakeZoneAwareScheduler(zone_aware_scheduler.ZoneAwareScheduler):
def filter_hosts(self, num, specs):
# NOTE(sirp): this is returning [(hostname, services)]