Make scheduler remove dead nodes from its cache.

Scheduler HostManager maintains and updates a cache of all compute
nodes in its host_state_map. However, entries were never deleted from
this mapping, even when the compute nodes were marked as deleted.

This patch removes a state_key from host_state_map when it is not
returned from db.compute_node_get_all, and logs a notice about it.
It also adds unit tests which check for this behaviour.

Change-Id: Ibe6c98dd6c2eb02983db47f263d86cb1b76e1c98
This commit is contained in:
Devananda van der Veen
2013-02-10 23:30:13 -08:00
parent 1bb726c839
commit 4779a976ea
2 changed files with 69 additions and 4 deletions

View File

@@ -369,6 +369,7 @@ class HostManager(object):
# Get resource usage across the available compute nodes:
compute_nodes = db.compute_node_get_all(context)
seen_nodes = set()
for compute in compute_nodes:
service = compute['service']
if not service:
@@ -388,5 +389,14 @@ class HostManager(object):
service=dict(service.iteritems()))
self.host_state_map[state_key] = host_state
host_state.update_from_compute_node(compute)
seen_nodes.add(state_key)
# remove compute nodes from host_state_map if they are not active
dead_nodes = set(self.host_state_map.keys()) - seen_nodes
for state_key in dead_nodes:
host, node = state_key
LOG.info(_("Removing dead compute node %(host)s:%(node)s "
"from scheduler") % locals())
del self.host_state_map[state_key]
return self.host_state_map.itervalues()

View File

@@ -45,10 +45,7 @@ class HostManagerTestCase(test.TestCase):
self.host_manager = host_manager.HostManager()
self.fake_hosts = [host_manager.HostState('fake_host%s' % x,
'fake-node') for x in xrange(1, 5)]
def tearDown(self):
timeutils.clear_time_override()
super(HostManagerTestCase, self).tearDown()
self.addCleanup(timeutils.clear_time_override)
def test_choose_host_filters_not_found(self):
self.flags(scheduler_default_filters='FakeFilterClass3')
@@ -268,6 +265,64 @@ class HostManagerTestCase(test.TestCase):
8388608)
class HostManagerChangedNodesTestCase(test.TestCase):
"""Test case for HostManager class."""
def setUp(self):
super(HostManagerChangedNodesTestCase, self).setUp()
self.host_manager = host_manager.HostManager()
self.fake_hosts = [
host_manager.HostState('host1', 'node1'),
host_manager.HostState('host2', 'node2'),
host_manager.HostState('host3', 'node3'),
host_manager.HostState('host4', 'node4')
]
self.addCleanup(timeutils.clear_time_override)
def test_get_all_host_states(self):
context = 'fake_context'
self.mox.StubOutWithMock(db, 'compute_node_get_all')
db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES)
self.mox.ReplayAll()
self.host_manager.get_all_host_states(context)
host_states_map = self.host_manager.host_state_map
self.assertEqual(len(host_states_map), 4)
def test_get_all_host_states_after_delete_one(self):
context = 'fake_context'
self.mox.StubOutWithMock(db, 'compute_node_get_all')
# all nodes active for first call
db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES)
# remove node4 for second call
running_nodes = [n for n in fakes.COMPUTE_NODES
if n.get('hypervisor_hostname') != 'node4']
db.compute_node_get_all(context).AndReturn(running_nodes)
self.mox.ReplayAll()
self.host_manager.get_all_host_states(context)
self.host_manager.get_all_host_states(context)
host_states_map = self.host_manager.host_state_map
self.assertEqual(len(host_states_map), 3)
def test_get_all_host_states_after_delete_all(self):
context = 'fake_context'
self.mox.StubOutWithMock(db, 'compute_node_get_all')
# all nodes active for first call
db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES)
# remove all nodes for second call
db.compute_node_get_all(context).AndReturn([])
self.mox.ReplayAll()
self.host_manager.get_all_host_states(context)
self.host_manager.get_all_host_states(context)
host_states_map = self.host_manager.host_state_map
self.assertEqual(len(host_states_map), 0)
class HostStateTestCase(test.TestCase):
"""Test case for HostState class."""