Make scheduler remove dead nodes from its cache.
Scheduler HostManager maintains and updates a cache of all compute nodes in its host_state_map. However, entries were never deleted from this mapping, even when the compute nodes were marked as deleted. This patch removes a state_key from host_state_map when it is not returned from db.compute_node_get_all, and logs a notice about it. It also adds unit tests which check for this behaviour. Change-Id: Ibe6c98dd6c2eb02983db47f263d86cb1b76e1c98
This commit is contained in:
@@ -369,6 +369,7 @@ class HostManager(object):
|
||||
|
||||
# Get resource usage across the available compute nodes:
|
||||
compute_nodes = db.compute_node_get_all(context)
|
||||
seen_nodes = set()
|
||||
for compute in compute_nodes:
|
||||
service = compute['service']
|
||||
if not service:
|
||||
@@ -388,5 +389,14 @@ class HostManager(object):
|
||||
service=dict(service.iteritems()))
|
||||
self.host_state_map[state_key] = host_state
|
||||
host_state.update_from_compute_node(compute)
|
||||
seen_nodes.add(state_key)
|
||||
|
||||
# remove compute nodes from host_state_map if they are not active
|
||||
dead_nodes = set(self.host_state_map.keys()) - seen_nodes
|
||||
for state_key in dead_nodes:
|
||||
host, node = state_key
|
||||
LOG.info(_("Removing dead compute node %(host)s:%(node)s "
|
||||
"from scheduler") % locals())
|
||||
del self.host_state_map[state_key]
|
||||
|
||||
return self.host_state_map.itervalues()
|
||||
|
||||
@@ -45,10 +45,7 @@ class HostManagerTestCase(test.TestCase):
|
||||
self.host_manager = host_manager.HostManager()
|
||||
self.fake_hosts = [host_manager.HostState('fake_host%s' % x,
|
||||
'fake-node') for x in xrange(1, 5)]
|
||||
|
||||
def tearDown(self):
|
||||
timeutils.clear_time_override()
|
||||
super(HostManagerTestCase, self).tearDown()
|
||||
self.addCleanup(timeutils.clear_time_override)
|
||||
|
||||
def test_choose_host_filters_not_found(self):
|
||||
self.flags(scheduler_default_filters='FakeFilterClass3')
|
||||
@@ -268,6 +265,64 @@ class HostManagerTestCase(test.TestCase):
|
||||
8388608)
|
||||
|
||||
|
||||
class HostManagerChangedNodesTestCase(test.TestCase):
|
||||
"""Test case for HostManager class."""
|
||||
|
||||
def setUp(self):
|
||||
super(HostManagerChangedNodesTestCase, self).setUp()
|
||||
self.host_manager = host_manager.HostManager()
|
||||
self.fake_hosts = [
|
||||
host_manager.HostState('host1', 'node1'),
|
||||
host_manager.HostState('host2', 'node2'),
|
||||
host_manager.HostState('host3', 'node3'),
|
||||
host_manager.HostState('host4', 'node4')
|
||||
]
|
||||
self.addCleanup(timeutils.clear_time_override)
|
||||
|
||||
def test_get_all_host_states(self):
|
||||
context = 'fake_context'
|
||||
|
||||
self.mox.StubOutWithMock(db, 'compute_node_get_all')
|
||||
db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.host_manager.get_all_host_states(context)
|
||||
host_states_map = self.host_manager.host_state_map
|
||||
self.assertEqual(len(host_states_map), 4)
|
||||
|
||||
def test_get_all_host_states_after_delete_one(self):
|
||||
context = 'fake_context'
|
||||
|
||||
self.mox.StubOutWithMock(db, 'compute_node_get_all')
|
||||
# all nodes active for first call
|
||||
db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES)
|
||||
# remove node4 for second call
|
||||
running_nodes = [n for n in fakes.COMPUTE_NODES
|
||||
if n.get('hypervisor_hostname') != 'node4']
|
||||
db.compute_node_get_all(context).AndReturn(running_nodes)
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.host_manager.get_all_host_states(context)
|
||||
self.host_manager.get_all_host_states(context)
|
||||
host_states_map = self.host_manager.host_state_map
|
||||
self.assertEqual(len(host_states_map), 3)
|
||||
|
||||
def test_get_all_host_states_after_delete_all(self):
|
||||
context = 'fake_context'
|
||||
|
||||
self.mox.StubOutWithMock(db, 'compute_node_get_all')
|
||||
# all nodes active for first call
|
||||
db.compute_node_get_all(context).AndReturn(fakes.COMPUTE_NODES)
|
||||
# remove all nodes for second call
|
||||
db.compute_node_get_all(context).AndReturn([])
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.host_manager.get_all_host_states(context)
|
||||
self.host_manager.get_all_host_states(context)
|
||||
host_states_map = self.host_manager.host_state_map
|
||||
self.assertEqual(len(host_states_map), 0)
|
||||
|
||||
|
||||
class HostStateTestCase(test.TestCase):
|
||||
"""Test case for HostState class."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user