Merge "Add periodic agents health check."
This commit is contained in:
commit
254651ae84
@ -27,6 +27,7 @@ from sqlalchemy import sql
|
|||||||
|
|
||||||
from neutron.api.v2 import attributes
|
from neutron.api.v2 import attributes
|
||||||
from neutron.common import constants
|
from neutron.common import constants
|
||||||
|
from neutron import context
|
||||||
from neutron.db import model_base
|
from neutron.db import model_base
|
||||||
from neutron.db import models_v2
|
from neutron.db import models_v2
|
||||||
from neutron.extensions import agent as ext_agent
|
from neutron.extensions import agent as ext_agent
|
||||||
@ -248,6 +249,26 @@ class AgentDbMixin(ext_agent.AgentPluginBase, AgentAvailabilityZoneMixin):
|
|||||||
agents = [agent for agent in agents if agent['alive'] == alive]
|
agents = [agent for agent in agents if agent['alive'] == alive]
|
||||||
return agents
|
return agents
|
||||||
|
|
||||||
|
def agent_health_check(self):
|
||||||
|
"""Scan agents and log if some are considered dead."""
|
||||||
|
agents = self.get_agents(context.get_admin_context(),
|
||||||
|
filters={'admin_state_up': [True]})
|
||||||
|
dead_agents = [agent for agent in agents if not agent['alive']]
|
||||||
|
if dead_agents:
|
||||||
|
data = '%20s %20s %s\n' % ('Type', 'Last heartbeat', "host")
|
||||||
|
data += '\n'.join(['%20s %20s %s' %
|
||||||
|
(agent['agent_type'],
|
||||||
|
agent['heartbeat_timestamp'],
|
||||||
|
agent['host']) for agent in dead_agents])
|
||||||
|
LOG.warn(_LW("Agent healthcheck: found %(count)s dead agents "
|
||||||
|
"out of %(total)s:\n%(data)s"),
|
||||||
|
{'count': len(dead_agents),
|
||||||
|
'total': len(agents),
|
||||||
|
'data': data})
|
||||||
|
else:
|
||||||
|
LOG.debug("Agent healthcheck: found %s active agents",
|
||||||
|
len(agents))
|
||||||
|
|
||||||
def _get_agent_by_type_and_host(self, context, agent_type, host):
|
def _get_agent_by_type_and_host(self, context, agent_type, host):
|
||||||
query = self._model_query(context, Agent)
|
query = self._model_query(context, Agent)
|
||||||
try:
|
try:
|
||||||
|
@ -118,16 +118,19 @@ class AgentSchedulerDbMixin(agents_db.AgentDbMixin):
|
|||||||
original_agent['host'])
|
original_agent['host'])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def setup_agent_status_check(self, function):
|
def add_agent_status_check(self, function):
|
||||||
self.periodic_agent_loop = loopingcall.FixedIntervalLoopingCall(
|
loop = loopingcall.FixedIntervalLoopingCall(function)
|
||||||
function)
|
|
||||||
# TODO(enikanorov): make interval configurable rather than computed
|
# TODO(enikanorov): make interval configurable rather than computed
|
||||||
interval = max(cfg.CONF.agent_down_time // 2, 1)
|
interval = max(cfg.CONF.agent_down_time // 2, 1)
|
||||||
# add random initial delay to allow agents to check in after the
|
# add random initial delay to allow agents to check in after the
|
||||||
# neutron server first starts. random to offset multiple servers
|
# neutron server first starts. random to offset multiple servers
|
||||||
initial_delay = random.randint(interval, interval * 2)
|
initial_delay = random.randint(interval, interval * 2)
|
||||||
self.periodic_agent_loop.start(interval=interval,
|
loop.start(interval=interval, initial_delay=initial_delay)
|
||||||
initial_delay=initial_delay)
|
|
||||||
|
if hasattr(self, 'periodic_agent_loops'):
|
||||||
|
self.periodic_agent_loops.append(loop)
|
||||||
|
else:
|
||||||
|
self.periodic_agent_loops = [loop]
|
||||||
|
|
||||||
def agent_dead_limit_seconds(self):
|
def agent_dead_limit_seconds(self):
|
||||||
return cfg.CONF.agent_down_time * 2
|
return cfg.CONF.agent_down_time * 2
|
||||||
@ -166,7 +169,7 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
|
|||||||
"automatic network rescheduling is disabled."))
|
"automatic network rescheduling is disabled."))
|
||||||
return
|
return
|
||||||
|
|
||||||
self.setup_agent_status_check(self.remove_networks_from_down_agents)
|
self.add_agent_status_check(self.remove_networks_from_down_agents)
|
||||||
|
|
||||||
def is_eligible_agent(self, context, active, agent):
|
def is_eligible_agent(self, context, active, agent):
|
||||||
# eligible agent is active or starting up
|
# eligible agent is active or starting up
|
||||||
|
@ -82,7 +82,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
|
|||||||
"automatic router rescheduling is disabled."))
|
"automatic router rescheduling is disabled."))
|
||||||
return
|
return
|
||||||
|
|
||||||
self.setup_agent_status_check(
|
self.add_agent_status_check(
|
||||||
self.reschedule_routers_from_down_agents)
|
self.reschedule_routers_from_down_agents)
|
||||||
|
|
||||||
def reschedule_routers_from_down_agents(self):
|
def reschedule_routers_from_down_agents(self):
|
||||||
|
@ -149,6 +149,7 @@ class Ml2Plugin(db_base_plugin_v2.NeutronDbPluginV2,
|
|||||||
self.mechanism_manager.initialize()
|
self.mechanism_manager.initialize()
|
||||||
self._setup_dhcp()
|
self._setup_dhcp()
|
||||||
self._start_rpc_notifiers()
|
self._start_rpc_notifiers()
|
||||||
|
self.add_agent_status_check(self.agent_health_check)
|
||||||
LOG.info(_LI("Modular L2 Plugin initialization complete"))
|
LOG.info(_LI("Modular L2 Plugin initialization complete"))
|
||||||
|
|
||||||
def _setup_rpc(self):
|
def _setup_rpc(self):
|
||||||
|
@ -409,6 +409,10 @@ class PluginFixture(fixtures.Fixture):
|
|||||||
'neutron.db.agentschedulers_db.DhcpAgentSchedulerDbMixin.'
|
'neutron.db.agentschedulers_db.DhcpAgentSchedulerDbMixin.'
|
||||||
'start_periodic_dhcp_agent_status_check')
|
'start_periodic_dhcp_agent_status_check')
|
||||||
self.patched_dhcp_periodic = self.dhcp_periodic_p.start()
|
self.patched_dhcp_periodic = self.dhcp_periodic_p.start()
|
||||||
|
self.agent_health_check_p = mock.patch(
|
||||||
|
'neutron.db.agentschedulers_db.DhcpAgentSchedulerDbMixin.'
|
||||||
|
'add_agent_status_check')
|
||||||
|
self.agent_health_check = self.agent_health_check_p.start()
|
||||||
# Plugin cleanup should be triggered last so that
|
# Plugin cleanup should be triggered last so that
|
||||||
# test-specific cleanup has a chance to release references.
|
# test-specific cleanup has a chance to release references.
|
||||||
self.addCleanup(self.cleanup_core_plugin)
|
self.addCleanup(self.cleanup_core_plugin)
|
||||||
|
@ -161,6 +161,27 @@ class TestAgentsDbMixin(TestAgentsDbBase):
|
|||||||
agent = self.plugin.get_agents(self.context)[0]
|
agent = self.plugin.get_agents(self.context)[0]
|
||||||
self.assertFalse(agent['admin_state_up'])
|
self.assertFalse(agent['admin_state_up'])
|
||||||
|
|
||||||
|
def test_agent_health_check(self):
|
||||||
|
agents = [{'agent_type': "DHCP Agent",
|
||||||
|
'heartbeat_timestamp': '2015-05-06 22:40:40.432295',
|
||||||
|
'host': 'some.node',
|
||||||
|
'alive': True}]
|
||||||
|
with mock.patch.object(self.plugin, 'get_agents',
|
||||||
|
return_value=agents),\
|
||||||
|
mock.patch.object(agents_db.LOG, 'warn') as warn,\
|
||||||
|
mock.patch.object(agents_db.LOG, 'debug') as debug:
|
||||||
|
self.plugin.agent_health_check()
|
||||||
|
self.assertTrue(debug.called)
|
||||||
|
self.assertFalse(warn.called)
|
||||||
|
agents[0]['alive'] = False
|
||||||
|
self.plugin.agent_health_check()
|
||||||
|
warn.assert_called_once_with(
|
||||||
|
mock.ANY,
|
||||||
|
{'count': 1, 'total': 1,
|
||||||
|
'data': " Type Last heartbeat host\n"
|
||||||
|
" DHCP Agent 2015-05-06 22:40:40.432295 some.node"}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestAgentsDbGetAgents(TestAgentsDbBase):
|
class TestAgentsDbGetAgents(TestAgentsDbBase):
|
||||||
scenarios = [
|
scenarios = [
|
||||||
|
Loading…
x
Reference in New Issue
Block a user