Move db query to fetch down bindings under try/except

In case of intermittent DB failures router and network auto-rescheduling
tasks may fail due to error on fetching down bindings from db.
Need to put this queries under try/except to prevent unexpected exit.

Closes-Bug: #1546110
Change-Id: Id48e899a5b3d906c6d1da4d03923bdda2681cd92
(cherry picked from commit b6ec40cbf7)
This commit is contained in:
Oleg Bondarev 2016-02-16 18:03:52 +03:00 committed by Ihar Hrachyshka
parent e9622b0bb0
commit 48a6196718
4 changed files with 37 additions and 17 deletions

View File

@ -263,14 +263,13 @@ class DhcpAgentSchedulerDbMixin(dhcpagentscheduler
cutoff = self.get_cutoff_time(agent_dead_limit)
context = ncontext.get_admin_context()
try:
down_bindings = (
context.session.query(NetworkDhcpAgentBinding).
join(agents_db.Agent).
filter(agents_db.Agent.heartbeat_timestamp < cutoff,
agents_db.Agent.admin_state_up))
dhcp_notifier = self.agent_notifiers.get(constants.AGENT_TYPE_DHCP)
try:
dead_bindings = [b for b in
self._filter_bindings(context, down_bindings)]
agents = self.get_agents_db(

View File

@ -98,6 +98,7 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
cutoff = self.get_cutoff_time(agent_dead_limit)
context = n_ctx.get_admin_context()
try:
down_bindings = (
context.session.query(RouterL3AgentBinding).
join(agents_db.Agent).
@ -106,9 +107,11 @@ class L3AgentSchedulerDbMixin(l3agentscheduler.L3AgentSchedulerPluginBase,
outerjoin(l3_attrs_db.RouterExtraAttributes,
l3_attrs_db.RouterExtraAttributes.router_id ==
RouterL3AgentBinding.router_id).
filter(sa.or_(l3_attrs_db.RouterExtraAttributes.ha == sql.false(),
l3_attrs_db.RouterExtraAttributes.ha == sql.null())))
try:
filter(sa.or_(l3_attrs_db.RouterExtraAttributes.ha ==
sql.false(),
l3_attrs_db.RouterExtraAttributes.ha ==
sql.null())))
agents_back_online = set()
for binding in down_bindings:
if binding.l3_agent_id in agents_back_online:

View File

@ -683,6 +683,16 @@ class OvsAgentSchedulerTestCase(OvsAgentSchedulerTestCaseBase):
self._take_down_agent_and_run_reschedule(L3_HOSTA) # Value error
self._take_down_agent_and_run_reschedule(L3_HOSTA) # Exception
def test_router_rescheduler_catches_exceptions_on_fetching_bindings(self):
with mock.patch('neutron.context.get_admin_context') as get_ctx:
mock_ctx = mock.Mock()
get_ctx.return_value = mock_ctx
mock_ctx.session.query.side_effect = db_exc.DBError()
plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT)
# check that no exception is raised
plugin.reschedule_routers_from_down_agents()
def test_router_rescheduler_iterates_after_reschedule_failure(self):
plugin = manager.NeutronManager.get_service_plugins().get(
service_constants.L3_ROUTER_NAT)

View File

@ -332,6 +332,14 @@ class TestNetworksFailover(TestDhcpSchedulerBaseTestCase,
# just make sure that no exception is raised
self.remove_networks_from_down_agents()
def test_reschedule_network_catches_exceptions_on_fetching_bindings(self):
with mock.patch('neutron.context.get_admin_context') as get_ctx:
mock_ctx = mock.Mock()
get_ctx.return_value = mock_ctx
mock_ctx.session.query.side_effect = Exception()
# just make sure that no exception is raised
self.remove_networks_from_down_agents()
def test_reschedule_doesnt_occur_if_no_agents(self):
agents = self._create_and_set_agents_down(['host-a', 'host-b'], 2)
self._test_schedule_bind_network([agents[0]], self.network_id)