Merge "Router should flip to standby if all L3 nodes down"

This commit is contained in:
Zuul 2018-04-09 22:45:52 +00:00 committed by Gerrit Code Review
commit c9f03df891
7 changed files with 109 additions and 22 deletions

View File

@ -37,7 +37,10 @@ HA_SUBNET_NAME = 'HA subnet tenant %s'
HA_PORT_NAME = 'HA port tenant %s' HA_PORT_NAME = 'HA port tenant %s'
HA_ROUTER_STATE_ACTIVE = 'active' HA_ROUTER_STATE_ACTIVE = 'active'
HA_ROUTER_STATE_STANDBY = 'standby' HA_ROUTER_STATE_STANDBY = 'standby'
VALID_HA_STATES = (HA_ROUTER_STATE_ACTIVE, HA_ROUTER_STATE_STANDBY) HA_ROUTER_STATE_UNKNOWN = 'unknown'
VALID_HA_STATES = (HA_ROUTER_STATE_ACTIVE, HA_ROUTER_STATE_STANDBY,
HA_ROUTER_STATE_UNKNOWN)
PAGINATION_INFINITE = 'infinite' PAGINATION_INFINITE = 'infinite'
SORT_DIRECTION_ASC = 'asc' SORT_DIRECTION_ASC = 'asc'

View File

@ -547,20 +547,36 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
""" """
with context.session.begin(subtransactions=True): with context.session.begin(subtransactions=True):
bindings = self.get_ha_router_port_bindings(context, [router_id]) bindings = self.get_ha_router_port_bindings(context, [router_id])
dead_agents = [] router_active_agents_dead = []
active = [binding for binding in bindings router_standby_agents_dead = []
if binding.state == n_const.HA_ROUTER_STATE_ACTIVE] # List agents where router is active and agent is dead
# Check dead agents only if we have more then one active agent # and agents where router is standby and agent is dead
if len(active) > 1: for binding in bindings:
dead_agents = [binding.agent for binding in active if not (binding.agent.is_active
if not (binding.agent.is_active and and binding.agent.admin_state_up):
binding.agent.admin_state_up)] if binding.state == n_const.HA_ROUTER_STATE_ACTIVE:
for dead_agent in dead_agents: router_active_agents_dead.append(binding.agent)
self.update_routers_states( elif binding.state == n_const.HA_ROUTER_STATE_STANDBY:
context, router_standby_agents_dead.append(binding.agent)
{router_id: n_const.HA_ROUTER_STATE_STANDBY}, if router_active_agents_dead:
dead_agent.host) # Just check if all l3_agents are down
if dead_agents: # then assuming some communication issue
if (len(router_active_agents_dead) +
len(router_standby_agents_dead) == len(bindings)):
# Make router status as unknown because
# agent communication may be issue but router
# may still be active. We do not know the
# exact status of router.
state = n_const.HA_ROUTER_STATE_UNKNOWN
else:
# Make router status as standby on all dead agents
# as some other agents are alive , router can become
# active on them after some time
state = n_const.HA_ROUTER_STATE_STANDBY
for dead_agent in router_active_agents_dead:
self.update_routers_states(context, {router_id: state},
dead_agent.host)
if router_active_agents_dead:
return self.get_ha_router_port_bindings(context, [router_id]) return self.get_ha_router_port_bindings(context, [router_id])
return bindings return bindings

View File

@ -1 +1 @@
594422d373ee 61663558142c

View File

@ -0,0 +1,43 @@
# Copyright 2017 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
"""Add unknown state to HA router
Revision ID: 61663558142c
Revises: 594422d373ee
Create Date: 2017-05-18 14:31:45.725516
"""
revision = '61663558142c'
down_revision = '594422d373ee'
import sqlalchemy as sa
from neutron.common import constants
from neutron.db import migration
ha_port_bindings_table_name = "ha_router_agent_port_bindings"
new_enum = sa.Enum(
constants.HA_ROUTER_STATE_ACTIVE,
constants.HA_ROUTER_STATE_STANDBY,
constants.HA_ROUTER_STATE_UNKNOWN,
name='l3_ha_states'
)
def upgrade():
migration.alter_enum_add_value(ha_port_bindings_table_name, 'state',
new_enum, True, server_default='standby')

View File

@ -53,6 +53,7 @@ class L3HARouterAgentPortBinding(model_base.BASEV2):
state = sa.Column(sa.Enum(n_const.HA_ROUTER_STATE_ACTIVE, state = sa.Column(sa.Enum(n_const.HA_ROUTER_STATE_ACTIVE,
n_const.HA_ROUTER_STATE_STANDBY, n_const.HA_ROUTER_STATE_STANDBY,
n_const.HA_ROUTER_STATE_UNKNOWN,
name='l3_ha_states'), name='l3_ha_states'),
default=n_const.HA_ROUTER_STATE_STANDBY, default=n_const.HA_ROUTER_STATE_STANDBY,
server_default=n_const.HA_ROUTER_STATE_STANDBY) server_default=n_const.HA_ROUTER_STATE_STANDBY)

View File

@ -179,8 +179,7 @@ class L3HATestCase(L3HATestFramework):
self.admin_ctx, router['id']) self.admin_ctx, router['id'])
self.assertEqual([], bindings) self.assertEqual([], bindings)
def _assert_ha_state_for_agent(self, router, agent, def _assert_ha_state_for_agent(self, router, agent, state):
state=n_const.HA_ROUTER_STATE_STANDBY):
bindings = ( bindings = (
self.plugin.get_l3_bindings_hosting_router_with_ha_states( self.plugin.get_l3_bindings_hosting_router_with_ha_states(
self.admin_ctx, router['id'])) self.admin_ctx, router['id']))
@ -198,7 +197,8 @@ class L3HATestCase(L3HATestFramework):
self.agent2['host']) self.agent2['host'])
with mock.patch.object(agent_utils, 'is_agent_down', with mock.patch.object(agent_utils, 'is_agent_down',
return_value=True): return_value=True):
self._assert_ha_state_for_agent(router, self.agent1) self._assert_ha_state_for_agent(router, self.agent1,
n_const.HA_ROUTER_STATE_UNKNOWN)
def test_get_l3_bindings_hosting_router_agents_admin_state_up_is_false( def test_get_l3_bindings_hosting_router_agents_admin_state_up_is_false(
self): self):
@ -209,8 +209,22 @@ class L3HATestCase(L3HATestFramework):
self.plugin.update_routers_states( self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE}, self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE},
self.agent2['host']) self.agent2['host'])
helpers.set_agent_admin_state(self.agent1['id']) helpers.set_agent_admin_state(self.agent1['id'], admin_state_up=False)
self._assert_ha_state_for_agent(router, self.agent1) self._assert_ha_state_for_agent(router, self.agent1,
n_const.HA_ROUTER_STATE_STANDBY)
def test_get_l3_bindings_hosting_router_agents_admin_state_up_is_true(
self):
router = self._create_router()
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE},
self.agent1['host'])
self.plugin.update_routers_states(
self.admin_ctx, {router['id']: n_const.HA_ROUTER_STATE_ACTIVE},
self.agent2['host'])
helpers.set_agent_admin_state(self.agent1['id'], admin_state_up=True)
self._assert_ha_state_for_agent(router, self.agent1,
n_const.HA_ROUTER_STATE_ACTIVE)
def test_get_l3_bindings_hosting_router_with_ha_states_one_dead(self): def test_get_l3_bindings_hosting_router_with_ha_states_one_dead(self):
router = self._create_router() router = self._create_router()
@ -222,8 +236,10 @@ class L3HATestCase(L3HATestFramework):
self.agent2['host']) self.agent2['host'])
with mock.patch.object(agent_utils, 'is_agent_down', with mock.patch.object(agent_utils, 'is_agent_down',
return_value=True): return_value=True):
# With above mock all agents are in dead state
# hence router state is Unknown overall.
self._assert_ha_state_for_agent( self._assert_ha_state_for_agent(
router, self.agent1, state=n_const.HA_ROUTER_STATE_ACTIVE) router, self.agent1, n_const.HA_ROUTER_STATE_UNKNOWN)
def test_ha_router_create(self): def test_ha_router_create(self):
router = self._create_router() router = self._create_router()

View File

@ -0,0 +1,8 @@
---
features:
- Added new ``unknown`` state for HA routers. Sometimes l3 agents may not be
able to update health status to Neutron server due to communication issues.
During that time the server may not know whether HA routers hosted by that
agent are active or standby.
fixes:
- Fixes bug `1682145 <https://launchpad.net/bugs/1682145>`_.