Make L3 HA VIPs ordering consistent in keepalived.conf

Currently the order of VIPs in keepalived.conf is determined
by sorting the VIPs whenever one is added or removed. As it
turns out, keepalived doesn't like it when the primary VIP
changes. One side effect is that virtual routes, in our case
the router's default route, may be removed.

This patch fabricates an IP address on the router's HA interface
and uses it as the primary VIP.

Closes-Bug: #1404945
Change-Id: I993daf594a28918de6fafff465f5f40e7b89305e
This commit is contained in:
Assaf Muller 2014-12-23 13:52:41 +02:00
parent 6e42c4c926
commit ea587e113a
5 changed files with 149 additions and 75 deletions

View File

@ -113,9 +113,11 @@ class AgentMixin(object):
config = ri.keepalived_manager.config
interface_name = self.get_ha_device_name(ri.ha_port['id'])
ha_port_cidr = ri.ha_port['subnet']['cidr']
instance = keepalived.KeepalivedInstance(
'BACKUP', interface_name, ri.ha_vr_id, nopreempt=True,
advert_int=self.conf.ha_vrrp_advert_int, priority=ri.ha_priority)
'BACKUP', interface_name, ri.ha_vr_id, ha_port_cidr,
nopreempt=True, advert_int=self.conf.ha_vrrp_advert_int,
priority=ri.ha_priority)
instance.track_interfaces.append(interface_name)
if self.conf.ha_vrrp_auth_password:

View File

@ -16,6 +16,7 @@ import itertools
import os
import stat
import netaddr
from oslo.config import cfg
from neutron.agent.linux import external_process
@ -28,10 +29,35 @@ VALID_STATES = ['MASTER', 'BACKUP']
VALID_NOTIFY_STATES = ['master', 'backup', 'fault']
VALID_AUTH_TYPES = ['AH', 'PASS']
HA_DEFAULT_PRIORITY = 50
PRIMARY_VIP_RANGE_SIZE = 24
# TODO(amuller): Use L3 agent constant when new constants module is introduced.
FIP_LL_SUBNET = '169.254.30.0/23'
LOG = logging.getLogger(__name__)
def get_free_range(parent_range, excluded_ranges, size=PRIMARY_VIP_RANGE_SIZE):
"""Get a free IP range, from parent_range, of the specified size.
:param parent_range: String representing an IP range. E.g: '169.254.0.0/16'
:param excluded_ranges: A list of strings to be excluded from parent_range
:param size: What should be the size of the range returned?
:return: A string representing an IP range
"""
free_cidrs = netaddr.IPSet([parent_range]) - netaddr.IPSet(excluded_ranges)
for cidr in free_cidrs.iter_cidrs():
if cidr.prefixlen <= size:
return '%s/%s' % (cidr.network, size)
raise ValueError(_('Network of size %(size)s, from IP range '
'%(parent_range)s excluding IP ranges '
'%(excluded_ranges)s was not found.') %
{'size': size,
'parent_range': parent_range,
'excluded_ranges': excluded_ranges})
class InvalidInstanceStateException(exceptions.NeutronException):
message = (_('Invalid instance state: %%(state)s, valid states are: '
'%(valid_states)s') %
@ -106,7 +132,7 @@ class KeepalivedGroup(object):
class KeepalivedInstance(object):
"""Instance section of a keepalived configuration."""
def __init__(self, state, interface, vrouter_id,
def __init__(self, state, interface, vrouter_id, ha_cidr,
priority=HA_DEFAULT_PRIORITY, advert_int=None,
mcast_src_ip=None, nopreempt=False):
self.name = 'VR_%s' % vrouter_id
@ -125,6 +151,13 @@ class KeepalivedInstance(object):
self.vips = []
self.virtual_routes = []
self.authentication = tuple()
metadata_cidr = '169.254.169.254/32'
self.primary_vip_range = get_free_range(
parent_range='169.254.0.0/16',
excluded_ranges=[metadata_cidr,
FIP_LL_SUBNET,
ha_cidr],
size=PRIMARY_VIP_RANGE_SIZE)
def set_authentication(self, auth_type, password):
if auth_type not in VALID_AUTH_TYPES:
@ -152,18 +185,46 @@ class KeepalivedInstance(object):
(' %s' % i for i in self.track_interfaces),
[' }'])
def _build_vips_config(self):
vips_sorted = sorted(self.vips, key=lambda vip: vip.ip_address)
first_address = vips_sorted.pop(0)
def _generate_primary_vip(self):
"""Return an address in the primary_vip_range CIDR, with the router's
VRID in the host section.
For example, if primary_vip_range is 169.254.0.0/24, and this router's
VRID is 5, the result is 169.254.0.5. Using the VRID assures that
the primary VIP is consistent amongst HA router instances on different
nodes.
"""
ip = (netaddr.IPNetwork(self.primary_vip_range).network +
self.vrouter_id)
return netaddr.IPNetwork('%s/%s' % (ip, PRIMARY_VIP_RANGE_SIZE))
def _build_vips_config(self):
# NOTE(amuller): The primary VIP must be consistent in order to avoid
# keepalived bugs. Changing the VIP in the 'virtual_ipaddress' and
# SIGHUP'ing keepalived can remove virtual routers, including the
# router's default gateway.
# We solve this by never changing the VIP in the virtual_ipaddress
# section, herein known as the primary VIP.
# The only interface known to exist for HA routers is the HA interface
# (self.interface). We generate an IP on that device and use it as the
# primary VIP. The other VIPs (Internal interfaces IPs, the external
# interface IP and floating IPs) are placed in the
# virtual_ipaddress_excluded section.
primary = KeepalivedVipAddress(str(self._generate_primary_vip()),
self.interface)
vips_result = [' virtual_ipaddress {',
' %s' % first_address.build_config(),
' %s' % primary.build_config(),
' }']
if vips_sorted:
if self.vips:
vips_result.extend(
itertools.chain([' virtual_ipaddress_excluded {'],
(' %s' % vip.build_config()
for vip in vips_sorted),
for vip in
sorted(self.vips,
key=lambda vip: vip.ip_address)),
[' }']))
return vips_result
@ -201,8 +262,7 @@ class KeepalivedInstance(object):
if self.track_interfaces:
config.extend(self._build_track_interface_config())
if self.vips:
config.extend(self._build_vips_config())
config.extend(self._build_vips_config())
if self.virtual_routes:
config.extend(self._build_virtual_routes_config())

View File

@ -173,9 +173,10 @@ vrrp_instance VR_1 {
%(ha_device_name)s
}
virtual_ipaddress {
%(floating_ip_cidr)s dev %(external_device_name)s
169.254.0.1/24 dev %(ha_device_name)s
}
virtual_ipaddress_excluded {
%(floating_ip_cidr)s dev %(external_device_name)s
%(external_device_cidr)s dev %(external_device_name)s
%(internal_device_cidr)s dev %(internal_device_name)s
}
@ -377,7 +378,7 @@ class L3HATestFramework(L3AgentTestFramework):
router_info_2 = copy.deepcopy(router_info)
router_info_2[l3_constants.HA_INTERFACE_KEY] = (
test_l3_agent.get_ha_interface(ip='169.254.0.3',
test_l3_agent.get_ha_interface(ip='169.254.192.2',
mac='22:22:22:22:22:22'))
router2 = self.manage_router(self.failover_agent, router_info_2)

View File

@ -12,6 +12,8 @@
# License for the specific language governing permissions and limitations
# under the License.
import testtools
from neutron.agent.linux import keepalived
from neutron.tests import base
@ -19,6 +21,40 @@ from neutron.tests import base
# http://www.keepalived.org/pdf/UserGuide.pdf
class KeepalivedGetFreeRangeTestCase(base.BaseTestCase):
def test_get_free_range(self):
free_range = keepalived.get_free_range(
parent_range='169.254.0.0/16',
excluded_ranges=['169.254.0.0/24',
'169.254.1.0/24',
'169.254.2.0/24'],
size=24)
self.assertEqual('169.254.3.0/24', free_range)
def test_get_free_range_without_excluded(self):
free_range = keepalived.get_free_range(
parent_range='169.254.0.0/16',
excluded_ranges=[],
size=20)
self.assertEqual('169.254.0.0/20', free_range)
def test_get_free_range_excluded_out_of_parent(self):
free_range = keepalived.get_free_range(
parent_range='169.254.0.0/16',
excluded_ranges=['255.255.255.0/24'],
size=24)
self.assertEqual('169.254.0.0/24', free_range)
def test_get_free_range_not_found(self):
tiny_parent_range = '192.168.1.0/24'
huge_size = 8
with testtools.ExpectedException(ValueError):
keepalived.get_free_range(
parent_range=tiny_parent_range,
excluded_ranges=[],
size=huge_size)
class KeepalivedConfBaseMixin(object):
def _get_config(self):
@ -30,6 +66,7 @@ class KeepalivedConfBaseMixin(object):
group1.set_notify('master', '/tmp/script.sh')
instance1 = keepalived.KeepalivedInstance('MASTER', 'eth0', 1,
'169.254.192.0/18',
advert_int=5)
instance1.set_authentication('AH', 'pass123')
instance1.track_interfaces.append("eth0")
@ -59,6 +96,7 @@ class KeepalivedConfBaseMixin(object):
group1.add_instance(instance1)
instance2 = keepalived.KeepalivedInstance('MASTER', 'eth4', 2,
'169.254.192.0/18',
mcast_src_ip='224.0.0.1')
instance2.track_interfaces.append("eth4")
@ -107,9 +145,10 @@ vrrp_instance VR_1 {
eth0
}
virtual_ipaddress {
192.168.1.0/24 dev eth1
169.254.0.1/24 dev eth0
}
virtual_ipaddress_excluded {
192.168.1.0/24 dev eth1
192.168.2.0/24 dev eth2
192.168.3.0/24 dev eth2
192.168.55.0/24 dev eth10
@ -128,9 +167,10 @@ vrrp_instance VR_2 {
eth4
}
virtual_ipaddress {
192.168.2.0/24 dev eth2
169.254.0.2/24 dev eth4
}
virtual_ipaddress_excluded {
192.168.2.0/24 dev eth2
192.168.3.0/24 dev eth6
192.168.55.0/24 dev eth10
}
@ -160,10 +200,11 @@ class KeepalivedStateExceptionTestCase(base.BaseTestCase):
invalid_vrrp_state = 'into a club'
self.assertRaises(keepalived.InvalidInstanceStateException,
keepalived.KeepalivedInstance,
invalid_vrrp_state, 'eth0', 33)
invalid_vrrp_state, 'eth0', 33, '169.254.192.0/18')
invalid_auth_type = '[hip, hip]'
instance = keepalived.KeepalivedInstance('MASTER', 'eth0', 1)
instance = keepalived.KeepalivedInstance('MASTER', 'eth0', 1,
'169.254.192.0/18')
self.assertRaises(keepalived.InvalidAuthenticationTypeExecption,
instance.set_authentication,
invalid_auth_type, 'some_password')
@ -171,6 +212,12 @@ class KeepalivedStateExceptionTestCase(base.BaseTestCase):
class KeepalivedInstanceTestCase(base.BaseTestCase,
KeepalivedConfBaseMixin):
def test_generate_primary_vip(self):
instance = keepalived.KeepalivedInstance('MASTER', 'ha0', 42,
'169.254.192.0/18')
self.assertEqual('169.254.0.42/24',
str(instance._generate_primary_vip()))
def test_remove_adresses_by_interface(self):
config = self._get_config()
instance = config.get_instance(1)
@ -202,6 +249,9 @@ vrrp_instance VR_1 {
eth0
}
virtual_ipaddress {
169.254.0.1/24 dev eth0
}
virtual_ipaddress_excluded {
192.168.1.0/24 dev eth1
}
virtual_routes {
@ -218,9 +268,10 @@ vrrp_instance VR_2 {
eth4
}
virtual_ipaddress {
192.168.2.0/24 dev eth2
169.254.0.2/24 dev eth4
}
virtual_ipaddress_excluded {
192.168.2.0/24 dev eth2
192.168.3.0/24 dev eth6
192.168.55.0/24 dev eth10
}
@ -228,6 +279,20 @@ vrrp_instance VR_2 {
self.assertEqual(expected, config.get_config_str())
def test_build_config_no_vips(self):
expected = """vrrp_instance VR_1 {
state MASTER
interface eth0
virtual_router_id 1
priority 50
virtual_ipaddress {
169.254.0.1/24 dev eth0
}
}"""
instance = keepalived.KeepalivedInstance(
'MASTER', 'eth0', 1, '169.254.192.0/18')
self.assertEqual(expected, '\n'.join(instance.build_config()))
class KeepalivedVirtualRouteTestCase(base.BaseTestCase):
def test_virtual_route_with_dev(self):

View File

@ -137,7 +137,7 @@ def _get_subnet_id(port):
#TODO(jschwarz): This is a shared function with both the unit tests
# and the functional tests, and should be moved elsewhere (probably
# neutron/tests/common/).
def get_ha_interface(ip='169.254.0.2', mac='12:34:56:78:2b:5d'):
def get_ha_interface(ip='169.254.192.1', mac='12:34:56:78:2b:5d'):
return {'admin_state_up': True,
'device_id': _uuid(),
'device_owner': 'network:router_ha_interface',
@ -148,8 +148,8 @@ def get_ha_interface(ip='169.254.0.2', mac='12:34:56:78:2b:5d'):
'name': u'L3 HA Admin port 0',
'network_id': _uuid(),
'status': u'ACTIVE',
'subnet': {'cidr': '169.254.0.0/24',
'gateway_ip': '169.254.0.1',
'subnet': {'cidr': '169.254.192.0/18',
'gateway_ip': '169.254.255.254',
'id': _uuid()},
'tenant_id': '',
'agent_id': _uuid(),
@ -889,60 +889,6 @@ class TestBasicRouterOperations(base.BaseTestCase):
self.assertEqual(agent.process_router_floating_ip_nat_rules.called,
distributed)
def test_ha_router_keepalived_config(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router = prepare_router_data(enable_ha=True)
router['routes'] = [
{'destination': '8.8.8.8/32', 'nexthop': '35.4.0.10'},
{'destination': '8.8.4.4/32', 'nexthop': '35.4.0.11'}]
ri = l3router.RouterInfo(router['id'], self.conf.root_helper,
router=router)
ri.router = router
with contextlib.nested(mock.patch.object(agent,
'_spawn_metadata_proxy'),
mock.patch('neutron.agent.linux.'
'utils.replace_file'),
mock.patch('neutron.agent.linux.'
'utils.execute'),
mock.patch('os.makedirs')):
agent.process_ha_router_added(ri)
agent.process_router(ri)
config = ri.keepalived_manager.config
ha_iface = agent.get_ha_device_name(ri.ha_port['id'])
ex_iface = agent.get_external_device_name(ri.ex_gw_port['id'])
int_iface = agent.get_internal_device_name(
ri.internal_ports[0]['id'])
expected = """vrrp_sync_group VG_1 {
group {
VR_1
}
}
vrrp_instance VR_1 {
state BACKUP
interface %(ha_iface)s
virtual_router_id 1
priority 50
nopreempt
advert_int 2
track_interface {
%(ha_iface)s
}
virtual_ipaddress {
19.4.4.4/24 dev %(ex_iface)s
}
virtual_ipaddress_excluded {
35.4.0.4/24 dev %(int_iface)s
}
virtual_routes {
0.0.0.0/0 via 19.4.4.1 dev %(ex_iface)s
8.8.8.8/32 via 35.4.0.10
8.8.4.4/32 via 35.4.0.11
}
}""" % {'ha_iface': ha_iface, 'ex_iface': ex_iface, 'int_iface': int_iface}
self.assertEqual(expected, config.get_config_str())
@mock.patch('neutron.agent.linux.ip_lib.IPDevice')
def _test_process_router_floating_ip_addresses_add(self, ri,
agent, IPDevice):