Spawn RADVD only in the master HA router

Currently radvd is spawned in all the HA routers irrespective of the
state of the router. This approach has the following issues.

1. While processing the internal router ports (i.e., qr-xxx), ha_router
   removes the LLA of the interface and adds it as a VIP to Keepalived conf.
   Radvd daemon is spawned after this operation in the router namespace
   (if the port is associated with any IPv6 subnets). Radvd notices that
   qr-xxx interface does not have the LLA, so does not transmit any Router
   Advts. In this state, VMs fail to acquire IPv6 addresses because of the
   missing RAs. Radvd does not recover even after keepalived configures the
   LLA of the interface. The only solution is to restart/reload radvd daemon.
   Currently keepalived-state-change monitor does not do any radvd related
   operations when a state transition happens. So we endup in this state
   forever.
2. For all the routers in Backup state, qr-xxx interface does not have LLA
   as it is managed by keepalived and configured only on the Master HA router.
   In such agents syslog is flooded with the messages [1] and this can cause
   loss of other useful info.
   [1] - resetting ipv6-allrouters membership on qr-2e373555-97

This patch implements the following.
1. If the router is already in the Master state, we configure the LLA as a VIP
   in keepalived conf but do not delete the LLA of the internal interface.
2. We spawn radvd only if the router is in the Master State.
3. Keepalived-state-change monitor takes care of enabling/disabling radvd upon
   state transitions.

Closes-Bug: #1440699
Change-Id: I351c71d058170265bbb8b56e1f7a3430bd8828d5
This commit is contained in:
sridhargaddam 2015-04-08 10:57:19 +00:00 committed by Numan Siddique
parent cc020dc930
commit 2f9b0ce940
4 changed files with 64 additions and 10 deletions

View File

@ -113,10 +113,7 @@ class AgentMixin(object):
LOG.info(_LI('Router %(router_id)s transitioned to %(state)s'),
{'router_id': router_id,
'state': state})
self._update_metadata_proxy(router_id, state)
self.state_change_notifier.queue_event((router_id, state))
def _update_metadata_proxy(self, router_id, state):
try:
ri = self.router_info[router_id]
except AttributeError:
@ -124,6 +121,11 @@ class AgentMixin(object):
'possibly deleted concurrently.'), router_id)
return
self._update_metadata_proxy(ri, router_id, state)
self._update_radvd_daemon(ri, state)
self.state_change_notifier.queue_event((router_id, state))
def _update_metadata_proxy(self, ri, router_id, state):
if state == 'master':
LOG.debug('Spawning metadata proxy for router %s', router_id)
self.metadata_driver.spawn_monitored_metadata_proxy(
@ -134,6 +136,14 @@ class AgentMixin(object):
self.metadata_driver.destroy_monitored_metadata_proxy(
self.process_monitor, ri.router_id, ri.ns_name, self.conf)
def _update_radvd_daemon(self, ri, state):
# Radvd has to be spawned only on the Master HA Router. If there are
# any state transitions, we enable/disable radvd accordingly.
if state == 'master':
ri.enable_radvd()
else:
ri.disable_radvd()
def notify_server(self, batched_events):
translation_map = {'master': 'active',
'backup': 'standby',

View File

@ -212,14 +212,17 @@ class HaRouter(router.RouterInfo):
def _should_delete_ipv6_lladdr(self, ipv6_lladdr):
"""Only the master should have any IP addresses configured.
Let keepalived manage IPv6 link local addresses, the same way we let
it manage IPv4 addresses. In order to do that, we must delete
the address first as it is autoconfigured by the kernel.
it manage IPv4 addresses. If the router is not in the master state,
we must delete the address first as it is autoconfigured by the kernel.
"""
manager = self.keepalived_manager
if manager.get_process().active:
conf = manager.get_conf_on_disk()
managed_by_keepalived = conf and ipv6_lladdr in conf
if managed_by_keepalived:
if self.ha_state != 'master':
conf = manager.get_conf_on_disk()
managed_by_keepalived = conf and ipv6_lladdr in conf
if managed_by_keepalived:
return False
else:
return False
return True
@ -353,3 +356,8 @@ class HaRouter(router.RouterInfo):
if self.ha_port:
self.enable_keepalived()
def enable_radvd(self, internal_ports=None):
if (self.keepalived_manager.get_process().active and
self.ha_state == 'master'):
super(HaRouter, self).enable_radvd(internal_ports)

View File

@ -274,7 +274,7 @@ class RouterInfo(object):
self.router[l3_constants.INTERFACE_KEY] = []
self.router[l3_constants.FLOATINGIP_KEY] = []
self.process(agent)
self.radvd.disable()
self.disable_radvd()
if self.router_namespace:
self.router_namespace.delete()
@ -342,6 +342,17 @@ class RouterInfo(object):
if netaddr.IPNetwork(subnet['cidr']).version == 6:
return True
def enable_radvd(self, internal_ports=None):
LOG.debug('Spawning radvd daemon in router device: %s', self.router_id)
if not internal_ports:
internal_ports = self.internal_ports
self.radvd.enable(internal_ports)
def disable_radvd(self):
LOG.debug('Terminating radvd daemon in router device: %s',
self.router_id)
self.radvd.disable()
def _process_internal_ports(self):
existing_port_ids = set(p['id'] for p in self.internal_ports)
@ -380,7 +391,7 @@ class RouterInfo(object):
# Enable RA
if enable_ra:
self.radvd.enable(internal_ports)
self.enable_radvd(internal_ports)
existing_devices = self._get_existing_devices()
current_internal_devs = set(n for n in existing_devices

View File

@ -755,6 +755,31 @@ class L3HATestFramework(L3AgentTestFramework):
utils.wait_until_true(lambda: router2.ha_state == 'master')
utils.wait_until_true(lambda: router1.ha_state == 'backup')
def test_ha_router_ipv6_radvd_status(self):
router_info = self.generate_router_info(ip_version=6, enable_ha=True)
router1 = self.manage_router(self.agent, router_info)
utils.wait_until_true(lambda: router1.ha_state == 'master')
utils.wait_until_true(lambda: router1.radvd.enabled)
def _check_lla_status(router, expected):
internal_devices = router.router[l3_constants.INTERFACE_KEY]
for device in internal_devices:
lladdr = ip_lib.get_ipv6_lladdr(device['mac_address'])
exists = ip_lib.device_exists_with_ips_and_mac(
router.get_internal_device_name(device['id']), [lladdr],
device['mac_address'], router.ns_name)
self.assertEqual(expected, exists)
_check_lla_status(router1, True)
device_name = router1.get_ha_device_name()
ha_device = ip_lib.IPDevice(device_name, namespace=router1.ns_name)
ha_device.link.set_down()
utils.wait_until_true(lambda: router1.ha_state == 'backup')
utils.wait_until_true(lambda: not router1.radvd.enabled, timeout=10)
_check_lla_status(router1, False)
class MetadataFakeProxyHandler(object):