From 303cbc6b5b745aadf026627c4cf2e90e3862b586 Mon Sep 17 00:00:00 2001 From: Assaf Muller Date: Fri, 15 Jan 2016 16:47:39 -0500 Subject: [PATCH] Fix L3 HA with IPv6 We currently use garp_master_repeat and garp_master_refresh to solve bug 1453855. We need to spawn keepalived only after all of the qr/qg ports have been wired so that the initial GARP will be properly sent. Otherwise you get a routing black hole. In lieu of a proper sync method, we used those two keepalived options to send GARPs repeatedly: a) We did not know it never stops spamming the network b) It causes VMs to lose their IPv6 default gateway due to a keepalived bug, which has since been fixed, but it would need to be backported to every keepalived version on every distro. Here's the patch: https://github.com/acassen/keepalived/pull/200 The solution this patch proposes is to drop the repeat and refresh keepalived options. This will fix the IPv6 bug but re-introduce bug 1453855. So, this patch uses the delay option instead. It turns out keepalived sends a GARP when it transitions to MASTER, and then it waits a number of seconds determined by the delay option, and sends a GARP again. We'll use an aggressive 'delay' setting to make sure that when the node boots and the L3/L2 agents start, we'll give the L2 agent enough time to wire the ports as a stopgap solution. Note that this only affects initial synchronization time, not failover times. Failover times will continue to be fast because the ports are wired ahead of time, the initial GARP after the state transition to MASTER will be sent properly. Change-Id: I7a086472b8742828dae08ffd915c45e94fb4b94e Closes-Bug: #1520517 Related-Bug: #1453855 --- neutron/agent/linux/keepalived.py | 12 ++++-------- neutron/tests/functional/agent/l3/framework.py | 3 +-- neutron/tests/unit/agent/linux/test_keepalived.py | 15 +++++---------- 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/neutron/agent/linux/keepalived.py b/neutron/agent/linux/keepalived.py index 5950b1bc6e6..3c10f6a885c 100644 --- a/neutron/agent/linux/keepalived.py +++ b/neutron/agent/linux/keepalived.py @@ -32,8 +32,7 @@ PRIMARY_VIP_RANGE_SIZE = 24 # TODO(amuller): Use L3 agent constant when new constants module is introduced. FIP_LL_SUBNET = '169.254.30.0/23' KEEPALIVED_SERVICE_NAME = 'keepalived' -GARP_MASTER_REPEAT = 5 -GARP_MASTER_REFRESH = 10 +GARP_MASTER_DELAY = 60 LOG = logging.getLogger(__name__) @@ -159,8 +158,7 @@ class KeepalivedInstance(object): def __init__(self, state, interface, vrouter_id, ha_cidrs, priority=HA_DEFAULT_PRIORITY, advert_int=None, mcast_src_ip=None, nopreempt=False, - garp_master_repeat=GARP_MASTER_REPEAT, - garp_master_refresh=GARP_MASTER_REFRESH): + garp_master_delay=GARP_MASTER_DELAY): self.name = 'VR_%s' % vrouter_id if state not in VALID_STATES: @@ -173,8 +171,7 @@ class KeepalivedInstance(object): self.nopreempt = nopreempt self.advert_int = advert_int self.mcast_src_ip = mcast_src_ip - self.garp_master_repeat = garp_master_repeat - self.garp_master_refresh = garp_master_refresh + self.garp_master_delay = garp_master_delay self.track_interfaces = [] self.vips = [] self.virtual_routes = KeepalivedInstanceRoutes() @@ -273,8 +270,7 @@ class KeepalivedInstance(object): ' interface %s' % self.interface, ' virtual_router_id %s' % self.vrouter_id, ' priority %s' % self.priority, - ' garp_master_repeat %s' % self.garp_master_repeat, - ' garp_master_refresh %s' % self.garp_master_refresh] + ' garp_master_delay %s' % self.garp_master_delay] if self.nopreempt: config.append(' nopreempt') diff --git a/neutron/tests/functional/agent/l3/framework.py b/neutron/tests/functional/agent/l3/framework.py index bbe7cfb5a62..bbcf393a551 100644 --- a/neutron/tests/functional/agent/l3/framework.py +++ b/neutron/tests/functional/agent/l3/framework.py @@ -345,8 +345,7 @@ class L3AgentTestFramework(base.BaseSudoTestCase): interface %(ha_device_name)s virtual_router_id 1 priority 50 - garp_master_repeat 5 - garp_master_refresh 10 + garp_master_delay 60 nopreempt advert_int 2 track_interface { diff --git a/neutron/tests/unit/agent/linux/test_keepalived.py b/neutron/tests/unit/agent/linux/test_keepalived.py index 01eb4698af8..b29b1f64906 100644 --- a/neutron/tests/unit/agent/linux/test_keepalived.py +++ b/neutron/tests/unit/agent/linux/test_keepalived.py @@ -115,8 +115,7 @@ class KeepalivedConfTestCase(base.BaseTestCase, interface eth0 virtual_router_id 1 priority 50 - garp_master_repeat 5 - garp_master_refresh 10 + garp_master_delay 60 advert_int 5 authentication { auth_type AH @@ -143,8 +142,7 @@ vrrp_instance VR_2 { interface eth4 virtual_router_id 2 priority 50 - garp_master_repeat 5 - garp_master_refresh 10 + garp_master_delay 60 mcast_src_ip 224.0.0.1 track_interface { eth4 @@ -253,8 +251,7 @@ class KeepalivedInstanceTestCase(base.BaseTestCase, interface eth0 virtual_router_id 1 priority 50 - garp_master_repeat 5 - garp_master_refresh 10 + garp_master_delay 60 advert_int 5 authentication { auth_type AH @@ -278,8 +275,7 @@ vrrp_instance VR_2 { interface eth4 virtual_router_id 2 priority 50 - garp_master_repeat 5 - garp_master_refresh 10 + garp_master_delay 60 mcast_src_ip 224.0.0.1 track_interface { eth4 @@ -302,8 +298,7 @@ vrrp_instance VR_2 { interface eth0 virtual_router_id 1 priority 50 - garp_master_repeat 5 - garp_master_refresh 10 + garp_master_delay 60 virtual_ipaddress { 169.254.0.1/24 dev eth0 }