Fix L3 HA with IPv6

We currently use garp_master_repeat and garp_master_refresh
to solve bug 1453855. We need to spawn keepalived only after
all of the qr/qg ports have been wired so that the
initial GARP will be properly sent. Otherwise you get a routing
black hole. In lieu of a proper sync method, we used those two keepalived
options to send GARPs repeatedly:

a) We did not know it never stops spamming the network
b) It causes VMs to lose their IPv6 default gateway due to a keepalived
   bug, which has since been fixed, but it would need to be backported
   to every keepalived version on every distro. Here's the patch:
   https://github.com/acassen/keepalived/pull/200

The solution this patch proposes is to drop the repeat and refresh
keepalived options. This will fix the IPv6 bug but re-introduce bug
1453855. So, this patch uses the delay option instead. It turns
out keepalived sends a GARP when it transitions to MASTER, and then
it waits a number of seconds determined by the delay option, and
sends a GARP again. We'll use an aggressive 'delay' setting to make
sure that when the node boots and the L3/L2 agents start, we'll
give the L2 agent enough time to wire the ports as a stopgap solution.
Note that this only affects initial synchronization time, not failover
times. Failover times will continue to be fast because the ports
are wired ahead of time, the initial GARP after the state transition
to MASTER will be sent properly.

Change-Id: I7a086472b8742828dae08ffd915c45e94fb4b94e
Closes-Bug: #1520517
Related-Bug: #1453855
This commit is contained in:
Assaf Muller 2016-01-15 16:47:39 -05:00
parent 96f7597acb
commit 303cbc6b5b
3 changed files with 10 additions and 20 deletions

View File

@ -32,8 +32,7 @@ PRIMARY_VIP_RANGE_SIZE = 24
# TODO(amuller): Use L3 agent constant when new constants module is introduced.
FIP_LL_SUBNET = '169.254.30.0/23'
KEEPALIVED_SERVICE_NAME = 'keepalived'
GARP_MASTER_REPEAT = 5
GARP_MASTER_REFRESH = 10
GARP_MASTER_DELAY = 60
LOG = logging.getLogger(__name__)
@ -159,8 +158,7 @@ class KeepalivedInstance(object):
def __init__(self, state, interface, vrouter_id, ha_cidrs,
priority=HA_DEFAULT_PRIORITY, advert_int=None,
mcast_src_ip=None, nopreempt=False,
garp_master_repeat=GARP_MASTER_REPEAT,
garp_master_refresh=GARP_MASTER_REFRESH):
garp_master_delay=GARP_MASTER_DELAY):
self.name = 'VR_%s' % vrouter_id
if state not in VALID_STATES:
@ -173,8 +171,7 @@ class KeepalivedInstance(object):
self.nopreempt = nopreempt
self.advert_int = advert_int
self.mcast_src_ip = mcast_src_ip
self.garp_master_repeat = garp_master_repeat
self.garp_master_refresh = garp_master_refresh
self.garp_master_delay = garp_master_delay
self.track_interfaces = []
self.vips = []
self.virtual_routes = KeepalivedInstanceRoutes()
@ -273,8 +270,7 @@ class KeepalivedInstance(object):
' interface %s' % self.interface,
' virtual_router_id %s' % self.vrouter_id,
' priority %s' % self.priority,
' garp_master_repeat %s' % self.garp_master_repeat,
' garp_master_refresh %s' % self.garp_master_refresh]
' garp_master_delay %s' % self.garp_master_delay]
if self.nopreempt:
config.append(' nopreempt')

View File

@ -345,8 +345,7 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
interface %(ha_device_name)s
virtual_router_id 1
priority 50
garp_master_repeat 5
garp_master_refresh 10
garp_master_delay 60
nopreempt
advert_int 2
track_interface {

View File

@ -115,8 +115,7 @@ class KeepalivedConfTestCase(base.BaseTestCase,
interface eth0
virtual_router_id 1
priority 50
garp_master_repeat 5
garp_master_refresh 10
garp_master_delay 60
advert_int 5
authentication {
auth_type AH
@ -143,8 +142,7 @@ vrrp_instance VR_2 {
interface eth4
virtual_router_id 2
priority 50
garp_master_repeat 5
garp_master_refresh 10
garp_master_delay 60
mcast_src_ip 224.0.0.1
track_interface {
eth4
@ -253,8 +251,7 @@ class KeepalivedInstanceTestCase(base.BaseTestCase,
interface eth0
virtual_router_id 1
priority 50
garp_master_repeat 5
garp_master_refresh 10
garp_master_delay 60
advert_int 5
authentication {
auth_type AH
@ -278,8 +275,7 @@ vrrp_instance VR_2 {
interface eth4
virtual_router_id 2
priority 50
garp_master_repeat 5
garp_master_refresh 10
garp_master_delay 60
mcast_src_ip 224.0.0.1
track_interface {
eth4
@ -302,8 +298,7 @@ vrrp_instance VR_2 {
interface eth0
virtual_router_id 1
priority 50
garp_master_repeat 5
garp_master_refresh 10
garp_master_delay 60
virtual_ipaddress {
169.254.0.1/24 dev eth0
}