Merge "l3-ha: Send gratuitous ARP when new floating IP is added"
This commit is contained in:
commit
bdb4c92004
|
@ -138,21 +138,14 @@ class FipNamespace(namespaces.Namespace):
|
|||
# Somewhere in the 3.19 kernel timeframe ip_nonlocal_bind was
|
||||
# changed to be a per-namespace attribute. To be backwards
|
||||
# compatible we need to try both if at first we fail.
|
||||
ip_wrapper = ip_lib.IPWrapper(namespace=self.name)
|
||||
try:
|
||||
ip_wrapper.netns.execute(['sysctl',
|
||||
'-w',
|
||||
'net.ipv4.ip_nonlocal_bind=1'],
|
||||
log_fail_as_error=False,
|
||||
run_as_root=True)
|
||||
ip_lib.set_ip_nonlocal_bind(
|
||||
value=1, namespace=self.name, log_fail_as_error=False)
|
||||
except RuntimeError:
|
||||
LOG.debug('DVR: fip namespace (%s) does not support setting '
|
||||
'net.ipv4.ip_nonlocal_bind, trying in root namespace',
|
||||
self.name)
|
||||
self.ip_wrapper_root.netns.execute(['sysctl',
|
||||
'-w',
|
||||
'net.ipv4.ip_nonlocal_bind=1'],
|
||||
run_as_root=True)
|
||||
ip_lib.set_ip_nonlocal_bind(value=1)
|
||||
|
||||
# no connection tracking needed in fip namespace
|
||||
self._iptables_manager.ipv4['raw'].add_rule('PREROUTING',
|
||||
|
|
|
@ -29,6 +29,12 @@ class SnatNamespace(namespaces.Namespace):
|
|||
super(SnatNamespace, self).__init__(
|
||||
name, agent_conf, driver, use_ipv6)
|
||||
|
||||
def create(self):
|
||||
super(SnatNamespace, self).create()
|
||||
# This might be an HA router namespaces and it should not have
|
||||
# ip_nonlocal_bind enabled
|
||||
ip_lib.set_ip_nonlocal_bind_for_namespace(self.name)
|
||||
|
||||
@classmethod
|
||||
def get_snat_ns_name(cls, router_id):
|
||||
return namespaces.build_ns_name(SNAT_NS_PREFIX, router_id)
|
||||
|
|
|
@ -20,6 +20,7 @@ from neutron_lib import constants as n_consts
|
|||
from oslo_log import log as logging
|
||||
|
||||
from neutron._i18n import _LE
|
||||
from neutron.agent.l3 import namespaces
|
||||
from neutron.agent.l3 import router_info as router
|
||||
from neutron.agent.linux import external_process
|
||||
from neutron.agent.linux import ip_lib
|
||||
|
@ -32,6 +33,19 @@ HA_DEV_PREFIX = 'ha-'
|
|||
IP_MONITOR_PROCESS_SERVICE = 'ip_monitor'
|
||||
|
||||
|
||||
class HaRouterNamespace(namespaces.RouterNamespace):
|
||||
"""Namespace for HA router.
|
||||
|
||||
This namespace sets the ip_nonlocal_bind to 0 for HA router namespaces.
|
||||
It does so to prevent sending gratuitous ARPs for interfaces that got VIP
|
||||
removed in the middle of processing.
|
||||
"""
|
||||
def create(self):
|
||||
super(HaRouterNamespace, self).create()
|
||||
# HA router namespaces should not have ip_nonlocal_bind enabled
|
||||
ip_lib.set_ip_nonlocal_bind_for_namespace(self.name)
|
||||
|
||||
|
||||
class HaRouter(router.RouterInfo):
|
||||
def __init__(self, state_change_callback, *args, **kwargs):
|
||||
super(HaRouter, self).__init__(*args, **kwargs)
|
||||
|
@ -40,6 +54,11 @@ class HaRouter(router.RouterInfo):
|
|||
self.keepalived_manager = None
|
||||
self.state_change_callback = state_change_callback
|
||||
|
||||
def create_router_namespace_object(
|
||||
self, router_id, agent_conf, iface_driver, use_ipv6):
|
||||
return HaRouterNamespace(
|
||||
router_id, agent_conf, iface_driver, use_ipv6)
|
||||
|
||||
@property
|
||||
def ha_priority(self):
|
||||
return self.router.get('priority', keepalived.HA_DEFAULT_PRIORITY)
|
||||
|
|
|
@ -16,6 +16,7 @@ import os
|
|||
import sys
|
||||
|
||||
import httplib2
|
||||
import netaddr
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import requests
|
||||
|
@ -23,6 +24,7 @@ import requests
|
|||
from neutron._i18n import _, _LE
|
||||
from neutron.agent.l3 import ha
|
||||
from neutron.agent.linux import daemon
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import ip_monitor
|
||||
from neutron.agent.linux import utils as agent_utils
|
||||
from neutron.common import config
|
||||
|
@ -50,20 +52,21 @@ class MonitorDaemon(daemon.Daemon):
|
|||
self.conf_dir = conf_dir
|
||||
self.interface = interface
|
||||
self.cidr = cidr
|
||||
self.monitor = None
|
||||
super(MonitorDaemon, self).__init__(pidfile, uuid=router_id,
|
||||
user=user, group=group)
|
||||
|
||||
def run(self, run_as_root=False):
|
||||
monitor = ip_monitor.IPMonitor(namespace=self.namespace,
|
||||
run_as_root=run_as_root)
|
||||
monitor.start()
|
||||
self.monitor = ip_monitor.IPMonitor(namespace=self.namespace,
|
||||
run_as_root=run_as_root)
|
||||
self.monitor.start()
|
||||
# Only drop privileges if the process is currently running as root
|
||||
# (The run_as_root variable name here is unfortunate - It means to
|
||||
# use a root helper when the running process is NOT already running
|
||||
# as root
|
||||
if not run_as_root:
|
||||
super(MonitorDaemon, self).run()
|
||||
for iterable in monitor:
|
||||
for iterable in self.monitor:
|
||||
self.parse_and_handle_event(iterable)
|
||||
|
||||
def parse_and_handle_event(self, iterable):
|
||||
|
@ -73,6 +76,15 @@ class MonitorDaemon(daemon.Daemon):
|
|||
new_state = 'master' if event.added else 'backup'
|
||||
self.write_state_change(new_state)
|
||||
self.notify_agent(new_state)
|
||||
elif event.interface != self.interface and event.added:
|
||||
# Send GARPs for all new router interfaces.
|
||||
# REVISIT(jlibosva): keepalived versions 1.2.19 and below
|
||||
# contain bug where gratuitous ARPs are not sent on receiving
|
||||
# SIGHUP signal. This is a workaround to this bug. keepalived
|
||||
# has this issue fixed since 1.2.20 but the version is not
|
||||
# packaged in some distributions (RHEL/CentOS/Ubuntu Xenial).
|
||||
# Remove this code once new keepalived versions are available.
|
||||
self.send_garp(event)
|
||||
except Exception:
|
||||
LOG.exception(_LE(
|
||||
'Failed to process or handle event for line %s'), iterable)
|
||||
|
@ -97,6 +109,15 @@ class MonitorDaemon(daemon.Daemon):
|
|||
|
||||
LOG.debug('Notified agent router %s, state %s', self.router_id, state)
|
||||
|
||||
def send_garp(self, event):
|
||||
"""Send gratuitous ARP for given event."""
|
||||
ip_lib.send_ip_addr_adv_notif(
|
||||
self.namespace,
|
||||
event.interface,
|
||||
str(netaddr.IPNetwork(event.cidr).ip),
|
||||
log_exception=False
|
||||
)
|
||||
|
||||
|
||||
def configure(conf):
|
||||
config.init(sys.argv[1:])
|
||||
|
|
|
@ -57,7 +57,7 @@ class RouterInfo(object):
|
|||
# Invoke the setter for establishing initial SNAT action
|
||||
self.router = router
|
||||
self.use_ipv6 = use_ipv6
|
||||
ns = namespaces.RouterNamespace(
|
||||
ns = self.create_router_namespace_object(
|
||||
router_id, agent_conf, interface_driver, use_ipv6)
|
||||
self.router_namespace = ns
|
||||
self.ns_name = ns.name
|
||||
|
@ -94,6 +94,11 @@ class RouterInfo(object):
|
|||
|
||||
self.router_namespace.create()
|
||||
|
||||
def create_router_namespace_object(
|
||||
self, router_id, agent_conf, iface_driver, use_ipv6):
|
||||
return namespaces.RouterNamespace(
|
||||
router_id, agent_conf, iface_driver, use_ipv6)
|
||||
|
||||
@property
|
||||
def router(self):
|
||||
return self._router
|
||||
|
|
|
@ -25,7 +25,7 @@ from oslo_log import log as logging
|
|||
from oslo_utils import excutils
|
||||
import six
|
||||
|
||||
from neutron._i18n import _, _LE
|
||||
from neutron._i18n import _, _LE, _LW
|
||||
from neutron.agent.common import utils
|
||||
from neutron.common import exceptions as n_exc
|
||||
from neutron.common import utils as common_utils
|
||||
|
@ -38,6 +38,7 @@ OPTS = [
|
|||
help=_('Force ip_lib calls to use the root helper')),
|
||||
]
|
||||
|
||||
IP_NONLOCAL_BIND = 'net.ipv4.ip_nonlocal_bind'
|
||||
|
||||
LOOPBACK_DEVNAME = 'lo'
|
||||
GRE_TUNNEL_DEVICE_NAMES = ['gre0', 'gretap0']
|
||||
|
@ -1007,22 +1008,27 @@ def iproute_arg_supported(command, arg):
|
|||
return any(arg in line for line in stderr.split('\n'))
|
||||
|
||||
|
||||
def _arping(ns_name, iface_name, address, count):
|
||||
def _arping(ns_name, iface_name, address, count, log_exception):
|
||||
# Pass -w to set timeout to ensure exit if interface removed while running
|
||||
arping_cmd = ['arping', '-A', '-I', iface_name, '-c', count,
|
||||
'-w', 1.5 * count, address]
|
||||
try:
|
||||
ip_wrapper = IPWrapper(namespace=ns_name)
|
||||
ip_wrapper.netns.execute(arping_cmd, check_exit_code=True)
|
||||
except Exception:
|
||||
msg = _LE("Failed sending gratuitous ARP "
|
||||
"to %(addr)s on %(iface)s in namespace %(ns)s")
|
||||
LOG.exception(msg, {'addr': address,
|
||||
except Exception as exc:
|
||||
msg = _("Failed sending gratuitous ARP "
|
||||
"to %(addr)s on %(iface)s in namespace %(ns)s: %(err)s")
|
||||
logger_method = LOG.exception
|
||||
if not log_exception:
|
||||
logger_method = LOG.warning
|
||||
logger_method(msg, {'addr': address,
|
||||
'iface': iface_name,
|
||||
'ns': ns_name})
|
||||
'ns': ns_name,
|
||||
'err': exc})
|
||||
|
||||
|
||||
def send_ip_addr_adv_notif(ns_name, iface_name, address, count=3):
|
||||
def send_ip_addr_adv_notif(
|
||||
ns_name, iface_name, address, count=3, log_exception=True):
|
||||
"""Send advance notification of an IP address assignment.
|
||||
|
||||
If the address is in the IPv4 family, send gratuitous ARP.
|
||||
|
@ -1037,9 +1043,12 @@ def send_ip_addr_adv_notif(ns_name, iface_name, address, count=3):
|
|||
:param iface_name: Name of interface which GARPs are gonna be sent from.
|
||||
:param address: Advertised IP address.
|
||||
:param count: (Optional) How many GARPs are gonna be sent. Default is 3.
|
||||
:param log_exception: (Optional) True if possible failures should be logged
|
||||
on exception level. Otherwise they are logged on
|
||||
WARNING level. Default is True.
|
||||
"""
|
||||
def arping():
|
||||
_arping(ns_name, iface_name, address, count)
|
||||
_arping(ns_name, iface_name, address, count, log_exception)
|
||||
|
||||
if count > 0 and netaddr.IPAddress(address).version == 4:
|
||||
eventlet.spawn_n(arping)
|
||||
|
@ -1057,3 +1066,36 @@ def get_ip_version(ip_or_cidr):
|
|||
|
||||
def get_ipv6_lladdr(mac_addr):
|
||||
return '%s/64' % netaddr.EUI(mac_addr).ipv6_link_local()
|
||||
|
||||
|
||||
def get_ip_nonlocal_bind(namespace=None):
|
||||
"""Get kernel option value of ip_nonlocal_bind in given namespace."""
|
||||
cmd = ['sysctl', '-bn', IP_NONLOCAL_BIND]
|
||||
ip_wrapper = IPWrapper(namespace)
|
||||
return int(ip_wrapper.netns.execute(cmd, run_as_root=True))
|
||||
|
||||
|
||||
def set_ip_nonlocal_bind(value, namespace=None, log_fail_as_error=True):
|
||||
"""Set sysctl knob of ip_nonlocal_bind to given value."""
|
||||
cmd = ['sysctl', '-w', '%s=%d' % (IP_NONLOCAL_BIND, value)]
|
||||
ip_wrapper = IPWrapper(namespace)
|
||||
ip_wrapper.netns.execute(
|
||||
cmd, run_as_root=True, log_fail_as_error=log_fail_as_error)
|
||||
|
||||
|
||||
def set_ip_nonlocal_bind_for_namespace(namespace):
|
||||
"""Set ip_nonlocal_bind but don't raise exception on failure."""
|
||||
try:
|
||||
set_ip_nonlocal_bind(
|
||||
value=0, namespace=namespace, log_fail_as_error=False)
|
||||
except RuntimeError as rte:
|
||||
LOG.warning(
|
||||
_LW("Setting %(knob)s=0 in namespace %(ns)s failed: %(err)s. It "
|
||||
"will not be set to 0 in the root namespace in order to not "
|
||||
"break DVR, which requires this value be set to 1. This "
|
||||
"may introduce a race between moving a floating IP to a "
|
||||
"different network node, and the peer side getting a "
|
||||
"populated ARP cache for a given floating IP address."),
|
||||
{'knob': IP_NONLOCAL_BIND,
|
||||
'ns': namespace,
|
||||
'err': rte})
|
||||
|
|
|
@ -1078,3 +1078,20 @@ class TestDvrRouter(framework.L3AgentTestFramework):
|
|||
# external networks. SNAT will be used. Direct route will not work
|
||||
# here.
|
||||
src_machine.assert_no_ping(machine_diff_scope.ip)
|
||||
|
||||
def test_dvr_snat_namespace_has_ip_nonlocal_bind_disabled(self):
|
||||
self.agent.conf.agent_mode = 'dvr_snat'
|
||||
router_info = self.generate_dvr_router_info(
|
||||
enable_ha=True, enable_snat=True)
|
||||
router = self.manage_router(self.agent, router_info)
|
||||
try:
|
||||
ip_nonlocal_bind_value = ip_lib.get_ip_nonlocal_bind(
|
||||
router.snat_namespace.name)
|
||||
except RuntimeError as rte:
|
||||
stat_message = 'cannot stat /proc/sys/net/ipv4/ip_nonlocal_bind'
|
||||
if stat_message in str(rte):
|
||||
raise self.skipException(
|
||||
"This kernel doesn't support %s in network namespaces." % (
|
||||
ip_lib.IP_NONLOCAL_BIND))
|
||||
raise
|
||||
self.assertEqual(0, ip_nonlocal_bind_value)
|
||||
|
|
|
@ -286,6 +286,21 @@ class L3HATestCase(framework.L3AgentTestFramework):
|
|||
self.agent._process_updated_router(router1.router)
|
||||
common_utils.wait_until_true(lambda: router1.ha_state == 'master')
|
||||
|
||||
def test_ha_router_namespace_has_ip_nonlocal_bind_disabled(self):
|
||||
router_info = self.generate_router_info(enable_ha=True)
|
||||
router = self.manage_router(self.agent, router_info)
|
||||
try:
|
||||
ip_nonlocal_bind_value = ip_lib.get_ip_nonlocal_bind(
|
||||
router.router_namespace.name)
|
||||
except RuntimeError as rte:
|
||||
stat_message = 'cannot stat /proc/sys/net/ipv4/ip_nonlocal_bind'
|
||||
if stat_message in str(rte):
|
||||
raise self.skipException(
|
||||
"This kernel doesn't support %s in network namespaces." % (
|
||||
ip_lib.IP_NONLOCAL_BIND))
|
||||
raise
|
||||
self.assertEqual(0, ip_nonlocal_bind_value)
|
||||
|
||||
|
||||
class L3HATestFailover(framework.L3AgentTestFramework):
|
||||
|
||||
|
|
|
@ -12,16 +12,46 @@
|
|||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
|
||||
import eventlet
|
||||
import mock
|
||||
import netaddr
|
||||
from oslo_config import fixture as fixture_config
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
from neutron.agent.l3 import keepalived_state_change
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.common import utils
|
||||
from neutron.conf.agent.l3 import config
|
||||
from neutron.conf.agent.l3 import keepalived as kd
|
||||
from neutron.tests.common import machine_fixtures as mf
|
||||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.functional import base
|
||||
|
||||
IPV4_NEIGH_REGEXP = re.compile(
|
||||
r'(?P<ip>(\d{1,3}\.){3}\d{1,3}) '
|
||||
'.*(?P<mac>([0-9A-Fa-f]{2}:){5}([0-9A-Fa-f]){2}).*')
|
||||
|
||||
|
||||
def get_arp_ip_mac_pairs(device_name, namespace):
|
||||
"""Generate (ip, mac) pairs from device's ip neigh.
|
||||
|
||||
Each neigh entry has following format:
|
||||
192.168.0.1 lladdr fa:16:3e:01:ba:d3 STALE
|
||||
"""
|
||||
device = ip_lib.IPDevice(device_name, namespace)
|
||||
for entry in device.neigh.show(ip_version=4).splitlines():
|
||||
match = IPV4_NEIGH_REGEXP.match(entry)
|
||||
if match:
|
||||
yield match.group('ip'), match.group('mac')
|
||||
|
||||
|
||||
def has_expected_arp_entry(device_name, namespace, ip, mac):
|
||||
return (ip, mac) in get_arp_ip_mac_pairs(device_name, namespace)
|
||||
|
||||
|
||||
class TestKeepalivedStateChange(base.BaseSudoTestCase):
|
||||
def setUp(self):
|
||||
|
@ -68,3 +98,58 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
|
|||
with mock.patch.object(
|
||||
self.monitor, 'notify_agent', side_effect=Exception):
|
||||
self.monitor.parse_and_handle_event(self.line)
|
||||
|
||||
|
||||
class TestMonitorDaemon(base.BaseSudoTestCase):
|
||||
def setUp(self):
|
||||
super(TestMonitorDaemon, self).setUp()
|
||||
bridge = self.useFixture(net_helpers.OVSBridgeFixture()).bridge
|
||||
self.machines = self.useFixture(mf.PeerMachines(bridge))
|
||||
self.router, self.peer = self.machines.machines[:2]
|
||||
config.register_l3_agent_config_opts(config.OPTS)
|
||||
|
||||
conf_dir = self.get_default_temp_dir().path
|
||||
monitor = keepalived_state_change.MonitorDaemon(
|
||||
self.get_temp_file_path('monitor.pid'),
|
||||
uuidutils.generate_uuid(),
|
||||
1,
|
||||
2,
|
||||
self.router.namespace,
|
||||
conf_dir,
|
||||
'foo-iface',
|
||||
self.machines.ip_cidr
|
||||
)
|
||||
eventlet.spawn_n(monitor.run, run_as_root=True)
|
||||
monitor_started = functools.partial(
|
||||
lambda mon: mon.monitor is not None, monitor)
|
||||
utils.wait_until_true(monitor_started)
|
||||
self.addCleanup(monitor.monitor.stop)
|
||||
|
||||
def test_new_fip_sends_garp(self):
|
||||
next_ip_cidr = net_helpers.increment_ip_cidr(self.machines.ip_cidr, 2)
|
||||
expected_ip = str(netaddr.IPNetwork(next_ip_cidr).ip)
|
||||
# Create incomplete ARP entry
|
||||
self.peer.assert_no_ping(expected_ip)
|
||||
has_entry = has_expected_arp_entry(
|
||||
self.peer.port.name,
|
||||
self.peer.namespace,
|
||||
expected_ip,
|
||||
self.router.port.link.address)
|
||||
self.assertFalse(has_entry)
|
||||
self.router.port.addr.add(next_ip_cidr)
|
||||
has_arp_entry_predicate = functools.partial(
|
||||
has_expected_arp_entry,
|
||||
self.peer.port.name,
|
||||
self.peer.namespace,
|
||||
expected_ip,
|
||||
self.router.port.link.address,
|
||||
)
|
||||
exc = RuntimeError(
|
||||
"No ARP entry in %s namespace containing IP address %s and MAC "
|
||||
"address %s" % (
|
||||
self.peer.namespace,
|
||||
expected_ip,
|
||||
self.router.port.link.address))
|
||||
utils.wait_until_true(
|
||||
has_arp_entry_predicate,
|
||||
exception=exc)
|
||||
|
|
|
@ -215,3 +215,21 @@ class IpLibTestCase(IpLibTestFramework):
|
|||
self._check_for_device_name(namespace.ip_wrapper, dev_name, True)
|
||||
device.link.delete()
|
||||
self._check_for_device_name(namespace.ip_wrapper, dev_name, False)
|
||||
|
||||
|
||||
class TestSetIpNonlocalBind(functional_base.BaseSudoTestCase):
|
||||
def test_assigned_value(self):
|
||||
namespace = self.useFixture(net_helpers.NamespaceFixture())
|
||||
for expected in (0, 1):
|
||||
try:
|
||||
ip_lib.set_ip_nonlocal_bind(expected, namespace.name)
|
||||
except RuntimeError as rte:
|
||||
stat_message = (
|
||||
'cannot stat /proc/sys/net/ipv4/ip_nonlocal_bind')
|
||||
if stat_message in str(rte):
|
||||
raise self.skipException(
|
||||
"This kernel doesn't support %s in network "
|
||||
"namespaces." % ip_lib.IP_NONLOCAL_BIND)
|
||||
raise
|
||||
observed = ip_lib.get_ip_nonlocal_bind(namespace.name)
|
||||
self.assertEqual(expected, observed)
|
||||
|
|
|
@ -1378,3 +1378,11 @@ class TestAddNamespaceToCmd(base.BaseTestCase):
|
|||
def test_add_namespace_to_cmd_without_namespace(self):
|
||||
cmd = ['ping', '8.8.8.8']
|
||||
self.assertEqual(cmd, ip_lib.add_namespace_to_cmd(cmd, None))
|
||||
|
||||
|
||||
class TestSetIpNonlocalBindForHaNamespace(base.BaseTestCase):
|
||||
def test_setting_failure(self):
|
||||
"""Make sure message is formatted correctly."""
|
||||
with mock.patch.object(
|
||||
ip_lib, 'set_ip_nonlocal_bind', side_effect=RuntimeError):
|
||||
ip_lib.set_ip_nonlocal_bind_for_namespace('foo')
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
---
|
||||
issues:
|
||||
- In kernels < 3.19 net.ipv4.ip_nonlocal_bind was not
|
||||
a per-namespace kernel option. L3 HA sets this option
|
||||
to zero to avoid sending gratuitous ARPs for IP addresses
|
||||
that were removed while processing. If this happens then
|
||||
gratuitous ARPs are going to be sent which might populate
|
||||
ARP caches of peer machines with the wrong MAC address.
|
||||
fixes:
|
||||
- Versions of keepalived < 1.2.20 don't send gratuitous ARPs
|
||||
when keepalived process receives SIGHUP signal. These
|
||||
versions are not packaged in some Linux distributions like
|
||||
RHEL, CentOS or Ubuntu Xenial. Not sending gratuitous ARPs
|
||||
may lead to peer ARP caches containing wrong information
|
||||
about floating IP addresses until the entry is invalidated.
|
||||
Neutron now sends gratuitous ARPs for all new IP addresses
|
||||
that appear on non-HA interfaces in router namespace which
|
||||
simulates behavior of new versions of keepalived.
|
Loading…
Reference in New Issue