Disable IPv6 forwarding by default on HA routers

In case of HA routers IPv6 forwarding is not disabled by default and
then enabled only on master node.
Before this patch it was done in opposite way, so forwarding was
enabled by default and then disabled on backup nodes.
When forwarding was enabled/disabled for qg- port, MLDv2 packets are
sent and that might lead to temportary packets loss as packets to
FIP were sent to this backup node instead of master one.

Related-Bug: #1771841

Change-Id: Ia6b772e91c1f94612ca29d7082eca999372e60d6
This commit is contained in:
Slawek Kaplonski 2018-05-17 12:16:52 +02:00
parent fab6bcbdcd
commit 3e9e2a5b4b
8 changed files with 106 additions and 23 deletions

View File

@ -46,9 +46,13 @@ class HaRouterNamespace(namespaces.RouterNamespace):
This namespace sets the ip_nonlocal_bind to 0 for HA router namespaces.
It does so to prevent sending gratuitous ARPs for interfaces that got VIP
removed in the middle of processing.
It also disables ipv6 forwarding by default. Forwarding will be
enabled during router configuration processing only for the master node.
It has to be disabled on all other nodes to avoid sending MLD packets
which cause lost connectivity to Floating IPs.
"""
def create(self):
super(HaRouterNamespace, self).create()
super(HaRouterNamespace, self).create(ipv6_forwarding=False)
# HA router namespaces should not have ip_nonlocal_bind enabled
ip_lib.set_ip_nonlocal_bind_for_namespace(self.name)

View File

@ -88,7 +88,7 @@ class Namespace(object):
self.driver = driver
self.use_ipv6 = use_ipv6
def create(self):
def create(self, ipv6_forwarding=True):
# See networking (netdev) tree, file
# Documentation/networking/ip-sysctl.txt for an explanation of
# these sysctl values.
@ -103,7 +103,8 @@ class Namespace(object):
cmd = ['sysctl', '-w', 'net.ipv4.conf.all.arp_announce=2']
ip_wrapper.netns.execute(cmd)
if self.use_ipv6:
cmd = ['sysctl', '-w', 'net.ipv6.conf.all.forwarding=1']
cmd = ['sysctl', '-w',
'net.ipv6.conf.all.forwarding=%d' % int(ipv6_forwarding)]
ip_wrapper.netns.execute(cmd)
def delete(self):

View File

@ -1302,3 +1302,10 @@ def set_ip_nonlocal_bind_for_namespace(namespace):
"different network node, and the peer side getting a "
"populated ARP cache for a given floating IP address.",
IP_NONLOCAL_BIND)
def get_ipv6_forwarding(device, namespace=None):
"""Get kernel value of IPv6 forwarding for device in given namespace."""
cmd = ['sysctl', '-b', "net.ipv6.conf.%s.forwarding" % device]
ip_wrapper = IPWrapper(namespace)
return int(ip_wrapper.netns.execute(cmd, run_as_root=True))

View File

@ -12,13 +12,17 @@
# License for the specific language governing permissions and limitations
# under the License.
from concurrent import futures
import os
from oslo_config import cfg
from oslo_log import log as logging
from neutron.common import utils as common_utils
from neutron.conf.agent import common as config
from neutron.tests import base as tests_base
from neutron.tests.common import helpers
from neutron.tests.common import net_helpers
from neutron.tests.fullstack.resources import client as client_resource
from neutron.tests import tools
from neutron.tests.unit import testlib_api
@ -29,6 +33,8 @@ DEFAULT_LOG_DIR = os.path.join(helpers.get_test_log_path(),
'dsvm-fullstack-logs')
ROOTDIR = os.path.dirname(__file__)
LOG = logging.getLogger(__name__)
class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
testlib_api.SqlTestCase):
@ -73,3 +79,27 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
def get_name(self):
class_name, test_name = self.id().split(".")[-2:]
return "%s.%s" % (class_name, test_name)
def _assert_ping_during_agents_restart(
self, agents, src_namespace, ips, restart_timeout=10,
ping_timeout=1, count=10):
with net_helpers.async_ping(
src_namespace, ips, timeout=ping_timeout,
count=count) as done:
LOG.debug("Restarting agents")
executor = futures.ThreadPoolExecutor(max_workers=len(agents))
restarts = [agent.restart(executor=executor)
for agent in agents]
futures.wait(restarts, timeout=restart_timeout)
self.assertTrue(all([r.done() for r in restarts]))
LOG.debug("Restarting agents - done")
# It is necessary to give agents time to initialize
# because some crucial steps (e.g. setting up bridge flows)
# happen only after RPC is established
common_utils.wait_until_true(
done,
exception=RuntimeError("Could not ping the other VM, L2 agent "
"restart leads to network disruption"))

View File

@ -12,7 +12,6 @@
# License for the specific language governing permissions and limitations
# under the License.
from concurrent import futures
import signal
from neutron_lib import constants
@ -246,22 +245,6 @@ class TestUninterruptedConnectivityOnL2AgentRestart(
# Restart agents on all nodes simultaneously while pinging across
# the hosts. The ping has to cross int and phys bridges and travels
# via central bridge as the vms are on separate hosts.
with net_helpers.async_ping(ns0, [ip1], timeout=2,
count=agent_restart_timeout) as done:
LOG.debug("Restarting agents")
executor = futures.ThreadPoolExecutor(max_workers=len(agents))
restarts = [agent.restart(executor=executor)
for agent in agents]
futures.wait(restarts, timeout=agent_restart_timeout)
self.assertTrue(all([r.done() for r in restarts]))
LOG.debug("Restarting agents - done")
# It is necessary to give agents time to initialize
# because some crucial steps (e.g. setting up bridge flows)
# happen only after RPC is established
common_utils.wait_until_true(
done,
exception=RuntimeError("Could not ping the other VM, L2 agent "
"restart leads to network disruption"))
self._assert_ping_during_agents_restart(
agents, ns0, [ip1], restart_timeout=agent_restart_timeout,
ping_timeout=2, count=agent_restart_timeout)

View File

@ -321,3 +321,33 @@ class TestHAL3Agent(TestL3Agent):
self.assertEqual(
"master",
self._get_keepalived_state(keepalived_state_file))
def test_ha_router_restart_agents_no_packet_lost(self):
tenant_id = uuidutils.generate_uuid()
ext_net, ext_sub = self._create_external_network_and_subnet(tenant_id)
router = self.safe_client.create_router(tenant_id, ha=True,
external_network=ext_net['id'])
external_vm = self.useFixture(
machine_fixtures.FakeMachine(
self.environment.central_external_bridge,
common_utils.ip_to_cidr(ext_sub['gateway_ip'], 24)))
common_utils.wait_until_true(
lambda:
len(self.client.list_l3_agent_hosting_routers(
router['id'])['agents']) == 2,
timeout=90)
common_utils.wait_until_true(
functools.partial(
self._is_ha_router_active_on_one_agent,
router['id']),
timeout=90)
router_ip = router['external_gateway_info'][
'external_fixed_ips'][0]['ip_address']
l3_agents = [host.agents['l3'] for host in self.environment.hosts]
self._assert_ping_during_agents_restart(
l3_agents, external_vm.namespace, [router_ip], count=60)

View File

@ -335,6 +335,28 @@ class L3HATestCase(framework.L3AgentTestFramework):
raise
self.assertEqual(0, ip_nonlocal_bind_value)
def test_ha_router_namespace_has_ipv6_forwarding_disabled(self):
router_info = self.generate_router_info(enable_ha=True)
router_info[constants.HA_INTERFACE_KEY]['status'] = (
constants.PORT_STATUS_DOWN)
router = self.manage_router(self.agent, router_info)
external_port = router.get_ex_gw_port()
external_device_name = router.get_external_device_name(
external_port['id'])
common_utils.wait_until_true(lambda: router.ha_state == 'backup')
self.assertEqual(
0, ip_lib.get_ipv6_forwarding(device=external_device_name,
namespace=router.ns_name))
router.router[constants.HA_INTERFACE_KEY]['status'] = (
constants.PORT_STATUS_ACTIVE)
self.agent._process_updated_router(router.router)
common_utils.wait_until_true(lambda: router.ha_state == 'master')
self.assertEqual(
1, ip_lib.get_ipv6_forwarding(device=external_device_name,
namespace=router.ns_name))
class L3HATestFailover(framework.L3AgentTestFramework):

View File

@ -278,6 +278,11 @@ function _configure_iptables_rules {
}
function _enable_ipv6 {
sudo sysctl -w net.ipv6.conf.all.disable_ipv6=0
}
function configure_host_for_func_testing {
echo_summary "Configuring host for functional testing"
@ -305,6 +310,7 @@ if [[ "$IS_GATE" != "True" ]]; then
fi
if [[ "$VENV" =~ "dsvm-fullstack" ]]; then
_enable_ipv6
_configure_iptables_rules
# This module only exists on older kernels, built-in otherwise
modinfo ip_conntrack_proto_sctp 1> /dev/null 2>&1 && sudo modprobe ip_conntrack_proto_sctp