From 2ed6e4d7ab253f23fd81d84b127139b9deb438fd Mon Sep 17 00:00:00 2001 From: Slawek Kaplonski Date: Thu, 17 May 2018 12:16:52 +0200 Subject: [PATCH] Disable IPv6 forwarding by default on HA routers In case of HA routers IPv6 forwarding is not disabled by default and then enabled only on master node. Before this patch it was done in opposite way, so forwarding was enabled by default and then disabled on backup nodes. When forwarding was enabled/disabled for qg- port, MLDv2 packets are sent and that might lead to temportary packets loss as packets to FIP were sent to this backup node instead of master one. Related-Bug: #1771841 Change-Id: Ia6b772e91c1f94612ca29d7082eca999372e60d6 (cherry picked from commit 3e9e2a5b4b2654864bb0e2a7d9b841eb03240147) --- neutron/agent/l3/ha_router.py | 6 +++- neutron/agent/l3/namespaces.py | 5 ++-- neutron/agent/linux/ip_lib.py | 7 +++++ neutron/tests/fullstack/base.py | 30 +++++++++++++++++++ neutron/tests/fullstack/test_connectivity.py | 23 ++------------ neutron/tests/fullstack/test_l3_agent.py | 30 +++++++++++++++++++ .../functional/agent/l3/test_ha_router.py | 22 ++++++++++++++ tools/configure_for_func_testing.sh | 6 ++++ 8 files changed, 106 insertions(+), 23 deletions(-) diff --git a/neutron/agent/l3/ha_router.py b/neutron/agent/l3/ha_router.py index 20b6c57e7d5..9fa2e05b387 100644 --- a/neutron/agent/l3/ha_router.py +++ b/neutron/agent/l3/ha_router.py @@ -46,9 +46,13 @@ class HaRouterNamespace(namespaces.RouterNamespace): This namespace sets the ip_nonlocal_bind to 0 for HA router namespaces. It does so to prevent sending gratuitous ARPs for interfaces that got VIP removed in the middle of processing. + It also disables ipv6 forwarding by default. Forwarding will be + enabled during router configuration processing only for the master node. + It has to be disabled on all other nodes to avoid sending MLD packets + which cause lost connectivity to Floating IPs. """ def create(self): - super(HaRouterNamespace, self).create() + super(HaRouterNamespace, self).create(ipv6_forwarding=False) # HA router namespaces should not have ip_nonlocal_bind enabled ip_lib.set_ip_nonlocal_bind_for_namespace(self.name) diff --git a/neutron/agent/l3/namespaces.py b/neutron/agent/l3/namespaces.py index 71e8cbcf35b..58b0deb6075 100644 --- a/neutron/agent/l3/namespaces.py +++ b/neutron/agent/l3/namespaces.py @@ -88,7 +88,7 @@ class Namespace(object): self.driver = driver self.use_ipv6 = use_ipv6 - def create(self): + def create(self, ipv6_forwarding=True): # See networking (netdev) tree, file # Documentation/networking/ip-sysctl.txt for an explanation of # these sysctl values. @@ -103,7 +103,8 @@ class Namespace(object): cmd = ['sysctl', '-w', 'net.ipv4.conf.all.arp_announce=2'] ip_wrapper.netns.execute(cmd) if self.use_ipv6: - cmd = ['sysctl', '-w', 'net.ipv6.conf.all.forwarding=1'] + cmd = ['sysctl', '-w', + 'net.ipv6.conf.all.forwarding=%d' % int(ipv6_forwarding)] ip_wrapper.netns.execute(cmd) def delete(self): diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py index aeda696820b..3a5ed8f7484 100644 --- a/neutron/agent/linux/ip_lib.py +++ b/neutron/agent/linux/ip_lib.py @@ -1254,3 +1254,10 @@ def set_ip_nonlocal_bind_for_namespace(namespace): "different network node, and the peer side getting a " "populated ARP cache for a given floating IP address.", IP_NONLOCAL_BIND) + + +def get_ipv6_forwarding(device, namespace=None): + """Get kernel value of IPv6 forwarding for device in given namespace.""" + cmd = ['sysctl', '-b', "net.ipv6.conf.%s.forwarding" % device] + ip_wrapper = IPWrapper(namespace) + return int(ip_wrapper.netns.execute(cmd, run_as_root=True)) diff --git a/neutron/tests/fullstack/base.py b/neutron/tests/fullstack/base.py index cf84b506cc1..8adae386c81 100644 --- a/neutron/tests/fullstack/base.py +++ b/neutron/tests/fullstack/base.py @@ -12,13 +12,17 @@ # License for the specific language governing permissions and limitations # under the License. +from concurrent import futures import os from oslo_config import cfg +from oslo_log import log as logging +from neutron.common import utils as common_utils from neutron.conf.agent import common as config from neutron.tests import base as tests_base from neutron.tests.common import helpers +from neutron.tests.common import net_helpers from neutron.tests.fullstack.resources import client as client_resource from neutron.tests import tools from neutron.tests.unit import testlib_api @@ -29,6 +33,8 @@ DEFAULT_LOG_DIR = os.path.join(helpers.get_test_log_path(), 'dsvm-fullstack-logs') ROOTDIR = os.path.dirname(__file__) +LOG = logging.getLogger(__name__) + class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin, testlib_api.SqlTestCase): @@ -73,3 +79,27 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin, def get_name(self): class_name, test_name = self.id().split(".")[-2:] return "%s.%s" % (class_name, test_name) + + def _assert_ping_during_agents_restart( + self, agents, src_namespace, ips, restart_timeout=10, + ping_timeout=1, count=10): + with net_helpers.async_ping( + src_namespace, ips, timeout=ping_timeout, + count=count) as done: + LOG.debug("Restarting agents") + executor = futures.ThreadPoolExecutor(max_workers=len(agents)) + restarts = [agent.restart(executor=executor) + for agent in agents] + + futures.wait(restarts, timeout=restart_timeout) + + self.assertTrue(all([r.done() for r in restarts])) + LOG.debug("Restarting agents - done") + + # It is necessary to give agents time to initialize + # because some crucial steps (e.g. setting up bridge flows) + # happen only after RPC is established + common_utils.wait_until_true( + done, + exception=RuntimeError("Could not ping the other VM, L2 agent " + "restart leads to network disruption")) diff --git a/neutron/tests/fullstack/test_connectivity.py b/neutron/tests/fullstack/test_connectivity.py index c8650b0053d..3f498e88cfe 100644 --- a/neutron/tests/fullstack/test_connectivity.py +++ b/neutron/tests/fullstack/test_connectivity.py @@ -12,7 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -from concurrent import futures import signal from neutron_lib import constants @@ -246,22 +245,6 @@ class TestUninterruptedConnectivityOnL2AgentRestart( # Restart agents on all nodes simultaneously while pinging across # the hosts. The ping has to cross int and phys bridges and travels # via central bridge as the vms are on separate hosts. - with net_helpers.async_ping(ns0, [ip1], timeout=2, - count=agent_restart_timeout) as done: - LOG.debug("Restarting agents") - executor = futures.ThreadPoolExecutor(max_workers=len(agents)) - restarts = [agent.restart(executor=executor) - for agent in agents] - - futures.wait(restarts, timeout=agent_restart_timeout) - - self.assertTrue(all([r.done() for r in restarts])) - LOG.debug("Restarting agents - done") - - # It is necessary to give agents time to initialize - # because some crucial steps (e.g. setting up bridge flows) - # happen only after RPC is established - common_utils.wait_until_true( - done, - exception=RuntimeError("Could not ping the other VM, L2 agent " - "restart leads to network disruption")) + self._assert_ping_during_agents_restart( + agents, ns0, [ip1], restart_timeout=agent_restart_timeout, + ping_timeout=2, count=agent_restart_timeout) diff --git a/neutron/tests/fullstack/test_l3_agent.py b/neutron/tests/fullstack/test_l3_agent.py index f67be5953d5..1cc299ad0a7 100644 --- a/neutron/tests/fullstack/test_l3_agent.py +++ b/neutron/tests/fullstack/test_l3_agent.py @@ -321,3 +321,33 @@ class TestHAL3Agent(TestL3Agent): self.assertEqual( "master", self._get_keepalived_state(keepalived_state_file)) + + def test_ha_router_restart_agents_no_packet_lost(self): + tenant_id = uuidutils.generate_uuid() + ext_net, ext_sub = self._create_external_network_and_subnet(tenant_id) + router = self.safe_client.create_router(tenant_id, ha=True, + external_network=ext_net['id']) + + external_vm = self.useFixture( + machine_fixtures.FakeMachine( + self.environment.central_external_bridge, + common_utils.ip_to_cidr(ext_sub['gateway_ip'], 24))) + + common_utils.wait_until_true( + lambda: + len(self.client.list_l3_agent_hosting_routers( + router['id'])['agents']) == 2, + timeout=90) + + common_utils.wait_until_true( + functools.partial( + self._is_ha_router_active_on_one_agent, + router['id']), + timeout=90) + + router_ip = router['external_gateway_info'][ + 'external_fixed_ips'][0]['ip_address'] + l3_agents = [host.agents['l3'] for host in self.environment.hosts] + + self._assert_ping_during_agents_restart( + l3_agents, external_vm.namespace, [router_ip], count=60) diff --git a/neutron/tests/functional/agent/l3/test_ha_router.py b/neutron/tests/functional/agent/l3/test_ha_router.py index 413aea88f7c..fe03766f1e2 100644 --- a/neutron/tests/functional/agent/l3/test_ha_router.py +++ b/neutron/tests/functional/agent/l3/test_ha_router.py @@ -335,6 +335,28 @@ class L3HATestCase(framework.L3AgentTestFramework): raise self.assertEqual(0, ip_nonlocal_bind_value) + def test_ha_router_namespace_has_ipv6_forwarding_disabled(self): + router_info = self.generate_router_info(enable_ha=True) + router_info[constants.HA_INTERFACE_KEY]['status'] = ( + constants.PORT_STATUS_DOWN) + router = self.manage_router(self.agent, router_info) + external_port = router.get_ex_gw_port() + external_device_name = router.get_external_device_name( + external_port['id']) + + common_utils.wait_until_true(lambda: router.ha_state == 'backup') + self.assertEqual( + 0, ip_lib.get_ipv6_forwarding(device=external_device_name, + namespace=router.ns_name)) + + router.router[constants.HA_INTERFACE_KEY]['status'] = ( + constants.PORT_STATUS_ACTIVE) + self.agent._process_updated_router(router.router) + common_utils.wait_until_true(lambda: router.ha_state == 'master') + self.assertEqual( + 1, ip_lib.get_ipv6_forwarding(device=external_device_name, + namespace=router.ns_name)) + class L3HATestFailover(framework.L3AgentTestFramework): diff --git a/tools/configure_for_func_testing.sh b/tools/configure_for_func_testing.sh index bc90630a7b7..7279acee0c8 100755 --- a/tools/configure_for_func_testing.sh +++ b/tools/configure_for_func_testing.sh @@ -270,6 +270,11 @@ function _configure_iptables_rules { } +function _enable_ipv6 { + sudo sysctl -w net.ipv6.conf.all.disable_ipv6=0 +} + + function configure_host_for_func_testing { echo_summary "Configuring host for functional testing" @@ -297,6 +302,7 @@ if [[ "$IS_GATE" != "True" ]]; then fi if [[ "$VENV" =~ "dsvm-fullstack" ]]; then + _enable_ipv6 _configure_iptables_rules sudo modprobe ip_conntrack_proto_sctp fi