Packets getting lost during SNAT with too many connections
We have a problem with SNAT with too many connections using the same source and destination on the network nodes. In addition we can see in the conntrack table that the who "instert_failed" increases. This might be a generic problem with conntrack and linux. We suspect that we encounter the following "limitation / bug" in the kernel. There seems to be a workaround to alleviate this behavior by setting the -random-fully flag in iptables for port consumption. This patch fixes the problem by adding the --random-fully to the SNAT rules. Change-Id: I246c1f56df889bad9c7e140b56c3614124d80a19 Closes-Bug: #1814002
This commit is contained in:
parent
f8b990736b
commit
30f35e08f9
@ -341,12 +341,14 @@ class DvrEdgeRouter(dvr_local_router.DvrLocalRouter):
|
||||
return lib_constants.FLOATINGIP_STATUS_ACTIVE
|
||||
|
||||
def _centralized_floating_forward_rules(self, floating_ip, fixed_ip):
|
||||
to_source = '-s %s/32 -j SNAT --to-source %s' % (fixed_ip, floating_ip)
|
||||
if self.snat_iptables_manager.random_fully:
|
||||
to_source += ' --random-fully'
|
||||
return [('PREROUTING', '-d %s/32 -j DNAT --to-destination %s' %
|
||||
(floating_ip, fixed_ip)),
|
||||
('OUTPUT', '-d %s/32 -j DNAT --to-destination %s' %
|
||||
(floating_ip, fixed_ip)),
|
||||
('float-snat', '-s %s/32 -j SNAT --to-source %s' %
|
||||
(fixed_ip, floating_ip))]
|
||||
('float-snat', to_source)]
|
||||
|
||||
def _set_floating_ip_nat_rules_for_centralized_floatingip(self, fip):
|
||||
if fip.get(lib_constants.DVR_SNAT_BOUND):
|
||||
|
@ -74,9 +74,10 @@ class DvrLocalRouter(dvr_router_base.DvrRouterBase):
|
||||
dnat_from_floatingip_to_fixedip = (
|
||||
'PREROUTING', '-d %s/32 -i %s -j DNAT --to-destination %s' % (
|
||||
floating_ip, rtr_2_fip_name, fixed_ip))
|
||||
snat_from_fixedip_to_floatingip = (
|
||||
'float-snat', '-s %s/32 -j SNAT --to-source %s' % (
|
||||
fixed_ip, floating_ip))
|
||||
to_source = '-s %s/32 -j SNAT --to-source %s' % (fixed_ip, floating_ip)
|
||||
if self.iptables_manager.random_fully:
|
||||
to_source += ' --random-fully'
|
||||
snat_from_fixedip_to_floatingip = ('float-snat', to_source)
|
||||
return [dnat_from_floatingip_to_fixedip,
|
||||
snat_from_fixedip_to_floatingip]
|
||||
|
||||
|
@ -172,12 +172,14 @@ class RouterInfo(object):
|
||||
def floating_forward_rules(self, fip):
|
||||
fixed_ip = fip['fixed_ip_address']
|
||||
floating_ip = fip['floating_ip_address']
|
||||
to_source = '-s %s/32 -j SNAT --to-source %s' % (fixed_ip, floating_ip)
|
||||
if self.iptables_manager.random_fully:
|
||||
to_source += ' --random-fully'
|
||||
return [('PREROUTING', '-d %s/32 -j DNAT --to-destination %s' %
|
||||
(floating_ip, fixed_ip)),
|
||||
('OUTPUT', '-d %s/32 -j DNAT --to-destination %s' %
|
||||
(floating_ip, fixed_ip)),
|
||||
('float-snat', '-s %s/32 -j SNAT --to-source %s' %
|
||||
(fixed_ip, floating_ip))]
|
||||
('float-snat', to_source)]
|
||||
|
||||
def floating_mangle_rules(self, floating_ip, fixed_ip, internal_mark):
|
||||
mark_traffic_to_floating_ip = (
|
||||
@ -854,19 +856,21 @@ class RouterInfo(object):
|
||||
self._prevent_snat_for_internal_traffic_rule(interface_name))
|
||||
# Makes replies come back through the router to reverse DNAT
|
||||
ext_in_mark = self.agent_conf.external_ingress_mark
|
||||
snat_internal_traffic_to_floating_ip = (
|
||||
'snat', '-m mark ! --mark %s/%s '
|
||||
to_source = ('-m mark ! --mark %s/%s '
|
||||
'-m conntrack --ctstate DNAT '
|
||||
'-j SNAT --to-source %s'
|
||||
% (ext_in_mark, lib_constants.ROUTER_MARK_MASK, ex_gw_ip))
|
||||
if self.iptables_manager.random_fully:
|
||||
to_source += ' --random-fully'
|
||||
snat_internal_traffic_to_floating_ip = ('snat', to_source)
|
||||
return [dont_snat_traffic_to_internal_ports_if_not_to_floating_ip,
|
||||
snat_internal_traffic_to_floating_ip]
|
||||
|
||||
def external_gateway_nat_snat_rules(self, ex_gw_ip, interface_name):
|
||||
snat_normal_external_traffic = (
|
||||
'snat', '-o %s -j SNAT --to-source %s' %
|
||||
(interface_name, ex_gw_ip))
|
||||
return [snat_normal_external_traffic]
|
||||
to_source = '-o %s -j SNAT --to-source %s' % (interface_name, ex_gw_ip)
|
||||
if self.iptables_manager.random_fully:
|
||||
to_source += ' --random-fully'
|
||||
return [('snat', to_source)]
|
||||
|
||||
def external_gateway_mangle_rules(self, interface_name):
|
||||
mark = self.agent_conf.external_ingress_mark
|
||||
|
@ -38,6 +38,8 @@ from neutron._i18n import _
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.agent.linux import iptables_comments as ic
|
||||
from neutron.agent.linux import utils as linux_utils
|
||||
from neutron.common import _constants as n_const
|
||||
from neutron.common import utils
|
||||
from neutron.conf.agent import common as config
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
@ -302,6 +304,9 @@ class IptablesManager(object):
|
||||
# run iptables-restore without it.
|
||||
use_table_lock = False
|
||||
|
||||
# Flag to denote iptables supports --random-fully argument
|
||||
_random_fully = None
|
||||
|
||||
def __init__(self, _execute=None, state_less=False, use_ipv6=False,
|
||||
namespace=None, binary_name=binary_name):
|
||||
if _execute:
|
||||
@ -473,6 +478,23 @@ class IptablesManager(object):
|
||||
args = ['ip', 'netns', 'exec', self.namespace] + args
|
||||
return self.execute(args, run_as_root=True).split('\n')
|
||||
|
||||
def _get_version(self):
|
||||
# Output example is "iptables v1.6.2"
|
||||
args = ['iptables', '--version']
|
||||
version = str(self.execute(args, run_as_root=True).split()[1][1:])
|
||||
LOG.debug("IPTables version installed: %s", version)
|
||||
return version
|
||||
|
||||
@property
|
||||
def random_fully(self):
|
||||
if self._random_fully is not None:
|
||||
return self._random_fully
|
||||
|
||||
version = self._get_version()
|
||||
self.__class__._random_fully = utils.is_version_greater_equal(
|
||||
version, n_const.IPTABLES_RANDOM_FULLY_VERSION)
|
||||
return self._random_fully
|
||||
|
||||
@property
|
||||
def xlock_wait_time(self):
|
||||
# give agent some time to report back to server
|
||||
|
@ -56,3 +56,7 @@ AGENT_RES_PROCESSING_STEP = 100
|
||||
# Number of resources for neutron to divide the large RPC
|
||||
# call data sets.
|
||||
RPC_RES_PROCESSING_STEP = 20
|
||||
|
||||
# IPtables version to support --random-fully option.
|
||||
# Do not move this constant to neutron-lib, since it is temporary
|
||||
IPTABLES_RANDOM_FULLY_VERSION = '1.6.2'
|
||||
|
@ -41,6 +41,7 @@ from oslo_config import cfg
|
||||
from oslo_db import exception as db_exc
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import excutils
|
||||
import pkg_resources
|
||||
import six
|
||||
|
||||
import neutron
|
||||
@ -312,6 +313,12 @@ def get_socket_address_family(ip_version):
|
||||
else socket.AF_INET6))
|
||||
|
||||
|
||||
def is_version_greater_equal(version1, version2):
|
||||
"""Returns True if version1 is greater or equal than version2 else False"""
|
||||
return (pkg_resources.parse_version(version1) >=
|
||||
pkg_resources.parse_version(version2))
|
||||
|
||||
|
||||
class DelayedStringRenderer(object):
|
||||
"""Takes a callable and its args and calls when __str__ is called
|
||||
|
||||
|
@ -18,6 +18,7 @@ from itertools import chain as iter_chain
|
||||
from itertools import combinations as iter_combinations
|
||||
|
||||
import eventlet
|
||||
import fixtures
|
||||
import mock
|
||||
import netaddr
|
||||
from neutron_lib.agent import constants as agent_consts
|
||||
@ -192,10 +193,28 @@ class BasicRouterOperationsFramework(base.BaseTestCase):
|
||||
ri.process()
|
||||
|
||||
|
||||
class IptablesFixture(fixtures.Fixture):
|
||||
def _setUp(self):
|
||||
# We MUST save and restore random_fully because it is a class
|
||||
# attribute and could change state in some tests, which can cause
|
||||
# the other router test cases to randomly fail due to race conditions.
|
||||
self.random_fully = iptables_manager.IptablesManager.random_fully
|
||||
iptables_manager.IptablesManager.random_fully = True
|
||||
self.addCleanup(self._reset)
|
||||
|
||||
def _reset(self):
|
||||
iptables_manager.IptablesManager.random_fully = self.random_fully
|
||||
|
||||
|
||||
class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
def setUp(self):
|
||||
super(TestBasicRouterOperations, self).setUp()
|
||||
self.useFixture(IptablesFixture())
|
||||
|
||||
def test_request_id_changes(self):
|
||||
a = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
self.assertNotEqual(a.context.request_id, a.context.request_id)
|
||||
self.useFixture(IptablesFixture())
|
||||
|
||||
def test_init_ha_conf(self):
|
||||
with mock.patch('os.path.dirname', return_value='/etc/ha/'):
|
||||
@ -1022,7 +1041,7 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
self._test_external_gateway_action('remove', router, dual_stack=True)
|
||||
|
||||
def _verify_snat_mangle_rules(self, nat_rules, mangle_rules, router,
|
||||
negate=False):
|
||||
random_fully, negate=False):
|
||||
interfaces = router[lib_constants.INTERFACE_KEY]
|
||||
source_cidrs = []
|
||||
for iface in interfaces:
|
||||
@ -1033,13 +1052,18 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
source_cidrs.append(source_cidr)
|
||||
source_nat_ip = router['gw_port']['fixed_ips'][0]['ip_address']
|
||||
interface_name = ('qg-%s' % router['gw_port']['id'])[:14]
|
||||
mask_rule = ('-m mark ! --mark 0x2/%s -m conntrack --ctstate DNAT '
|
||||
'-j SNAT --to-source %s' %
|
||||
(lib_constants.ROUTER_MARK_MASK, source_nat_ip))
|
||||
snat_rule = ('-o %s -j SNAT --to-source %s' %
|
||||
(interface_name, source_nat_ip))
|
||||
if random_fully:
|
||||
mask_rule += ' --random-fully'
|
||||
snat_rule += ' --random-fully'
|
||||
expected_rules = [
|
||||
'! -i %s ! -o %s -m conntrack ! --ctstate DNAT -j ACCEPT' %
|
||||
(interface_name, interface_name),
|
||||
'-o %s -j SNAT --to-source %s' % (interface_name, source_nat_ip),
|
||||
'-m mark ! --mark 0x2/%s -m conntrack --ctstate DNAT '
|
||||
'-j SNAT --to-source %s' %
|
||||
(lib_constants.ROUTER_MARK_MASK, source_nat_ip)]
|
||||
mask_rule, snat_rule]
|
||||
for r in nat_rules:
|
||||
if negate:
|
||||
self.assertNotIn(r.rule, expected_rules)
|
||||
@ -1631,7 +1655,8 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
ri.get_external_device_name = mock.Mock(return_value='exgw')
|
||||
self._test_process_floating_ip_addresses_add(ri, agent)
|
||||
|
||||
def test_process_router_snat_disabled(self):
|
||||
def _test_process_router_snat_disabled(self, random_fully):
|
||||
iptables_manager.IptablesManager.random_fully = random_fully
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
router = l3_test_common.prepare_router_data(enable_snat=True)
|
||||
ri = l3router.RouterInfo(agent, router['id'], router, **self.ri_kwargs)
|
||||
@ -1655,10 +1680,17 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
if r not in ri.iptables_manager.ipv4['mangle'].rules]
|
||||
self.assertEqual(1, len(mangle_rules_delta))
|
||||
self._verify_snat_mangle_rules(nat_rules_delta, mangle_rules_delta,
|
||||
router)
|
||||
router, random_fully)
|
||||
self.assertEqual(1, self.send_adv_notif.call_count)
|
||||
|
||||
def test_process_router_snat_enabled(self):
|
||||
def test_process_router_snat_disabled_random_fully(self):
|
||||
self._test_process_router_snat_disabled(True)
|
||||
|
||||
def test_process_router_snat_disabled_random_fully_false(self):
|
||||
self._test_process_router_snat_disabled(False)
|
||||
|
||||
def _test_process_router_snat_enabled(self, random_fully):
|
||||
iptables_manager.IptablesManager.random_fully = random_fully
|
||||
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
|
||||
router = l3_test_common.prepare_router_data(enable_snat=False)
|
||||
ri = l3router.RouterInfo(agent, router['id'], router, **self.ri_kwargs)
|
||||
@ -1682,9 +1714,15 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
if r not in orig_mangle_rules]
|
||||
self.assertEqual(1, len(mangle_rules_delta))
|
||||
self._verify_snat_mangle_rules(nat_rules_delta, mangle_rules_delta,
|
||||
router)
|
||||
router, random_fully)
|
||||
self.assertEqual(1, self.send_adv_notif.call_count)
|
||||
|
||||
def test_process_router_snat_enabled_random_fully(self):
|
||||
self._test_process_router_snat_enabled(True)
|
||||
|
||||
def test_process_router_snat_enabled_random_fully_false(self):
|
||||
self._test_process_router_snat_enabled(False)
|
||||
|
||||
def _test_update_routing_table(self, is_snat_host=True):
|
||||
router = l3_test_common.prepare_router_data()
|
||||
uuid = router['id']
|
||||
@ -2292,11 +2330,12 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
|
||||
|
||||
jump_float_rule = "-A %s-snat -j %s-float-snat" % (wrap_name,
|
||||
wrap_name)
|
||||
snat_rule1 = ("-A %s-snat -o iface -j SNAT --to-source %s") % (
|
||||
snat_rule1 = ("-A %s-snat -o iface -j SNAT --to-source %s "
|
||||
"--random-fully") % (
|
||||
wrap_name, ex_gw_port['fixed_ips'][0]['ip_address'])
|
||||
snat_rule2 = ("-A %s-snat -m mark ! --mark 0x2/%s "
|
||||
"-m conntrack --ctstate DNAT "
|
||||
"-j SNAT --to-source %s") % (
|
||||
"-j SNAT --to-source %s --random-fully") % (
|
||||
wrap_name, lib_constants.ROUTER_MARK_MASK,
|
||||
ex_gw_port['fixed_ips'][0]['ip_address'])
|
||||
|
||||
|
@ -259,9 +259,11 @@ class TestDvrRouterOperations(base.BaseTestCase):
|
||||
dnat_from_floatingip_to_fixedip = (
|
||||
'PREROUTING', '-d %s/32 -i %s -j DNAT --to-destination %s' % (
|
||||
floating_ip, rtr_2_fip_name, fixed_ip))
|
||||
snat_from_fixedip_to_floatingip = (
|
||||
'float-snat', '-s %s/32 -j SNAT --to-source %s' % (
|
||||
fixed_ip, floating_ip))
|
||||
to_source = '-s %s/32 -j SNAT --to-source %s' % (fixed_ip, floating_ip)
|
||||
|
||||
if ri.iptables_manager.random_fully:
|
||||
to_source += ' --random-fully'
|
||||
snat_from_fixedip_to_floatingip = ('float-snat', to_source)
|
||||
actual = ri.floating_forward_rules(fip)
|
||||
expected = [dnat_from_floatingip_to_fixedip,
|
||||
snat_from_fixedip_to_floatingip]
|
||||
|
@ -281,6 +281,17 @@ class TestIpVersionFromInt(base.BaseTestCase):
|
||||
8)
|
||||
|
||||
|
||||
class TestIsVersionGreaterEqual(base.BaseTestCase):
|
||||
def test_is_version_greater_equal_greater(self):
|
||||
self.assertTrue(utils.is_version_greater_equal('1.6.2', '1.6.0'))
|
||||
|
||||
def test_is_version_greater_equal_equal(self):
|
||||
self.assertTrue(utils.is_version_greater_equal('1.6.2', '1.6.2'))
|
||||
|
||||
def test_is_version_greater_equal_less(self):
|
||||
self.assertFalse(utils.is_version_greater_equal('1.6.0', '1.6.2'))
|
||||
|
||||
|
||||
class TestDelayedStringRenderer(base.BaseTestCase):
|
||||
def test_call_deferred_until_str(self):
|
||||
my_func = mock.MagicMock(return_value='Brie cheese!')
|
||||
|
Loading…
Reference in New Issue
Block a user