diff --git a/neutron/agent/linux/iptables_manager.py b/neutron/agent/linux/iptables_manager.py index 5518607fe82..efea3b3b562 100644 --- a/neutron/agent/linux/iptables_manager.py +++ b/neutron/agent/linux/iptables_manager.py @@ -62,7 +62,6 @@ MAX_CHAIN_LEN_NOWRAP = 28 # a failure during iptables-restore IPTABLES_ERROR_LINES_OF_CONTEXT = 5 - # RESOURCE_PROBLEM in include/xtables.h XTABLES_RESOURCE_PROBLEM_CODE = 4 @@ -298,6 +297,10 @@ class IptablesManager(object): """ + # Flag to denote we've already tried and used -w successfully, so don't + # run iptables-restore without it. + use_table_lock = False + def __init__(self, _execute=None, state_less=False, use_ipv6=False, namespace=None, binary_name=binary_name): if _execute: @@ -466,7 +469,7 @@ class IptablesManager(object): # give agent some time to report back to server return str(int(cfg.CONF.AGENT.report_interval / 3.0)) - def _run_restore(self, args, commands, lock=False): + def _do_run_restore(self, args, commands, lock=False): args = args[:] if lock: args += ['-w', self.xlock_wait_time] @@ -477,6 +480,25 @@ class IptablesManager(object): except RuntimeError as error: return error + def _run_restore(self, args, commands): + # If we've already tried and used -w successfully, don't + # run iptables-restore without it. + if self.use_table_lock: + return self._do_run_restore(args, commands, lock=True) + + err = self._do_run_restore(args, commands) + if (isinstance(err, linux_utils.ProcessExecutionError) and + err.returncode == XTABLES_RESOURCE_PROBLEM_CODE): + # maybe we run on a platform that includes iptables commit + # 999eaa241212d3952ddff39a99d0d55a74e3639e (for example, latest + # RHEL) and failed because of xlock acquired by another + # iptables process running in parallel. Try to use -w to + # acquire xlock. + err = self._do_run_restore(args, commands, lock=True) + if not err: + self.__class__.use_table_lock = True + return err + def _log_restore_err(self, err, commands): try: line_no = int(re.search( @@ -560,14 +582,6 @@ class IptablesManager(object): args = ['ip', 'netns', 'exec', self.namespace] + args err = self._run_restore(args, commands) - if (isinstance(err, linux_utils.ProcessExecutionError) and - err.returncode == XTABLES_RESOURCE_PROBLEM_CODE): - # maybe we run on a platform that includes iptables commit - # 999eaa241212d3952ddff39a99d0d55a74e3639e (for example, latest - # RHEL) and failed because of xlock acquired by another - # iptables process running in parallel. Try to use -w to - # acquire xlock. - err = self._run_restore(args, commands, lock=True) if err: self._log_restore_err(err, commands) raise err diff --git a/neutron/tests/unit/agent/linux/test_iptables_manager.py b/neutron/tests/unit/agent/linux/test_iptables_manager.py index 745a0cf2cf1..3faba2d9036 100644 --- a/neutron/tests/unit/agent/linux/test_iptables_manager.py +++ b/neutron/tests/unit/agent/linux/test_iptables_manager.py @@ -16,6 +16,7 @@ import os import sys +import fixtures import mock from oslo_config import cfg import testtools @@ -368,14 +369,29 @@ MANGLE_RESTORE_DUMP = _generate_mangle_restore_dump(IPTABLES_ARG) RAW_RESTORE_DUMP = _generate_raw_restore_dump(IPTABLES_ARG) +class IptablesFixture(fixtures.Fixture): + def _setUp(self): + # We MUST save and restore use_table_lock because it is a class + # attribute and could change state in some tests, which can cause + # the other iptables_manager test cases to randomly fail due to + # race conditions. + self.use_table_lock = iptables_manager.IptablesManager.use_table_lock + iptables_manager.IptablesManager.use_table_lock = False + self.addCleanup(self._reset) + + def _reset(self): + iptables_manager.IptablesManager.use_table_lock = self.use_table_lock + + class IptablesManagerStateFulTestCase(base.BaseTestCase): def setUp(self): super(IptablesManagerStateFulTestCase, self).setUp() cfg.CONF.set_override('comment_iptables_rules', False, 'AGENT') cfg.CONF.set_override('report_interval', 30, 'AGENT') + self.execute = mock.patch.object(linux_utils, "execute").start() self.iptables = iptables_manager.IptablesManager() - self.execute = mock.patch.object(self.iptables, "execute").start() + self.useFixture(IptablesFixture()) def test_binary_name(self): expected = os.path.basename(sys.argv[0])[:16] @@ -1069,6 +1085,42 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase): '\n'.join(logged) ) + def test_iptables_use_table_lock(self): + # Under normal operation, if we do call iptables-restore with a -w + # and it succeeds, the next call will only use -w. + PE_error = linux_utils.ProcessExecutionError( + "", iptables_manager.XTABLES_RESOURCE_PROBLEM_CODE) + self.execute.side_effect = [FILTER_DUMP, PE_error, None, + FILTER_DUMP, None, + FILTER_DUMP, None] + self.iptables._apply_synchronized() + self.assertEqual(3, self.execute.call_count) + self.execute.assert_has_calls( + [mock.call(['iptables-save'], run_as_root=True), + mock.call(['iptables-restore', '-n'], + process_input=mock.ANY, run_as_root=True, + log_fail_as_error=False), + mock.call(['iptables-restore', '-n', '-w', '10'], + process_input=mock.ANY, run_as_root=True)]) + + self.execute.reset_mock() + self.iptables._apply_synchronized() + self.assertEqual(2, self.execute.call_count) + self.execute.assert_has_calls( + [mock.call(['iptables-save'], run_as_root=True), + mock.call(['iptables-restore', '-n', '-w', '10'], + process_input=mock.ANY, run_as_root=True)]) + + # Another instance of the class should behave similarly now + self.execute.reset_mock() + iptm = iptables_manager.IptablesManager() + iptm._apply_synchronized() + self.assertEqual(2, self.execute.call_count) + self.execute.assert_has_calls( + [mock.call(['iptables-save'], run_as_root=True), + mock.call(['iptables-restore', '-n', '-w', '10'], + process_input=mock.ANY, run_as_root=True)]) + def test_get_traffic_counters_chain_notexists(self): with mock.patch.object(iptables_manager, "LOG") as log: acc = self.iptables.get_traffic_counters('chain1')