From a521bf0393d33d6e69f59900942404c2b5c84d83 Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Mon, 21 Aug 2017 12:15:25 -0700 Subject: [PATCH] Make use of -w argument for iptables calls Upstream iptables added support for -w ('wait') argument to iptables-restore. It makes the command grab a 'xlock' that guarantees that no two iptables calls will mess a table if called in parallel. [This somewhat resembles what we try to achieve with a file lock we grab in iptables manager's _apply_synchronized.] If two processes call to iptables-restore or iptables in parallel, the second call risks failing, returning error code = 4, and also printing the following error: Another app is currently holding the xtables lock. Perhaps you want to use the -w option? If we call to iptables / iptables-restore with -w though, it will wait for the xlock release before proceeding, and won't fail. Though the feature was added in iptables/master only and is not part of an official iptables release, it was already backported to RHEL 7.x iptables package, and so we need to adopt to it. At the same time, we can't expect any underlying platform to support the argument. A solution here is to call iptables-restore with -w when a regular call failed. Also, the patch adds -w to all iptables calls, in the iptables manager as well as in ipset-cleanup. Since we don't want to lock agent in case current xlock owner doesn't release it in reasonable time, we limit the time we wait to ~1/3 of report_interval, to give the agent some time to recover without triggering expensive fullsync. In the future, we may be able to get rid of our custom synchronization lock that we use in iptables manager. But this will require all supported platforms to get the feature in and will take some time. Closes-Bug: #1712185 Change-Id: I94e54935df7c6caa2480eca19e851cb4882c0f8b --- neutron/agent/linux/iptables_manager.py | 90 +++++++++++++------ neutron/cmd/ipset_cleanup.py | 1 + .../unit/agent/linux/test_iptables_manager.py | 45 ++++++---- 3 files changed, 93 insertions(+), 43 deletions(-) diff --git a/neutron/agent/linux/iptables_manager.py b/neutron/agent/linux/iptables_manager.py index e07ca49bf34..78eee4857e3 100644 --- a/neutron/agent/linux/iptables_manager.py +++ b/neutron/agent/linux/iptables_manager.py @@ -63,6 +63,10 @@ MAX_CHAIN_LEN_NOWRAP = 28 IPTABLES_ERROR_LINES_OF_CONTEXT = 5 +# RESOURCE_PROBLEM in include/xtables.h +XTABLES_RESOURCE_PROBLEM_CODE = 4 + + def comment_rule(rule, comment): if not cfg.CONF.AGENT.comment_iptables_rules or not comment: return rule @@ -435,6 +439,9 @@ class IptablesManager(object): if self.namespace: lock_name += '-' + self.namespace + # NOTE(ihrachys) we may get rid of the lock once all supported + # platforms get iptables with 999eaa241212d3952ddff39a99d0d55a74e3639e + # ("iptables-restore: support acquiring the lock.") with lockutils.lock(lock_name, utils.SYNCHRONIZED_PREFIX, True): first = self._apply_synchronized() if not cfg.CONF.AGENT.debug_iptables_rules: @@ -454,6 +461,42 @@ class IptablesManager(object): args = ['ip', 'netns', 'exec', self.namespace] + args return self.execute(args, run_as_root=True).split('\n') + @property + def xlock_wait_time(self): + # give agent some time to report back to server + return str(int(cfg.CONF.AGENT.report_interval / 3.0)) + + def _run_restore(self, args, commands, lock=False): + args = args[:] + if lock: + args += ['-w', self.xlock_wait_time] + try: + self.execute(args, process_input='\n'.join(commands), + run_as_root=True) + except RuntimeError as error: + return error + + def _log_restore_err(self, err, commands): + try: + line_no = int(re.search( + 'iptables-restore: line ([0-9]+?) failed', + str(err)).group(1)) + context = IPTABLES_ERROR_LINES_OF_CONTEXT + log_start = max(0, line_no - context) + log_end = line_no + context + except AttributeError: + # line error wasn't found, print all lines instead + log_start = 0 + log_end = len(commands) + log_lines = ('%7d. %s' % (idx, l) + for idx, l in enumerate( + commands[log_start:log_end], + log_start + 1) + ) + LOG.error("IPTablesManager.apply failed to apply the " + "following set of iptables rules:\n%s", + '\n'.join(log_lines)) + def _apply_synchronized(self): """Apply the current in-memory set of iptables rules. @@ -507,35 +550,27 @@ class IptablesManager(object): if not commands: continue all_commands += commands + + # always end with a new line + commands.append('') + args = ['%s-restore' % (cmd,), '-n'] if self.namespace: args = ['ip', 'netns', 'exec', self.namespace] + args - try: - # always end with a new line - commands.append('') - self.execute(args, process_input='\n'.join(commands), - run_as_root=True) - except RuntimeError as r_error: - with excutils.save_and_reraise_exception(): - try: - line_no = int(re.search( - 'iptables-restore: line ([0-9]+?) failed', - str(r_error)).group(1)) - context = IPTABLES_ERROR_LINES_OF_CONTEXT - log_start = max(0, line_no - context) - log_end = line_no + context - except AttributeError: - # line error wasn't found, print all lines instead - log_start = 0 - log_end = len(commands) - log_lines = ('%7d. %s' % (idx, l) - for idx, l in enumerate( - commands[log_start:log_end], - log_start + 1) - ) - LOG.error("IPTablesManager.apply failed to apply the " - "following set of iptables rules:\n%s", - '\n'.join(log_lines)) + + err = self._run_restore(args, commands) + if (isinstance(err, linux_utils.ProcessExecutionError) and + err.returncode == XTABLES_RESOURCE_PROBLEM_CODE): + # maybe we run on a platform that includes iptables commit + # 999eaa241212d3952ddff39a99d0d55a74e3639e (for example, latest + # RHEL) and failed because of xlock acquired by another + # iptables process running in parallel. Try to use -w to + # acquire xlock. + err = self._run_restore(args, commands, lock=True) + if err: + self._log_restore_err(err, commands) + raise err + LOG.debug("IPTablesManager.apply completed with success. %d iptables " "commands were issued", len(all_commands)) return all_commands @@ -683,7 +718,8 @@ class IptablesManager(object): acc = {'pkts': 0, 'bytes': 0} for cmd, table in cmd_tables: - args = [cmd, '-t', table, '-L', name, '-n', '-v', '-x'] + args = [cmd, '-t', table, '-L', name, '-n', '-v', '-x', + '-w', self.xlock_wait_time] if zero: args.append('-Z') if self.namespace: diff --git a/neutron/cmd/ipset_cleanup.py b/neutron/cmd/ipset_cleanup.py index 914e5e12d7b..c6bd40de6a4 100644 --- a/neutron/cmd/ipset_cleanup.py +++ b/neutron/cmd/ipset_cleanup.py @@ -42,6 +42,7 @@ def remove_iptables_reference(ipset): if ipset in iptables_save: cmd = ['iptables'] if 'IPv4' in ipset else ['ip6tables'] + cmd += ['-w', '10'] # wait for xlock release LOG.info("Removing iptables rule for IPset: %s", ipset) for rule in iptables_save.splitlines(): if '--match-set %s ' % ipset in rule and rule.startswith('-A'): diff --git a/neutron/tests/unit/agent/linux/test_iptables_manager.py b/neutron/tests/unit/agent/linux/test_iptables_manager.py index 2d243f06948..2dbc21eb579 100644 --- a/neutron/tests/unit/agent/linux/test_iptables_manager.py +++ b/neutron/tests/unit/agent/linux/test_iptables_manager.py @@ -23,6 +23,7 @@ import testtools from neutron._i18n import _ from neutron.agent.linux import iptables_comments as ic from neutron.agent.linux import iptables_manager +from neutron.agent.linux import utils as linux_utils from neutron.common import constants from neutron.common import exceptions as n_exc from neutron.tests import base @@ -373,6 +374,7 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase): def setUp(self): super(IptablesManagerStateFulTestCase, self).setUp() cfg.CONF.set_override('comment_iptables_rules', False, 'AGENT') + cfg.CONF.set_override('report_interval', 30, 'AGENT') self.iptables = iptables_manager.IptablesManager() self.execute = mock.patch.object(self.iptables, "execute").start() @@ -1025,7 +1027,7 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase): '\n'.join(logged) ) - def test_iptables_failure_on_specific_line(self): + def test_iptables_failure(self): with mock.patch.object(iptables_manager, "LOG") as log: # generate Runtime errors on iptables-restore calls def iptables_restore_failer(*args, **kwargs): @@ -1034,13 +1036,22 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase): # pretend line 11 failed msg = ("Exit code: 1\nStdout: ''\n" "Stderr: 'iptables-restore: line 11 failed\n'") - raise RuntimeError(msg) + raise linux_utils.ProcessExecutionError( + msg, iptables_manager.XTABLES_RESOURCE_PROBLEM_CODE) return FILTER_DUMP self.execute.side_effect = iptables_restore_failer # _apply_synchronized calls iptables-restore so it should raise # a RuntimeError self.assertRaises(RuntimeError, self.iptables._apply_synchronized) + + # check that we tried with -w when the first attempt failed + self.execute.assert_has_calls( + [mock.call(['iptables-restore', '-n'], + process_input=mock.ANY, run_as_root=True), + mock.call(['iptables-restore', '-n', '-w', '10'], + process_input=mock.ANY, run_as_root=True)]) + # The RuntimeError should have triggered a log of the input to the # process that it failed to execute. Verify by comparing the log # call to the 'process_input' arg given to the failed iptables-restore @@ -1077,35 +1088,35 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase): expected_calls_and_values = [ (mock.call(['iptables', '-t', 'filter', '-L', 'OUTPUT', - '-n', '-v', '-x'], + '-n', '-v', '-x', '-w', '10'], run_as_root=True), TRAFFIC_COUNTERS_DUMP), (mock.call(['iptables', '-t', 'raw', '-L', 'OUTPUT', '-n', - '-v', '-x'], + '-v', '-x', '-w', '10'], run_as_root=True), ''), (mock.call(['iptables', '-t', 'mangle', '-L', 'OUTPUT', '-n', - '-v', '-x'], + '-v', '-x', '-w', '10'], run_as_root=True), ''), (mock.call(['iptables', '-t', 'nat', '-L', 'OUTPUT', '-n', - '-v', '-x'], + '-v', '-x', '-w', '10'], run_as_root=True), ''), ] if use_ipv6: expected_calls_and_values.append( (mock.call(['ip6tables', '-t', 'raw', '-L', 'OUTPUT', - '-n', '-v', '-x'], run_as_root=True), + '-n', '-v', '-x', '-w', '10'], run_as_root=True), '')) expected_calls_and_values.append( (mock.call(['ip6tables', '-t', 'filter', '-L', 'OUTPUT', - '-n', '-v', '-x'], + '-n', '-v', '-x', '-w', '10'], run_as_root=True), TRAFFIC_COUNTERS_DUMP)) expected_calls_and_values.append( (mock.call(['ip6tables', '-t', 'mangle', '-L', 'OUTPUT', - '-n', '-v', '-x'], run_as_root=True), + '-n', '-v', '-x', '-w', '10'], run_as_root=True), '')) exp_packets *= 2 exp_bytes *= 2 @@ -1134,35 +1145,37 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase): expected_calls_and_values = [ (mock.call(['iptables', '-t', 'filter', '-L', 'OUTPUT', - '-n', '-v', '-x', '-Z'], + '-n', '-v', '-x', '-w', '10', '-Z'], run_as_root=True), TRAFFIC_COUNTERS_DUMP), (mock.call(['iptables', '-t', 'raw', '-L', 'OUTPUT', '-n', - '-v', '-x', '-Z'], + '-v', '-x', '-w', '10', '-Z'], run_as_root=True), ''), (mock.call(['iptables', '-t', 'mangle', '-L', 'OUTPUT', '-n', - '-v', '-x', '-Z'], + '-v', '-x', '-w', '10', '-Z'], run_as_root=True), ''), (mock.call(['iptables', '-t', 'nat', '-L', 'OUTPUT', '-n', - '-v', '-x', '-Z'], + '-v', '-x', '-w', '10', '-Z'], run_as_root=True), '') ] if use_ipv6: expected_calls_and_values.append( (mock.call(['ip6tables', '-t', 'raw', '-L', 'OUTPUT', - '-n', '-v', '-x', '-Z'], run_as_root=True), + '-n', '-v', '-x', '-w', '10', '-Z'], + run_as_root=True), '')) expected_calls_and_values.append( (mock.call(['ip6tables', '-t', 'filter', '-L', 'OUTPUT', - '-n', '-v', '-x', '-Z'], + '-n', '-v', '-x', '-w', '10', '-Z'], run_as_root=True), TRAFFIC_COUNTERS_DUMP)) expected_calls_and_values.append( (mock.call(['ip6tables', '-t', 'mangle', '-L', 'OUTPUT', - '-n', '-v', '-x', '-Z'], run_as_root=True), + '-n', '-v', '-x', '-w', '10', '-Z'], + run_as_root=True), '')) exp_packets *= 2 exp_bytes *= 2