Always call iptables-restore with -w if done once

In the case where we called iptables-restore with a
-w argument and it succeeded, we should short-circuit
future calls to always use -w, instead of trying
without it, just to fall-back to using it on failure.

While analyzing some l3-agent log files I have seen
lots of "Perhaps you want to use the -w option?",
followed by a call with -w, followed by not using it
the next time. Changing this can save one failing
call to iptables-restore.

Change-Id: Icac99eb1d43648c64b6beaee0d6201f990eacb51
Related-bug: #1712185
This commit is contained in:
Brian Haley 2017-10-18 15:38:29 -04:00
parent 1b3f982914
commit 6c50ad5858
2 changed files with 77 additions and 11 deletions

View File

@ -62,7 +62,6 @@ MAX_CHAIN_LEN_NOWRAP = 28
# a failure during iptables-restore
IPTABLES_ERROR_LINES_OF_CONTEXT = 5
# RESOURCE_PROBLEM in include/xtables.h
XTABLES_RESOURCE_PROBLEM_CODE = 4
@ -298,6 +297,10 @@ class IptablesManager(object):
"""
# Flag to denote we've already tried and used -w successfully, so don't
# run iptables-restore without it.
use_table_lock = False
def __init__(self, _execute=None, state_less=False, use_ipv6=False,
namespace=None, binary_name=binary_name):
if _execute:
@ -466,7 +469,7 @@ class IptablesManager(object):
# give agent some time to report back to server
return str(int(cfg.CONF.AGENT.report_interval / 3.0))
def _run_restore(self, args, commands, lock=False):
def _do_run_restore(self, args, commands, lock=False):
args = args[:]
if lock:
args += ['-w', self.xlock_wait_time]
@ -477,6 +480,25 @@ class IptablesManager(object):
except RuntimeError as error:
return error
def _run_restore(self, args, commands):
# If we've already tried and used -w successfully, don't
# run iptables-restore without it.
if self.use_table_lock:
return self._do_run_restore(args, commands, lock=True)
err = self._do_run_restore(args, commands)
if (isinstance(err, linux_utils.ProcessExecutionError) and
err.returncode == XTABLES_RESOURCE_PROBLEM_CODE):
# maybe we run on a platform that includes iptables commit
# 999eaa241212d3952ddff39a99d0d55a74e3639e (for example, latest
# RHEL) and failed because of xlock acquired by another
# iptables process running in parallel. Try to use -w to
# acquire xlock.
err = self._do_run_restore(args, commands, lock=True)
if not err:
self.__class__.use_table_lock = True
return err
def _log_restore_err(self, err, commands):
try:
line_no = int(re.search(
@ -560,14 +582,6 @@ class IptablesManager(object):
args = ['ip', 'netns', 'exec', self.namespace] + args
err = self._run_restore(args, commands)
if (isinstance(err, linux_utils.ProcessExecutionError) and
err.returncode == XTABLES_RESOURCE_PROBLEM_CODE):
# maybe we run on a platform that includes iptables commit
# 999eaa241212d3952ddff39a99d0d55a74e3639e (for example, latest
# RHEL) and failed because of xlock acquired by another
# iptables process running in parallel. Try to use -w to
# acquire xlock.
err = self._run_restore(args, commands, lock=True)
if err:
self._log_restore_err(err, commands)
raise err

View File

@ -16,6 +16,7 @@
import os
import sys
import fixtures
import mock
from oslo_config import cfg
import testtools
@ -368,14 +369,29 @@ MANGLE_RESTORE_DUMP = _generate_mangle_restore_dump(IPTABLES_ARG)
RAW_RESTORE_DUMP = _generate_raw_restore_dump(IPTABLES_ARG)
class IptablesFixture(fixtures.Fixture):
def _setUp(self):
# We MUST save and restore use_table_lock because it is a class
# attribute and could change state in some tests, which can cause
# the other iptables_manager test cases to randomly fail due to
# race conditions.
self.use_table_lock = iptables_manager.IptablesManager.use_table_lock
iptables_manager.IptablesManager.use_table_lock = False
self.addCleanup(self._reset)
def _reset(self):
iptables_manager.IptablesManager.use_table_lock = self.use_table_lock
class IptablesManagerStateFulTestCase(base.BaseTestCase):
def setUp(self):
super(IptablesManagerStateFulTestCase, self).setUp()
cfg.CONF.set_override('comment_iptables_rules', False, 'AGENT')
cfg.CONF.set_override('report_interval', 30, 'AGENT')
self.execute = mock.patch.object(linux_utils, "execute").start()
self.iptables = iptables_manager.IptablesManager()
self.execute = mock.patch.object(self.iptables, "execute").start()
self.useFixture(IptablesFixture())
def test_binary_name(self):
expected = os.path.basename(sys.argv[0])[:16]
@ -1069,6 +1085,42 @@ class IptablesManagerStateFulTestCase(base.BaseTestCase):
'\n'.join(logged)
)
def test_iptables_use_table_lock(self):
# Under normal operation, if we do call iptables-restore with a -w
# and it succeeds, the next call will only use -w.
PE_error = linux_utils.ProcessExecutionError(
"", iptables_manager.XTABLES_RESOURCE_PROBLEM_CODE)
self.execute.side_effect = [FILTER_DUMP, PE_error, None,
FILTER_DUMP, None,
FILTER_DUMP, None]
self.iptables._apply_synchronized()
self.assertEqual(3, self.execute.call_count)
self.execute.assert_has_calls(
[mock.call(['iptables-save'], run_as_root=True),
mock.call(['iptables-restore', '-n'],
process_input=mock.ANY, run_as_root=True,
log_fail_as_error=False),
mock.call(['iptables-restore', '-n', '-w', '10'],
process_input=mock.ANY, run_as_root=True)])
self.execute.reset_mock()
self.iptables._apply_synchronized()
self.assertEqual(2, self.execute.call_count)
self.execute.assert_has_calls(
[mock.call(['iptables-save'], run_as_root=True),
mock.call(['iptables-restore', '-n', '-w', '10'],
process_input=mock.ANY, run_as_root=True)])
# Another instance of the class should behave similarly now
self.execute.reset_mock()
iptm = iptables_manager.IptablesManager()
iptm._apply_synchronized()
self.assertEqual(2, self.execute.call_count)
self.execute.assert_has_calls(
[mock.call(['iptables-save'], run_as_root=True),
mock.call(['iptables-restore', '-n', '-w', '10'],
process_input=mock.ANY, run_as_root=True)])
def test_get_traffic_counters_chain_notexists(self):
with mock.patch.object(iptables_manager, "LOG") as log:
acc = self.iptables.get_traffic_counters('chain1')