From 00b633d284f0f21aa380fa47a270c612ebef0795 Mon Sep 17 00:00:00 2001 From: Brent Eagles Date: Thu, 13 Aug 2015 13:35:36 -0230 Subject: [PATCH] Manage cleanup of .ctl/.pid files for LibreSwan LibreSwan checks for the presence of pid/ctl files when starting up and will error out if they already exist. However, LibreSwan's usage of the capabilities library removes the access required to cleanup .ctl and .pid files on shutdown if any of the directories in the path are missing explicit permissions for root. This is not considered a bug by the LibreSwan maintainers, so the LibreSwan driver must work around it by checking if it is okay to remove the files and removing them on startup. It must also wait for shutdown to complete before restarting the daemon on LibreSwanProcess.restart(). Introduces new configuration for retrying check for process shutdown. DocImpact Change-Id: I5c215d70c348524979b740f882029f74e400e6d7 Closes-Bug: #1331502 --- etc/vpn_agent.ini | 11 ++ .../vpn/device_drivers/libreswan_ipsec.py | 120 +++++++++++ .../services/vpn/device_drivers/test_ipsec.py | 186 +++++++++++++++++- 3 files changed, 315 insertions(+), 2 deletions(-) diff --git a/etc/vpn_agent.ini b/etc/vpn_agent.ini index f06cc2b2e..419081130 100644 --- a/etc/vpn_agent.ini +++ b/etc/vpn_agent.ini @@ -25,3 +25,14 @@ # default_config_area=/usr/share/strongswan/templates/config/strongswan.d # Default is for ubuntu use, /etc/strongswan.d # default_config_area=/etc/strongswan.d + +[libreswan] +# Initial interval in seconds for checking if pluto daemon is shutdown +# shutdown_check_timeout=1 +# +# The maximum number of retries for checking for pluto daemon shutdown +# shutdown_check_retries=5 +# +# A factor to increase the retry interval for each retry +# shutdown_check_back_off=1.5 + diff --git a/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py b/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py index 6d2e25abc..d6dda6a4d 100644 --- a/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py +++ b/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py @@ -12,8 +12,37 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +import os +import os.path + +import eventlet + +from neutron.i18n import _LE +from neutron.i18n import _LW +from oslo_config import cfg +from oslo_log import log as logging + from neutron_vpnaas.services.vpn.device_drivers import ipsec +LOG = logging.getLogger(__name__) + +libreswan_opts = [ + cfg.IntOpt('shutdown_check_timeout', + default=1, + help=_('Initial interval in seconds for checking if pluto ' + 'daemon is shutdown')), + cfg.IntOpt('shutdown_check_retries', + default=5, + help=_('The maximum number of retries for checking for ' + 'pluto daemon shutdown')), + cfg.FloatOpt('shutdown_check_back_off', + default=1.5, + help=_('A factor to increase the retry interval for ' + 'each retry')) +] + +cfg.CONF.register_opts(libreswan_opts, 'libreswan') + class LibreSwanProcess(ipsec.OpenSwanProcess): """Libreswan Process manager class. @@ -23,6 +52,7 @@ class LibreSwanProcess(ipsec.OpenSwanProcess): def __init__(self, conf, process_id, vpnservice, namespace): super(LibreSwanProcess, self).__init__(conf, process_id, vpnservice, namespace) + self.pid_file = '%s.pid' % self.pid_path def ensure_configs(self): """Generate config files which are needed for Libreswan. @@ -40,6 +70,96 @@ class LibreSwanProcess(ipsec.OpenSwanProcess): except RuntimeError: self._execute([self.binary, 'initnss', self.etc_dir]) + def _process_running(self): + """Checks if process is still running.""" + + # If no PID file, we assume the process is not running. + if not os.path.exists(self.pid_file): + return False + + try: + # We take an ask-forgiveness-not-permission approach and rely + # on throwing to tell us something. If the pid file exists, + # delve into the process information and check if it matches + # our expected command line. + with open(self.pid_file, 'r') as f: + pid = f.readline().strip() + with open('/proc/%s/cmdline' % pid) as cmd_line_file: + cmd_line = cmd_line_file.readline() + if self.pid_path in cmd_line and 'pluto' in cmd_line: + # Okay the process is probably a libreswan process + # and it contains the pid_path in the command + # line... could be a race. Log to error and return + # that it is *NOT* okay to clean up files. We are + # logging to error instead of debug because it + # indicates something bad has happened and this is + # valuable information for figuring it out. + LOG.error(_LE('Process %(pid)s exists with command ' + 'line %(cmd_line)s.') % + {'pid': pid, 'cmd_line': cmd_line}) + return True + + except IOError as e: + LOG.error(_LE('Unable to check control files on startup for ' + 'router %(router)s: %(msg)s'), + {'router': self.id, 'msg': e}) + return False + + def _cleanup_control_files(self): + try: + ctl_file = '%s.ctl' % self.pid_path + LOG.debug('Removing %(pidfile)s and %(ctlfile)s', + {'pidfile': self.pid_file, + 'ctlfile': '%s.ctl' % ctl_file}) + + if os.path.exists(self.pid_file): + os.remove(self.pid_file) + + if os.path.exists(ctl_file): + os.remove(ctl_file) + + except OSError as e: + LOG.error(_LE('Unable to remove libreswan control ' + 'files for router %(router)s. %(msg)s'), + {'router': self.id, 'msg': e}) + + def start(self): + # NOTE: The restart operation calls the parent's start() instead of + # this one to avoid having to special case the startup file check. + # If anything is added to this method that needs to run whenever + # a restart occurs, it should be either added to the restart() + # override or things refactored to special-case start() when + # called from restart(). + + # LibreSwan's use of the capablities library may prevent the ctl + # and pid files from being cleaned up, so we check to see if the + # process is running and if not, attempt a cleanup. In either case + # we fall through to allow the LibreSwan process to start or fail + # in the usual way. + if not self._process_running(): + self._cleanup_control_files() + + super(LibreSwanProcess, self).start() + + def restart(self): + # stop() followed immediately by a start() runs the risk that the + # current pluto daemon has not had a chance to shutdown. We check + # the current process information to see if the daemon is still + # running and if so, wait a short interval and retry. + self.stop() + wait_interval = cfg.CONF.libreswan.shutdown_check_timeout + for i in range(cfg.CONF.libreswan.shutdown_check_retries): + if not self._process_running(): + self._cleanup_control_files() + break + eventlet.sleep(wait_interval) + wait_interval *= cfg.CONF.libreswan.shutdown_check_back_off + else: + LOG.warning(_LW('Server appears to still be running, restart ' + 'of router %s may fail'), self.id) + + super(LibreSwanProcess, self).start() + class LibreSwanDriver(ipsec.IPsecDriver): def create_process(self, process_id, vpnservice, namespace): diff --git a/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py b/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py index f9dcc42ae..12e6c7880 100644 --- a/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py +++ b/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py @@ -14,6 +14,7 @@ # under the License. import copy import difflib +import io import mock import socket @@ -794,14 +795,195 @@ class TestOpenSwanProcess(BaseIPsecDeviceDriver): class TestLibreSwanProcess(base.BaseTestCase): + + _test_timeout = 1 + _test_backoff = 2 + _test_retries = 5 + def setUp(self): super(TestLibreSwanProcess, self).setUp() + # Insulate tests against changes to configuration defaults. + cfg.CONF.register_opts(libreswan_ipsec.libreswan_opts, + 'libreswan') + cfg.CONF.set_override('shutdown_check_timeout', self._test_timeout, + group='libreswan') + cfg.CONF.set_override('shutdown_check_back_off', self._test_backoff, + group='libreswan') + cfg.CONF.set_override('shutdown_check_retries', self._test_retries, + group='libreswan') + self.addCleanup(cfg.CONF.reset) self.vpnservice = copy.deepcopy(FAKE_VPN_SERVICE) + self.parent_start = mock.patch('neutron_vpnaas.services.' + 'vpn.device_drivers.ipsec.' + 'OpenSwanProcess.start').start() + self.parent_stop = mock.patch('neutron_vpnaas.services.' + 'vpn.device_drivers.ipsec.' + 'OpenSwanProcess.stop').start() + self.os_remove = mock.patch('os.remove').start() + self.ipsec_process = libreswan_ipsec.LibreSwanProcess(mock.ANY, 'foo-process-id', self.vpnservice, mock.ANY) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + def test_no_cleanups(self, cleanup_mock): + # Not an "awesome test" but more of a check box item. Basically, + # what happens if we didn't need to clean up any files. + with mock.patch.object(self.ipsec_process, + '_process_running', + return_value=True) as query_mock: + self.ipsec_process.start() + self.assertEqual(1, self.parent_start.call_count) + self.assertEqual(1, query_mock.call_count) + + # This is really what is being tested here. If process is + # running, we shouldn't attempt a cleanup. + self.assertFalse(cleanup_mock.called) + + @mock.patch('os.path.exists', return_value=True) + def test_cleanup_files(self, exists_mock): + # Tests the 'bones' of things really and kind of check-box-item-bogus + # test - this really needs exercising through a higher level test. + with mock.patch.object(self.ipsec_process, + '_process_running', + return_value=False) as query_mock: + fake_path = '/fake/path/run' + self.ipsec_process.pid_path = fake_path + self.ipsec_process.pid_file = '%s.pid' % fake_path + self.ipsec_process.start() + self.assertEqual(1, self.parent_start.call_count) + self.assertEqual(1, query_mock.call_count) + self.assertEqual(2, self.os_remove.call_count) + self.os_remove.assert_has_calls([mock.call('%s.pid' % fake_path), + mock.call('%s.ctl' % fake_path)]) + + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._process_running', + return_value=False) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + @mock.patch('eventlet.sleep') + def test_restart_process_not_running(self, sleep_mock, cleanup_mock, + query_mock): + self.ipsec_process.restart() + # Lame checks that are really for sanity + self.assertTrue(self.parent_stop.called) + self.assertTrue(self.parent_start.called) + + # Really what is being tested - retry configuration exists and that + # we do the right things when process check is false. + self.assertTrue(query_mock.called) + self.assertTrue(cleanup_mock.called) + self.assertFalse(sleep_mock.called) + + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._process_running', + return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + @mock.patch('eventlet.sleep') + def test_restart_process_doesnt_stop(self, sleep_mock, cleanup_mock, + query_mock): + self.ipsec_process.restart() + # Lame checks that are really for sanity + self.assertTrue(self.parent_stop.called) + self.assertTrue(self.parent_start.called) + + # Really what is being tested - retry configuration exists and that + # we do the right things when process check is True. + self.assertEqual(5, query_mock.call_count) + self.assertFalse(cleanup_mock.called) + self.assertEqual(5, sleep_mock.call_count) + calls = [mock.call(1), mock.call(2), mock.call(4), + mock.call(8), mock.call(16)] + sleep_mock.assert_has_calls(calls) + + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._process_running', + side_effect=[True, True, False]) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + @mock.patch('eventlet.sleep') + def test_restart_process_retry_until_stop(self, sleep_mock, cleanup_mock, + query_mock): + self.ipsec_process.restart() + # Lame checks that are really for sanity + self.assertTrue(self.parent_start.called) + self.assertTrue(self.parent_stop.called) + + # Really what is being tested - retry configuration exists and that + # we do the right things when process check is True a few times and + # then returns False. + self.assertEqual(3, query_mock.call_count) + self.assertTrue(cleanup_mock.called) + self.assertEqual(2, sleep_mock.call_count) + + def test_process_running_no_pid(self): + with mock.patch('os.path.exists', return_value=False): + self.assertFalse( + self.ipsec_process._process_running()) + + # open() is used elsewhere, so we need to inject a mocked open into the + # module to be tested. + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=IOError) + def test_process_running_open_failure(self, mock_open, mock_exists): + self.assertFalse(self.ipsec_process._process_running()) + self.assertTrue(mock_exists.called) + self.assertTrue(mock_open.called) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'invalid'), + IOError]) + def test_process_running_bogus_pid(self, mock_open, mock_exists): + with mock.patch.object(libreswan_ipsec.LOG, 'error'): + self.assertFalse(self.ipsec_process._process_running()) + self.assertTrue(mock_exists.called) + self.assertEqual(2, mock_open.call_count) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'134'), io.StringIO(u'')]) + def test_process_running_no_cmdline(self, mock_open, mock_exists): + with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: + self.assertFalse(self.ipsec_process._process_running()) + self.assertFalse(log_mock.called) + self.assertEqual(2, mock_open.call_count) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'134'), io.StringIO(u'ps ax')]) + def test_process_running_cmdline_mismatch(self, mock_open, mock_exists): + with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: + self.assertFalse(self.ipsec_process._process_running()) + self.assertFalse(log_mock.called) + self.assertEqual(2, mock_open.call_count) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'134'), + io.StringIO(u'/usr/libexec/ipsec/pluto -ctlbase' + '/some/foo/path')]) + def test_process_running_cmdline_match(self, mock_open, mock_exists): + self.ipsec_process.pid_path = '/some/foo/path' + with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: + self.assertTrue(self.ipsec_process._process_running()) + self.assertTrue(log_mock.called) + def test_ensure_configs(self): openswan_ipsec.OpenSwanProcess.ensure_configs = mock.Mock() with mock.patch.object(self.ipsec_process, '_execute') as fake_execute: @@ -810,7 +992,7 @@ class TestLibreSwanProcess(base.BaseTestCase): mock.call(['ipsec', 'checknss', self.ipsec_process.etc_dir])] fake_execute.assert_has_calls(expected) - self.assertEqual(fake_execute.call_count, 2) + self.assertEqual(2, fake_execute.call_count) with mock.patch.object(self.ipsec_process, '_execute') as fake_execute: fake_execute.side_effect = [None, RuntimeError, None] @@ -821,7 +1003,7 @@ class TestLibreSwanProcess(base.BaseTestCase): mock.call(['ipsec', 'initnss', self.ipsec_process.etc_dir])] fake_execute.assert_has_calls(expected) - self.assertEqual(fake_execute.call_count, 3) + self.assertEqual(3, fake_execute.call_count) class IPsecStrongswanDeviceDriverLegacy(IPSecDeviceLegacy):