diff --git a/etc/vpn_agent.ini b/etc/vpn_agent.ini index f06cc2b2e..419081130 100644 --- a/etc/vpn_agent.ini +++ b/etc/vpn_agent.ini @@ -25,3 +25,14 @@ # default_config_area=/usr/share/strongswan/templates/config/strongswan.d # Default is for ubuntu use, /etc/strongswan.d # default_config_area=/etc/strongswan.d + +[libreswan] +# Initial interval in seconds for checking if pluto daemon is shutdown +# shutdown_check_timeout=1 +# +# The maximum number of retries for checking for pluto daemon shutdown +# shutdown_check_retries=5 +# +# A factor to increase the retry interval for each retry +# shutdown_check_back_off=1.5 + diff --git a/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py b/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py index 6d2e25abc..d6dda6a4d 100644 --- a/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py +++ b/neutron_vpnaas/services/vpn/device_drivers/libreswan_ipsec.py @@ -12,8 +12,37 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +import os +import os.path + +import eventlet + +from neutron.i18n import _LE +from neutron.i18n import _LW +from oslo_config import cfg +from oslo_log import log as logging + from neutron_vpnaas.services.vpn.device_drivers import ipsec +LOG = logging.getLogger(__name__) + +libreswan_opts = [ + cfg.IntOpt('shutdown_check_timeout', + default=1, + help=_('Initial interval in seconds for checking if pluto ' + 'daemon is shutdown')), + cfg.IntOpt('shutdown_check_retries', + default=5, + help=_('The maximum number of retries for checking for ' + 'pluto daemon shutdown')), + cfg.FloatOpt('shutdown_check_back_off', + default=1.5, + help=_('A factor to increase the retry interval for ' + 'each retry')) +] + +cfg.CONF.register_opts(libreswan_opts, 'libreswan') + class LibreSwanProcess(ipsec.OpenSwanProcess): """Libreswan Process manager class. @@ -23,6 +52,7 @@ class LibreSwanProcess(ipsec.OpenSwanProcess): def __init__(self, conf, process_id, vpnservice, namespace): super(LibreSwanProcess, self).__init__(conf, process_id, vpnservice, namespace) + self.pid_file = '%s.pid' % self.pid_path def ensure_configs(self): """Generate config files which are needed for Libreswan. @@ -40,6 +70,96 @@ class LibreSwanProcess(ipsec.OpenSwanProcess): except RuntimeError: self._execute([self.binary, 'initnss', self.etc_dir]) + def _process_running(self): + """Checks if process is still running.""" + + # If no PID file, we assume the process is not running. + if not os.path.exists(self.pid_file): + return False + + try: + # We take an ask-forgiveness-not-permission approach and rely + # on throwing to tell us something. If the pid file exists, + # delve into the process information and check if it matches + # our expected command line. + with open(self.pid_file, 'r') as f: + pid = f.readline().strip() + with open('/proc/%s/cmdline' % pid) as cmd_line_file: + cmd_line = cmd_line_file.readline() + if self.pid_path in cmd_line and 'pluto' in cmd_line: + # Okay the process is probably a libreswan process + # and it contains the pid_path in the command + # line... could be a race. Log to error and return + # that it is *NOT* okay to clean up files. We are + # logging to error instead of debug because it + # indicates something bad has happened and this is + # valuable information for figuring it out. + LOG.error(_LE('Process %(pid)s exists with command ' + 'line %(cmd_line)s.') % + {'pid': pid, 'cmd_line': cmd_line}) + return True + + except IOError as e: + LOG.error(_LE('Unable to check control files on startup for ' + 'router %(router)s: %(msg)s'), + {'router': self.id, 'msg': e}) + return False + + def _cleanup_control_files(self): + try: + ctl_file = '%s.ctl' % self.pid_path + LOG.debug('Removing %(pidfile)s and %(ctlfile)s', + {'pidfile': self.pid_file, + 'ctlfile': '%s.ctl' % ctl_file}) + + if os.path.exists(self.pid_file): + os.remove(self.pid_file) + + if os.path.exists(ctl_file): + os.remove(ctl_file) + + except OSError as e: + LOG.error(_LE('Unable to remove libreswan control ' + 'files for router %(router)s. %(msg)s'), + {'router': self.id, 'msg': e}) + + def start(self): + # NOTE: The restart operation calls the parent's start() instead of + # this one to avoid having to special case the startup file check. + # If anything is added to this method that needs to run whenever + # a restart occurs, it should be either added to the restart() + # override or things refactored to special-case start() when + # called from restart(). + + # LibreSwan's use of the capablities library may prevent the ctl + # and pid files from being cleaned up, so we check to see if the + # process is running and if not, attempt a cleanup. In either case + # we fall through to allow the LibreSwan process to start or fail + # in the usual way. + if not self._process_running(): + self._cleanup_control_files() + + super(LibreSwanProcess, self).start() + + def restart(self): + # stop() followed immediately by a start() runs the risk that the + # current pluto daemon has not had a chance to shutdown. We check + # the current process information to see if the daemon is still + # running and if so, wait a short interval and retry. + self.stop() + wait_interval = cfg.CONF.libreswan.shutdown_check_timeout + for i in range(cfg.CONF.libreswan.shutdown_check_retries): + if not self._process_running(): + self._cleanup_control_files() + break + eventlet.sleep(wait_interval) + wait_interval *= cfg.CONF.libreswan.shutdown_check_back_off + else: + LOG.warning(_LW('Server appears to still be running, restart ' + 'of router %s may fail'), self.id) + + super(LibreSwanProcess, self).start() + class LibreSwanDriver(ipsec.IPsecDriver): def create_process(self, process_id, vpnservice, namespace): diff --git a/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py b/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py index f9dcc42ae..12e6c7880 100644 --- a/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py +++ b/neutron_vpnaas/tests/unit/services/vpn/device_drivers/test_ipsec.py @@ -14,6 +14,7 @@ # under the License. import copy import difflib +import io import mock import socket @@ -794,14 +795,195 @@ class TestOpenSwanProcess(BaseIPsecDeviceDriver): class TestLibreSwanProcess(base.BaseTestCase): + + _test_timeout = 1 + _test_backoff = 2 + _test_retries = 5 + def setUp(self): super(TestLibreSwanProcess, self).setUp() + # Insulate tests against changes to configuration defaults. + cfg.CONF.register_opts(libreswan_ipsec.libreswan_opts, + 'libreswan') + cfg.CONF.set_override('shutdown_check_timeout', self._test_timeout, + group='libreswan') + cfg.CONF.set_override('shutdown_check_back_off', self._test_backoff, + group='libreswan') + cfg.CONF.set_override('shutdown_check_retries', self._test_retries, + group='libreswan') + self.addCleanup(cfg.CONF.reset) self.vpnservice = copy.deepcopy(FAKE_VPN_SERVICE) + self.parent_start = mock.patch('neutron_vpnaas.services.' + 'vpn.device_drivers.ipsec.' + 'OpenSwanProcess.start').start() + self.parent_stop = mock.patch('neutron_vpnaas.services.' + 'vpn.device_drivers.ipsec.' + 'OpenSwanProcess.stop').start() + self.os_remove = mock.patch('os.remove').start() + self.ipsec_process = libreswan_ipsec.LibreSwanProcess(mock.ANY, 'foo-process-id', self.vpnservice, mock.ANY) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + def test_no_cleanups(self, cleanup_mock): + # Not an "awesome test" but more of a check box item. Basically, + # what happens if we didn't need to clean up any files. + with mock.patch.object(self.ipsec_process, + '_process_running', + return_value=True) as query_mock: + self.ipsec_process.start() + self.assertEqual(1, self.parent_start.call_count) + self.assertEqual(1, query_mock.call_count) + + # This is really what is being tested here. If process is + # running, we shouldn't attempt a cleanup. + self.assertFalse(cleanup_mock.called) + + @mock.patch('os.path.exists', return_value=True) + def test_cleanup_files(self, exists_mock): + # Tests the 'bones' of things really and kind of check-box-item-bogus + # test - this really needs exercising through a higher level test. + with mock.patch.object(self.ipsec_process, + '_process_running', + return_value=False) as query_mock: + fake_path = '/fake/path/run' + self.ipsec_process.pid_path = fake_path + self.ipsec_process.pid_file = '%s.pid' % fake_path + self.ipsec_process.start() + self.assertEqual(1, self.parent_start.call_count) + self.assertEqual(1, query_mock.call_count) + self.assertEqual(2, self.os_remove.call_count) + self.os_remove.assert_has_calls([mock.call('%s.pid' % fake_path), + mock.call('%s.ctl' % fake_path)]) + + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._process_running', + return_value=False) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + @mock.patch('eventlet.sleep') + def test_restart_process_not_running(self, sleep_mock, cleanup_mock, + query_mock): + self.ipsec_process.restart() + # Lame checks that are really for sanity + self.assertTrue(self.parent_stop.called) + self.assertTrue(self.parent_start.called) + + # Really what is being tested - retry configuration exists and that + # we do the right things when process check is false. + self.assertTrue(query_mock.called) + self.assertTrue(cleanup_mock.called) + self.assertFalse(sleep_mock.called) + + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._process_running', + return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + @mock.patch('eventlet.sleep') + def test_restart_process_doesnt_stop(self, sleep_mock, cleanup_mock, + query_mock): + self.ipsec_process.restart() + # Lame checks that are really for sanity + self.assertTrue(self.parent_stop.called) + self.assertTrue(self.parent_start.called) + + # Really what is being tested - retry configuration exists and that + # we do the right things when process check is True. + self.assertEqual(5, query_mock.call_count) + self.assertFalse(cleanup_mock.called) + self.assertEqual(5, sleep_mock.call_count) + calls = [mock.call(1), mock.call(2), mock.call(4), + mock.call(8), mock.call(16)] + sleep_mock.assert_has_calls(calls) + + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._process_running', + side_effect=[True, True, False]) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.LibreSwanProcess._cleanup_control_files') + @mock.patch('eventlet.sleep') + def test_restart_process_retry_until_stop(self, sleep_mock, cleanup_mock, + query_mock): + self.ipsec_process.restart() + # Lame checks that are really for sanity + self.assertTrue(self.parent_start.called) + self.assertTrue(self.parent_stop.called) + + # Really what is being tested - retry configuration exists and that + # we do the right things when process check is True a few times and + # then returns False. + self.assertEqual(3, query_mock.call_count) + self.assertTrue(cleanup_mock.called) + self.assertEqual(2, sleep_mock.call_count) + + def test_process_running_no_pid(self): + with mock.patch('os.path.exists', return_value=False): + self.assertFalse( + self.ipsec_process._process_running()) + + # open() is used elsewhere, so we need to inject a mocked open into the + # module to be tested. + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=IOError) + def test_process_running_open_failure(self, mock_open, mock_exists): + self.assertFalse(self.ipsec_process._process_running()) + self.assertTrue(mock_exists.called) + self.assertTrue(mock_open.called) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'invalid'), + IOError]) + def test_process_running_bogus_pid(self, mock_open, mock_exists): + with mock.patch.object(libreswan_ipsec.LOG, 'error'): + self.assertFalse(self.ipsec_process._process_running()) + self.assertTrue(mock_exists.called) + self.assertEqual(2, mock_open.call_count) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'134'), io.StringIO(u'')]) + def test_process_running_no_cmdline(self, mock_open, mock_exists): + with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: + self.assertFalse(self.ipsec_process._process_running()) + self.assertFalse(log_mock.called) + self.assertEqual(2, mock_open.call_count) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'134'), io.StringIO(u'ps ax')]) + def test_process_running_cmdline_mismatch(self, mock_open, mock_exists): + with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: + self.assertFalse(self.ipsec_process._process_running()) + self.assertFalse(log_mock.called) + self.assertEqual(2, mock_open.call_count) + + @mock.patch('os.path.exists', return_value=True) + @mock.patch('neutron_vpnaas.services.vpn.device_drivers.' + 'libreswan_ipsec.open', + create=True, + side_effect=[io.StringIO(u'134'), + io.StringIO(u'/usr/libexec/ipsec/pluto -ctlbase' + '/some/foo/path')]) + def test_process_running_cmdline_match(self, mock_open, mock_exists): + self.ipsec_process.pid_path = '/some/foo/path' + with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock: + self.assertTrue(self.ipsec_process._process_running()) + self.assertTrue(log_mock.called) + def test_ensure_configs(self): openswan_ipsec.OpenSwanProcess.ensure_configs = mock.Mock() with mock.patch.object(self.ipsec_process, '_execute') as fake_execute: @@ -810,7 +992,7 @@ class TestLibreSwanProcess(base.BaseTestCase): mock.call(['ipsec', 'checknss', self.ipsec_process.etc_dir])] fake_execute.assert_has_calls(expected) - self.assertEqual(fake_execute.call_count, 2) + self.assertEqual(2, fake_execute.call_count) with mock.patch.object(self.ipsec_process, '_execute') as fake_execute: fake_execute.side_effect = [None, RuntimeError, None] @@ -821,7 +1003,7 @@ class TestLibreSwanProcess(base.BaseTestCase): mock.call(['ipsec', 'initnss', self.ipsec_process.etc_dir])] fake_execute.assert_has_calls(expected) - self.assertEqual(fake_execute.call_count, 3) + self.assertEqual(3, fake_execute.call_count) class IPsecStrongswanDeviceDriverLegacy(IPSecDeviceLegacy):