Merge "Manage cleanup of .ctl/.pid files for LibreSwan"

This commit is contained in:
Jenkins 2015-09-08 13:10:10 +00:00 committed by Gerrit Code Review
commit 2baf733818
3 changed files with 315 additions and 2 deletions

View File

@ -32,3 +32,14 @@
# default_config_area=/usr/share/strongswan/templates/config/strongswan.d # default_config_area=/usr/share/strongswan/templates/config/strongswan.d
# Default is for ubuntu use, /etc/strongswan.d # Default is for ubuntu use, /etc/strongswan.d
# default_config_area=/etc/strongswan.d # default_config_area=/etc/strongswan.d
[libreswan]
# Initial interval in seconds for checking if pluto daemon is shutdown
# shutdown_check_timeout=1
#
# The maximum number of retries for checking for pluto daemon shutdown
# shutdown_check_retries=5
#
# A factor to increase the retry interval for each retry
# shutdown_check_back_off=1.5

View File

@ -12,8 +12,37 @@
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import os
import os.path
import eventlet
from neutron.i18n import _LE
from neutron.i18n import _LW
from oslo_config import cfg
from oslo_log import log as logging
from neutron_vpnaas.services.vpn.device_drivers import ipsec from neutron_vpnaas.services.vpn.device_drivers import ipsec
LOG = logging.getLogger(__name__)
libreswan_opts = [
cfg.IntOpt('shutdown_check_timeout',
default=1,
help=_('Initial interval in seconds for checking if pluto '
'daemon is shutdown')),
cfg.IntOpt('shutdown_check_retries',
default=5,
help=_('The maximum number of retries for checking for '
'pluto daemon shutdown')),
cfg.FloatOpt('shutdown_check_back_off',
default=1.5,
help=_('A factor to increase the retry interval for '
'each retry'))
]
cfg.CONF.register_opts(libreswan_opts, 'libreswan')
class LibreSwanProcess(ipsec.OpenSwanProcess): class LibreSwanProcess(ipsec.OpenSwanProcess):
"""Libreswan Process manager class. """Libreswan Process manager class.
@ -23,6 +52,7 @@ class LibreSwanProcess(ipsec.OpenSwanProcess):
def __init__(self, conf, process_id, vpnservice, namespace): def __init__(self, conf, process_id, vpnservice, namespace):
super(LibreSwanProcess, self).__init__(conf, process_id, super(LibreSwanProcess, self).__init__(conf, process_id,
vpnservice, namespace) vpnservice, namespace)
self.pid_file = '%s.pid' % self.pid_path
def ensure_configs(self): def ensure_configs(self):
"""Generate config files which are needed for Libreswan. """Generate config files which are needed for Libreswan.
@ -40,6 +70,96 @@ class LibreSwanProcess(ipsec.OpenSwanProcess):
except RuntimeError: except RuntimeError:
self._execute([self.binary, 'initnss', self.etc_dir]) self._execute([self.binary, 'initnss', self.etc_dir])
def _process_running(self):
"""Checks if process is still running."""
# If no PID file, we assume the process is not running.
if not os.path.exists(self.pid_file):
return False
try:
# We take an ask-forgiveness-not-permission approach and rely
# on throwing to tell us something. If the pid file exists,
# delve into the process information and check if it matches
# our expected command line.
with open(self.pid_file, 'r') as f:
pid = f.readline().strip()
with open('/proc/%s/cmdline' % pid) as cmd_line_file:
cmd_line = cmd_line_file.readline()
if self.pid_path in cmd_line and 'pluto' in cmd_line:
# Okay the process is probably a libreswan process
# and it contains the pid_path in the command
# line... could be a race. Log to error and return
# that it is *NOT* okay to clean up files. We are
# logging to error instead of debug because it
# indicates something bad has happened and this is
# valuable information for figuring it out.
LOG.error(_LE('Process %(pid)s exists with command '
'line %(cmd_line)s.') %
{'pid': pid, 'cmd_line': cmd_line})
return True
except IOError as e:
LOG.error(_LE('Unable to check control files on startup for '
'router %(router)s: %(msg)s'),
{'router': self.id, 'msg': e})
return False
def _cleanup_control_files(self):
try:
ctl_file = '%s.ctl' % self.pid_path
LOG.debug('Removing %(pidfile)s and %(ctlfile)s',
{'pidfile': self.pid_file,
'ctlfile': '%s.ctl' % ctl_file})
if os.path.exists(self.pid_file):
os.remove(self.pid_file)
if os.path.exists(ctl_file):
os.remove(ctl_file)
except OSError as e:
LOG.error(_LE('Unable to remove libreswan control '
'files for router %(router)s. %(msg)s'),
{'router': self.id, 'msg': e})
def start(self):
# NOTE: The restart operation calls the parent's start() instead of
# this one to avoid having to special case the startup file check.
# If anything is added to this method that needs to run whenever
# a restart occurs, it should be either added to the restart()
# override or things refactored to special-case start() when
# called from restart().
# LibreSwan's use of the capablities library may prevent the ctl
# and pid files from being cleaned up, so we check to see if the
# process is running and if not, attempt a cleanup. In either case
# we fall through to allow the LibreSwan process to start or fail
# in the usual way.
if not self._process_running():
self._cleanup_control_files()
super(LibreSwanProcess, self).start()
def restart(self):
# stop() followed immediately by a start() runs the risk that the
# current pluto daemon has not had a chance to shutdown. We check
# the current process information to see if the daemon is still
# running and if so, wait a short interval and retry.
self.stop()
wait_interval = cfg.CONF.libreswan.shutdown_check_timeout
for i in range(cfg.CONF.libreswan.shutdown_check_retries):
if not self._process_running():
self._cleanup_control_files()
break
eventlet.sleep(wait_interval)
wait_interval *= cfg.CONF.libreswan.shutdown_check_back_off
else:
LOG.warning(_LW('Server appears to still be running, restart '
'of router %s may fail'), self.id)
super(LibreSwanProcess, self).start()
class LibreSwanDriver(ipsec.IPsecDriver): class LibreSwanDriver(ipsec.IPsecDriver):
def create_process(self, process_id, vpnservice, namespace): def create_process(self, process_id, vpnservice, namespace):

View File

@ -14,6 +14,7 @@
# under the License. # under the License.
import copy import copy
import difflib import difflib
import io
import mock import mock
import socket import socket
@ -798,14 +799,195 @@ class TestOpenSwanProcess(BaseIPsecDeviceDriver):
class TestLibreSwanProcess(base.BaseTestCase): class TestLibreSwanProcess(base.BaseTestCase):
_test_timeout = 1
_test_backoff = 2
_test_retries = 5
def setUp(self): def setUp(self):
super(TestLibreSwanProcess, self).setUp() super(TestLibreSwanProcess, self).setUp()
# Insulate tests against changes to configuration defaults.
cfg.CONF.register_opts(libreswan_ipsec.libreswan_opts,
'libreswan')
cfg.CONF.set_override('shutdown_check_timeout', self._test_timeout,
group='libreswan')
cfg.CONF.set_override('shutdown_check_back_off', self._test_backoff,
group='libreswan')
cfg.CONF.set_override('shutdown_check_retries', self._test_retries,
group='libreswan')
self.addCleanup(cfg.CONF.reset)
self.vpnservice = copy.deepcopy(FAKE_VPN_SERVICE) self.vpnservice = copy.deepcopy(FAKE_VPN_SERVICE)
self.parent_start = mock.patch('neutron_vpnaas.services.'
'vpn.device_drivers.ipsec.'
'OpenSwanProcess.start').start()
self.parent_stop = mock.patch('neutron_vpnaas.services.'
'vpn.device_drivers.ipsec.'
'OpenSwanProcess.stop').start()
self.os_remove = mock.patch('os.remove').start()
self.ipsec_process = libreswan_ipsec.LibreSwanProcess(mock.ANY, self.ipsec_process = libreswan_ipsec.LibreSwanProcess(mock.ANY,
'foo-process-id', 'foo-process-id',
self.vpnservice, self.vpnservice,
mock.ANY) mock.ANY)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._cleanup_control_files')
def test_no_cleanups(self, cleanup_mock):
# Not an "awesome test" but more of a check box item. Basically,
# what happens if we didn't need to clean up any files.
with mock.patch.object(self.ipsec_process,
'_process_running',
return_value=True) as query_mock:
self.ipsec_process.start()
self.assertEqual(1, self.parent_start.call_count)
self.assertEqual(1, query_mock.call_count)
# This is really what is being tested here. If process is
# running, we shouldn't attempt a cleanup.
self.assertFalse(cleanup_mock.called)
@mock.patch('os.path.exists', return_value=True)
def test_cleanup_files(self, exists_mock):
# Tests the 'bones' of things really and kind of check-box-item-bogus
# test - this really needs exercising through a higher level test.
with mock.patch.object(self.ipsec_process,
'_process_running',
return_value=False) as query_mock:
fake_path = '/fake/path/run'
self.ipsec_process.pid_path = fake_path
self.ipsec_process.pid_file = '%s.pid' % fake_path
self.ipsec_process.start()
self.assertEqual(1, self.parent_start.call_count)
self.assertEqual(1, query_mock.call_count)
self.assertEqual(2, self.os_remove.call_count)
self.os_remove.assert_has_calls([mock.call('%s.pid' % fake_path),
mock.call('%s.ctl' % fake_path)])
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._process_running',
return_value=False)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._cleanup_control_files')
@mock.patch('eventlet.sleep')
def test_restart_process_not_running(self, sleep_mock, cleanup_mock,
query_mock):
self.ipsec_process.restart()
# Lame checks that are really for sanity
self.assertTrue(self.parent_stop.called)
self.assertTrue(self.parent_start.called)
# Really what is being tested - retry configuration exists and that
# we do the right things when process check is false.
self.assertTrue(query_mock.called)
self.assertTrue(cleanup_mock.called)
self.assertFalse(sleep_mock.called)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._process_running',
return_value=True)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._cleanup_control_files')
@mock.patch('eventlet.sleep')
def test_restart_process_doesnt_stop(self, sleep_mock, cleanup_mock,
query_mock):
self.ipsec_process.restart()
# Lame checks that are really for sanity
self.assertTrue(self.parent_stop.called)
self.assertTrue(self.parent_start.called)
# Really what is being tested - retry configuration exists and that
# we do the right things when process check is True.
self.assertEqual(5, query_mock.call_count)
self.assertFalse(cleanup_mock.called)
self.assertEqual(5, sleep_mock.call_count)
calls = [mock.call(1), mock.call(2), mock.call(4),
mock.call(8), mock.call(16)]
sleep_mock.assert_has_calls(calls)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._process_running',
side_effect=[True, True, False])
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.LibreSwanProcess._cleanup_control_files')
@mock.patch('eventlet.sleep')
def test_restart_process_retry_until_stop(self, sleep_mock, cleanup_mock,
query_mock):
self.ipsec_process.restart()
# Lame checks that are really for sanity
self.assertTrue(self.parent_start.called)
self.assertTrue(self.parent_stop.called)
# Really what is being tested - retry configuration exists and that
# we do the right things when process check is True a few times and
# then returns False.
self.assertEqual(3, query_mock.call_count)
self.assertTrue(cleanup_mock.called)
self.assertEqual(2, sleep_mock.call_count)
def test_process_running_no_pid(self):
with mock.patch('os.path.exists', return_value=False):
self.assertFalse(
self.ipsec_process._process_running())
# open() is used elsewhere, so we need to inject a mocked open into the
# module to be tested.
@mock.patch('os.path.exists', return_value=True)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.open',
create=True,
side_effect=IOError)
def test_process_running_open_failure(self, mock_open, mock_exists):
self.assertFalse(self.ipsec_process._process_running())
self.assertTrue(mock_exists.called)
self.assertTrue(mock_open.called)
@mock.patch('os.path.exists', return_value=True)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.open',
create=True,
side_effect=[io.StringIO(u'invalid'),
IOError])
def test_process_running_bogus_pid(self, mock_open, mock_exists):
with mock.patch.object(libreswan_ipsec.LOG, 'error'):
self.assertFalse(self.ipsec_process._process_running())
self.assertTrue(mock_exists.called)
self.assertEqual(2, mock_open.call_count)
@mock.patch('os.path.exists', return_value=True)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.open',
create=True,
side_effect=[io.StringIO(u'134'), io.StringIO(u'')])
def test_process_running_no_cmdline(self, mock_open, mock_exists):
with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock:
self.assertFalse(self.ipsec_process._process_running())
self.assertFalse(log_mock.called)
self.assertEqual(2, mock_open.call_count)
@mock.patch('os.path.exists', return_value=True)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.open',
create=True,
side_effect=[io.StringIO(u'134'), io.StringIO(u'ps ax')])
def test_process_running_cmdline_mismatch(self, mock_open, mock_exists):
with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock:
self.assertFalse(self.ipsec_process._process_running())
self.assertFalse(log_mock.called)
self.assertEqual(2, mock_open.call_count)
@mock.patch('os.path.exists', return_value=True)
@mock.patch('neutron_vpnaas.services.vpn.device_drivers.'
'libreswan_ipsec.open',
create=True,
side_effect=[io.StringIO(u'134'),
io.StringIO(u'/usr/libexec/ipsec/pluto -ctlbase'
'/some/foo/path')])
def test_process_running_cmdline_match(self, mock_open, mock_exists):
self.ipsec_process.pid_path = '/some/foo/path'
with mock.patch.object(libreswan_ipsec.LOG, 'error') as log_mock:
self.assertTrue(self.ipsec_process._process_running())
self.assertTrue(log_mock.called)
def test_ensure_configs(self): def test_ensure_configs(self):
openswan_ipsec.OpenSwanProcess.ensure_configs = mock.Mock() openswan_ipsec.OpenSwanProcess.ensure_configs = mock.Mock()
with mock.patch.object(self.ipsec_process, '_execute') as fake_execute: with mock.patch.object(self.ipsec_process, '_execute') as fake_execute:
@ -814,7 +996,7 @@ class TestLibreSwanProcess(base.BaseTestCase):
mock.call(['ipsec', 'checknss', mock.call(['ipsec', 'checknss',
self.ipsec_process.etc_dir])] self.ipsec_process.etc_dir])]
fake_execute.assert_has_calls(expected) fake_execute.assert_has_calls(expected)
self.assertEqual(fake_execute.call_count, 2) self.assertEqual(2, fake_execute.call_count)
with mock.patch.object(self.ipsec_process, '_execute') as fake_execute: with mock.patch.object(self.ipsec_process, '_execute') as fake_execute:
fake_execute.side_effect = [None, RuntimeError, None] fake_execute.side_effect = [None, RuntimeError, None]
@ -825,7 +1007,7 @@ class TestLibreSwanProcess(base.BaseTestCase):
mock.call(['ipsec', 'initnss', mock.call(['ipsec', 'initnss',
self.ipsec_process.etc_dir])] self.ipsec_process.etc_dir])]
fake_execute.assert_has_calls(expected) fake_execute.assert_has_calls(expected)
self.assertEqual(fake_execute.call_count, 3) self.assertEqual(3, fake_execute.call_count)
class IPsecStrongswanDeviceDriverLegacy(IPSecDeviceLegacy): class IPsecStrongswanDeviceDriverLegacy(IPSecDeviceLegacy):