Remove obsolete PID files before start

External processes, such as radvd, can refuse to start
and throw an exception such as:

  "Unable to convert value in $pidfile"

because the given pidfile has more than one PID in it.
The situation can happen when the neutron node is reset
and the obsolete PID files are not cleaned before neutron
is started.

This commit adds PID file cleanup before external
process start.

Closes-bug: #2033980
Change-Id: Id62bf18067d0b144c3e8825c7603cc1e51dca052
(cherry picked from commit c3b855a100)
This commit is contained in:
Brian Haley 2023-09-19 13:25:41 -04:00
parent c03d76a41d
commit a7aeec703d
9 changed files with 82 additions and 37 deletions

View File

@ -87,7 +87,19 @@ class ProcessManager(MonitoredProcess):
if not self.active:
if not cmd_callback:
cmd_callback = self.default_cmd_callback
cmd = cmd_callback(self.get_pid_file_name())
# Always try and remove the pid file, as it's existence could
# stop the process from starting
pid_file = self.get_pid_file_name()
try:
utils.delete_if_exists(pid_file, run_as_root=self.run_as_root)
except Exception as e:
LOG.error("Could not delete file %(pid_file)s, %(service)s "
"could fail to start. Exception: %(exc)s",
{'pid_file': pid_file,
'service': self.service,
'exc': e})
cmd = cmd_callback(pid_file)
ip_wrapper = ip_lib.IPWrapper(namespace=self.namespace)
ip_wrapper.netns.execute(cmd, addl_env=self.cmd_addl_env,
@ -99,12 +111,14 @@ class ProcessManager(MonitoredProcess):
def reload_cfg(self):
if self.custom_reload_callback:
self.disable(get_stop_command=self.custom_reload_callback)
self.disable(get_stop_command=self.custom_reload_callback,
delete_pid_file=False)
else:
self.disable('HUP')
self.disable('HUP', delete_pid_file=False)
def disable(self, sig='9', get_stop_command=None):
def disable(self, sig='9', get_stop_command=None, delete_pid_file=True):
pid = self.pid
delete_pid_file = delete_pid_file or sig == '9'
if self.active:
if get_stop_command:
@ -118,10 +132,10 @@ class ProcessManager(MonitoredProcess):
utils.execute(cmd, addl_env=self.cmd_addl_env,
run_as_root=self.run_as_root,
privsep_exec=True)
# In the case of shutting down, remove the pid file
if sig == '9':
utils.delete_if_exists(self.get_pid_file_name(),
run_as_root=self.run_as_root)
if delete_pid_file:
utils.delete_if_exists(self.get_pid_file_name(),
run_as_root=self.run_as_root)
elif pid:
LOG.debug('%(service)s process for %(uuid)s pid %(pid)d is stale, '
'ignoring signal %(signal)s',

View File

@ -427,15 +427,6 @@ class KeepalivedManager(object):
return config_path
@staticmethod
def _safe_remove_pid_file(pid_file):
try:
os.remove(pid_file)
except OSError as e:
if e.errno != errno.ENOENT:
LOG.error("Could not delete file %s, keepalived can "
"refuse to start.", pid_file)
def get_vrrp_pid_file_name(self, base_pid_file):
return '%s-vrrp' % base_pid_file
@ -516,9 +507,6 @@ class KeepalivedManager(object):
if vrrp_pm.active:
vrrp_pm.disable()
self._safe_remove_pid_file(pid_file)
self._safe_remove_pid_file(self.get_vrrp_pid_file_name(pid_file))
cmd = ['keepalived', '-P',
'-f', config_path,
'-p', pid_file,

View File

@ -297,9 +297,8 @@ class MetadataDriver(object):
pm.pid, SIGTERM_TIMEOUT)
pm.disable(sig=str(int(signal.SIGKILL)))
# Delete metadata proxy config and PID files.
# Delete metadata proxy config.
HaproxyConfigurator.cleanup_config_file(uuid, cfg.CONF.state_path)
linux_utils.delete_if_exists(pm.get_pid_file_name(), run_as_root=True)
cls.monitors.pop(uuid, None)

View File

@ -34,6 +34,9 @@ class TestHostMedataHAProxyDaemonMonitor(base.BaseTestCase):
'neutron.agent.linux.utils.execute')
self.utils_exec = self.utils_exec_p.start()
self.delete_if_exists = mock.patch(
'neutron.agent.linux.utils.delete_if_exists').start()
self.utils_replace_file_p = mock.patch(
'neutron_lib.utils.file.replace_file')
self.utils_replace_file = self.utils_replace_file_p.start()
@ -70,6 +73,8 @@ class TestHostMedataHAProxyDaemonMonitor(base.BaseTestCase):
self.assertIn('haproxy', cmd)
self.assertIn(_join('-f', conffile), cmd)
self.assertIn(_join('-p', pidfile), cmd)
self.delete_if_exists.assert_called_once_with(pidfile,
run_as_root=True)
def test_generate_host_metadata_haproxy_config(self):
cfg.CONF.set_override('metadata_proxy_shared_secret',

View File

@ -3309,8 +3309,9 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
self.assertFalse(ri.remove_floating_ip.called)
@mock.patch.object(os, 'geteuid', return_value=mock.ANY)
@mock.patch.object(linux_utils, 'delete_if_exists')
@mock.patch.object(pwd, 'getpwuid')
def test_spawn_radvd(self, mock_getpwuid, *args):
def test_spawn_radvd(self, mock_getpwuid, mock_delete, *args):
router = l3_test_common.prepare_router_data(
ip_version=lib_constants.IP_VERSION_6)
@ -3346,6 +3347,8 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
self.conf.set_override('radvd_user', radvd_user)
mock_getpwuid.return_value = FakeUser(os_user)
radvd.enable(router['_interfaces'])
mock_delete.assert_called_once_with(pidfile, run_as_root=True)
mock_delete.reset_mock()
cmd = execute.call_args[0][0]
_join = lambda *args: ' '.join(args)
cmd = _join(*cmd)

View File

@ -35,6 +35,9 @@ class TestBasicRouterOperations(base.BaseTestCase):
self.device_exists_p = mock.patch(
'neutron.agent.linux.ip_lib.device_exists')
self.device_exists = self.device_exists_p.start()
self.delete_if_exists_p = mock.patch(
'neutron.agent.linux.utils.delete_if_exists')
self.delete_if_exists = self.delete_if_exists_p.start()
def _create_router(self, router=None, **kwargs):
if not router:

View File

@ -121,9 +121,11 @@ class TestProcessManager(base.BaseTestCase):
self.execute_p = mock.patch('neutron.agent.common.utils.execute')
self.execute = self.execute_p.start()
self.delete_if_exists = mock.patch(
'oslo_utils.fileutils.delete_if_exists').start()
'neutron.agent.linux.utils.delete_if_exists').start()
self.ensure_dir = mock.patch.object(
fileutils, 'ensure_tree').start()
self.error_log = mock.patch("neutron.agent.linux.external_process."
"LOG.error").start()
self.conf = mock.Mock()
self.conf.external_pids = '/var/path'
@ -168,6 +170,8 @@ class TestProcessManager(base.BaseTestCase):
with mock.patch.object(ep, 'ip_lib') as ip_lib:
manager.enable(callback)
callback.assert_called_once_with('pidfile')
self.delete_if_exists.assert_called_once_with(
'pidfile', run_as_root=True)
env = {ep.PROCESS_TAG: ep.DEFAULT_SERVICE_NAME + '-uuid'}
ip_lib.assert_has_calls([
mock.call.IPWrapper(namespace='ns'),
@ -204,11 +208,34 @@ class TestProcessManager(base.BaseTestCase):
except common_utils.WaitTimeout:
self.fail('ProcessManager.enable() raised WaitTimeout')
def test_enable_delete_pid_file_raises(self):
callback = mock.Mock()
cmd = ['the', 'cmd']
callback.return_value = cmd
self.delete_if_exists.side_effect = OSError
with mock.patch.object(ep.ProcessManager, 'get_pid_file_name') as name:
name.return_value = 'pidfile'
with mock.patch.object(ep.ProcessManager, 'active') as active:
active.__get__ = mock.Mock(return_value=False)
manager = ep.ProcessManager(self.conf, 'uuid')
manager.enable(callback)
callback.assert_called_once_with('pidfile')
cmd = ['env', DEFAULT_ENVVAR + '-uuid'] + cmd
self.execute.assert_called_once_with(cmd,
check_exit_code=True,
extra_ok_codes=None,
run_as_root=False,
log_fail_as_error=True,
privsep_exec=False)
self.error_log.assert_called_once()
def test_reload_cfg_without_custom_reload_callback(self):
with mock.patch.object(ep.ProcessManager, 'disable') as disable:
manager = ep.ProcessManager(self.conf, 'uuid', namespace='ns')
manager.reload_cfg()
disable.assert_called_once_with('HUP')
disable.assert_called_once_with('HUP', delete_pid_file=False)
def test_reload_cfg_with_custom_reload_callback(self):
reload_callback = mock.sentinel.callback
@ -217,7 +244,8 @@ class TestProcessManager(base.BaseTestCase):
self.conf, 'uuid', namespace='ns',
custom_reload_callback=reload_callback)
manager.reload_cfg()
disable.assert_called_once_with(get_stop_command=reload_callback)
disable.assert_called_once_with(get_stop_command=reload_callback,
delete_pid_file=False)
def test_disable_get_stop_command(self):
cmd = ['the', 'cmd']

View File

@ -99,7 +99,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
mock.patch('neutron.agent.l3.ha.AgentMixin'
'._init_ha_conf_path').start()
self.delete_if_exists = mock.patch.object(linux_utils,
'delete_if_exists')
'delete_if_exists').start()
self.mock_get_process = mock.patch.object(
metadata_driver.MetadataDriver,
'_get_metadata_proxy_process_manager')
@ -264,6 +264,9 @@ class TestMetadataDriverProcess(base.BaseTestCase):
router_id, metadata_driver.METADATA_SERVICE_NAME,
mock.ANY)
self.delete_if_exists.assert_called_once_with(
mock.ANY, run_as_root=True)
def test_spawn_metadata_proxy(self):
self._test_spawn_metadata_proxy()
@ -311,9 +314,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
config.create_config_file)
def test_destroy_monitored_metadata_proxy(self):
delete_if_exists = self.delete_if_exists.start()
mproxy_process = mock.Mock(
active=False, get_pid_file_name=mock.Mock(return_value='pid_file'))
mproxy_process = mock.Mock(active=False)
mock_get_process = self.mock_get_process.start()
mock_get_process.return_value = mproxy_process
driver = metadata_driver.MetadataDriver(FakeL3NATAgent())
@ -321,13 +322,11 @@ class TestMetadataDriverProcess(base.BaseTestCase):
'ns_name')
mproxy_process.disable.assert_called_once_with(
sig=str(int(signal.SIGTERM)))
delete_if_exists.assert_has_calls([
mock.call('pid_file', run_as_root=True)])
self.delete_if_exists.assert_called_once_with(
mock.ANY, run_as_root=True)
def test_destroy_monitored_metadata_proxy_force(self):
delete_if_exists = self.delete_if_exists.start()
mproxy_process = mock.Mock(
active=True, get_pid_file_name=mock.Mock(return_value='pid_file'))
mproxy_process = mock.Mock(active=True)
mock_get_process = self.mock_get_process.start()
mock_get_process.return_value = mproxy_process
driver = metadata_driver.MetadataDriver(FakeL3NATAgent())
@ -337,5 +336,5 @@ class TestMetadataDriverProcess(base.BaseTestCase):
mproxy_process.disable.assert_has_calls([
mock.call(sig=str(int(signal.SIGTERM))),
mock.call(sig=str(int(signal.SIGKILL)))])
delete_if_exists.assert_has_calls([
mock.call('pid_file', run_as_root=True)])
self.delete_if_exists.assert_called_once_with(
mock.ANY, run_as_root=True)

View File

@ -21,6 +21,7 @@ from oslo_config import cfg
from oslo_utils import uuidutils
from neutron.agent.linux import external_process as ep
from neutron.agent.linux import utils as linux_utils
from neutron.agent.ovn.metadata import agent as metadata_agent
from neutron.agent.ovn.metadata import driver as metadata_driver
from neutron.common import metadata as comm_meta
@ -51,6 +52,8 @@ class TestMetadataDriverProcess(base.BaseTestCase):
def setUp(self):
super(TestMetadataDriverProcess, self).setUp()
mock.patch('eventlet.spawn').start()
self.delete_if_exists = mock.patch.object(linux_utils,
'delete_if_exists').start()
ovn_meta_conf.register_meta_conf_opts(meta_conf.SHARED_OPTS, cfg.CONF)
ovn_meta_conf.register_meta_conf_opts(
@ -153,6 +156,9 @@ class TestMetadataDriverProcess(base.BaseTestCase):
run_as_root=True)
])
self.delete_if_exists.assert_called_once_with(
mock.ANY, run_as_root=True)
def test_create_config_file_wrong_user(self):
with mock.patch('pwd.getpwnam', side_effect=KeyError):
config = metadata_driver.HaproxyConfigurator(mock.ANY, mock.ANY,