Add kill hooks for external processes

This patch adds possibility to configure kill hooks used to kill
external processes, like dnsmasq or keepalived.

Change-Id: I29dfbedfb7167982323dcff1c4554ee780cc48db
Closes-Bug: #1825943
This commit is contained in:
Slawek Kaplonski 2019-05-27 13:17:28 +02:00
parent 3f837836f6
commit 93015527f0
16 changed files with 129 additions and 5 deletions

View File

@ -43,3 +43,36 @@ vary between hosts in a neutron deployment such as the ``local_ip`` for an L2
agent. If any agent requires access to additional external services beyond the
neutron RPC, those endpoints should be defined in the agent-specific
configuration file (for example, nova metadata for metadata agent).
External processes run by agents
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Some neutron agents, like DHCP, Metadata or L3, often run external
processes to provide some of their functionalities. It may be keepalived,
dnsmasq, haproxy or some other process.
Neutron agents are responsible for spawning and killing such processes when
necessary. By default, to kill such processes, agents use a simple ``kill``
command, but in some cases, like for example when those additional services
are running inside containers, it may be not a good solution.
To address this problem, operators should use the ``AGENT`` config group option
``kill_scripts_path`` to configure a path to where ``kill scripts`` for such
processes live. By default, it is set to ``/etc/neutron/kill_scripts/``.
If option ``kill_scripts_path`` is changed in the config to the different
location, ``exec_dirs`` in ``/etc/rootwrap.conf`` should be changed accordingly.
If ``kill_scripts_path`` is set, every time neutron has to kill a process,
for example ``dnsmasq``, it will look in this directory for a file with the name
``<process_name>-kill``. So for ``dnsmasq`` process it will look for a
``dnsmasq-kill`` script. If such a file exists there, it will be called
instead of using the ``kill`` command.
Kill scripts are called with two parameters:
.. code-block::
<process>-kill <sig> <pid>
where: ``<sig>`` is the signal, same as with the ``kill`` command, for example
``9`` or ``SIGKILL``; and ``<pid>`` is pid of the process to kill.
This external script should then handle killing of the given process as neutron
will not call the ``kill`` command for it anymore.

View File

@ -15,6 +15,8 @@ dnsmasq: CommandFilter, dnsmasq, root
# neutron/agent/linux/dhcp.py
kill_dnsmasq: KillFilter, root, /sbin/dnsmasq, -9, -HUP, -15
kill_dnsmasq_usr: KillFilter, root, /usr/sbin/dnsmasq, -9, -HUP, -15
# dnsmasq kill script filter
kill_dnsmasq_script: CommandFilter, dnsmasq-kill, root
ovs-vsctl: CommandFilter, ovs-vsctl, root
mm-ctl: CommandFilter, mm-ctl, root

View File

@ -15,3 +15,7 @@
# prefix_delegation_agent
dibbler-client: CommandFilter, dibbler-client, root
kill_dibbler-client: KillFilter, root, dibbler-client, -9
# dibbler kill script filter
kill_dibbler_script: CommandFilter, dibbler-kill, root
# dibbler-client kill script filter
kill_dibbler-client_script: CommandFilter, dibbler-client-kill, root

View File

@ -19,6 +19,8 @@ radvd: CommandFilter, radvd, root
# haproxy
haproxy: RegExpFilter, haproxy, root, haproxy, -f, .*
kill_haproxy: KillFilter, root, haproxy, -15, -9, -HUP
# haproxy kill script filter
kill_haproxy_script: CommandFilter, haproxy-kill, root
kill_radvd_usr: KillFilter, root, /usr/sbin/radvd, -15, -9, -HUP
kill_radvd: KillFilter, root, /sbin/radvd, -15, -9, -HUP
@ -52,6 +54,8 @@ ip6tables-restore: CommandFilter, ip6tables-restore, root
# Keepalived
keepalived: CommandFilter, keepalived, root
kill_keepalived: KillFilter, root, keepalived, -HUP, -15, -9
# keepalived kill script filter
kill_keepalived_script: CommandFilter, keepalived-kill, root
# l3 agent to delete floatingip's conntrack state
conntrack: CommandFilter, conntrack, root
@ -75,3 +79,5 @@ kill_keepalived_monitor_py37: KillFilter, root, python3.7, -15
# absolute path
kill_keepalived_monitor_platform_py: KillFilter, root, /usr/libexec/platform-python, -15
kill_keepalived_monitor_platform_py36: KillFilter, root, /usr/libexec/platform-python3.6, -15
# neutron-keepalived-state-change-monitor kill script filter
kill_neutron-keepalived-state-change-monitor_script: CommandFilter, neutron-keepalived-state-change-monitor-kill, root

View File

@ -10,7 +10,7 @@ filters_path=/etc/neutron/rootwrap.d,/usr/share/neutron/rootwrap
# explicitely specify a full path (separated by ',')
# If not specified, defaults to system PATH environment variable.
# These directories MUST all be only writeable by root !
exec_dirs=/sbin,/usr/sbin,/bin,/usr/bin,/usr/local/bin,/usr/local/sbin
exec_dirs=/sbin,/usr/sbin,/bin,/usr/bin,/usr/local/bin,/usr/local/sbin,/etc/neutron/kill_scripts
# Enable logging to syslog
# Default value is False

View File

@ -35,6 +35,8 @@ LOG = logging.getLogger(__name__)
HA_DEV_PREFIX = 'ha-'
IP_MONITOR_PROCESS_SERVICE = 'ip_monitor'
SIGTERM_TIMEOUT = 10
KEEPALIVED_STATE_CHANGE_MONITOR_SERVICE_NAME = (
"neutron-keepalived-state-change-monitor")
# The multiplier is used to compensate execution time of function sending
# SIGHUP to keepalived process. The constant multiplies ha_vrrp_advert_int
@ -357,6 +359,7 @@ class HaRouter(router.RouterInfo):
self.agent_conf,
'%s.monitor' % self.router_id,
self.ha_namespace,
service=KEEPALIVED_STATE_CHANGE_MONITOR_SERVICE_NAME,
default_cmd_callback=self._get_state_change_monitor_callback())
def _get_state_change_monitor_callback(self):

View File

@ -242,6 +242,7 @@ class DhcpLocalProcess(DhcpBase):
conf=self.conf,
uuid=self.network.id,
namespace=self.network.namespace,
service=DNSMASQ_SERVICE_NAME,
default_cmd_callback=cmd_callback,
pid_file=self.get_conf_file_name('pid'),
run_as_root=True)

View File

@ -66,6 +66,7 @@ class ProcessManager(MonitoredProcess):
self.pid_file = pid_file
self.run_as_root = run_as_root or self.namespace is not None
self.custom_reload_callback = custom_reload_callback
self.kill_scripts_path = cfg.CONF.AGENT.kill_scripts_path
if service:
self.service_pid_fname = 'pid.' + service
@ -105,7 +106,7 @@ class ProcessManager(MonitoredProcess):
ip_wrapper.netns.execute(cmd, addl_env=self.cmd_addl_env,
run_as_root=self.run_as_root)
else:
cmd = ['kill', '-%s' % (sig), pid]
cmd = self.get_kill_cmd(sig, pid)
utils.execute(cmd, run_as_root=self.run_as_root)
# In the case of shutting down, remove the pid file
if sig == '9':
@ -117,6 +118,13 @@ class ProcessManager(MonitoredProcess):
else:
LOG.debug('No process started for %s', self.uuid)
def get_kill_cmd(self, sig, pid):
if self.kill_scripts_path:
kill_file = "%s-kill" % self.service
if os.path.isfile(os.path.join(self.kill_scripts_path, kill_file)):
return [kill_file, sig, pid]
return ['kill', '-%s' % (sig), pid]
def get_pid_file_name(self):
"""Returns the file name for a given kind of config file."""
if self.pid_file:

View File

@ -457,6 +457,7 @@ class KeepalivedManager(object):
cfg.CONF,
self.resource_id,
self.namespace,
service=KEEPALIVED_SERVICE_NAME,
pids_path=self.conf_path)
def _get_vrrp_process(self, pid_file):

View File

@ -35,6 +35,7 @@ from neutron.agent.linux import external_process
LOG = logging.getLogger(__name__)
METADATA_SERVICE_NAME = 'metadata-proxy'
HAPROXY_SERVICE = 'haproxy'
PROXY_CONFIG_DIR = "ns-metadata-proxy"
_HAPROXY_CONFIG_TEMPLATE = """
@ -220,7 +221,7 @@ class MetadataDriver(object):
conf.state_path,
pid_file)
haproxy.create_config_file()
proxy_cmd = ['haproxy',
proxy_cmd = [HAPROXY_SERVICE,
'-f', haproxy.cfg_path]
return proxy_cmd
@ -260,6 +261,7 @@ class MetadataDriver(object):
conf=conf,
uuid=router_id,
namespace=ns_name,
service=HAPROXY_SERVICE,
default_cmd_callback=callback)

View File

@ -145,6 +145,15 @@ PROCESS_MONITOR_OPTS = [
cfg.IntOpt('check_child_processes_interval', default=60,
help=_('Interval between checks of child process liveness '
'(seconds), use 0 to disable')),
cfg.StrOpt('kill_scripts_path', default='/etc/neutron/kill_scripts/',
help=_('Location of scripts used to kill external processes. '
'Names of scripts here must follow the pattern: '
'"<process-name>-kill" where <process-name> is name of '
'the process which should be killed using this script. '
'For example, kill script for dnsmasq process should be '
'named "dnsmasq-kill". '
'If path is set to None, then default "kill" command '
'will be used to stop processes.')),
]
AVAILABILITY_ZONE_OPTS = [

View File

@ -34,6 +34,7 @@ from neutron.agent.linux import external_process
from neutron.agent.linux import interface
from neutron.agent.linux import ip_lib
from neutron.agent.linux import keepalived
from neutron.agent.metadata import driver as metadata_driver
from neutron.common import utils as common_utils
from neutron.conf.agent import common as agent_config
from neutron.conf import common as common_config
@ -398,7 +399,8 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
pm = external_process.ProcessManager(
conf,
router.router_id,
router.ns_name)
router.ns_name,
service=metadata_driver.HAPROXY_SERVICE)
return pm.active
def device_exists_with_ips_and_mac(self, expected_device, name_getter,

View File

@ -32,6 +32,7 @@ from neutron.agent.linux import external_process
from neutron.agent.linux import interface
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils
from neutron.agent.metadata import driver as metadata_driver
from neutron.common import utils as common_utils
from neutron.conf.agent import common as config
from neutron.tests.common import net_helpers
@ -256,7 +257,8 @@ class DHCPAgentOVSTestFramework(base.BaseSudoTestCase):
return external_process.ProcessManager(
self.conf,
network.id,
network.namespace)
network.namespace,
service=metadata_driver.HAPROXY_SERVICE)
class DHCPAgentOVSTestCase(DHCPAgentOVSTestFramework):

View File

@ -693,6 +693,7 @@ class TestDhcpAgentEventHandler(base.BaseTestCase):
return mock.call(conf=cfg.CONF,
uuid=FAKE_NETWORK_UUID,
namespace=ns,
service='haproxy',
default_cmd_callback=mock.ANY)
def _enable_dhcp_helper(self, network, enable_isolated_metadata=False,

View File

@ -15,6 +15,7 @@
import os.path
import mock
from oslo_config import cfg
from oslo_utils import fileutils
import psutil
@ -244,6 +245,48 @@ class TestProcessManager(base.BaseTestCase):
manager.disable()
debug.assert_called_once_with(mock.ANY, mock.ANY)
def _test_disable_custom_kill_script(self, kill_script_exists, namespace,
kill_scripts_path='test-path/'):
cfg.CONF.set_override("kill_scripts_path", kill_scripts_path, "AGENT")
if kill_script_exists:
expected_cmd = ['test-service-kill', '9', 4]
else:
expected_cmd = ['kill', '-9', 4]
with mock.patch.object(ep.ProcessManager, 'pid') as pid:
pid.__get__ = mock.Mock(return_value=4)
with mock.patch.object(ep.ProcessManager, 'active') as active:
active.__get__ = mock.Mock(return_value=True)
manager = ep.ProcessManager(
self.conf, 'uuid', namespace=namespace,
service='test-service')
with mock.patch.object(ep, 'utils') as utils, \
mock.patch.object(os.path, 'isfile',
return_value=kill_script_exists):
manager.disable()
utils.execute.assert_called_with(
expected_cmd, run_as_root=bool(namespace))
def test_disable_custom_kill_script_no_namespace(self):
self._test_disable_custom_kill_script(
kill_script_exists=True, namespace=None)
def test_disable_custom_kill_script_namespace(self):
self._test_disable_custom_kill_script(
kill_script_exists=True, namespace="ns")
def test_disable_custom_kill_script_no_kill_script_no_namespace(self):
self._test_disable_custom_kill_script(
kill_script_exists=False, namespace=None)
def test_disable_custom_kill_script_no_kill_script_namespace(self):
self._test_disable_custom_kill_script(
kill_script_exists=False, namespace="ns")
def test_disable_custom_kill_script_namespace_no_path(self):
self._test_disable_custom_kill_script(
kill_script_exists=False, namespace="ns", kill_scripts_path=None)
def test_get_pid_file_name_default(self):
manager = ep.ProcessManager(self.conf, 'uuid')
retval = manager.get_pid_file_name()

View File

@ -0,0 +1,7 @@
---
features:
- |
Added support for custom scripts used to kill external processes managed by
neutron agents, such as ``dnsmasq`` or ``keepalived``. Such custom scripts,
if defined, will be used instead default ``kill`` command to kill such
external processes.