Kill neutron-keepalived-state-change gracefully
When HA routers are deleted, neutron-keepalived-state-change was killed by l3/ha_router using SIGKILL and orphaning its child process 'ip -o monitor'. This patch implements a way to kill it gracefully through SIGTERM, and also sending a SIGKILL if keepalived-state-change didn't die after a 10 seconds timeout. The SIGTERM handler in keepalived-state-change kills the ip monitor process using SIGKILL. It kills ip_monitor PID using kill instead of through IPMonitor.stop() because the monitor runs as root and keepalived-state-change doesn't (it drops its privileges after launching the monitor). Doing so, would lead to a "Permission denied". We can safely kill the monitor this way since it was started with respawn_interval set to None which means that it won't be automatically respawned. Closes-Bug: #1632099 Change-Id: I7385172a007fdd252ea3a1c03c58064160d07e9e
This commit is contained in:
parent
4f8a106cb5
commit
17c2b45d91
@ -14,7 +14,9 @@
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
|
||||
import eventlet
|
||||
import netaddr
|
||||
from neutron_lib import constants as n_consts
|
||||
from oslo_log import log as logging
|
||||
@ -31,6 +33,7 @@ from neutron.extensions import portbindings
|
||||
LOG = logging.getLogger(__name__)
|
||||
HA_DEV_PREFIX = 'ha-'
|
||||
IP_MONITOR_PROCESS_SERVICE = 'ip_monitor'
|
||||
SIGTERM_TIMEOUT = 10
|
||||
|
||||
|
||||
class HaRouterNamespace(namespaces.RouterNamespace):
|
||||
@ -342,7 +345,12 @@ class HaRouter(router.RouterInfo):
|
||||
pm = self._get_state_change_monitor_process_manager()
|
||||
process_monitor.unregister(
|
||||
self.router_id, IP_MONITOR_PROCESS_SERVICE)
|
||||
pm.disable()
|
||||
pm.disable(sig=str(int(signal.SIGTERM)))
|
||||
try:
|
||||
common_utils.wait_until_true(lambda: not pm.active,
|
||||
timeout=SIGTERM_TIMEOUT)
|
||||
except eventlet.timeout.Timeout:
|
||||
pm.disable(sig=str(int(signal.SIGKILL)))
|
||||
|
||||
def update_initial_state(self, callback):
|
||||
ha_device = ip_lib.IPDevice(
|
||||
|
@ -13,6 +13,7 @@
|
||||
# under the License.
|
||||
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
|
||||
import httplib2
|
||||
@ -118,6 +119,21 @@ class MonitorDaemon(daemon.Daemon):
|
||||
log_exception=False
|
||||
)
|
||||
|
||||
def _kill_monitor(self):
|
||||
if self.monitor:
|
||||
# Kill PID instead of calling self.monitor.stop() because the ip
|
||||
# monitor is running as root while keepalived-state-change is not
|
||||
# (dropped privileges after launching the ip monitor) and will fail
|
||||
# with "Permission denied". Also, we can safely do this because the
|
||||
# monitor was launched with respawn_interval=None so it won't be
|
||||
# automatically respawned
|
||||
agent_utils.kill_process(self.monitor.pid, signal.SIGKILL,
|
||||
run_as_root=True)
|
||||
|
||||
def handle_sigterm(self, signum, frame):
|
||||
self._kill_monitor()
|
||||
super(MonitorDaemon, self).handle_sigterm(signum, frame)
|
||||
|
||||
|
||||
def configure(conf):
|
||||
config.init(sys.argv[1:])
|
||||
|
@ -12,6 +12,8 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import signal
|
||||
|
||||
import mock
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
@ -84,3 +86,29 @@ class TestBasicRouterOperations(base.BaseTestCase):
|
||||
|
||||
ri.remove_floating_ip(device, fip_cidr)
|
||||
self.assertTrue(super_remove_floating_ip.called)
|
||||
|
||||
def test_destroy_state_change_monitor_ok(self):
|
||||
ri = self._create_router(mock.MagicMock())
|
||||
with mock.patch.object(ri,
|
||||
'_get_state_change_monitor_process_manager')\
|
||||
as m_get_state:
|
||||
mock_pm = m_get_state.return_value
|
||||
mock_pm.active = False
|
||||
ri.destroy_state_change_monitor(mock_pm)
|
||||
|
||||
mock_pm.disable.assert_called_once_with(
|
||||
sig=str(int(signal.SIGTERM)))
|
||||
|
||||
def test_destroy_state_change_monitor_force(self):
|
||||
ri = self._create_router(mock.MagicMock())
|
||||
with mock.patch.object(ri,
|
||||
'_get_state_change_monitor_process_manager')\
|
||||
as m_get_state:
|
||||
mock_pm = m_get_state.return_value
|
||||
mock_pm.active = False
|
||||
with mock.patch.object(ha_router, 'SIGTERM_TIMEOUT', 0):
|
||||
ri.destroy_state_change_monitor(mock_pm)
|
||||
|
||||
calls = ["sig='str(%d)'" % signal.SIGTERM,
|
||||
"sig='str(%d)'" % signal.SIGKILL]
|
||||
mock_pm.disable.has_calls(calls)
|
||||
|
Loading…
Reference in New Issue
Block a user