Set initial ha router state in neutron-keepalived-state-change

Sometimes in case of HA routers it may happend that
keepalived will set status of router to MASTER before
neutron-keepalived-state-change daemon will spawn "ip monitor"
to monitor changes of IPs in router's namespace.

In such case neutron-keepalived-state-change process will never
notice that keepalived set router to be MASTER and L3 agent will
not be notified about that so router will not be configured properly.

To avoid such race condition neutron-keepalived-state-change will
now check if VIP address is already configured on ha interface
before it will spawn "ip monitor". If it is already configured
by keepalived, it will notify L3 agent that router is set to
MASTER.

Change-Id: Ie3fe825d65408fc969c478767b411fe0156e9fbc
Closes-Bug: #1818614
(cherry picked from commit 8fec1ffc83)
This commit is contained in:
Slawek Kaplonski 2019-03-10 22:45:15 +01:00
parent 4aaab7100f
commit 5bcca13f4a
4 changed files with 60 additions and 3 deletions

View File

@ -371,7 +371,10 @@ class HaRouter(router.RouterInfo):
'--pid_file=%s' % pid_file, '--pid_file=%s' % pid_file,
'--state_path=%s' % self.agent_conf.state_path, '--state_path=%s' % self.agent_conf.state_path,
'--user=%s' % os.geteuid(), '--user=%s' % os.geteuid(),
'--group=%s' % os.getegid()] '--group=%s' % os.getegid(),
'--AGENT-root_helper=%s' % self.agent_conf.AGENT.root_helper,
'--AGENT-root_helper_daemon=%s' %
self.agent_conf.AGENT.root_helper_daemon]
return cmd return cmd
return callback return callback

View File

@ -28,6 +28,7 @@ from neutron.agent.linux import ip_lib
from neutron.agent.linux import ip_monitor from neutron.agent.linux import ip_monitor
from neutron.agent.linux import utils as agent_utils from neutron.agent.linux import utils as agent_utils
from neutron.common import config from neutron.common import config
from neutron.conf.agent import common as agent_config
from neutron.conf.agent.l3 import keepalived from neutron.conf.agent.l3 import keepalived
@ -66,6 +67,7 @@ class MonitorDaemon(daemon.Daemon):
# as root # as root
if not run_as_root: if not run_as_root:
super(MonitorDaemon, self).run() super(MonitorDaemon, self).run()
self.handle_initial_state()
for iterable in self.monitor: for iterable in self.monitor:
self.parse_and_handle_event(iterable) self.parse_and_handle_event(iterable)
@ -89,6 +91,23 @@ class MonitorDaemon(daemon.Daemon):
LOG.exception('Failed to process or handle event for line %s', LOG.exception('Failed to process or handle event for line %s',
iterable) iterable)
def handle_initial_state(self):
try:
state = 'backup'
ip = ip_lib.IPDevice(self.interface, self.namespace)
for address in ip.addr.list():
if address.get('cidr') == self.cidr:
state = 'master'
self.write_state_change(state)
self.notify_agent(state)
break
LOG.debug('Initial status of router %s is %s',
self.router_id, state)
except Exception:
LOG.exception('Failed to get initial status of router %s',
self.router_id)
def write_state_change(self, state): def write_state_change(self, state):
with open(os.path.join( with open(os.path.join(
self.conf_dir, 'state'), 'w') as state_file: self.conf_dir, 'state'), 'w') as state_file:
@ -140,9 +159,12 @@ def configure(conf):
conf.set_override('debug', True) conf.set_override('debug', True)
conf.set_override('use_syslog', True) conf.set_override('use_syslog', True)
config.setup_logging() config.setup_logging()
agent_config.setup_privsep()
def main(): def main():
agent_config.register_root_helper(cfg.CONF)
cfg.CONF.register_cli_opts(agent_config.ROOT_HELPER_OPTS, 'AGENT')
keepalived.register_cli_l3_agent_keepalived_opts() keepalived.register_cli_l3_agent_keepalived_opts()
keepalived.register_l3_agent_keepalived_opts() keepalived.register_l3_agent_keepalived_opts()
configure(cfg.CONF) configure(cfg.CONF)

View File

@ -71,6 +71,7 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
config.register_opts(common_config.core_cli_opts) config.register_opts(common_config.core_cli_opts)
logging.register_options(config) logging.register_options(config)
agent_config.register_process_monitor_opts(config) agent_config.register_process_monitor_opts(config)
agent_config.register_root_helper(config)
return config return config
def _configure_agent(self, host, agent_mode='dvr_snat'): def _configure_agent(self, host, agent_mode='dvr_snat'):
@ -97,6 +98,11 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
get_temp_file_path('external/pids')) get_temp_file_path('external/pids'))
conf.set_override('host', host) conf.set_override('host', host)
conf.set_override('agent_mode', agent_mode) conf.set_override('agent_mode', agent_mode)
conf.set_override(
'root_helper', cfg.CONF.AGENT.root_helper, group='AGENT')
conf.set_override(
'root_helper_daemon', cfg.CONF.AGENT.root_helper_daemon,
group='AGENT')
return conf return conf

View File

@ -45,13 +45,13 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
self.router_id = uuidutils.generate_uuid() self.router_id = uuidutils.generate_uuid()
self.conf_dir = self.get_default_temp_dir().path self.conf_dir = self.get_default_temp_dir().path
self.cidr = '169.254.128.1/24' self.cidr = '169.254.128.1/24'
self.interface_name = 'interface' self.interface_name = utils.get_rand_name()
self.monitor = keepalived_state_change.MonitorDaemon( self.monitor = keepalived_state_change.MonitorDaemon(
self.get_temp_file_path('monitor.pid'), self.get_temp_file_path('monitor.pid'),
self.router_id, self.router_id,
1, 1,
2, 2,
'namespace', utils.get_rand_name(),
self.conf_dir, self.conf_dir,
self.interface_name, self.interface_name,
self.cidr) self.cidr)
@ -83,6 +83,32 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
self.monitor, 'notify_agent', side_effect=Exception): self.monitor, 'notify_agent', side_effect=Exception):
self.monitor.parse_and_handle_event(self.line) self.monitor.parse_and_handle_event(self.line)
def test_handle_initial_state_backup(self):
ip = ip_lib.IPWrapper(namespace=self.monitor.namespace)
ip.netns.add(self.monitor.namespace)
self.addCleanup(ip.netns.delete, self.monitor.namespace)
ip.add_dummy(self.interface_name)
with mock.patch.object(
self.monitor, 'write_state_change') as write_state_change,\
mock.patch.object(
self.monitor, 'notify_agent') as notify_agent:
self.monitor.handle_initial_state()
write_state_change.assert_not_called()
notify_agent.assert_not_called()
def test_handle_initial_state_master(self):
ip = ip_lib.IPWrapper(namespace=self.monitor.namespace)
ip.netns.add(self.monitor.namespace)
self.addCleanup(ip.netns.delete, self.monitor.namespace)
ha_interface = ip.add_dummy(self.interface_name)
ha_interface.addr.add(self.cidr)
self.monitor.handle_initial_state()
self.assertEqual('master', self._get_state())
class TestMonitorDaemon(base.BaseSudoTestCase): class TestMonitorDaemon(base.BaseSudoTestCase):
def setUp(self): def setUp(self):