Set initial ha router state in neutron-keepalived-state-change

Sometimes in case of HA routers it may happend that
keepalived will set status of router to MASTER before
neutron-keepalived-state-change daemon will spawn "ip monitor"
to monitor changes of IPs in router's namespace.

In such case neutron-keepalived-state-change process will never
notice that keepalived set router to be MASTER and L3 agent will
not be notified about that so router will not be configured properly.

To avoid such race condition neutron-keepalived-state-change will
now check if VIP address is already configured on ha interface
before it will spawn "ip monitor". If it is already configured
by keepalived, it will notify L3 agent that router is set to
MASTER.

Change-Id: Ie3fe825d65408fc969c478767b411fe0156e9fbc
Closes-Bug: #1818614
This commit is contained in:
Slawek Kaplonski 2019-03-10 22:45:15 +01:00
parent c3b5301fd7
commit 8fec1ffc83
4 changed files with 60 additions and 3 deletions

View File

@ -374,7 +374,10 @@ class HaRouter(router.RouterInfo):
'--pid_file=%s' % pid_file,
'--state_path=%s' % self.agent_conf.state_path,
'--user=%s' % os.geteuid(),
'--group=%s' % os.getegid()]
'--group=%s' % os.getegid(),
'--AGENT-root_helper=%s' % self.agent_conf.AGENT.root_helper,
'--AGENT-root_helper_daemon=%s' %
self.agent_conf.AGENT.root_helper_daemon]
return cmd
return callback

View File

@ -28,6 +28,7 @@ from neutron.agent.linux import ip_lib
from neutron.agent.linux import ip_monitor
from neutron.agent.linux import utils as agent_utils
from neutron.common import config
from neutron.conf.agent import common as agent_config
from neutron.conf.agent.l3 import keepalived
@ -66,6 +67,7 @@ class MonitorDaemon(daemon.Daemon):
# as root
if not run_as_root:
super(MonitorDaemon, self).run()
self.handle_initial_state()
for iterable in self.monitor:
self.parse_and_handle_event(iterable)
@ -89,6 +91,23 @@ class MonitorDaemon(daemon.Daemon):
LOG.exception('Failed to process or handle event for line %s',
iterable)
def handle_initial_state(self):
try:
state = 'backup'
ip = ip_lib.IPDevice(self.interface, self.namespace)
for address in ip.addr.list():
if address.get('cidr') == self.cidr:
state = 'master'
self.write_state_change(state)
self.notify_agent(state)
break
LOG.debug('Initial status of router %s is %s',
self.router_id, state)
except Exception:
LOG.exception('Failed to get initial status of router %s',
self.router_id)
def write_state_change(self, state):
with open(os.path.join(
self.conf_dir, 'state'), 'w') as state_file:
@ -140,9 +159,12 @@ def configure(conf):
conf.set_override('debug', True)
conf.set_override('use_syslog', True)
config.setup_logging()
agent_config.setup_privsep()
def main():
agent_config.register_root_helper(cfg.CONF)
cfg.CONF.register_cli_opts(agent_config.ROOT_HELPER_OPTS, 'AGENT')
keepalived.register_cli_l3_agent_keepalived_opts()
keepalived.register_l3_agent_keepalived_opts()
configure(cfg.CONF)

View File

@ -71,6 +71,7 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
config.register_opts(common_config.core_cli_opts)
logging.register_options(config)
agent_config.register_process_monitor_opts(config)
agent_config.register_root_helper(config)
return config
def _configure_agent(self, host, agent_mode='dvr_snat'):
@ -97,6 +98,11 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
get_temp_file_path('external/pids'))
conf.set_override('host', host)
conf.set_override('agent_mode', agent_mode)
conf.set_override(
'root_helper', cfg.CONF.AGENT.root_helper, group='AGENT')
conf.set_override(
'root_helper_daemon', cfg.CONF.AGENT.root_helper_daemon,
group='AGENT')
return conf

View File

@ -45,13 +45,13 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
self.router_id = uuidutils.generate_uuid()
self.conf_dir = self.get_default_temp_dir().path
self.cidr = '169.254.128.1/24'
self.interface_name = 'interface'
self.interface_name = utils.get_rand_name()
self.monitor = keepalived_state_change.MonitorDaemon(
self.get_temp_file_path('monitor.pid'),
self.router_id,
1,
2,
'namespace',
utils.get_rand_name(),
self.conf_dir,
self.interface_name,
self.cidr)
@ -83,6 +83,32 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
self.monitor, 'notify_agent', side_effect=Exception):
self.monitor.parse_and_handle_event(self.line)
def test_handle_initial_state_backup(self):
ip = ip_lib.IPWrapper(namespace=self.monitor.namespace)
ip.netns.add(self.monitor.namespace)
self.addCleanup(ip.netns.delete, self.monitor.namespace)
ip.add_dummy(self.interface_name)
with mock.patch.object(
self.monitor, 'write_state_change') as write_state_change,\
mock.patch.object(
self.monitor, 'notify_agent') as notify_agent:
self.monitor.handle_initial_state()
write_state_change.assert_not_called()
notify_agent.assert_not_called()
def test_handle_initial_state_master(self):
ip = ip_lib.IPWrapper(namespace=self.monitor.namespace)
ip.netns.add(self.monitor.namespace)
self.addCleanup(ip.netns.delete, self.monitor.namespace)
ha_interface = ip.add_dummy(self.interface_name)
ha_interface.addr.add(self.cidr)
self.monitor.handle_initial_state()
self.assertEqual('master', self._get_state())
class TestMonitorDaemon(base.BaseSudoTestCase):
def setUp(self):