Browse Source

Set initial ha router state in neutron-keepalived-state-change

Sometimes in case of HA routers it may happend that
keepalived will set status of router to MASTER before
neutron-keepalived-state-change daemon will spawn "ip monitor"
to monitor changes of IPs in router's namespace.

In such case neutron-keepalived-state-change process will never
notice that keepalived set router to be MASTER and L3 agent will
not be notified about that so router will not be configured properly.

To avoid such race condition neutron-keepalived-state-change will
now check if VIP address is already configured on ha interface
before it will spawn "ip monitor". If it is already configured
by keepalived, it will notify L3 agent that router is set to
MASTER.

Change-Id: Ie3fe825d65408fc969c478767b411fe0156e9fbc
Closes-Bug: #1818614
Slawek Kaplonski 1 month ago
parent
commit
8fec1ffc83

+ 4
- 1
neutron/agent/l3/ha_router.py View File

@@ -374,7 +374,10 @@ class HaRouter(router.RouterInfo):
374 374
                 '--pid_file=%s' % pid_file,
375 375
                 '--state_path=%s' % self.agent_conf.state_path,
376 376
                 '--user=%s' % os.geteuid(),
377
-                '--group=%s' % os.getegid()]
377
+                '--group=%s' % os.getegid(),
378
+                '--AGENT-root_helper=%s' % self.agent_conf.AGENT.root_helper,
379
+                '--AGENT-root_helper_daemon=%s' %
380
+                self.agent_conf.AGENT.root_helper_daemon]
378 381
             return cmd
379 382
 
380 383
         return callback

+ 22
- 0
neutron/agent/l3/keepalived_state_change.py View File

@@ -28,6 +28,7 @@ from neutron.agent.linux import ip_lib
28 28
 from neutron.agent.linux import ip_monitor
29 29
 from neutron.agent.linux import utils as agent_utils
30 30
 from neutron.common import config
31
+from neutron.conf.agent import common as agent_config
31 32
 from neutron.conf.agent.l3 import keepalived
32 33
 
33 34
 
@@ -66,6 +67,7 @@ class MonitorDaemon(daemon.Daemon):
66 67
         # as root
67 68
         if not run_as_root:
68 69
             super(MonitorDaemon, self).run()
70
+        self.handle_initial_state()
69 71
         for iterable in self.monitor:
70 72
             self.parse_and_handle_event(iterable)
71 73
 
@@ -89,6 +91,23 @@ class MonitorDaemon(daemon.Daemon):
89 91
             LOG.exception('Failed to process or handle event for line %s',
90 92
                           iterable)
91 93
 
94
+    def handle_initial_state(self):
95
+        try:
96
+            state = 'backup'
97
+            ip = ip_lib.IPDevice(self.interface, self.namespace)
98
+            for address in ip.addr.list():
99
+                if address.get('cidr') == self.cidr:
100
+                    state = 'master'
101
+                    self.write_state_change(state)
102
+                    self.notify_agent(state)
103
+                    break
104
+
105
+            LOG.debug('Initial status of router %s is %s',
106
+                      self.router_id, state)
107
+        except Exception:
108
+            LOG.exception('Failed to get initial status of router %s',
109
+                          self.router_id)
110
+
92 111
     def write_state_change(self, state):
93 112
         with open(os.path.join(
94 113
                 self.conf_dir, 'state'), 'w') as state_file:
@@ -140,9 +159,12 @@ def configure(conf):
140 159
     conf.set_override('debug', True)
141 160
     conf.set_override('use_syslog', True)
142 161
     config.setup_logging()
162
+    agent_config.setup_privsep()
143 163
 
144 164
 
145 165
 def main():
166
+    agent_config.register_root_helper(cfg.CONF)
167
+    cfg.CONF.register_cli_opts(agent_config.ROOT_HELPER_OPTS, 'AGENT')
146 168
     keepalived.register_cli_l3_agent_keepalived_opts()
147 169
     keepalived.register_l3_agent_keepalived_opts()
148 170
     configure(cfg.CONF)

+ 6
- 0
neutron/tests/functional/agent/l3/framework.py View File

@@ -71,6 +71,7 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
71 71
         config.register_opts(common_config.core_cli_opts)
72 72
         logging.register_options(config)
73 73
         agent_config.register_process_monitor_opts(config)
74
+        agent_config.register_root_helper(config)
74 75
         return config
75 76
 
76 77
     def _configure_agent(self, host, agent_mode='dvr_snat'):
@@ -97,6 +98,11 @@ class L3AgentTestFramework(base.BaseSudoTestCase):
97 98
                           get_temp_file_path('external/pids'))
98 99
         conf.set_override('host', host)
99 100
         conf.set_override('agent_mode', agent_mode)
101
+        conf.set_override(
102
+            'root_helper', cfg.CONF.AGENT.root_helper, group='AGENT')
103
+        conf.set_override(
104
+            'root_helper_daemon', cfg.CONF.AGENT.root_helper_daemon,
105
+            group='AGENT')
100 106
 
101 107
         return conf
102 108
 

+ 28
- 2
neutron/tests/functional/agent/l3/test_keepalived_state_change.py View File

@@ -45,13 +45,13 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
45 45
         self.router_id = uuidutils.generate_uuid()
46 46
         self.conf_dir = self.get_default_temp_dir().path
47 47
         self.cidr = '169.254.128.1/24'
48
-        self.interface_name = 'interface'
48
+        self.interface_name = utils.get_rand_name()
49 49
         self.monitor = keepalived_state_change.MonitorDaemon(
50 50
             self.get_temp_file_path('monitor.pid'),
51 51
             self.router_id,
52 52
             1,
53 53
             2,
54
-            'namespace',
54
+            utils.get_rand_name(),
55 55
             self.conf_dir,
56 56
             self.interface_name,
57 57
             self.cidr)
@@ -83,6 +83,32 @@ class TestKeepalivedStateChange(base.BaseSudoTestCase):
83 83
                 self.monitor, 'notify_agent', side_effect=Exception):
84 84
             self.monitor.parse_and_handle_event(self.line)
85 85
 
86
+    def test_handle_initial_state_backup(self):
87
+        ip = ip_lib.IPWrapper(namespace=self.monitor.namespace)
88
+        ip.netns.add(self.monitor.namespace)
89
+        self.addCleanup(ip.netns.delete, self.monitor.namespace)
90
+        ip.add_dummy(self.interface_name)
91
+
92
+        with mock.patch.object(
93
+                self.monitor, 'write_state_change') as write_state_change,\
94
+                mock.patch.object(
95
+                    self.monitor, 'notify_agent') as notify_agent:
96
+
97
+            self.monitor.handle_initial_state()
98
+            write_state_change.assert_not_called()
99
+            notify_agent.assert_not_called()
100
+
101
+    def test_handle_initial_state_master(self):
102
+        ip = ip_lib.IPWrapper(namespace=self.monitor.namespace)
103
+        ip.netns.add(self.monitor.namespace)
104
+        self.addCleanup(ip.netns.delete, self.monitor.namespace)
105
+        ha_interface = ip.add_dummy(self.interface_name)
106
+
107
+        ha_interface.addr.add(self.cidr)
108
+
109
+        self.monitor.handle_initial_state()
110
+        self.assertEqual('master', self._get_state())
111
+
86 112
 
87 113
 class TestMonitorDaemon(base.BaseSudoTestCase):
88 114
     def setUp(self):

Loading…
Cancel
Save