Browse Source

make resart agent unnecessary

Now agent detects lagopus down, waits lagopus restart and
initialize caches again. So it is not necessary to restart
agent when lagopus down.

Change-Id: I46c4ae1895c498f4b598270a17af881ca6f2a939
Itsuro Oda 1 year ago
parent
commit
ad6fcb7937

+ 67
- 5
networking_lagopus/ml2/agent/lagopus_agent.py View File

@@ -11,6 +11,7 @@
11 11
 #    under the License.
12 12
 
13 13
 import eventlet
14
+import os
14 15
 import socket
15 16
 import sys
16 17
 
@@ -43,6 +44,8 @@ LAGOPUS_AGENT_BINARY = 'neutron-lagopus-agent'
43 44
 AGENT_TYPE_LAGOPUS = 'Lagopus agent'
44 45
 MAX_WAIT_LAGOPUS_RETRY = 5
45 46
 
47
+lagopus_dead_handler = None
48
+
46 49
 
47 50
 class LagopusCache(dict):
48 51
 
@@ -87,9 +90,12 @@ class LagopusManager(object):
87 90
         self.serializer = eventlet.semaphore.Semaphore()
88 91
 
89 92
         self._wait_lagopus_initialized()
90
-
91 93
         lg_lib.register_config_change_callback(self._rebuild_dsl)
94
+        self.build_cache()
95
+        register_dead_handler(self.lagopus_dead_handler)
96
+        self.lagopus_alive = True
92 97
 
98
+    def build_cache(self):
93 99
         # initialize device caches
94 100
         # channel
95 101
         self.channels = LagopusCache(lg_lib.LagopusChannel)
@@ -127,13 +133,13 @@ class LagopusManager(object):
127 133
         self.bridges = LagopusCache(lg_lib.LagopusBridge)
128 134
         raw_data = self.bridges.show()
129 135
         LOG.debug("bridges: %s", raw_data)
130
-        phys_bridge_names = bridge_mappings.values()
136
+        phys_bridge_names = self.bridge_mappings.values()
131 137
         for item in raw_data:
132 138
             b_name = item["name"]
133 139
             controller = item["controllers"][0][1:]  # remove ":"
134 140
             b_type = (lg_lib.BRIDGE_TYPE_PHYS if b_name in phys_bridge_names
135 141
                       else lg_lib.BRIDGE_TYPE_VLAN)
136
-            bridge = self.bridges.add(b_name, ryu_app, controller,
142
+            bridge = self.bridges.add(b_name, self.ryu_app, controller,
137 143
                                       item["dpid"], b_type, item["is-enabled"])
138 144
             for p_name, ofport in item["ports"].items():
139 145
                 port = self.ports[p_name[1:]]  # remove ":"
@@ -141,7 +147,7 @@ class LagopusManager(object):
141 147
 
142 148
         # check physical bridge existence
143 149
         self.phys_to_bridge = {}
144
-        for phys_net, name in bridge_mappings.items():
150
+        for phys_net, name in self.bridge_mappings.items():
145 151
             if name not in self.bridges:
146 152
                 LOG.error("Bridge %s not found.", name)
147 153
                 sys.exit(1)
@@ -151,7 +157,7 @@ class LagopusManager(object):
151 157
         self.max_pipe_pairs = cfg.CONF.lagopus.max_vlan_networks
152 158
         self.max_vhosts = (cfg.CONF.lagopus.max_eth_ports
153 159
                            - self.max_pipe_pairs * 2
154
-                           - len(bridge_mappings))
160
+                           - len(self.bridge_mappings))
155 161
 
156 162
         self.free_vhost_interfaces = []
157 163
         for vhost_id in range(self.max_vhosts):
@@ -336,6 +342,7 @@ class LagopusManager(object):
336 342
 
337 343
     @log_helpers.log_method_call
338 344
     def plug_vhost(self, context, **kwargs):
345
+        self.check_active()
339 346
         p_name = self.ports.mk_name(lg_lib.INTERFACE_TYPE_VHOST,
340 347
                                     kwargs['port_id'])
341 348
         segment = kwargs['segment']
@@ -349,6 +356,7 @@ class LagopusManager(object):
349 356
 
350 357
     @log_helpers.log_method_call
351 358
     def unplug_vhost(self, context, **kwargs):
359
+        self.check_active()
352 360
         p_name = self.ports.mk_name(lg_lib.INTERFACE_TYPE_VHOST,
353 361
                                     kwargs['port_id'])
354 362
 
@@ -362,6 +370,7 @@ class LagopusManager(object):
362 370
 
363 371
     @log_helpers.log_method_call
364 372
     def plug_rawsock(self, context, **kwargs):
373
+        self.check_active()
365 374
         device = kwargs['device']
366 375
         segment = kwargs['segment']
367 376
         i_name = self.interfaces.mk_name(lg_lib.INTERFACE_TYPE_RAWSOCK, device)
@@ -379,6 +388,7 @@ class LagopusManager(object):
379 388
 
380 389
     @log_helpers.log_method_call
381 390
     def unplug_rawsock(self, context, **kwargs):
391
+        self.check_active()
382 392
         device = kwargs['device']
383 393
         i_name = self.interfaces.mk_name(lg_lib.INTERFACE_TYPE_RAWSOCK, device)
384 394
         p_name = self.ports.mk_name(lg_lib.INTERFACE_TYPE_RAWSOCK, device)
@@ -391,6 +401,44 @@ class LagopusManager(object):
391 401
             self.interfaces.destroy(i_name)
392 402
             self.put_bridge(bridge)
393 403
 
404
+    def lagopus_dead_handler(self, dpid):
405
+        for bridge in self.phys_to_bridge.values():
406
+            if dpid == bridge.dpid:
407
+                if self.lagopus_alive:
408
+                    # call once at first detected
409
+                    self.lagopus_alive = False
410
+                    eventlet.spawn_n(self.wait_lagopus_restart)
411
+
412
+    def wait_lagopus_restart(self):
413
+        LOG.warning("Detect lagopus down. wait lagopus restart.")
414
+
415
+        while True:
416
+            eventlet.sleep(10)
417
+            try:
418
+                lg_lib.LagopusChannel.show()
419
+                break
420
+            except Exception:
421
+                pass
422
+
423
+        LOG.info("lagopus restarted. initialize again...")
424
+        try:
425
+            self.build_cache()
426
+        except Exception:
427
+            LOG.error("Re-initialization failed.")
428
+            eventlet.sleep(0)  # to output log
429
+            # give up
430
+            os._exit(1)
431
+
432
+        LOG.info("Re-initialization done. now operate normaly.")
433
+        self.lagopus_alive = True
434
+
435
+    def is_active(self):
436
+        return self.lagopus_alive
437
+
438
+    def check_active(self):
439
+        if not self.lagopus_alive:
440
+            raise RuntimeError("lagopus is down.")
441
+
394 442
 
395 443
 class LagopusAgent(service.Service):
396 444
 
@@ -408,6 +456,7 @@ class LagopusAgent(service.Service):
408 456
     def start(self):
409 457
         self.context = context.get_admin_context_without_session()
410 458
         self.manager = LagopusManager(self.ryu_app, self.bridge_mappings)
459
+
411 460
         self.connection = n_rpc.create_connection()
412 461
         self.connection.create_consumer("q-lagopus", [self.manager])
413 462
 
@@ -433,6 +482,8 @@ class LagopusAgent(service.Service):
433 482
 
434 483
     def _report_state(self):
435 484
         try:
485
+            if not self.manager.is_active():
486
+                return
436 487
             devices = len(self.manager.ports)
437 488
             self.agent_state['configurations']['devices'] = devices
438 489
             self.state_rpc.report_state(self.context, self.agent_state, True)
@@ -464,6 +515,17 @@ def parse_bridge_mappings():
464 515
         sys.exit(1)
465 516
 
466 517
 
518
+def register_dead_handler(handler):
519
+    global lagopus_dead_handler
520
+    lagopus_dead_handler = handler
521
+
522
+
523
+def handle_dead(dpid):
524
+    global lagopus_dead_handler
525
+    if lagopus_dead_handler:
526
+        lagopus_dead_handler(dpid)
527
+
528
+
467 529
 def main(ryu_app):
468 530
     bridge_mappings = parse_bridge_mappings()
469 531
     report_interval = cfg.CONF.AGENT.report_interval

+ 10
- 0
networking_lagopus/ml2/agent/lagopus_ryuapp.py View File

@@ -14,6 +14,9 @@ from oslo_log import log as logging
14 14
 from oslo_utils import excutils
15 15
 import ryu.app.ofctl.api  # noqa
16 16
 from ryu.base import app_manager
17
+from ryu.controller.handler import DEAD_DISPATCHER
18
+from ryu.controller.handler import set_ev_cls
19
+from ryu.controller import ofp_event
17 20
 from ryu.lib import hub
18 21
 from ryu.ofproto import ofproto_v1_3
19 22
 
@@ -45,3 +48,10 @@ class LagopusAgentRyuApp(app_manager.RyuApp):
45 48
         super(LagopusAgentRyuApp, self).start()
46 49
         self.threads.append(hub.spawn(agent_main_wrapper, self,
47 50
                                       raise_error=True))
51
+
52
+    @set_ev_cls(ofp_event.EventOFPStateChange, DEAD_DISPATCHER)
53
+    def _handle_dead(self, ev):
54
+        dpid = ev.datapath.id
55
+        LOG.debug('del dpid %s', dpid)
56
+        if dpid is not None:
57
+            lagopus_agent.handle_dead(dpid)

Loading…
Cancel
Save