From 566351761318aa1f33650ba4d78b55cc6a4f8f7b Mon Sep 17 00:00:00 2001 From: Oleg Bondarev Date: Wed, 6 Nov 2019 11:43:57 +0400 Subject: [PATCH] Support L3 agent cleanup on shutdown Add an option to delete all routers on agent shutdown. Closes-Bug: #1851609 Change-Id: I7a4056680d8453b2ef2dcc853437a0ec4b3e8044 --- neutron/agent/l3/agent.py | 20 ++++++++++++++++++- neutron/conf/agent/l3/config.py | 6 ++++++ neutron/manager.py | 7 +++++++ neutron/service.py | 1 + neutron/tests/unit/agent/l3/test_agent.py | 17 ++++++++++++++++ ...nt_graceful_shutdown-87bf3304e6fab8a5.yaml | 10 ++++++++++ 6 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 releasenotes/notes/l3_agent_graceful_shutdown-87bf3304e6fab8a5.yaml diff --git a/neutron/agent/l3/agent.py b/neutron/agent/l3/agent.py index 5de4184ed70..070c59176d4 100644 --- a/neutron/agent/l3/agent.py +++ b/neutron/agent/l3/agent.py @@ -274,6 +274,7 @@ class L3NATAgent(ha.AgentMixin, self.conf = conf else: self.conf = cfg.CONF + self.check_config() self.router_info = {} self.router_factory = RouterFactory() self._register_router_cls(self.router_factory) @@ -294,6 +295,7 @@ class L3NATAgent(ha.AgentMixin, self.fullsync = True self.sync_routers_chunk_size = SYNC_ROUTERS_MAX_CHUNK_SIZE + self._exiting = False # Get the HA router count from Neutron Server # This is the first place where we contact neutron-server on startup @@ -344,6 +346,12 @@ class L3NATAgent(ha.AgentMixin, self._check_ha_router_process_status() + def check_config(self): + if self.conf.cleanup_on_shutdown: + LOG.warning("cleanup_on_shutdown is set to True, so L3 agent will " + "cleanup all its routers when exiting, " + "data-plane will be affected.") + def _check_ha_router_process_status(self): """Check HA router VRRP process status in network node. @@ -668,6 +676,9 @@ class L3NATAgent(ha.AgentMixin, self._queue.add(router_update) def _process_router_update(self): + if self._exiting: + return + for rp, update in self._queue.each_update_to_next_resource(): LOG.info("Starting router update for %s, action %s, priority %s, " "update_id %s. Wait time elapsed: %.3f", @@ -777,7 +788,7 @@ class L3NATAgent(ha.AgentMixin, def _process_routers_loop(self): LOG.debug("Starting _process_routers_loop") - while True: + while not self._exiting: self._pool.spawn_n(self._process_router_update) # NOTE(kevinbenton): this is set to 1 second because the actual interval @@ -892,6 +903,13 @@ class L3NATAgent(ha.AgentMixin, eventlet.spawn_n(self._process_routers_loop) LOG.info("L3 agent started") + def stop(self): + LOG.info("Stopping L3 agent") + if self.conf.cleanup_on_shutdown: + self._exiting = True + for router in self.router_info.values(): + router.delete() + def create_pd_router_update(self): router_id = None update = queue.ResourceUpdate(router_id, diff --git a/neutron/conf/agent/l3/config.py b/neutron/conf/agent/l3/config.py index f9cc09cd3fa..060423e11a4 100644 --- a/neutron/conf/agent/l3/config.py +++ b/neutron/conf/agent/l3/config.py @@ -100,6 +100,12 @@ OPTS = [ '(by default), the user executing the L3 agent will be ' 'passed. If "root" specified, because radvd is spawned ' 'as root, no "username" parameter will be passed.')), + cfg.BoolOpt('cleanup_on_shutdown', default=False, + help=_('Delete all routers on L3 agent shutdown. For L3 HA ' + 'routers it includes a shutdown of keepalived and ' + 'the state change monitor. NOTE: Setting to True ' + 'could affect the data plane when stopping or ' + 'restarting the L3 agent.')), ] diff --git a/neutron/manager.py b/neutron/manager.py index de86b777871..15df5f0bfec 100644 --- a/neutron/manager.py +++ b/neutron/manager.py @@ -72,6 +72,13 @@ class Manager(periodic_task.PeriodicTasks): """ pass + def stop(self): + """Handle stop. + + Child classes can override this method. + """ + pass + def validate_pre_plugin_load(): """Checks if the configuration variables are valid. diff --git a/neutron/service.py b/neutron/service.py index 06bf4cdc7b2..6fc8a54af93 100644 --- a/neutron/service.py +++ b/neutron/service.py @@ -427,6 +427,7 @@ class Service(n_rpc.Service): except Exception: LOG.exception("Exception occurs when timer stops") self.timers = [] + self.manager.stop() def wait(self): super(Service, self).wait() diff --git a/neutron/tests/unit/agent/l3/test_agent.py b/neutron/tests/unit/agent/l3/test_agent.py index 2ea7de2422f..fa7a347976c 100644 --- a/neutron/tests/unit/agent/l3/test_agent.py +++ b/neutron/tests/unit/agent/l3/test_agent.py @@ -3982,3 +3982,20 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): self.conf, get_networks_callback=mock.ANY) funct_partial_mock.assert_called_once_with( self.plugin_api.get_networks, agent.context) + + def test_stop_no_cleanup(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router = mock.Mock() + agent.router_info[1] = router + agent.stop() + self.assertFalse(router.delete.called) + + def test_stop_cleanup(self): + self.conf.set_override('cleanup_on_shutdown', True) + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + router = mock.Mock() + agent.router_info[1] = router + self.assertFalse(agent._exiting) + agent.stop() + self.assertTrue(router.delete.called) + self.assertTrue(agent._exiting) diff --git a/releasenotes/notes/l3_agent_graceful_shutdown-87bf3304e6fab8a5.yaml b/releasenotes/notes/l3_agent_graceful_shutdown-87bf3304e6fab8a5.yaml new file mode 100644 index 00000000000..98c2d25f60c --- /dev/null +++ b/releasenotes/notes/l3_agent_graceful_shutdown-87bf3304e6fab8a5.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + A new configuration option, ``cleanup_on_shutdown``, was added to the + L3 agent. If set to True the L3 agent will explicitly delete all routers + on shutdown. For L3 HA routers it includes a graceful shutdown of + keepalived and the state change monitor, which will allow a faster failover + in certain conditions. The default value of ``cleanup_on_shutdown`` is + False to maintain backward compatibility. Setting to True could affect + the data plane when stopping or restarting the L3 agent.