Support L3 agent cleanup on shutdown

Add an option to delete all routers on agent shutdown.

Closes-Bug: #1851609
Change-Id: I7a4056680d8453b2ef2dcc853437a0ec4b3e8044
This commit is contained in:
Oleg Bondarev 2019-11-06 11:43:57 +04:00 committed by Brian Haley
parent 418be00155
commit 5663517613
6 changed files with 60 additions and 1 deletions

View File

@ -274,6 +274,7 @@ class L3NATAgent(ha.AgentMixin,
self.conf = conf self.conf = conf
else: else:
self.conf = cfg.CONF self.conf = cfg.CONF
self.check_config()
self.router_info = {} self.router_info = {}
self.router_factory = RouterFactory() self.router_factory = RouterFactory()
self._register_router_cls(self.router_factory) self._register_router_cls(self.router_factory)
@ -294,6 +295,7 @@ class L3NATAgent(ha.AgentMixin,
self.fullsync = True self.fullsync = True
self.sync_routers_chunk_size = SYNC_ROUTERS_MAX_CHUNK_SIZE self.sync_routers_chunk_size = SYNC_ROUTERS_MAX_CHUNK_SIZE
self._exiting = False
# Get the HA router count from Neutron Server # Get the HA router count from Neutron Server
# This is the first place where we contact neutron-server on startup # This is the first place where we contact neutron-server on startup
@ -344,6 +346,12 @@ class L3NATAgent(ha.AgentMixin,
self._check_ha_router_process_status() self._check_ha_router_process_status()
def check_config(self):
if self.conf.cleanup_on_shutdown:
LOG.warning("cleanup_on_shutdown is set to True, so L3 agent will "
"cleanup all its routers when exiting, "
"data-plane will be affected.")
def _check_ha_router_process_status(self): def _check_ha_router_process_status(self):
"""Check HA router VRRP process status in network node. """Check HA router VRRP process status in network node.
@ -668,6 +676,9 @@ class L3NATAgent(ha.AgentMixin,
self._queue.add(router_update) self._queue.add(router_update)
def _process_router_update(self): def _process_router_update(self):
if self._exiting:
return
for rp, update in self._queue.each_update_to_next_resource(): for rp, update in self._queue.each_update_to_next_resource():
LOG.info("Starting router update for %s, action %s, priority %s, " LOG.info("Starting router update for %s, action %s, priority %s, "
"update_id %s. Wait time elapsed: %.3f", "update_id %s. Wait time elapsed: %.3f",
@ -777,7 +788,7 @@ class L3NATAgent(ha.AgentMixin,
def _process_routers_loop(self): def _process_routers_loop(self):
LOG.debug("Starting _process_routers_loop") LOG.debug("Starting _process_routers_loop")
while True: while not self._exiting:
self._pool.spawn_n(self._process_router_update) self._pool.spawn_n(self._process_router_update)
# NOTE(kevinbenton): this is set to 1 second because the actual interval # NOTE(kevinbenton): this is set to 1 second because the actual interval
@ -892,6 +903,13 @@ class L3NATAgent(ha.AgentMixin,
eventlet.spawn_n(self._process_routers_loop) eventlet.spawn_n(self._process_routers_loop)
LOG.info("L3 agent started") LOG.info("L3 agent started")
def stop(self):
LOG.info("Stopping L3 agent")
if self.conf.cleanup_on_shutdown:
self._exiting = True
for router in self.router_info.values():
router.delete()
def create_pd_router_update(self): def create_pd_router_update(self):
router_id = None router_id = None
update = queue.ResourceUpdate(router_id, update = queue.ResourceUpdate(router_id,

View File

@ -100,6 +100,12 @@ OPTS = [
'(by default), the user executing the L3 agent will be ' '(by default), the user executing the L3 agent will be '
'passed. If "root" specified, because radvd is spawned ' 'passed. If "root" specified, because radvd is spawned '
'as root, no "username" parameter will be passed.')), 'as root, no "username" parameter will be passed.')),
cfg.BoolOpt('cleanup_on_shutdown', default=False,
help=_('Delete all routers on L3 agent shutdown. For L3 HA '
'routers it includes a shutdown of keepalived and '
'the state change monitor. NOTE: Setting to True '
'could affect the data plane when stopping or '
'restarting the L3 agent.')),
] ]

View File

@ -72,6 +72,13 @@ class Manager(periodic_task.PeriodicTasks):
""" """
pass pass
def stop(self):
"""Handle stop.
Child classes can override this method.
"""
pass
def validate_pre_plugin_load(): def validate_pre_plugin_load():
"""Checks if the configuration variables are valid. """Checks if the configuration variables are valid.

View File

@ -427,6 +427,7 @@ class Service(n_rpc.Service):
except Exception: except Exception:
LOG.exception("Exception occurs when timer stops") LOG.exception("Exception occurs when timer stops")
self.timers = [] self.timers = []
self.manager.stop()
def wait(self): def wait(self):
super(Service, self).wait() super(Service, self).wait()

View File

@ -3982,3 +3982,20 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework):
self.conf, get_networks_callback=mock.ANY) self.conf, get_networks_callback=mock.ANY)
funct_partial_mock.assert_called_once_with( funct_partial_mock.assert_called_once_with(
self.plugin_api.get_networks, agent.context) self.plugin_api.get_networks, agent.context)
def test_stop_no_cleanup(self):
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router = mock.Mock()
agent.router_info[1] = router
agent.stop()
self.assertFalse(router.delete.called)
def test_stop_cleanup(self):
self.conf.set_override('cleanup_on_shutdown', True)
agent = l3_agent.L3NATAgent(HOSTNAME, self.conf)
router = mock.Mock()
agent.router_info[1] = router
self.assertFalse(agent._exiting)
agent.stop()
self.assertTrue(router.delete.called)
self.assertTrue(agent._exiting)

View File

@ -0,0 +1,10 @@
---
features:
- |
A new configuration option, ``cleanup_on_shutdown``, was added to the
L3 agent. If set to True the L3 agent will explicitly delete all routers
on shutdown. For L3 HA routers it includes a graceful shutdown of
keepalived and the state change monitor, which will allow a faster failover
in certain conditions. The default value of ``cleanup_on_shutdown`` is
False to maintain backward compatibility. Setting to True could affect
the data plane when stopping or restarting the L3 agent.