From 822ad5f06bcef8f95f36032d4fd4709975cecc31 Mon Sep 17 00:00:00 2001 From: Assaf Muller Date: Sat, 19 Dec 2015 14:13:43 -0500 Subject: [PATCH] Force L3 agent to resync router it could not configure If the L3 agent fails to configure a router, commit: 4957b5b43521a61873a041fe3e8989ed399903d9 changed it so that instead of performing an expensive full sync, only that router is reconfigured. However, it tries to reconfigure the cached router. This is a change of behavior from the fullsync days. The retry is more likely to succeed if the router is retrieved from the server, instead of using the locally cached version, in case the user or operator fixed bad input, or if the router was retrieved in a bad state due to a server-side race condition. Note that this is only relevant to full syncs, as those retrieve routers from the server and queue updates with the router object. Incremental updates queue up updates without router objects, so if one of those fails it would always be resynced on a second attempt. Related-Bug: #1494682 Change-Id: Id0565e11b3023a639589f2734488029f194e2f9d --- neutron/agent/l3/agent.py | 1 + neutron/tests/unit/agent/l3/test_agent.py | 25 +++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/neutron/agent/l3/agent.py b/neutron/agent/l3/agent.py index e71e51d8720..06d48028c02 100644 --- a/neutron/agent/l3/agent.py +++ b/neutron/agent/l3/agent.py @@ -441,6 +441,7 @@ class L3NATAgent(firewall_l3_agent.FWaaSL3AgentRpcCallback, priority=queue.PRIORITY_SYNC_ROUTERS_TASK): router_update.timestamp = timeutils.utcnow() router_update.priority = priority + router_update.router = None # Force the agent to resync the router self._queue.add(router_update) def _process_router_update(self): diff --git a/neutron/tests/unit/agent/l3/test_agent.py b/neutron/tests/unit/agent/l3/test_agent.py index 22576d70914..42df743c694 100644 --- a/neutron/tests/unit/agent/l3/test_agent.py +++ b/neutron/tests/unit/agent/l3/test_agent.py @@ -22,6 +22,7 @@ import mock import netaddr from oslo_log import log import oslo_messaging +from oslo_utils import timeutils from oslo_utils import uuidutils import six from testtools import matchers @@ -36,6 +37,7 @@ from neutron.agent.l3 import legacy_router from neutron.agent.l3 import link_local_allocator as lla from neutron.agent.l3 import namespaces from neutron.agent.l3 import router_info as l3router +from neutron.agent.l3 import router_processing_queue from neutron.agent.linux import dibbler from neutron.agent.linux import external_process from neutron.agent.linux import interface @@ -1818,6 +1820,29 @@ class TestBasicRouterOperations(BasicRouterOperationsFramework): oslo_messaging.MessagingTimeout) self._test_process_routers_update_rpc_timeout() + def test_process_routers_update_resyncs_failed_router(self): + agent = l3_agent.L3NATAgent(HOSTNAME, self.conf) + + # Attempting to configure the router will fail + agent._process_router_if_compatible = mock.MagicMock() + agent._process_router_if_compatible.side_effect = RuntimeError() + + # Queue an update from a full sync + update = router_processing_queue.RouterUpdate( + 42, + router_processing_queue.PRIORITY_SYNC_ROUTERS_TASK, + router=mock.Mock(), + timestamp=timeutils.utcnow()) + agent._queue.add(update) + agent._process_router_update() + + # The update contained the router object, get_routers won't be called + self.assertFalse(agent.plugin_rpc.get_routers.called) + + # The update failed, assert that get_routers was called + agent._process_router_update() + self.assertTrue(agent.plugin_rpc.get_routers.called) + def test_process_routers_update_rpc_timeout_on_get_ext_net(self): self._test_process_routers_update_rpc_timeout(ext_net_call=True, ext_net_call_failed=True)