From 0bd4472ef7bdb9d94f988669f34f7eaa53ca0a89 Mon Sep 17 00:00:00 2001 From: Assaf Muller Date: Mon, 15 Sep 2014 18:11:17 +0300 Subject: [PATCH] HA routers master state now distributed amongst agents We're currently running with no pre-emption, meaning that the first router in a cluster to go up will be the master, regardless of priority. Since the order in which we sent notifications was constant, the same agent hosted the master instances of all HA routers, defeating the idea of load sharing. Closes-Bug: #1365429 Change-Id: Ia6fe2bd0317c241bf7eb55915df7650dfdc68210 --- .../api/rpc/agentnotifiers/l3_rpc_agent_api.py | 17 ++++++++++++----- neutron/db/l3_hamode_db.py | 3 ++- neutron/plugins/nec/nec_router.py | 5 +++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py index d06074e78..0b8f7e5ef 100644 --- a/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py +++ b/neutron/api/rpc/agentnotifiers/l3_rpc_agent_api.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import random + from neutron.common import constants from neutron.common import rpc as n_rpc from neutron.common import topics @@ -43,7 +45,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): payload=payload), topic='%s.%s' % (topics.L3_AGENT, host)) - def _agent_notification(self, context, method, router_ids, operation): + def _agent_notification(self, context, method, router_ids, operation, + shuffle_agents): """Notify changed routers to hosting l3 agents.""" adminContext = context.is_admin and context or context.elevated() plugin = manager.NeutronManager.get_service_plugins().get( @@ -53,6 +56,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): adminContext, [router_id], admin_state_up=True, active=True) + if shuffle_agents: + random.shuffle(l3_agents) for l3_agent in l3_agents: LOG.debug(_('Notify agent at %(topic)s.%(host)s the message ' '%(method)s'), @@ -91,7 +96,8 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): self.make_msg(method, payload=dvr_arptable), topic=topic, version='1.2') - def _notification(self, context, method, router_ids, operation): + def _notification(self, context, method, router_ids, operation, + shuffle_agents): """Notify all the agents that are hosting the routers.""" plugin = manager.NeutronManager.get_service_plugins().get( service_constants.L3_ROUTER_NAT) @@ -105,7 +111,7 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): context or context.elevated()) plugin.schedule_routers(adminContext, router_ids) self._agent_notification( - context, method, router_ids, operation) + context, method, router_ids, operation, shuffle_agents) else: self.fanout_cast( context, self.make_msg(method, @@ -132,10 +138,11 @@ class L3AgentNotifyAPI(n_rpc.RpcProxy): def router_deleted(self, context, router_id): self._notification_fanout(context, 'router_deleted', router_id) - def routers_updated(self, context, router_ids, operation=None, data=None): + def routers_updated(self, context, router_ids, operation=None, data=None, + shuffle_agents=False): if router_ids: self._notification(context, 'routers_updated', router_ids, - operation) + operation, shuffle_agents) def add_arp_entry(self, context, router_id, arp_table, operation=None): self._agent_notification_arp(context, 'add_arp_entry', router_id, diff --git a/neutron/db/l3_hamode_db.py b/neutron/db/l3_hamode_db.py index 19ecf3cc9..a0ed58085 100644 --- a/neutron/db/l3_hamode_db.py +++ b/neutron/db/l3_hamode_db.py @@ -323,7 +323,8 @@ class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin): l3_port_check=False) def _notify_ha_interfaces_updated(self, context, router_id): - self.l3_rpc_notifier.routers_updated(context, [router_id]) + self.l3_rpc_notifier.routers_updated( + context, [router_id], shuffle_agents=True) @classmethod def _is_ha(cls, router): diff --git a/neutron/plugins/nec/nec_router.py b/neutron/plugins/nec/nec_router.py index 9af75cbed..f47bcd6ad 100644 --- a/neutron/plugins/nec/nec_router.py +++ b/neutron/plugins/nec/nec_router.py @@ -289,7 +289,8 @@ class L3AgentSchedulerDbMixin(l3_agentschedulers_db.L3AgentSchedulerDbMixin): class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI): - def _notification(self, context, method, router_ids, operation): + def _notification(self, context, method, router_ids, operation, + shuffle_agents): """Notify all the agents that are hosting the routers. _notification() is called in L3 db plugin for all routers regardless @@ -300,7 +301,7 @@ class L3AgentNotifyAPI(l3_rpc_agent_api.L3AgentNotifyAPI): router_ids = rdb.get_routers_by_provider( context.session, nconst.ROUTER_PROVIDER_L3AGENT, router_ids) super(L3AgentNotifyAPI, self)._notification( - context, method, router_ids, operation) + context, method, router_ids, operation, shuffle_agents) def load_driver(plugin, ofc_manager):