# Copyright (C) 2014 eNovance SAS # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # import functools import random import netaddr from neutron_lib.api.definitions import l3 as l3_apidef from neutron_lib.api.definitions import l3_ext_ha_mode as l3_ext_ha_apidef from neutron_lib.api.definitions import port as port_def from neutron_lib.api.definitions import portbindings from neutron_lib.api.definitions import provider_net as providernet from neutron_lib.api import extensions from neutron_lib.api import validators from neutron_lib.callbacks import events from neutron_lib.callbacks import priority_group from neutron_lib.callbacks import registry from neutron_lib.callbacks import resources from neutron_lib import constants from neutron_lib.db import api as db_api from neutron_lib import exceptions as n_exc from neutron_lib.exceptions import l3 as l3_exc from neutron_lib.exceptions import l3_ext_ha_mode as l3ha_exc from neutron_lib.objects import exceptions as obj_base from neutron_lib.plugins import utils as p_utils from oslo_config import cfg from oslo_db import exception as db_exc from oslo_log import helpers as log_helpers from oslo_log import log as logging from oslo_utils import excutils import six import sqlalchemy as sa from sqlalchemy import exc as sql_exc from sqlalchemy import orm from neutron._i18n import _ from neutron.common import utils as n_utils from neutron.conf.db import l3_hamode_db from neutron.db import _utils as db_utils from neutron.db.availability_zone import router as router_az_db from neutron.db import l3_dvr_db from neutron.objects import base from neutron.objects import l3_hamode from neutron.objects import router as l3_obj VR_ID_RANGE = set(range(1, 255)) MAX_ALLOCATION_TRIES = 10 UNLIMITED_AGENTS_PER_ROUTER = 0 LOG = logging.getLogger(__name__) l3_hamode_db.register_db_l3_hamode_opts() @registry.has_registry_receivers class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin, router_az_db.RouterAvailabilityZoneMixin): """Mixin class to add high availability capability to routers.""" def _verify_configuration(self): self.ha_cidr = cfg.CONF.l3_ha_net_cidr try: net = netaddr.IPNetwork(self.ha_cidr) except netaddr.AddrFormatError: raise l3ha_exc.HANetworkCIDRNotValid(cidr=self.ha_cidr) if ('/' not in self.ha_cidr or net.network != net.ip): raise l3ha_exc.HANetworkCIDRNotValid(cidr=self.ha_cidr) self._check_num_agents_per_router() def _check_num_agents_per_router(self): max_agents = cfg.CONF.max_l3_agents_per_router if max_agents != UNLIMITED_AGENTS_PER_ROUTER and max_agents < 1: raise l3ha_exc.HAMaximumAgentsNumberNotValid(max_agents=max_agents) def __new__(cls, *args, **kwargs): inst = super(L3_HA_NAT_db_mixin, cls).__new__(cls, *args, **kwargs) inst._verify_configuration() return inst def get_ha_network(self, context, tenant_id): pager = base.Pager(limit=1) results = l3_hamode.L3HARouterNetwork.get_objects( context, _pager=pager, project_id=tenant_id) return results.pop() if results else None def _get_allocated_vr_id(self, context, network_id): vr_id_objs = l3_hamode.L3HARouterVRIdAllocation.get_objects( context, network_id=network_id) allocated_vr_ids = set(a.vr_id for a in vr_id_objs) - set([0]) return allocated_vr_ids def _get_vr_id(self, context, network_id): allocated_vr_ids = self._get_allocated_vr_id(context, network_id) available_vr_ids = VR_ID_RANGE - allocated_vr_ids if not available_vr_ids: return None return random.choice(list(available_vr_ids)) @db_api.retry_if_session_inactive() def _ensure_vr_id(self, context, router_db, ha_network): router_id = router_db.id network_id = ha_network.network_id # TODO(kevinbenton): let decorator handle duplicate retry # like in review.opendev.org/#/c/367179/1/neutron/db/l3_hamode_db.py for count in range(MAX_ALLOCATION_TRIES): try: # NOTE(kevinbenton): we disallow subtransactions because the # retry logic will bust any parent transactions with db_api.CONTEXT_WRITER.using(context): if router_db.extra_attributes.ha_vr_id: LOG.debug( "Router %(router_id)s has already been " "allocated a ha_vr_id %(ha_vr_id)d!", {'router_id': router_id, 'ha_vr_id': router_db.extra_attributes.ha_vr_id}) return old_router = self._make_router_dict(router_db) vr_id = self._get_vr_id(context, network_id) if vr_id is None: raise l3ha_exc.NoVRIDAvailable(router_id=router_id) allocation = l3_hamode.L3HARouterVRIdAllocation( context, network_id=network_id, vr_id=vr_id) allocation.create() router_db.extra_attributes.ha_vr_id = allocation.vr_id LOG.debug( "Router %(router_id)s has been allocated a ha_vr_id " "%(ha_vr_id)d.", {'router_id': router_id, 'ha_vr_id': allocation.vr_id}) router_body = {l3_apidef.ROUTER: {l3_ext_ha_apidef.HA_INFO: True, 'ha_vr_id': allocation.vr_id}} registry.publish(resources.ROUTER, events.PRECOMMIT_UPDATE, self, payload=events.DBEventPayload( context, request_body=router_body, states=(old_router,), resource_id=router_id, desired_state=router_db)) return allocation.vr_id except obj_base.NeutronDbObjectDuplicateEntry: LOG.info("Attempt %(count)s to allocate a VRID in the " "network %(network)s for the router %(router)s", {'count': count, 'network': network_id, 'router': router_id}) raise l3ha_exc.MaxVRIDAllocationTriesReached( network_id=network_id, router_id=router_id, max_tries=MAX_ALLOCATION_TRIES) @db_api.retry_if_session_inactive() def _delete_vr_id_allocation(self, context, ha_network, vr_id): l3_hamode.L3HARouterVRIdAllocation.delete_objects( context, network_id=ha_network.network_id, vr_id=vr_id) def _create_ha_subnet(self, context, network_id, tenant_id): args = {'network_id': network_id, 'tenant_id': '', 'name': constants.HA_SUBNET_NAME % tenant_id, 'ip_version': 4, 'cidr': cfg.CONF.l3_ha_net_cidr, 'enable_dhcp': False, 'gateway_ip': None} return p_utils.create_subnet(self._core_plugin, context, {'subnet': args}) def _create_ha_network_tenant_binding(self, context, tenant_id, network_id): ha_network = l3_hamode.L3HARouterNetwork( context, project_id=tenant_id, network_id=network_id) ha_network.create() # we need to check if someone else just inserted at exactly the # same time as us because there is no constrain in L3HARouterNetwork # that prevents multiple networks per tenant if l3_hamode.L3HARouterNetwork.count( context, project_id=tenant_id) > 1: # we need to throw an error so our network is deleted # and the process is started over where the existing # network will be selected. raise db_exc.DBDuplicateEntry(columns=['tenant_id']) return ha_network def _add_ha_network_settings(self, network): if cfg.CONF.l3_ha_network_type: network[providernet.NETWORK_TYPE] = cfg.CONF.l3_ha_network_type if cfg.CONF.l3_ha_network_physical_name: network[providernet.PHYSICAL_NETWORK] = ( cfg.CONF.l3_ha_network_physical_name) def _create_ha_network(self, context, tenant_id): admin_ctx = context.elevated() args = {'network': {'name': constants.HA_NETWORK_NAME % tenant_id, 'tenant_id': '', 'shared': False, 'admin_state_up': True}} self._add_ha_network_settings(args['network']) creation = functools.partial(p_utils.create_network, self._core_plugin, admin_ctx, args) content = functools.partial(self._create_ha_network_tenant_binding, admin_ctx, tenant_id) deletion = functools.partial(self._core_plugin.delete_network, admin_ctx) network, ha_network = db_utils.safe_creation( context, creation, deletion, content, transaction=False) try: self._create_ha_subnet(admin_ctx, network['id'], tenant_id) except Exception: with excutils.save_and_reraise_exception(): self._core_plugin.delete_network(admin_ctx, network['id']) return ha_network def get_number_of_agents_for_scheduling(self, context): """Return number of agents on which the router will be scheduled.""" num_agents = len( self.get_l3_agents( context, active=True, filters={'agent_modes': [constants.L3_AGENT_MODE_LEGACY, constants.L3_AGENT_MODE_DVR_SNAT]})) max_agents = cfg.CONF.max_l3_agents_per_router if max_agents: if max_agents > num_agents: LOG.info("Number of active agents lower than " "max_l3_agents_per_router. L3 agents " "available: %s", num_agents) else: num_agents = max_agents return num_agents @db_api.retry_if_session_inactive() def _create_ha_port_binding(self, context, router_id, port_id): try: l3_obj.RouterPort( context, port_id=port_id, router_id=router_id, port_type=constants.DEVICE_OWNER_ROUTER_HA_INTF).create() portbinding = l3_hamode.L3HARouterAgentPortBinding( context, port_id=port_id, router_id=router_id) portbinding.create() return portbinding except db_exc.DBReferenceError as e: with excutils.save_and_reraise_exception() as ctxt: if isinstance(e.inner_exception, sql_exc.IntegrityError): ctxt.reraise = False LOG.debug( 'Failed to create HA router agent PortBinding, ' 'Router %s has already been removed ' 'by concurrent operation', router_id) raise l3_exc.RouterNotFound(router_id=router_id) def add_ha_port(self, context, router_id, network_id, tenant_id): # NOTE(kevinbenton): we have to block any ongoing transactions because # our exception handling will try to delete the port using the normal # core plugin API. If this function is called inside of a transaction # the exception will mangle the state, cause the delete call to fail, # and end up relying on the DB rollback to remove the port instead of # proper delete_port call. if context.session.is_active: raise RuntimeError(_('add_ha_port cannot be called inside of a ' 'transaction.')) args = {'tenant_id': '', 'network_id': network_id, 'admin_state_up': True, 'device_id': router_id, 'device_owner': constants.DEVICE_OWNER_ROUTER_HA_INTF, 'name': constants.HA_PORT_NAME % tenant_id} creation = functools.partial(p_utils.create_port, self._core_plugin, context, {'port': args}) content = functools.partial(self._create_ha_port_binding, context, router_id) deletion = functools.partial(self._core_plugin.delete_port, context, l3_port_check=False) port, binding = db_utils.safe_creation(context, creation, deletion, content, transaction=False) # _create_ha_port_binding returns the binding object now and # to populate agent relation db_obj is used. return binding.db_obj def _delete_ha_interfaces(self, context, router_id): admin_ctx = context.elevated() device_filter = {'device_id': [router_id], 'device_owner': [constants.DEVICE_OWNER_ROUTER_HA_INTF]} ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter) for port in ports: self._core_plugin.delete_port(admin_ctx, port['id'], l3_port_check=False) def delete_ha_interfaces_on_host(self, context, router_id, host): admin_ctx = context.elevated() port_ids = (binding.port_id for binding in self.get_ha_router_port_bindings(admin_ctx, [router_id], host)) for port_id in port_ids: self._core_plugin.delete_port(admin_ctx, port_id, l3_port_check=False) def _notify_router_updated(self, context, router_id): self.l3_rpc_notifier.routers_updated( context, [router_id], shuffle_agents=True) @classmethod def _is_ha(cls, router): ha = router.get('ha') if not validators.is_attr_set(ha): ha = cfg.CONF.l3_ha return ha def _get_device_owner(self, context, router=None): """Get device_owner for the specified router.""" router_is_uuid = isinstance(router, six.string_types) if router_is_uuid: router = self._get_router(context, router) if (is_ha_router(router) and not l3_dvr_db.is_distributed_router(router)): return constants.DEVICE_OWNER_HA_REPLICATED_INT return super(L3_HA_NAT_db_mixin, self)._get_device_owner(context, router) @n_utils.transaction_guard def _ensure_vr_id_and_network(self, context, router_db): """Attach vr_id to router while tolerating network deletes.""" creator = functools.partial(self._ensure_vr_id, context, router_db) dep_getter = functools.partial(self.get_ha_network, context, router_db.tenant_id) dep_creator = functools.partial(self._create_ha_network, context, router_db.tenant_id) dep_deleter = functools.partial(self._delete_ha_network, context) dep_id_attr = 'network_id' return n_utils.create_object_with_dependency( creator, dep_getter, dep_creator, dep_id_attr, dep_deleter)[1] @registry.receives(resources.ROUTER, [events.BEFORE_CREATE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) @db_api.retry_if_session_inactive() def _before_router_create(self, resource, event, trigger, context, router, **kwargs): """Event handler to create HA resources before router creation.""" if not self._is_ha(router): return # ensure the HA network exists before we start router creation so # we can provide meaningful errors back to the user if no network # can be allocated if not self.get_ha_network(context, router['tenant_id']): self._create_ha_network(context, router['tenant_id']) @registry.receives(resources.ROUTER, [events.PRECOMMIT_CREATE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) def _precommit_router_create(self, resource, event, trigger, context, router, router_db, **kwargs): """Event handler to set ha flag and status on creation.""" is_ha = self._is_ha(router) router['ha'] = is_ha self.set_extra_attr_value(context, router_db, 'ha', is_ha) if not is_ha: return # This will throw an exception if there aren't enough agents to # handle this HA router self.get_number_of_agents_for_scheduling(context) ha_net = self.get_ha_network(context, router['tenant_id']) if not ha_net: # net was deleted, throw a retry to start over to create another raise db_exc.RetryRequest( l3ha_exc.HANetworkConcurrentDeletion( tenant_id=router['tenant_id'])) @registry.receives(resources.ROUTER, [events.AFTER_CREATE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) def _after_router_create(self, resource, event, trigger, context, router_id, router, router_db, **kwargs): if not router['ha']: return try: self.schedule_router(context, router_id) router['ha_vr_id'] = router_db.extra_attributes.ha_vr_id self._notify_router_updated(context, router_id) except Exception as e: with excutils.save_and_reraise_exception() as ctx: if isinstance(e, l3ha_exc.NoVRIDAvailable): ctx.reraise = False LOG.warning("No more VRIDs for router: %s", e) else: LOG.exception("Failed to schedule HA router %s.", router_id) router['status'] = self._update_router_db( context, router_id, {'status': constants.ERROR})['status'] @registry.receives(resources.ROUTER, [events.PRECOMMIT_UPDATE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) def _validate_migration(self, resource, event, trigger, payload=None): """Event handler on precommit update to validate migration.""" original_ha_state = payload.states[0]['ha'] requested_ha_state = payload.request_body.get('ha') ha_changed = (requested_ha_state is not None and requested_ha_state != original_ha_state) if not ha_changed: return if payload.desired_state.admin_state_up: msg = _('Cannot change HA attribute of active routers. Please ' 'set router admin_state_up to False prior to upgrade') raise n_exc.BadRequest(resource='router', msg=msg) if requested_ha_state: # This will throw HANotEnoughAvailableAgents if there aren't # enough l3 agents to handle this router. self.get_number_of_agents_for_scheduling(payload.context) old_owner = constants.DEVICE_OWNER_ROUTER_INTF new_owner = constants.DEVICE_OWNER_HA_REPLICATED_INT else: old_owner = constants.DEVICE_OWNER_HA_REPLICATED_INT new_owner = constants.DEVICE_OWNER_ROUTER_INTF ha_network = self.get_ha_network(payload.context, payload.desired_state.tenant_id) self._delete_vr_id_allocation( payload.context, ha_network, payload.desired_state.extra_attributes.ha_vr_id) payload.desired_state.extra_attributes.ha_vr_id = None if (payload.request_body.get('distributed') or payload.states[0]['distributed']): self.set_extra_attr_value(payload.context, payload.desired_state, 'ha', requested_ha_state) return self._migrate_router_ports( payload.context, payload.desired_state, old_owner=old_owner, new_owner=new_owner) self.set_extra_attr_value( payload.context, payload.desired_state, 'ha', requested_ha_state) @registry.receives(resources.ROUTER, [events.AFTER_UPDATE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) def _reconfigure_ha_resources(self, resource, event, trigger, context, router_id, old_router, router, router_db, **kwargs): """Event handler to react to changes after HA flag has been updated.""" ha_changed = old_router['ha'] != router['ha'] if not ha_changed: return requested_ha_state = router['ha'] # The HA attribute has changed. First unbind the router from agents # to force a proper re-scheduling to agents. # TODO(jschwarz): This will have to be more selective to get HA + DVR # working (Only unbind from dvr_snat nodes). self._unbind_ha_router(context, router_id) if not requested_ha_state: self._delete_ha_interfaces(context, router_db.id) # always attempt to cleanup the network as the router is # deleted. the core plugin will stop us if its in use ha_network = self.get_ha_network(context, router_db.tenant_id) if ha_network: self.safe_delete_ha_network(context, ha_network, router_db.tenant_id) self.schedule_router(context, router_id) self._notify_router_updated(context, router_db.id) def _delete_ha_network(self, context, net): admin_ctx = context.elevated() self._core_plugin.delete_network(admin_ctx, net.network_id) def safe_delete_ha_network(self, context, ha_network, tenant_id): try: # reference the attr inside the try block before we attempt # to delete the network and potentially invalidate the # relationship net_id = ha_network.network_id self._delete_ha_network(context, ha_network) except (n_exc.NetworkNotFound, orm.exc.ObjectDeletedError): LOG.debug( "HA network for tenant %s was already deleted.", tenant_id) except sa.exc.InvalidRequestError: LOG.info("HA network %s can not be deleted.", net_id) except n_exc.NetworkInUse: # network is still in use, this is normal so we don't # log anything pass else: LOG.info("HA network %(network)s was deleted as " "no HA routers are present in tenant " "%(tenant)s.", {'network': net_id, 'tenant': tenant_id}) @registry.receives(resources.ROUTER, [events.PRECOMMIT_DELETE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) def _release_router_vr_id(self, resource, event, trigger, context, router_db, **kwargs): """Event handler for removal of VRID during router delete.""" if router_db.extra_attributes.ha: ha_network = self.get_ha_network(context, router_db.tenant_id) if ha_network: self._delete_vr_id_allocation( context, ha_network, router_db.extra_attributes.ha_vr_id) @registry.receives(resources.ROUTER, [events.AFTER_DELETE], priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE) @db_api.retry_if_session_inactive() def _cleanup_ha_network(self, resource, event, trigger, context, router_id, original, **kwargs): """Event handler to attempt HA network deletion after router delete.""" if not original['ha']: return ha_network = self.get_ha_network(context, original['tenant_id']) if not ha_network: return # always attempt to cleanup the network as the router is # deleted. the core plugin will stop us if its in use self.safe_delete_ha_network(context, ha_network, original['tenant_id']) def _unbind_ha_router(self, context, router_id): for agent in self.get_l3_agents_hosting_routers(context, [router_id]): self.remove_router_from_l3_agent(context, agent['id'], router_id) def get_ha_router_port_bindings(self, context, router_ids, host=None): if not router_ids: return [] return ( l3_hamode.L3HARouterAgentPortBinding.get_l3ha_filter_host_router( context, router_ids, host)) @staticmethod def _check_router_agent_ha_binding(context, router_id, agent_id): return l3_hamode.L3HARouterAgentPortBinding.objects_exist( context, router_id=router_id, l3_agent_id=agent_id) def _get_bindings_and_update_router_state_for_dead_agents(self, context, router_id): """Return bindings. In case if dead agents were detected update router states on this agent. """ with db_api.CONTEXT_WRITER.using(context): bindings = self.get_ha_router_port_bindings(context, [router_id]) router_active_agents_dead = [] router_standby_agents_dead = [] # List agents where router is active and agent is dead # and agents where router is standby and agent is dead for binding in bindings: if not (binding.agent.is_active and binding.agent.admin_state_up): if binding.state == constants.HA_ROUTER_STATE_ACTIVE: router_active_agents_dead.append(binding.agent) elif binding.state == constants.HA_ROUTER_STATE_STANDBY: router_standby_agents_dead.append(binding.agent) if router_active_agents_dead: # Just check if all l3_agents are down # then assuming some communication issue if (len(router_active_agents_dead) + len(router_standby_agents_dead) == len(bindings)): # Make router status as unknown because # agent communication may be issue but router # may still be active. We do not know the # exact status of router. state = constants.HA_ROUTER_STATE_UNKNOWN else: # Make router status as standby on all dead agents # as some other agents are alive , router can become # active on them after some time state = constants.HA_ROUTER_STATE_STANDBY for dead_agent in router_active_agents_dead: self.update_routers_states(context, {router_id: state}, dead_agent.host) if router_active_agents_dead: return self.get_ha_router_port_bindings(context, [router_id]) return bindings def get_l3_bindings_hosting_router_with_ha_states( self, context, router_id): """Return a list of [(agent, ha_state), ...].""" bindings = self._get_bindings_and_update_router_state_for_dead_agents( context, router_id) return [(binding.agent, binding.state) for binding in bindings if binding.agent is not None] def get_active_host_for_ha_router(self, context, router_id): bindings = self.get_l3_bindings_hosting_router_with_ha_states( context, router_id) # TODO(amuller): In case we have two or more actives, this method # needs to return the last agent to become active. This requires # timestamps for state changes. Otherwise, if a host goes down # and another takes over, we'll have two actives. In this case, # if an interface is added to a router, its binding might be wrong # and l2pop would not work correctly. return next( (agent.host for agent, state in bindings if state == constants.HA_ROUTER_STATE_ACTIVE), None) @log_helpers.log_method_call def _process_sync_ha_data(self, context, routers, host, is_any_dvr_agent): routers_dict = dict((router['id'], router) for router in routers) bindings = self.get_ha_router_port_bindings(context, routers_dict.keys(), host) for binding in bindings: port = binding.port if not port: # Filter the HA router has no ha port here LOG.info("HA router %s is missing HA router port " "bindings. Skipping it.", binding.router_id) routers_dict.pop(binding.router_id) continue port_dict = self._core_plugin._make_port_dict(port) router = routers_dict.get(binding.router_id) router[constants.HA_INTERFACE_KEY] = port_dict router[constants.HA_ROUTER_STATE_KEY] = binding.state interfaces = [] for router in routers_dict.values(): interface = router.get(constants.HA_INTERFACE_KEY) if interface: interfaces.append(interface) self._populate_mtu_and_subnets_for_ports(context, interfaces) # If this is a DVR+HA router, then we want to always return it even # though it's missing the '_ha_interface' key. The agent will have # to figure out what kind of router setup is needed. return [r for r in list(routers_dict.values()) if (is_any_dvr_agent or not r.get('ha') or r.get(constants.HA_INTERFACE_KEY))] @log_helpers.log_method_call def get_ha_sync_data_for_host(self, context, host, agent, router_ids=None, active=None): agent_mode = self._get_agent_mode(agent) dvr_agent_mode = ( agent_mode in [constants.L3_AGENT_MODE_DVR_SNAT, constants.L3_AGENT_MODE_DVR, constants.L3_AGENT_MODE_DVR_NO_EXTERNAL]) if (dvr_agent_mode and extensions.is_extension_supported( self, constants.L3_DISTRIBUTED_EXT_ALIAS)): # DVR has to be handled differently sync_data = self._get_dvr_sync_data(context, host, agent, router_ids, active) else: sync_data = super(L3_HA_NAT_db_mixin, self).get_sync_data( context, router_ids, active) return self._process_sync_ha_data( context, sync_data, host, dvr_agent_mode) @classmethod def _set_router_states(cls, context, bindings, states): for binding in bindings: try: # NOTE(ralonsoh): to be migrated to the new facade. with context.session.begin(subtransactions=True): binding.state = states[binding.router_id] except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError): # Take concurrently deleted routers in to account pass @db_api.retry_if_session_inactive() def update_routers_states(self, context, states, host): """Receive dict of router ID to state and update them all.""" bindings = self.get_ha_router_port_bindings( context, router_ids=states.keys(), host=host) self._set_router_states(context, bindings, states) self._update_router_port_bindings(context, states, host) def _update_router_port_bindings(self, context, states, host): admin_ctx = context.elevated() device_filter = {'device_id': list(states.keys()), 'device_owner': [constants.DEVICE_OWNER_HA_REPLICATED_INT, constants.DEVICE_OWNER_ROUTER_SNAT, constants.DEVICE_OWNER_ROUTER_GW]} ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter) active_ports = ( port for port in ports if states[port['device_id']] == constants.HA_ROUTER_STATE_ACTIVE) for port in active_ports: try: self._core_plugin.update_port( admin_ctx, port['id'], {port_def.RESOURCE_NAME: {portbindings.HOST_ID: host}}) except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError, n_exc.PortNotFound): # Take concurrently deleted interfaces in to account pass def _get_gateway_port_host(self, context, router, gw_ports): if not router.get('ha'): return super(L3_HA_NAT_db_mixin, self)._get_gateway_port_host( context, router, gw_ports) gw_port_id = router['gw_port_id'] gateway_port = gw_ports.get(gw_port_id) if not gw_port_id or not gateway_port: return gateway_port_status = gateway_port['status'] gateway_port_binding_host = gateway_port[portbindings.HOST_ID] admin_ctx = context.elevated() router_id = router['id'] ha_bindings = self.get_l3_bindings_hosting_router_with_ha_states( admin_ctx, router_id) LOG.debug("HA router %(router_id)s gateway port %(gw_port_id)s " "binding host: %(host)s, status: %(status)s", {"router_id": router_id, "gw_port_id": gateway_port['id'], "host": gateway_port_binding_host, "status": gateway_port_status}) for ha_binding_agent, ha_binding_state in ha_bindings: if ha_binding_state != constants.HA_ROUTER_STATE_ACTIVE: continue # For create router gateway, the gateway port may not be ACTIVE # yet, so we return 'master' host directly. if gateway_port_status != constants.PORT_STATUS_ACTIVE: return ha_binding_agent.host # Do not let the original 'master' (current is backup) host, # override the gateway port binding host. if (gateway_port_status == constants.PORT_STATUS_ACTIVE and ha_binding_agent.host == gateway_port_binding_host): return ha_binding_agent.host LOG.debug("No gateway port host retrieved. HA router %(router_id)s " "gateway port %(gw_port_id)s " "binding host: %(host)s, status: %(status)s, " "router HA bindings: %(ha_bindings)s", {"router_id": router_id, "gw_port_id": gateway_port['id'], "host": gateway_port_binding_host, "status": gateway_port_status, "ha_bindings": ha_bindings}) def is_ha_router(router): """Return True if router to be handled is ha.""" try: # See if router is a DB object first requested_router_type = router.extra_attributes.ha except AttributeError: # if not, try to see if it is a request body requested_router_type = router.get('ha') if validators.is_attr_set(requested_router_type): return requested_router_type return cfg.CONF.l3_ha def is_ha_router_port(context, device_owner, router_id): if device_owner == constants.DEVICE_OWNER_HA_REPLICATED_INT: return True elif device_owner == constants.DEVICE_OWNER_ROUTER_SNAT: return l3_obj.RouterExtraAttributes.objects_exist( context, router_id=router_id, ha=True) else: return False