neutron/neutron/db/l3_hamode_db.py
Brian Haley 055036ba2b Improve terminology in the Neutron tree
There is no real reason we should be using some of the
terms we do, they're outdated, and we're behind other
open-source projects in this respect. Let's switch to
using more inclusive terms in all possible places.

Change-Id: I99913107e803384b34cbd5ca588451b1cf64d594
2020-08-19 16:47:53 -04:00

794 lines
36 KiB
Python

# Copyright (C) 2014 eNovance SAS <licensing@enovance.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import functools
import random
import netaddr
from neutron_lib.api.definitions import l3 as l3_apidef
from neutron_lib.api.definitions import l3_ext_ha_mode as l3_ext_ha_apidef
from neutron_lib.api.definitions import port as port_def
from neutron_lib.api.definitions import portbindings
from neutron_lib.api.definitions import provider_net as providernet
from neutron_lib.api import extensions
from neutron_lib.api import validators
from neutron_lib.callbacks import events
from neutron_lib.callbacks import priority_group
from neutron_lib.callbacks import registry
from neutron_lib.callbacks import resources
from neutron_lib import constants
from neutron_lib.db import api as db_api
from neutron_lib import exceptions as n_exc
from neutron_lib.exceptions import l3 as l3_exc
from neutron_lib.exceptions import l3_ext_ha_mode as l3ha_exc
from neutron_lib.objects import exceptions as obj_base
from neutron_lib.plugins import utils as p_utils
from oslo_config import cfg
from oslo_db import exception as db_exc
from oslo_log import helpers as log_helpers
from oslo_log import log as logging
from oslo_utils import excutils
import sqlalchemy as sa
from sqlalchemy import exc as sql_exc
from sqlalchemy import orm
from neutron._i18n import _
from neutron.common import utils as n_utils
from neutron.conf.db import l3_hamode_db
from neutron.db import _utils as db_utils
from neutron.db.availability_zone import router as router_az_db
from neutron.db import l3_dvr_db
from neutron.objects import base
from neutron.objects import l3_hamode
from neutron.objects import router as l3_obj
VR_ID_RANGE = set(range(1, 255))
MAX_ALLOCATION_TRIES = 10
UNLIMITED_AGENTS_PER_ROUTER = 0
LOG = logging.getLogger(__name__)
l3_hamode_db.register_db_l3_hamode_opts()
@registry.has_registry_receivers
class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
router_az_db.RouterAvailabilityZoneMixin):
"""Mixin class to add high availability capability to routers."""
def _verify_configuration(self):
self.ha_cidr = cfg.CONF.l3_ha_net_cidr
try:
net = netaddr.IPNetwork(self.ha_cidr)
except netaddr.AddrFormatError:
raise l3ha_exc.HANetworkCIDRNotValid(cidr=self.ha_cidr)
if ('/' not in self.ha_cidr or net.network != net.ip):
raise l3ha_exc.HANetworkCIDRNotValid(cidr=self.ha_cidr)
self._check_num_agents_per_router()
def _check_num_agents_per_router(self):
max_agents = cfg.CONF.max_l3_agents_per_router
if max_agents != UNLIMITED_AGENTS_PER_ROUTER and max_agents < 1:
raise l3ha_exc.HAMaximumAgentsNumberNotValid(max_agents=max_agents)
def __new__(cls, *args, **kwargs):
inst = super(L3_HA_NAT_db_mixin, cls).__new__(cls, *args, **kwargs)
inst._verify_configuration()
return inst
def get_ha_network(self, context, tenant_id):
pager = base.Pager(limit=1)
results = l3_hamode.L3HARouterNetwork.get_objects(
context, _pager=pager, project_id=tenant_id)
return results.pop() if results else None
def _get_allocated_vr_id(self, context, network_id):
vr_id_objs = l3_hamode.L3HARouterVRIdAllocation.get_objects(
context, network_id=network_id)
allocated_vr_ids = set(a.vr_id for a in vr_id_objs) - set([0])
return allocated_vr_ids
def _get_vr_id(self, context, network_id):
allocated_vr_ids = self._get_allocated_vr_id(context,
network_id)
available_vr_ids = VR_ID_RANGE - allocated_vr_ids
if not available_vr_ids:
return None
return random.choice(list(available_vr_ids))
@db_api.retry_if_session_inactive()
def _ensure_vr_id(self, context, router_db, ha_network):
router_id = router_db.id
network_id = ha_network.network_id
# TODO(kevinbenton): let decorator handle duplicate retry
# like in review.opendev.org/#/c/367179/1/neutron/db/l3_hamode_db.py
for count in range(MAX_ALLOCATION_TRIES):
try:
# NOTE(kevinbenton): we disallow subtransactions because the
# retry logic will bust any parent transactions
with db_api.CONTEXT_WRITER.using(context):
if router_db.extra_attributes.ha_vr_id:
LOG.debug(
"Router %(router_id)s has already been "
"allocated a ha_vr_id %(ha_vr_id)d!",
{'router_id': router_id,
'ha_vr_id': router_db.extra_attributes.ha_vr_id})
return
old_router = self._make_router_dict(router_db)
vr_id = self._get_vr_id(context, network_id)
if vr_id is None:
raise l3ha_exc.NoVRIDAvailable(router_id=router_id)
allocation = l3_hamode.L3HARouterVRIdAllocation(
context, network_id=network_id, vr_id=vr_id)
allocation.create()
router_db.extra_attributes.ha_vr_id = allocation.vr_id
LOG.debug(
"Router %(router_id)s has been allocated a ha_vr_id "
"%(ha_vr_id)d.",
{'router_id': router_id, 'ha_vr_id': allocation.vr_id})
router_body = {l3_apidef.ROUTER:
{l3_ext_ha_apidef.HA_INFO: True,
'ha_vr_id': allocation.vr_id}}
registry.publish(resources.ROUTER, events.PRECOMMIT_UPDATE,
self, payload=events.DBEventPayload(
context, request_body=router_body,
states=(old_router,),
resource_id=router_id,
desired_state=router_db))
return allocation.vr_id
except obj_base.NeutronDbObjectDuplicateEntry:
LOG.info("Attempt %(count)s to allocate a VRID in the "
"network %(network)s for the router %(router)s",
{'count': count, 'network': network_id,
'router': router_id})
raise l3ha_exc.MaxVRIDAllocationTriesReached(
network_id=network_id, router_id=router_id,
max_tries=MAX_ALLOCATION_TRIES)
@db_api.retry_if_session_inactive()
def _delete_vr_id_allocation(self, context, ha_network, vr_id):
l3_hamode.L3HARouterVRIdAllocation.delete_objects(
context, network_id=ha_network.network_id, vr_id=vr_id)
def _create_ha_subnet(self, context, network_id, tenant_id):
args = {'network_id': network_id,
'tenant_id': '',
'name': constants.HA_SUBNET_NAME % tenant_id,
'ip_version': 4,
'cidr': cfg.CONF.l3_ha_net_cidr,
'enable_dhcp': False,
'gateway_ip': None}
return p_utils.create_subnet(self._core_plugin, context,
{'subnet': args})
def _create_ha_network_tenant_binding(self, context, tenant_id,
network_id):
ha_network = l3_hamode.L3HARouterNetwork(
context, project_id=tenant_id, network_id=network_id)
ha_network.create()
# we need to check if someone else just inserted at exactly the
# same time as us because there is no constrain in L3HARouterNetwork
# that prevents multiple networks per tenant
if l3_hamode.L3HARouterNetwork.count(
context, project_id=tenant_id) > 1:
# we need to throw an error so our network is deleted
# and the process is started over where the existing
# network will be selected.
raise db_exc.DBDuplicateEntry(columns=['tenant_id'])
return ha_network
def _add_ha_network_settings(self, network):
if cfg.CONF.l3_ha_network_type:
network[providernet.NETWORK_TYPE] = cfg.CONF.l3_ha_network_type
if cfg.CONF.l3_ha_network_physical_name:
network[providernet.PHYSICAL_NETWORK] = (
cfg.CONF.l3_ha_network_physical_name)
def _create_ha_network(self, context, tenant_id):
admin_ctx = context.elevated()
args = {'network':
{'name': constants.HA_NETWORK_NAME % tenant_id,
'tenant_id': '',
'shared': False,
'admin_state_up': True}}
self._add_ha_network_settings(args['network'])
creation = functools.partial(p_utils.create_network,
self._core_plugin, admin_ctx, args)
content = functools.partial(self._create_ha_network_tenant_binding,
admin_ctx, tenant_id)
deletion = functools.partial(self._core_plugin.delete_network,
admin_ctx)
network, ha_network = db_utils.safe_creation(
context, creation, deletion, content, transaction=False)
try:
self._create_ha_subnet(admin_ctx, network['id'], tenant_id)
except Exception:
with excutils.save_and_reraise_exception():
self._core_plugin.delete_network(admin_ctx, network['id'])
return ha_network
def get_number_of_agents_for_scheduling(self, context):
"""Return number of agents on which the router will be scheduled."""
num_agents = len(
self.get_l3_agents(
context, active=True,
filters={'agent_modes': [constants.L3_AGENT_MODE_LEGACY,
constants.L3_AGENT_MODE_DVR_SNAT]}))
max_agents = cfg.CONF.max_l3_agents_per_router
if max_agents:
if max_agents > num_agents:
LOG.info("Number of active agents lower than "
"max_l3_agents_per_router. L3 agents "
"available: %s", num_agents)
else:
num_agents = max_agents
return num_agents
@db_api.retry_if_session_inactive()
def _create_ha_port_binding(self, context, router_id, port_id):
try:
l3_obj.RouterPort(
context,
port_id=port_id,
router_id=router_id,
port_type=constants.DEVICE_OWNER_ROUTER_HA_INTF).create()
portbinding = l3_hamode.L3HARouterAgentPortBinding(
context, port_id=port_id, router_id=router_id)
portbinding.create()
return portbinding
except db_exc.DBReferenceError as e:
with excutils.save_and_reraise_exception() as ctxt:
if isinstance(e.inner_exception, sql_exc.IntegrityError):
ctxt.reraise = False
LOG.debug(
'Failed to create HA router agent PortBinding, '
'Router %s has already been removed '
'by concurrent operation', router_id)
raise l3_exc.RouterNotFound(router_id=router_id)
def add_ha_port(self, context, router_id, network_id, tenant_id):
# NOTE(kevinbenton): we have to block any ongoing transactions because
# our exception handling will try to delete the port using the normal
# core plugin API. If this function is called inside of a transaction
# the exception will mangle the state, cause the delete call to fail,
# and end up relying on the DB rollback to remove the port instead of
# proper delete_port call.
if context.session.is_active:
raise RuntimeError(_('add_ha_port cannot be called inside of a '
'transaction.'))
args = {'tenant_id': '',
'network_id': network_id,
'admin_state_up': True,
'device_id': router_id,
'device_owner': constants.DEVICE_OWNER_ROUTER_HA_INTF,
'name': constants.HA_PORT_NAME % tenant_id}
creation = functools.partial(p_utils.create_port, self._core_plugin,
context, {'port': args})
content = functools.partial(self._create_ha_port_binding, context,
router_id)
deletion = functools.partial(self._core_plugin.delete_port, context,
l3_port_check=False)
port, binding = db_utils.safe_creation(context, creation,
deletion, content,
transaction=False)
# _create_ha_port_binding returns the binding object now and
# to populate agent relation db_obj is used.
return binding.db_obj
def _delete_ha_interfaces(self, context, router_id):
admin_ctx = context.elevated()
device_filter = {'device_id': [router_id],
'device_owner':
[constants.DEVICE_OWNER_ROUTER_HA_INTF]}
ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter)
for port in ports:
self._core_plugin.delete_port(admin_ctx, port['id'],
l3_port_check=False)
def delete_ha_interfaces_on_host(self, context, router_id, host):
admin_ctx = context.elevated()
port_ids = (binding.port_id for binding
in self.get_ha_router_port_bindings(admin_ctx,
[router_id], host))
for port_id in port_ids:
self._core_plugin.delete_port(admin_ctx, port_id,
l3_port_check=False)
def _notify_router_updated(self, context, router_id):
self.l3_rpc_notifier.routers_updated(
context, [router_id], shuffle_agents=True)
@classmethod
def _is_ha(cls, router):
ha = router.get('ha')
if not validators.is_attr_set(ha):
ha = cfg.CONF.l3_ha
return ha
def _get_device_owner(self, context, router=None):
"""Get device_owner for the specified router."""
router_is_uuid = isinstance(router, str)
if router_is_uuid:
router = self._get_router(context, router)
if (is_ha_router(router) and not
l3_dvr_db.is_distributed_router(router)):
return constants.DEVICE_OWNER_HA_REPLICATED_INT
return super(L3_HA_NAT_db_mixin,
self)._get_device_owner(context, router)
@n_utils.transaction_guard
def _ensure_vr_id_and_network(self, context, router_db):
"""Attach vr_id to router while tolerating network deletes."""
creator = functools.partial(self._ensure_vr_id,
context, router_db)
dep_getter = functools.partial(self.get_ha_network,
context, router_db.tenant_id)
dep_creator = functools.partial(self._create_ha_network,
context, router_db.tenant_id)
dep_deleter = functools.partial(self._delete_ha_network, context)
dep_id_attr = 'network_id'
return n_utils.create_object_with_dependency(
creator, dep_getter, dep_creator, dep_id_attr, dep_deleter)[1]
@registry.receives(resources.ROUTER, [events.BEFORE_CREATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
@db_api.retry_if_session_inactive()
def _before_router_create(self, resource, event, trigger,
context, router, **kwargs):
"""Event handler to create HA resources before router creation."""
if not self._is_ha(router):
return
# ensure the HA network exists before we start router creation so
# we can provide meaningful errors back to the user if no network
# can be allocated
if not self.get_ha_network(context, router['tenant_id']):
self._create_ha_network(context, router['tenant_id'])
@registry.receives(resources.ROUTER, [events.PRECOMMIT_CREATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _precommit_router_create(self, resource, event, trigger, context,
router, router_db, **kwargs):
"""Event handler to set ha flag and status on creation."""
is_ha = self._is_ha(router)
router['ha'] = is_ha
self.set_extra_attr_value(context, router_db, 'ha', is_ha)
if not is_ha:
return
# This will throw an exception if there aren't enough agents to
# handle this HA router
self.get_number_of_agents_for_scheduling(context)
ha_net = self.get_ha_network(context, router['tenant_id'])
if not ha_net:
# net was deleted, throw a retry to start over to create another
raise db_exc.RetryRequest(
l3ha_exc.HANetworkConcurrentDeletion(
tenant_id=router['tenant_id']))
@registry.receives(resources.ROUTER, [events.AFTER_CREATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _after_router_create(self, resource, event, trigger, context,
router_id, router, router_db, **kwargs):
if not router['ha']:
return
try:
self.schedule_router(context, router_id)
router['ha_vr_id'] = router_db.extra_attributes.ha_vr_id
self._notify_router_updated(context, router_id)
except Exception as e:
with excutils.save_and_reraise_exception() as ctx:
if isinstance(e, l3ha_exc.NoVRIDAvailable):
ctx.reraise = False
LOG.warning("No more VRIDs for router: %s", e)
else:
LOG.exception("Failed to schedule HA router %s.",
router_id)
router['status'] = self._update_router_db(
context, router_id,
{'status': constants.ERROR})['status']
@registry.receives(resources.ROUTER, [events.PRECOMMIT_UPDATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _validate_migration(self, resource, event, trigger, payload=None):
"""Event handler on precommit update to validate migration."""
original_ha_state = payload.states[0]['ha']
requested_ha_state = payload.request_body.get('ha')
ha_changed = (requested_ha_state is not None and
requested_ha_state != original_ha_state)
if not ha_changed:
return
if payload.desired_state.admin_state_up:
msg = _('Cannot change HA attribute of active routers. Please '
'set router admin_state_up to False prior to upgrade')
raise n_exc.BadRequest(resource='router', msg=msg)
if requested_ha_state:
# This will throw HANotEnoughAvailableAgents if there aren't
# enough l3 agents to handle this router.
self.get_number_of_agents_for_scheduling(payload.context)
old_owner = constants.DEVICE_OWNER_ROUTER_INTF
new_owner = constants.DEVICE_OWNER_HA_REPLICATED_INT
else:
old_owner = constants.DEVICE_OWNER_HA_REPLICATED_INT
new_owner = constants.DEVICE_OWNER_ROUTER_INTF
ha_network = self.get_ha_network(payload.context,
payload.desired_state.tenant_id)
self._delete_vr_id_allocation(
payload.context, ha_network,
payload.desired_state.extra_attributes.ha_vr_id)
payload.desired_state.extra_attributes.ha_vr_id = None
if (payload.request_body.get('distributed') or
payload.states[0]['distributed']):
self.set_extra_attr_value(payload.context, payload.desired_state,
'ha', requested_ha_state)
return
self._migrate_router_ports(
payload.context, payload.desired_state,
old_owner=old_owner, new_owner=new_owner)
self.set_extra_attr_value(
payload.context, payload.desired_state, 'ha', requested_ha_state)
@registry.receives(resources.ROUTER, [events.AFTER_UPDATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _reconfigure_ha_resources(self, resource, event, trigger, context,
router_id, old_router, router, router_db,
**kwargs):
"""Event handler to react to changes after HA flag has been updated."""
ha_changed = old_router['ha'] != router['ha']
if not ha_changed:
return
requested_ha_state = router['ha']
# The HA attribute has changed. First unbind the router from agents
# to force a proper re-scheduling to agents.
# TODO(jschwarz): This will have to be more selective to get HA + DVR
# working (Only unbind from dvr_snat nodes).
self._unbind_ha_router(context, router_id)
if not requested_ha_state:
self._delete_ha_interfaces(context, router_db.id)
# always attempt to cleanup the network as the router is
# deleted. the core plugin will stop us if its in use
ha_network = self.get_ha_network(context,
router_db.tenant_id)
if ha_network:
self.safe_delete_ha_network(context, ha_network,
router_db.tenant_id)
self.schedule_router(context, router_id)
self._notify_router_updated(context, router_db.id)
def _delete_ha_network(self, context, net):
admin_ctx = context.elevated()
self._core_plugin.delete_network(admin_ctx, net.network_id)
def safe_delete_ha_network(self, context, ha_network, tenant_id):
try:
# reference the attr inside the try block before we attempt
# to delete the network and potentially invalidate the
# relationship
net_id = ha_network.network_id
self._delete_ha_network(context, ha_network)
except (n_exc.NetworkNotFound,
orm.exc.ObjectDeletedError):
LOG.debug(
"HA network for tenant %s was already deleted.", tenant_id)
except sa.exc.InvalidRequestError:
LOG.info("HA network %s can not be deleted.", net_id)
except n_exc.NetworkInUse:
# network is still in use, this is normal so we don't
# log anything
pass
else:
LOG.info("HA network %(network)s was deleted as "
"no HA routers are present in tenant "
"%(tenant)s.",
{'network': net_id, 'tenant': tenant_id})
@registry.receives(resources.ROUTER, [events.PRECOMMIT_DELETE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _release_router_vr_id(self, resource, event, trigger, context,
router_db, **kwargs):
"""Event handler for removal of VRID during router delete."""
if router_db.extra_attributes.ha:
ha_network = self.get_ha_network(context,
router_db.tenant_id)
if ha_network:
self._delete_vr_id_allocation(
context, ha_network, router_db.extra_attributes.ha_vr_id)
@registry.receives(resources.ROUTER, [events.AFTER_DELETE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
@db_api.retry_if_session_inactive()
def _cleanup_ha_network(self, resource, event, trigger, context,
router_id, original, **kwargs):
"""Event handler to attempt HA network deletion after router delete."""
if not original['ha']:
return
ha_network = self.get_ha_network(context, original['tenant_id'])
if not ha_network:
return
# always attempt to cleanup the network as the router is
# deleted. the core plugin will stop us if its in use
self.safe_delete_ha_network(context, ha_network, original['tenant_id'])
def _unbind_ha_router(self, context, router_id):
for agent in self.get_l3_agents_hosting_routers(context, [router_id]):
self.remove_router_from_l3_agent(context, agent['id'], router_id)
def get_ha_router_port_bindings(self, context, router_ids, host=None):
if not router_ids:
return []
return (
l3_hamode.L3HARouterAgentPortBinding.get_l3ha_filter_host_router(
context, router_ids, host))
@staticmethod
def _check_router_agent_ha_binding(context, router_id, agent_id):
return l3_hamode.L3HARouterAgentPortBinding.objects_exist(
context, router_id=router_id, l3_agent_id=agent_id)
def _get_bindings_and_update_router_state_for_dead_agents(self, context,
router_id):
"""Return bindings. In case if dead agents were detected update router
states on this agent.
"""
with db_api.CONTEXT_WRITER.using(context):
bindings = self.get_ha_router_port_bindings(context, [router_id])
router_active_agents_dead = []
router_standby_agents_dead = []
# List agents where router is active and agent is dead
# and agents where router is standby and agent is dead
for binding in bindings:
if not (binding.agent.is_active and
binding.agent.admin_state_up):
if binding.state == constants.HA_ROUTER_STATE_ACTIVE:
router_active_agents_dead.append(binding.agent)
elif binding.state == constants.HA_ROUTER_STATE_STANDBY:
router_standby_agents_dead.append(binding.agent)
if router_active_agents_dead:
# Just check if all l3_agents are down
# then assuming some communication issue
if (len(router_active_agents_dead) +
len(router_standby_agents_dead) == len(bindings)):
# Make router status as unknown because
# agent communication may be issue but router
# may still be active. We do not know the
# exact status of router.
state = constants.HA_ROUTER_STATE_UNKNOWN
else:
# Make router status as standby on all dead agents
# as some other agents are alive , router can become
# active on them after some time
state = constants.HA_ROUTER_STATE_STANDBY
for dead_agent in router_active_agents_dead:
self.update_routers_states(context, {router_id: state},
dead_agent.host)
if router_active_agents_dead:
return self.get_ha_router_port_bindings(context, [router_id])
return bindings
def get_l3_bindings_hosting_router_with_ha_states(
self, context, router_id):
"""Return a list of [(agent, ha_state), ...]."""
bindings = self._get_bindings_and_update_router_state_for_dead_agents(
context, router_id)
return [(binding.agent, binding.state) for binding in bindings
if binding.agent is not None]
def get_active_host_for_ha_router(self, context, router_id):
bindings = self.get_l3_bindings_hosting_router_with_ha_states(
context, router_id)
# TODO(amuller): In case we have two or more actives, this method
# needs to return the last agent to become active. This requires
# timestamps for state changes. Otherwise, if a host goes down
# and another takes over, we'll have two actives. In this case,
# if an interface is added to a router, its binding might be wrong
# and l2pop would not work correctly.
return next(
(agent.host for agent, state in bindings
if state == constants.HA_ROUTER_STATE_ACTIVE),
None)
@log_helpers.log_method_call
def _process_sync_ha_data(self, context, routers, host, is_any_dvr_agent):
routers_dict = dict((router['id'], router) for router in routers)
bindings = self.get_ha_router_port_bindings(context,
routers_dict.keys(),
host)
for binding in bindings:
port = binding.port
if not port:
# Filter the HA router has no ha port here
LOG.info("HA router %s is missing HA router port "
"bindings. Skipping it.",
binding.router_id)
routers_dict.pop(binding.router_id)
continue
port_dict = self._core_plugin._make_port_dict(port)
router = routers_dict.get(binding.router_id)
router[constants.HA_INTERFACE_KEY] = port_dict
router[constants.HA_ROUTER_STATE_KEY] = binding.state
interfaces = []
for router in routers_dict.values():
interface = router.get(constants.HA_INTERFACE_KEY)
if interface:
interfaces.append(interface)
self._populate_mtu_and_subnets_for_ports(context, interfaces)
# If this is a DVR+HA router, then we want to always return it even
# though it's missing the '_ha_interface' key. The agent will have
# to figure out what kind of router setup is needed.
return [r for r in list(routers_dict.values())
if (is_any_dvr_agent or
not r.get('ha') or r.get(constants.HA_INTERFACE_KEY))]
@log_helpers.log_method_call
def get_ha_sync_data_for_host(self, context, host, agent,
router_ids=None, active=None):
agent_mode = self._get_agent_mode(agent)
dvr_agent_mode = (
agent_mode in [constants.L3_AGENT_MODE_DVR_SNAT,
constants.L3_AGENT_MODE_DVR,
constants.L3_AGENT_MODE_DVR_NO_EXTERNAL])
if (dvr_agent_mode and extensions.is_extension_supported(
self, constants.L3_DISTRIBUTED_EXT_ALIAS)):
# DVR has to be handled differently
sync_data = self._get_dvr_sync_data(context, host, agent,
router_ids, active)
else:
sync_data = super(L3_HA_NAT_db_mixin, self).get_sync_data(
context, router_ids, active)
return self._process_sync_ha_data(
context, sync_data, host, dvr_agent_mode)
@classmethod
def _set_router_states(cls, context, bindings, states):
for binding in bindings:
try:
# NOTE(ralonsoh): to be migrated to the new facade.
with context.session.begin(subtransactions=True):
binding.state = states[binding.router_id]
except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError):
# Take concurrently deleted routers in to account
pass
@db_api.retry_if_session_inactive()
def update_routers_states(self, context, states, host):
"""Receive dict of router ID to state and update them all."""
bindings = self.get_ha_router_port_bindings(
context, router_ids=states.keys(), host=host)
self._set_router_states(context, bindings, states)
self._update_router_port_bindings(context, states, host)
def _update_router_port_bindings(self, context, states, host):
admin_ctx = context.elevated()
device_filter = {'device_id': list(states.keys()),
'device_owner':
[constants.DEVICE_OWNER_HA_REPLICATED_INT,
constants.DEVICE_OWNER_ROUTER_SNAT,
constants.DEVICE_OWNER_ROUTER_GW]}
ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter)
active_ports = (
port for port in ports
if states[port['device_id']] == constants.HA_ROUTER_STATE_ACTIVE)
for port in active_ports:
try:
self._core_plugin.update_port(
admin_ctx, port['id'],
{port_def.RESOURCE_NAME: {portbindings.HOST_ID: host}})
except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError,
n_exc.PortNotFound):
# Take concurrently deleted interfaces in to account
pass
def _get_gateway_port_host(self, context, router, gw_ports):
if not router.get('ha'):
return super(L3_HA_NAT_db_mixin, self)._get_gateway_port_host(
context, router, gw_ports)
gw_port_id = router['gw_port_id']
gateway_port = gw_ports.get(gw_port_id)
if not gw_port_id or not gateway_port:
return
gateway_port_status = gateway_port['status']
gateway_port_binding_host = gateway_port[portbindings.HOST_ID]
admin_ctx = context.elevated()
router_id = router['id']
ha_bindings = self.get_l3_bindings_hosting_router_with_ha_states(
admin_ctx, router_id)
LOG.debug("HA router %(router_id)s gateway port %(gw_port_id)s "
"binding host: %(host)s, status: %(status)s",
{"router_id": router_id,
"gw_port_id": gateway_port['id'],
"host": gateway_port_binding_host,
"status": gateway_port_status})
for ha_binding_agent, ha_binding_state in ha_bindings:
if ha_binding_state != constants.HA_ROUTER_STATE_ACTIVE:
continue
# For create router gateway, the gateway port may not be ACTIVE
# yet, so we return 'primary' host directly.
if gateway_port_status != constants.PORT_STATUS_ACTIVE:
return ha_binding_agent.host
# Do not let the original 'primary' (current is backup) host,
# override the gateway port binding host.
if (gateway_port_status == constants.PORT_STATUS_ACTIVE and
ha_binding_agent.host == gateway_port_binding_host):
return ha_binding_agent.host
LOG.debug("No gateway port host retrieved. HA router %(router_id)s "
"gateway port %(gw_port_id)s "
"binding host: %(host)s, status: %(status)s, "
"router HA bindings: %(ha_bindings)s",
{"router_id": router_id,
"gw_port_id": gateway_port['id'],
"host": gateway_port_binding_host,
"status": gateway_port_status,
"ha_bindings": ha_bindings})
def is_ha_router(router):
"""Return True if router to be handled is ha."""
try:
# See if router is a DB object first
requested_router_type = router.extra_attributes.ha
except AttributeError:
# if not, try to see if it is a request body
requested_router_type = router.get('ha')
if validators.is_attr_set(requested_router_type):
return requested_router_type
return cfg.CONF.l3_ha
def is_ha_router_port(context, device_owner, router_id):
if device_owner == constants.DEVICE_OWNER_HA_REPLICATED_INT:
return True
elif device_owner == constants.DEVICE_OWNER_ROUTER_SNAT:
return l3_obj.RouterExtraAttributes.objects_exist(
context, router_id=router_id, ha=True)
else:
return False