neutron/neutron/db/l3_hamode_db.py

784 lines
36 KiB
Python

# Copyright (C) 2014 eNovance SAS <licensing@enovance.com>
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
import functools
import random
import netaddr
from neutron_lib.api.definitions import l3 as l3_apidef
from neutron_lib.api.definitions import l3_ext_ha_mode as l3_ext_ha_apidef
from neutron_lib.api.definitions import port as port_def
from neutron_lib.api.definitions import portbindings
from neutron_lib.api.definitions import provider_net as providernet
from neutron_lib.api import extensions
from neutron_lib.api import validators
from neutron_lib.callbacks import events
from neutron_lib.callbacks import priority_group
from neutron_lib.callbacks import registry
from neutron_lib.callbacks import resources
from neutron_lib import constants
from neutron_lib.db import api as db_api
from neutron_lib import exceptions as n_exc
from neutron_lib.exceptions import l3 as l3_exc
from neutron_lib.exceptions import l3_ext_ha_mode as l3ha_exc
from neutron_lib.objects import exceptions as obj_base
from neutron_lib.plugins import utils as p_utils
from oslo_config import cfg
from oslo_db import exception as db_exc
from oslo_log import helpers as log_helpers
from oslo_log import log as logging
from oslo_utils import excutils
import sqlalchemy as sa
from sqlalchemy import exc as sql_exc
from sqlalchemy import orm
from neutron._i18n import _
from neutron.common import utils as n_utils
from neutron.conf.db import l3_hamode_db
from neutron.db import _utils as db_utils
from neutron.db.availability_zone import router as router_az_db
from neutron.db import l3_dvr_db
from neutron.objects import base
from neutron.objects import l3_hamode
from neutron.objects import router as l3_obj
VR_ID_RANGE = set(range(1, 255))
MAX_ALLOCATION_TRIES = 10
UNLIMITED_AGENTS_PER_ROUTER = 0
LOG = logging.getLogger(__name__)
l3_hamode_db.register_db_l3_hamode_opts()
@registry.has_registry_receivers
class L3_HA_NAT_db_mixin(l3_dvr_db.L3_NAT_with_dvr_db_mixin,
router_az_db.RouterAvailabilityZoneMixin):
"""Mixin class to add high availability capability to routers."""
def _verify_configuration(self):
self.ha_cidr = cfg.CONF.l3_ha_net_cidr
try:
net = netaddr.IPNetwork(self.ha_cidr)
except netaddr.AddrFormatError:
raise l3ha_exc.HANetworkCIDRNotValid(cidr=self.ha_cidr)
if ('/' not in self.ha_cidr or net.network != net.ip):
raise l3ha_exc.HANetworkCIDRNotValid(cidr=self.ha_cidr)
self._check_num_agents_per_router()
def _check_num_agents_per_router(self):
max_agents = cfg.CONF.max_l3_agents_per_router
if max_agents != UNLIMITED_AGENTS_PER_ROUTER and max_agents < 1:
raise l3ha_exc.HAMaximumAgentsNumberNotValid(max_agents=max_agents)
def __new__(cls, *args, **kwargs):
inst = super(L3_HA_NAT_db_mixin, cls).__new__(cls, *args, **kwargs)
inst._verify_configuration()
return inst
def get_ha_network(self, context, tenant_id):
pager = base.Pager(limit=1)
results = l3_hamode.L3HARouterNetwork.get_objects(
context, _pager=pager, project_id=tenant_id)
return results.pop() if results else None
def _get_allocated_vr_id(self, context, network_id):
vr_id_objs = l3_hamode.L3HARouterVRIdAllocation.get_objects(
context, network_id=network_id)
allocated_vr_ids = set(a.vr_id for a in vr_id_objs) - set([0])
return allocated_vr_ids
def _get_vr_id(self, context, network_id):
allocated_vr_ids = self._get_allocated_vr_id(context,
network_id)
available_vr_ids = VR_ID_RANGE - allocated_vr_ids
if not available_vr_ids:
return None
return random.choice(list(available_vr_ids))
@db_api.retry_if_session_inactive()
def _ensure_vr_id(self, context, router_db, ha_network):
router_id = router_db.id
network_id = ha_network.network_id
# TODO(kevinbenton): let decorator handle duplicate retry
# like in review.opendev.org/#/c/367179/1/neutron/db/l3_hamode_db.py
for count in range(MAX_ALLOCATION_TRIES):
try:
# NOTE(kevinbenton): we disallow subtransactions because the
# retry logic will bust any parent transactions
with db_api.CONTEXT_WRITER.using(context):
if router_db.extra_attributes.ha_vr_id:
LOG.debug(
"Router %(router_id)s has already been "
"allocated a ha_vr_id %(ha_vr_id)d!",
{'router_id': router_id,
'ha_vr_id': router_db.extra_attributes.ha_vr_id})
return
old_router = self._make_router_dict(router_db)
vr_id = self._get_vr_id(context, network_id)
if vr_id is None:
raise l3ha_exc.NoVRIDAvailable(router_id=router_id)
allocation = l3_hamode.L3HARouterVRIdAllocation(
context, network_id=network_id, vr_id=vr_id)
allocation.create()
router_db.extra_attributes.update(
{'ha_vr_id': allocation.vr_id})
context.session.add(router_db.extra_attributes)
LOG.debug(
"Router %(router_id)s has been allocated a ha_vr_id "
"%(ha_vr_id)d.",
{'router_id': router_id, 'ha_vr_id': allocation.vr_id})
router_body = {l3_apidef.ROUTER:
{l3_ext_ha_apidef.HA_INFO: True,
'ha_vr_id': allocation.vr_id}}
registry.publish(resources.ROUTER, events.PRECOMMIT_UPDATE,
self, payload=events.DBEventPayload(
context, request_body=router_body,
states=(old_router,),
resource_id=router_id,
desired_state=router_db))
return allocation.vr_id
except obj_base.NeutronDbObjectDuplicateEntry:
LOG.info("Attempt %(count)s to allocate a VRID in the "
"network %(network)s for the router %(router)s",
{'count': count, 'network': network_id,
'router': router_id})
raise l3ha_exc.MaxVRIDAllocationTriesReached(
network_id=network_id, router_id=router_id,
max_tries=MAX_ALLOCATION_TRIES)
@db_api.retry_if_session_inactive()
def _delete_vr_id_allocation(self, context, ha_network, vr_id):
l3_hamode.L3HARouterVRIdAllocation.delete_objects(
context, network_id=ha_network.network_id, vr_id=vr_id)
def _create_ha_subnet(self, context, network_id, tenant_id):
args = {'network_id': network_id,
'tenant_id': '',
'name': constants.HA_SUBNET_NAME % tenant_id,
'ip_version': 4,
'cidr': cfg.CONF.l3_ha_net_cidr,
'enable_dhcp': False,
'gateway_ip': None}
return p_utils.create_subnet(self._core_plugin, context,
{'subnet': args})
def _create_ha_network_tenant_binding(self, context, tenant_id,
network_id):
ha_network = l3_hamode.L3HARouterNetwork(
context, project_id=tenant_id, network_id=network_id)
ha_network.create()
# we need to check if someone else just inserted at exactly the
# same time as us because there is no constrain in L3HARouterNetwork
# that prevents multiple networks per tenant
if l3_hamode.L3HARouterNetwork.count(
context, project_id=tenant_id) > 1:
# we need to throw an error so our network is deleted
# and the process is started over where the existing
# network will be selected.
raise db_exc.DBDuplicateEntry(columns=['tenant_id'])
return None, ha_network
def _add_ha_network_settings(self, network):
if cfg.CONF.l3_ha_network_type:
network[providernet.NETWORK_TYPE] = cfg.CONF.l3_ha_network_type
if cfg.CONF.l3_ha_network_physical_name:
network[providernet.PHYSICAL_NETWORK] = (
cfg.CONF.l3_ha_network_physical_name)
def _create_ha_network(self, context, tenant_id):
admin_ctx = n_utils.get_elevated_context(context)
args = {'network':
{'name': constants.HA_NETWORK_NAME % tenant_id,
'tenant_id': '',
'shared': False,
'admin_state_up': True}}
self._add_ha_network_settings(args['network'])
creation = functools.partial(p_utils.create_network,
self._core_plugin, admin_ctx, args)
content = functools.partial(self._create_ha_network_tenant_binding,
admin_ctx, tenant_id)
deletion = functools.partial(self._core_plugin.delete_network,
admin_ctx)
network, ha_network = db_utils.safe_creation(
context, creation, deletion, content, transaction=False)
try:
self._create_ha_subnet(admin_ctx, network['id'], tenant_id)
except Exception:
with excutils.save_and_reraise_exception():
self._core_plugin.delete_network(admin_ctx, network['id'])
return ha_network
def get_number_of_agents_for_scheduling(self, context):
"""Return number of agents on which the router will be scheduled."""
num_agents = len(
self.get_l3_agents(
context, active=True,
filters={'agent_modes': [constants.L3_AGENT_MODE_LEGACY,
constants.L3_AGENT_MODE_DVR_SNAT]}))
max_agents = cfg.CONF.max_l3_agents_per_router
if max_agents:
if max_agents > num_agents:
LOG.info("Number of active agents lower than "
"max_l3_agents_per_router. L3 agents "
"available: %s", num_agents)
else:
num_agents = max_agents
return num_agents
@db_api.retry_if_session_inactive()
def _create_ha_port_binding(self, context, router_id, port_id):
try:
l3_obj.RouterPort(
context,
port_id=port_id,
router_id=router_id,
port_type=constants.DEVICE_OWNER_ROUTER_HA_INTF).create()
portbinding = l3_hamode.L3HARouterAgentPortBinding(
context, port_id=port_id, router_id=router_id)
portbinding.create()
return None, portbinding
except db_exc.DBReferenceError as e:
with excutils.save_and_reraise_exception() as ctxt:
if isinstance(e.inner_exception, sql_exc.IntegrityError):
ctxt.reraise = False
LOG.debug(
'Failed to create HA router agent PortBinding, '
'Router %s has already been removed '
'by concurrent operation', router_id)
raise l3_exc.RouterNotFound(router_id=router_id)
def add_ha_port(self, context, router_id, network_id, tenant_id):
# NOTE(kevinbenton): we have to block any ongoing transactions because
# our exception handling will try to delete the port using the normal
# core plugin API. If this function is called inside of a transaction
# the exception will mangle the state, cause the delete call to fail,
# and end up relying on the DB rollback to remove the port instead of
# proper delete_port call.
if context.session.is_active:
raise RuntimeError(_('add_ha_port cannot be called inside of a '
'transaction.'))
args = {'tenant_id': '',
'network_id': network_id,
'admin_state_up': True,
'device_id': router_id,
'device_owner': constants.DEVICE_OWNER_ROUTER_HA_INTF,
'name': constants.HA_PORT_NAME % tenant_id}
creation = functools.partial(p_utils.create_port, self._core_plugin,
context, {'port': args})
content = functools.partial(self._create_ha_port_binding, context,
router_id)
deletion = functools.partial(self._core_plugin.delete_port, context,
l3_port_check=False)
port, binding = db_utils.safe_creation(context, creation,
deletion, content,
transaction=False)
# _create_ha_port_binding returns the binding object now and
# to populate agent relation db_obj is used.
return binding.db_obj
def _delete_ha_interfaces(self, context, router_id):
admin_ctx = n_utils.get_elevated_context(context)
device_filter = {'device_id': [router_id],
'device_owner':
[constants.DEVICE_OWNER_ROUTER_HA_INTF]}
ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter)
for port in ports:
self._core_plugin.delete_port(admin_ctx, port['id'],
l3_port_check=False)
def delete_ha_interfaces_on_host(self, context, router_id, host):
admin_ctx = n_utils.get_elevated_context(context)
port_ids = (binding.port_id for binding
in self.get_ha_router_port_bindings(admin_ctx,
[router_id], host))
for port_id in port_ids:
self._core_plugin.delete_port(admin_ctx, port_id,
l3_port_check=False)
def _notify_router_updated(self, context, router_id):
self.l3_rpc_notifier.routers_updated(
context, [router_id], shuffle_agents=True)
@classmethod
def _is_ha(cls, router):
ha = router.get('ha')
if not validators.is_attr_set(ha):
ha = cfg.CONF.l3_ha
return ha
def _get_device_owner(self, context, router=None):
"""Get device_owner for the specified router."""
router_is_uuid = isinstance(router, str)
if router_is_uuid:
router = self._get_router(context, router)
if (is_ha_router(router) and not
l3_dvr_db.is_distributed_router(router)):
return constants.DEVICE_OWNER_HA_REPLICATED_INT
return super(L3_HA_NAT_db_mixin,
self)._get_device_owner(context, router)
@registry.receives(resources.ROUTER, [events.BEFORE_CREATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
@db_api.retry_if_session_inactive()
def _before_router_create(self, resource, event, trigger,
context, router, **kwargs):
"""Event handler to create HA resources before router creation."""
if not self._is_ha(router):
return
# ensure the HA network exists before we start router creation so
# we can provide meaningful errors back to the user if no network
# can be allocated
if not self.get_ha_network(context, router['tenant_id']):
self._create_ha_network(context, router['tenant_id'])
@registry.receives(resources.ROUTER, [events.PRECOMMIT_CREATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _precommit_router_create(self, resource, event, trigger, context,
router, router_db, **kwargs):
"""Event handler to set ha flag and status on creation."""
is_ha = self._is_ha(router)
router['ha'] = is_ha
self.set_extra_attr_value(context, router_db, 'ha', is_ha)
if not is_ha:
return
# This will throw an exception if there aren't enough agents to
# handle this HA router
self.get_number_of_agents_for_scheduling(context)
ha_net = self.get_ha_network(context, router['tenant_id'])
if not ha_net:
# net was deleted, throw a retry to start over to create another
raise db_exc.RetryRequest(
l3ha_exc.HANetworkConcurrentDeletion(
tenant_id=router['tenant_id']))
@registry.receives(resources.ROUTER, [events.AFTER_CREATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _after_router_create(self, resource, event, trigger, context,
router_id, router, router_db, **kwargs):
if not router['ha']:
return
try:
self.schedule_router(context, router_id)
router['ha_vr_id'] = router_db.extra_attributes.ha_vr_id
self._notify_router_updated(context, router_id)
except Exception as e:
with excutils.save_and_reraise_exception() as ctx:
if isinstance(e, l3ha_exc.NoVRIDAvailable):
ctx.reraise = False
LOG.warning("No more VRIDs for router: %s", e)
else:
LOG.exception("Failed to schedule HA router %s.",
router_id)
router['status'] = self._update_router_db(
context, router_id,
{'status': constants.ERROR})['status']
@registry.receives(resources.ROUTER, [events.PRECOMMIT_UPDATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _validate_migration(self, resource, event, trigger, payload=None):
"""Event handler on precommit update to validate migration."""
original_ha_state = payload.states[0]['ha']
requested_ha_state = payload.request_body.get('ha')
ha_changed = (requested_ha_state is not None and
requested_ha_state != original_ha_state)
if not ha_changed:
return
if payload.desired_state.admin_state_up:
msg = _('Cannot change HA attribute of active routers. Please '
'set router admin_state_up to False prior to upgrade')
raise n_exc.BadRequest(resource='router', msg=msg)
if requested_ha_state:
# This will throw HANotEnoughAvailableAgents if there aren't
# enough l3 agents to handle this router.
self.get_number_of_agents_for_scheduling(payload.context)
old_owner = constants.DEVICE_OWNER_ROUTER_INTF
new_owner = constants.DEVICE_OWNER_HA_REPLICATED_INT
else:
old_owner = constants.DEVICE_OWNER_HA_REPLICATED_INT
new_owner = constants.DEVICE_OWNER_ROUTER_INTF
ha_network = self.get_ha_network(payload.context,
payload.desired_state.tenant_id)
self._delete_vr_id_allocation(
payload.context, ha_network,
payload.desired_state.extra_attributes.ha_vr_id)
payload.desired_state.extra_attributes.ha_vr_id = None
if (payload.request_body.get('distributed') or
payload.states[0]['distributed']):
self.set_extra_attr_value(payload.context, payload.desired_state,
'ha', requested_ha_state)
return
self._migrate_router_ports(
payload.context, payload.desired_state,
old_owner=old_owner, new_owner=new_owner)
self.set_extra_attr_value(
payload.context, payload.desired_state, 'ha', requested_ha_state)
@registry.receives(resources.ROUTER, [events.AFTER_UPDATE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _reconfigure_ha_resources(self, resource, event, trigger, context,
router_id, old_router, router, router_db,
**kwargs):
"""Event handler to react to changes after HA flag has been updated."""
ha_changed = old_router['ha'] != router['ha']
if not ha_changed:
return
requested_ha_state = router['ha']
# The HA attribute has changed. First unbind the router from agents
# to force a proper re-scheduling to agents.
# TODO(jschwarz): This will have to be more selective to get HA + DVR
# working (Only unbind from dvr_snat nodes).
self._unbind_ha_router(context, router_id)
if not requested_ha_state:
self._delete_ha_interfaces(context, router_db.id)
# always attempt to cleanup the network as the router is
# deleted. the core plugin will stop us if its in use
ha_network = self.get_ha_network(context,
router_db.tenant_id)
if ha_network:
self.safe_delete_ha_network(context, ha_network,
router_db.tenant_id)
self.schedule_router(context, router_id)
self._notify_router_updated(context, router_db.id)
def _delete_ha_network(self, context, net):
admin_ctx = n_utils.get_elevated_context(context)
self._core_plugin.delete_network(admin_ctx, net.network_id)
def safe_delete_ha_network(self, context, ha_network, tenant_id):
try:
# reference the attr inside the try block before we attempt
# to delete the network and potentially invalidate the
# relationship
net_id = ha_network.network_id
self._delete_ha_network(context, ha_network)
except (n_exc.NetworkNotFound,
orm.exc.ObjectDeletedError):
LOG.debug(
"HA network for tenant %s was already deleted.", tenant_id)
except sa.exc.InvalidRequestError:
LOG.info("HA network %s can not be deleted.", net_id)
except n_exc.NetworkInUse:
# network is still in use, this is normal so we don't
# log anything
pass
else:
LOG.info("HA network %(network)s was deleted as "
"no HA routers are present in tenant "
"%(tenant)s.",
{'network': net_id, 'tenant': tenant_id})
@registry.receives(resources.ROUTER, [events.PRECOMMIT_DELETE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
def _release_router_vr_id(self, resource, event, trigger, context,
router_db, **kwargs):
"""Event handler for removal of VRID during router delete."""
if router_db.extra_attributes.ha:
ha_network = self.get_ha_network(context,
router_db.tenant_id)
if ha_network:
self._delete_vr_id_allocation(
context, ha_network, router_db.extra_attributes.ha_vr_id)
@registry.receives(resources.ROUTER, [events.AFTER_DELETE],
priority_group.PRIORITY_ROUTER_EXTENDED_ATTRIBUTE)
@db_api.retry_if_session_inactive()
def _cleanup_ha_network(self, resource, event, trigger, context,
router_id, original, **kwargs):
"""Event handler to attempt HA network deletion after router delete."""
if not original['ha']:
return
ha_network = self.get_ha_network(context, original['tenant_id'])
if not ha_network:
return
# always attempt to cleanup the network as the router is
# deleted. the core plugin will stop us if its in use
self.safe_delete_ha_network(context, ha_network, original['tenant_id'])
def _unbind_ha_router(self, context, router_id):
for agent in self.get_l3_agents_hosting_routers(context, [router_id]):
self.remove_router_from_l3_agent(context, agent['id'], router_id)
def get_ha_router_port_bindings(self, context, router_ids, host=None):
if not router_ids:
return []
return (
l3_hamode.L3HARouterAgentPortBinding.get_l3ha_filter_host_router(
context, router_ids, host))
@staticmethod
def _check_router_agent_ha_binding(context, router_id, agent_id):
return l3_hamode.L3HARouterAgentPortBinding.objects_exist(
context, router_id=router_id, l3_agent_id=agent_id)
def _get_bindings_and_update_router_state_for_dead_agents(self, context,
router_id):
"""Return bindings. In case if dead agents were detected update router
states on this agent.
"""
with db_api.CONTEXT_WRITER.using(context):
bindings = self.get_ha_router_port_bindings(context, [router_id])
router_active_agents_dead = []
router_standby_agents_dead = []
# List agents where router is active and agent is dead
# and agents where router is standby and agent is dead
for binding in bindings:
if not (binding.agent.is_active and
binding.agent.admin_state_up):
if binding.state == constants.HA_ROUTER_STATE_ACTIVE:
router_active_agents_dead.append(binding.agent)
elif binding.state == constants.HA_ROUTER_STATE_STANDBY:
router_standby_agents_dead.append(binding.agent)
if router_active_agents_dead:
# Just check if all l3_agents are down
# then assuming some communication issue
if (len(router_active_agents_dead) +
len(router_standby_agents_dead) == len(bindings)):
# Make router status as unknown because
# agent communication may be issue but router
# may still be active. We do not know the
# exact status of router.
state = constants.HA_ROUTER_STATE_UNKNOWN
else:
# Make router status as standby on all dead agents
# as some other agents are alive , router can become
# active on them after some time
state = constants.HA_ROUTER_STATE_STANDBY
for dead_agent in router_active_agents_dead:
self.update_routers_states(context, {router_id: state},
dead_agent.host)
if router_active_agents_dead:
return self.get_ha_router_port_bindings(context, [router_id])
return bindings
def get_l3_bindings_hosting_router_with_ha_states(
self, context, router_id):
"""Return a list of [(agent, ha_state), ...]."""
bindings = self._get_bindings_and_update_router_state_for_dead_agents(
context, router_id)
return [(binding.agent, binding.state) for binding in bindings
if binding.agent is not None]
def get_active_host_for_ha_router(self, context, router_id):
bindings = self.get_l3_bindings_hosting_router_with_ha_states(
context, router_id)
# TODO(amuller): In case we have two or more actives, this method
# needs to return the last agent to become active. This requires
# timestamps for state changes. Otherwise, if a host goes down
# and another takes over, we'll have two actives. In this case,
# if an interface is added to a router, its binding might be wrong
# and l2pop would not work correctly.
return next(
(agent.host for agent, state in bindings
if state == constants.HA_ROUTER_STATE_ACTIVE),
None)
@log_helpers.log_method_call
def _process_sync_ha_data(self, context, routers, host, is_any_dvr_agent):
routers_dict = dict((router['id'], router) for router in routers)
bindings = self.get_ha_router_port_bindings(context,
routers_dict.keys(),
host)
for binding in bindings:
port = binding.port
if not port:
# Filter the HA router has no ha port here
LOG.info("HA router %s is missing HA router port "
"bindings. Skipping it.",
binding.router_id)
routers_dict.pop(binding.router_id)
continue
port_dict = self._core_plugin._make_port_dict(port)
router = routers_dict.get(binding.router_id)
router[constants.HA_INTERFACE_KEY] = port_dict
router[constants.HA_ROUTER_STATE_KEY] = binding.state
interfaces = []
for router in routers_dict.values():
interface = router.get(constants.HA_INTERFACE_KEY)
if interface:
interfaces.append(interface)
self._populate_mtu_and_subnets_for_ports(context, interfaces)
# If this is a DVR+HA router, then we want to always return it even
# though it's missing the '_ha_interface' key. The agent will have
# to figure out what kind of router setup is needed.
return [r for r in list(routers_dict.values())
if (is_any_dvr_agent or
not r.get('ha') or r.get(constants.HA_INTERFACE_KEY))]
@log_helpers.log_method_call
def get_ha_sync_data_for_host(self, context, host, agent,
router_ids=None, active=None):
agent_mode = self._get_agent_mode(agent)
dvr_agent_mode = (
agent_mode in [constants.L3_AGENT_MODE_DVR_SNAT,
constants.L3_AGENT_MODE_DVR,
constants.L3_AGENT_MODE_DVR_NO_EXTERNAL])
if (dvr_agent_mode and extensions.is_extension_supported(
self, constants.L3_DISTRIBUTED_EXT_ALIAS)):
# DVR has to be handled differently
sync_data = self._get_dvr_sync_data(context, host, agent,
router_ids, active)
else:
sync_data = super(L3_HA_NAT_db_mixin, self).get_sync_data(
context, router_ids, active)
return self._process_sync_ha_data(
context, sync_data, host, dvr_agent_mode)
@classmethod
def _set_router_states(cls, context, bindings, states):
for binding in bindings:
try:
with db_api.CONTEXT_WRITER.using(context):
context.session.add(binding)
binding.state = states[binding.router_id]
except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError):
# Take concurrently deleted routers in to account
pass
@db_api.retry_if_session_inactive()
def update_routers_states(self, context, states, host):
"""Receive dict of router ID to state and update them all."""
with db_api.CONTEXT_READER.using(context):
bindings = self.get_ha_router_port_bindings(
context, router_ids=states.keys(), host=host)
self._set_router_states(context, bindings, states)
self._update_router_port_bindings(context, states, host)
def _update_router_port_bindings(self, context, states, host):
admin_ctx = n_utils.get_elevated_context(context)
device_filter = {'device_id': list(states.keys()),
'device_owner':
[constants.DEVICE_OWNER_HA_REPLICATED_INT,
constants.DEVICE_OWNER_ROUTER_SNAT,
constants.DEVICE_OWNER_ROUTER_GW]}
ports = self._core_plugin.get_ports(admin_ctx, filters=device_filter)
active_ports = (
port for port in ports
if states[port['device_id']] == constants.HA_ROUTER_STATE_ACTIVE)
for port in active_ports:
try:
self._core_plugin.update_port(
admin_ctx, port['id'],
{port_def.RESOURCE_NAME: {portbindings.HOST_ID: host}})
except (orm.exc.StaleDataError, orm.exc.ObjectDeletedError,
n_exc.PortNotFound):
# Take concurrently deleted interfaces in to account
pass
def _get_gateway_port_host(self, context, router, gw_ports):
if not router.get('ha'):
return super(L3_HA_NAT_db_mixin, self)._get_gateway_port_host(
context, router, gw_ports)
gw_port_id = router['gw_port_id']
gateway_port = gw_ports.get(gw_port_id)
if not gw_port_id or not gateway_port:
return
gateway_port_status = gateway_port['status']
gateway_port_binding_host = gateway_port[portbindings.HOST_ID]
admin_ctx = n_utils.get_elevated_context(context)
router_id = router['id']
ha_bindings = self.get_l3_bindings_hosting_router_with_ha_states(
admin_ctx, router_id)
LOG.debug("HA router %(router_id)s gateway port %(gw_port_id)s "
"binding host: %(host)s, status: %(status)s",
{"router_id": router_id,
"gw_port_id": gateway_port['id'],
"host": gateway_port_binding_host,
"status": gateway_port_status})
for ha_binding_agent, ha_binding_state in ha_bindings:
if ha_binding_state != constants.HA_ROUTER_STATE_ACTIVE:
continue
# For create router gateway, the gateway port may not be ACTIVE
# yet, so we return 'primary' host directly.
if gateway_port_status != constants.PORT_STATUS_ACTIVE:
return ha_binding_agent.host
# Do not let the original 'primary' (current is backup) host,
# override the gateway port binding host.
if (gateway_port_status == constants.PORT_STATUS_ACTIVE and
ha_binding_agent.host == gateway_port_binding_host):
return ha_binding_agent.host
LOG.debug("No gateway port host retrieved. HA router %(router_id)s "
"gateway port %(gw_port_id)s "
"binding host: %(host)s, status: %(status)s, "
"router HA bindings: %(ha_bindings)s",
{"router_id": router_id,
"gw_port_id": gateway_port['id'],
"host": gateway_port_binding_host,
"status": gateway_port_status,
"ha_bindings": ha_bindings})
def is_ha_router(router):
"""Return True if router to be handled is ha."""
try:
# See if router is a DB object first
requested_router_type = router.extra_attributes.ha
except AttributeError:
# if not, try to see if it is a request body
requested_router_type = router.get('ha')
if validators.is_attr_set(requested_router_type):
return requested_router_type
return cfg.CONF.l3_ha
def is_ha_router_port(context, device_owner, router_id):
if device_owner == constants.DEVICE_OWNER_HA_REPLICATED_INT:
return True
elif device_owner == constants.DEVICE_OWNER_ROUTER_SNAT:
return l3_obj.RouterExtraAttributes.objects_exist(
context, router_id=router_id, ha=True)
else:
return False