neutron/neutron/agent/l3/ha.py

251 lines
10 KiB
Python

# Copyright (c) 2014 OpenStack Foundation.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import os
import threading
import eventlet
from neutron_lib import constants
from oslo_log import log as logging
from oslo_utils import fileutils
from oslo_utils import netutils
import webob
from neutron.agent.linux import utils as agent_utils
from neutron.notifiers import batch_notifier
LOG = logging.getLogger(__name__)
KEEPALIVED_STATE_CHANGE_SERVER_BACKLOG = 4096
TRANSLATION_MAP = {'primary': constants.HA_ROUTER_STATE_ACTIVE,
'backup': constants.HA_ROUTER_STATE_STANDBY,
'fault': constants.HA_ROUTER_STATE_STANDBY,
'unknown': constants.HA_ROUTER_STATE_UNKNOWN}
class KeepalivedStateChangeHandler(object):
def __init__(self, agent):
self.agent = agent
@webob.dec.wsgify(RequestClass=webob.Request)
def __call__(self, req):
router_id = req.headers['X-Neutron-Router-Id']
state = req.headers['X-Neutron-State']
self.enqueue(router_id, state)
def enqueue(self, router_id, state):
LOG.debug('Handling notification for router '
'%(router_id)s, state %(state)s', {'router_id': router_id,
'state': state})
self.agent.enqueue_state_change(router_id, state)
class L3AgentKeepalivedStateChangeServer(object):
def __init__(self, agent, conf):
self.agent = agent
self.conf = conf
agent_utils.ensure_directory_exists_without_file(
self.get_keepalived_state_change_socket_path(self.conf))
@classmethod
def get_keepalived_state_change_socket_path(cls, conf):
return os.path.join(conf.state_path, 'keepalived-state-change')
def run(self):
server = agent_utils.UnixDomainWSGIServer(
'neutron-keepalived-state-change',
num_threads=self.conf.ha_keepalived_state_change_server_threads)
server.start(KeepalivedStateChangeHandler(self.agent),
self.get_keepalived_state_change_socket_path(self.conf),
workers=0,
backlog=KEEPALIVED_STATE_CHANGE_SERVER_BACKLOG)
server.wait()
class AgentMixin(object):
def __init__(self, host):
self._init_ha_conf_path()
super(AgentMixin, self).__init__(host)
# BatchNotifier queue is needed to ensure that the HA router
# state change sequence is under the proper order.
self.state_change_notifier = batch_notifier.BatchNotifier(
self._calculate_batch_duration(), self.notify_server)
eventlet.spawn(self._start_keepalived_notifications_server)
self._transition_states = {}
self._transition_state_mutex = threading.Lock()
def _get_router_info(self, router_id):
try:
return self.router_info[router_id]
except KeyError:
LOG.info('Router %s is not managed by this agent. It was '
'possibly deleted concurrently.', router_id)
def check_ha_state_for_router(self, router_id, current_state):
ri = self._get_router_info(router_id)
if not ri:
return
ha_state = ri.ha_state
if current_state != TRANSLATION_MAP[ha_state]:
LOG.debug("Updating server with state %(state)s for router "
"%(router_id)s", {'router_id': router_id,
'state': ha_state})
self.state_change_notifier.queue_event((router_id, ha_state))
def _start_keepalived_notifications_server(self):
state_change_server = (
L3AgentKeepalivedStateChangeServer(self, self.conf))
state_change_server.run()
def _calculate_batch_duration(self):
# Set the BatchNotifier interval to ha_vrrp_advert_int,
# default 2 seconds.
return self.conf.ha_vrrp_advert_int
def _update_transition_state(self, router_id, new_state=None):
with self._transition_state_mutex:
transition_state = self._transition_states.get(router_id)
if new_state:
self._transition_states[router_id] = new_state
else:
self._transition_states.pop(router_id, None)
return transition_state
def enqueue_state_change(self, router_id, state):
"""Inform the server about the new router state
This function will also update the metadata proxy, the radvd daemon,
process the prefix delegation and inform to the L3 extensions. If the
HA router changes to "primary", this transition will be delayed for at
least "ha_vrrp_advert_int" seconds. When the "primary" router
transitions to "backup", "keepalived" will set the rest of HA routers
to "primary" until it decides which one should be the only "primary".
The transition from "backup" to "primary" and then to "backup" again,
should not be registered in the Neutron server.
:param router_id: router ID
:param state: ['primary', 'backup']
"""
if not self._update_transition_state(router_id, state):
eventlet.spawn_n(self._enqueue_state_change, router_id, state)
eventlet.sleep(0)
def _enqueue_state_change(self, router_id, state):
# NOTE(ralonsoh): move 'primary' and 'backup' constants to n-lib
if state == 'primary':
eventlet.sleep(self.conf.ha_vrrp_advert_int)
if self._update_transition_state(router_id) != state:
# If the current "transition state" is not the initial "state" sent
# to update the router, that means the actual router state is the
# same as the "transition state" (e.g.: backup-->primary-->backup).
return
ri = self._get_router_info(router_id)
if ri is None:
return
state_change_data = {"router_id": router_id, "state": state,
"host": ri.agent.host}
LOG.info('Router %(router_id)s transitioned to %(state)s on '
'agent %(host)s',
state_change_data)
# Set external gateway port link up or down according to state
if state == 'primary':
ri.set_external_gw_port_link_status(link_up=True, set_gw=True)
elif state == 'backup':
ri.set_external_gw_port_link_status(link_up=False)
else:
LOG.warning('Router %s has status %s, '
'no action to router gateway device.',
router_id, state)
# TODO(dalvarez): Fix bug 1677279 by moving the IPv6 parameters
# configuration to keepalived-state-change in order to remove the
# dependency that currently exists on l3-agent running for the IPv6
# failover.
ri.ha_state = state
self._configure_ipv6_params(ri, state)
if self.conf.enable_metadata_proxy:
self._update_metadata_proxy(ri, router_id, state)
self._update_radvd_daemon(ri, state)
self.pd.process_ha_state(router_id, state == 'primary')
self.state_change_notifier.queue_event((router_id, state))
self.l3_ext_manager.ha_state_change(self.context, state_change_data)
def _configure_ipv6_params(self, ri, state):
if not self.use_ipv6:
return
ipv6_forwarding_enable = state == 'primary'
if ri.router.get('distributed', False):
namespace = ri.ha_namespace
else:
namespace = ri.ns_name
if ipv6_forwarding_enable:
ri.driver.configure_ipv6_forwarding(
namespace, 'all', ipv6_forwarding_enable)
# If ipv6 is enabled on the platform, ipv6_gateway config flag is
# not set and external_network associated to the router does not
# include any IPv6 subnet, enable the gateway interface to accept
# Router Advts from upstream router for default route on primary
# instances as well as ipv6 forwarding. Otherwise, disable them.
ex_gw_port_id = ri.ex_gw_port and ri.ex_gw_port['id']
if ex_gw_port_id:
interface_name = ri.get_external_device_name(ex_gw_port_id)
ri._configure_ipv6_params_on_gw(
ri.ex_gw_port, namespace, interface_name,
ipv6_forwarding_enable)
def _update_metadata_proxy(self, ri, router_id, state):
# NOTE(slaweq): Since the metadata proxy is spawned in the qrouter
# namespace and not in the snat namespace, even standby DVR-HA
# routers needs to serve metadata requests to local ports.
if state == 'primary' or ri.router.get('distributed', False):
LOG.debug('Spawning metadata proxy for router %s', router_id)
spawn_kwargs = {}
if netutils.is_ipv6_enabled():
spawn_kwargs['bind_address'] = '::'
self.metadata_driver.spawn_monitored_metadata_proxy(
self.process_monitor, ri.ns_name, self.conf.metadata_port,
self.conf, router_id=ri.router_id, **spawn_kwargs)
else:
LOG.debug('Closing metadata proxy for router %s', router_id)
self.metadata_driver.destroy_monitored_metadata_proxy(
self.process_monitor, ri.router_id, self.conf, ri.ns_name)
def _update_radvd_daemon(self, ri, state):
# Radvd has to be spawned only on the primary HA Router. If there are
# any state transitions, we enable/disable radvd accordingly.
if state == 'primary':
ri.enable_radvd()
else:
ri.disable_radvd()
def notify_server(self, batched_events):
translated_states = dict((router_id, TRANSLATION_MAP[state]) for
router_id, state in batched_events)
LOG.debug('Updating server with HA routers states %s',
translated_states)
self.plugin_rpc.update_ha_routers_states(
self.context, translated_states)
def _init_ha_conf_path(self):
ha_full_path = os.path.dirname("/%s/" % self.conf.ha_confs_path)
fileutils.ensure_tree(ha_full_path, mode=0o755)