# Copyright (C) 2014 eNovance SAS # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. import errno import itertools import os import signal import netaddr from neutron_lib import constants from neutron_lib import exceptions from neutron_lib.utils import file as file_utils from oslo_config import cfg from oslo_log import log as logging from oslo_utils import fileutils from neutron._i18n import _ from neutron.agent.linux import external_process from neutron.common import utils VALID_STATES = ['MASTER', 'BACKUP'] VALID_AUTH_TYPES = ['AH', 'PASS'] HA_DEFAULT_PRIORITY = 50 PRIMARY_VIP_RANGE_SIZE = 24 KEEPALIVED_SERVICE_NAME = 'keepalived' KEEPALIVED_EMAIL_FROM = 'neutron@openstack.local' KEEPALIVED_ROUTER_ID = 'neutron' GARP_MASTER_DELAY = 60 HEALTH_CHECK_NAME = 'ha_health_check' LOG = logging.getLogger(__name__) SIGTERM_TIMEOUT = 5 def get_free_range(parent_range, excluded_ranges, size=PRIMARY_VIP_RANGE_SIZE): """Get a free IP range, from parent_range, of the specified size. :param parent_range: String representing an IP range. E.g: '169.254.0.0/16' :param excluded_ranges: A list of strings to be excluded from parent_range :param size: What should be the size of the range returned? :return: A string representing an IP range """ free_cidrs = netaddr.IPSet([parent_range]) - netaddr.IPSet(excluded_ranges) for cidr in free_cidrs.iter_cidrs(): if cidr.prefixlen <= size: return '%s/%s' % (cidr.network, size) raise ValueError(_('Network of size %(size)s, from IP range ' '%(parent_range)s excluding IP ranges ' '%(excluded_ranges)s was not found.') % {'size': size, 'parent_range': parent_range, 'excluded_ranges': excluded_ranges}) class InvalidInstanceStateException(exceptions.NeutronException): message = _('Invalid instance state: %(state)s, valid states are: ' '%(valid_states)s') def __init__(self, **kwargs): if 'valid_states' not in kwargs: kwargs['valid_states'] = ', '.join(VALID_STATES) super(InvalidInstanceStateException, self).__init__(**kwargs) class InvalidAuthenticationTypeException(exceptions.NeutronException): message = _('Invalid authentication type: %(auth_type)s, ' 'valid types are: %(valid_auth_types)s') def __init__(self, **kwargs): if 'valid_auth_types' not in kwargs: kwargs['valid_auth_types'] = ', '.join(VALID_AUTH_TYPES) super(InvalidAuthenticationTypeException, self).__init__(**kwargs) class KeepalivedVipAddress(object): """A virtual address entry of a keepalived configuration.""" def __init__(self, ip_address, interface_name, scope=None, track=True): self.ip_address = ip_address self.interface_name = interface_name self.scope = scope self.track = track def __eq__(self, other): return (isinstance(other, KeepalivedVipAddress) and self.ip_address == other.ip_address) def __str__(self): return '[%s, %s, %s, %s]' % (self.ip_address, self.interface_name, self.scope, self.track) def build_config(self): result = '%s dev %s' % (self.ip_address, self.interface_name) if self.scope: result += ' scope %s' % self.scope if cfg.CONF.keepalived_use_no_track and not self.track: result += ' no_track' return result class KeepalivedVirtualRoute(object): """A virtual route entry of a keepalived configuration.""" def __init__(self, destination, nexthop, interface_name=None, scope=None): self.destination = destination self.nexthop = nexthop self.interface_name = interface_name self.scope = scope def build_config(self): output = self.destination if self.nexthop: output += ' via %s' % self.nexthop if self.interface_name: output += ' dev %s' % self.interface_name if self.scope: output += ' scope %s' % self.scope if cfg.CONF.keepalived_use_no_track: output += ' no_track' return output class KeepalivedInstanceRoutes(object): def __init__(self): self.gateway_routes = [] self.extra_routes = [] self.extra_subnets = [] def remove_routes_on_interface(self, interface_name): self.gateway_routes = [gw_rt for gw_rt in self.gateway_routes if gw_rt.interface_name != interface_name] # NOTE(amuller): extra_routes are initialized from the router's # 'routes' attribute. These routes do not have an interface # parameter and so cannot be removed via an interface_name lookup. self.extra_subnets = [route for route in self.extra_subnets if route.interface_name != interface_name] @property def routes(self): return self.gateway_routes + self.extra_routes + self.extra_subnets def __len__(self): return len(self.routes) def build_config(self): return itertools.chain([' virtual_routes {'], (' %s' % route.build_config() for route in self.routes), [' }']) class KeepalivedInstance(object): """Instance section of a keepalived configuration.""" def __init__(self, state, interface, vrouter_id, ha_cidrs, priority=HA_DEFAULT_PRIORITY, advert_int=None, mcast_src_ip=None, nopreempt=False, garp_master_delay=GARP_MASTER_DELAY, vrrp_health_check_interval=0, ha_conf_dir=None): self.name = 'VR_%s' % vrouter_id if state not in VALID_STATES: raise InvalidInstanceStateException(state=state) self.state = state self.interface = interface self.vrouter_id = vrouter_id self.priority = priority self.nopreempt = nopreempt self.advert_int = advert_int self.mcast_src_ip = mcast_src_ip self.garp_master_delay = garp_master_delay self.track_interfaces = [] self.vips = [] self.virtual_routes = KeepalivedInstanceRoutes() self.authentication = None self.track_script = None self.primary_vip_range = get_free_range( parent_range=constants.PRIVATE_CIDR_RANGE, excluded_ranges=[constants.METADATA_CIDR, constants.DVR_FIP_LL_CIDR] + ha_cidrs, size=PRIMARY_VIP_RANGE_SIZE) if vrrp_health_check_interval > 0: self.track_script = KeepalivedTrackScript( vrrp_health_check_interval, ha_conf_dir, self.vrouter_id) def set_authentication(self, auth_type, password): if auth_type not in VALID_AUTH_TYPES: raise InvalidAuthenticationTypeException(auth_type=auth_type) self.authentication = (auth_type, password) def add_vip(self, ip_cidr, interface_name, scope): track = interface_name in self.track_interfaces vip = KeepalivedVipAddress(ip_cidr, interface_name, scope, track=track) if vip not in self.vips: self.vips.append(vip) else: LOG.debug('VIP %s already present in %s', vip, self.vips) def remove_vips_vroutes_by_interface(self, interface_name): self.vips = [vip for vip in self.vips if vip.interface_name != interface_name] self.virtual_routes.remove_routes_on_interface(interface_name) def remove_vip_by_ip_address(self, ip_address): self.vips = [vip for vip in self.vips if vip.ip_address != ip_address] def get_existing_vip_ip_addresses(self, interface_name): return [vip.ip_address for vip in self.vips if vip.interface_name == interface_name] def _build_track_interface_config(self): return itertools.chain( [' track_interface {'], (' %s' % i for i in self.track_interfaces), [' }']) def get_primary_vip(self): """Return an address in the primary_vip_range CIDR, with the router's VRID in the host section. For example, if primary_vip_range is 169.254.0.0/24, and this router's VRID is 5, the result is 169.254.0.5. Using the VRID assures that the primary VIP is consistent amongst HA router instances on different nodes. """ ip = (netaddr.IPNetwork(self.primary_vip_range).network + self.vrouter_id) return str(netaddr.IPNetwork('%s/%s' % (ip, PRIMARY_VIP_RANGE_SIZE))) def _build_vips_config(self): # NOTE(amuller): The primary VIP must be consistent in order to avoid # keepalived bugs. Changing the VIP in the 'virtual_ipaddress' and # SIGHUP'ing keepalived can remove virtual routers, including the # router's default gateway. # We solve this by never changing the VIP in the virtual_ipaddress # section, herein known as the primary VIP. # The only interface known to exist for HA routers is the HA interface # (self.interface). We generate an IP on that device and use it as the # primary VIP. The other VIPs (Internal interfaces IPs, the external # interface IP and floating IPs) are placed in the # virtual_ipaddress_excluded section. primary = KeepalivedVipAddress(self.get_primary_vip(), self.interface) vips_result = [' virtual_ipaddress {', ' %s' % primary.build_config(), ' }'] if self.vips: vips_result.extend( itertools.chain([' virtual_ipaddress_excluded {'], (' %s' % vip.build_config() for vip in sorted(self.vips, key=lambda vip: vip.ip_address)), [' }'])) return vips_result def _build_virtual_routes_config(self): return itertools.chain([' virtual_routes {'], (' %s' % route.build_config() for route in self.virtual_routes), [' }']) def build_config(self): if self.track_script: config = self.track_script.build_config_preamble() self.track_script.routes = self.virtual_routes.gateway_routes self.track_script.vips = self.vips else: config = [] config.extend(['vrrp_instance %s {' % self.name, ' state %s' % self.state, ' interface %s' % self.interface, ' virtual_router_id %s' % self.vrouter_id, ' priority %s' % self.priority, ' garp_master_delay %s' % self.garp_master_delay]) if self.nopreempt: config.append(' nopreempt') if self.advert_int: config.append(' advert_int %s' % self.advert_int) if self.authentication: auth_type, password = self.authentication authentication = [' authentication {', ' auth_type %s' % auth_type, ' auth_pass %s' % password, ' }'] config.extend(authentication) if self.mcast_src_ip: config.append(' mcast_src_ip %s' % self.mcast_src_ip) if self.track_interfaces: config.extend(self._build_track_interface_config()) config.extend(self._build_vips_config()) if len(self.virtual_routes): config.extend(self.virtual_routes.build_config()) if self.track_script: config.extend(self.track_script.build_config()) config.append('}') return config class KeepalivedConf(object): """A keepalived configuration.""" def __init__(self): self.reset() def reset(self): self.instances = {} def add_instance(self, instance): self.instances[instance.vrouter_id] = instance def get_instance(self, vrouter_id): return self.instances.get(vrouter_id) def build_config(self): config = ['global_defs {', ' notification_email_from %s' % KEEPALIVED_EMAIL_FROM, ' router_id %s' % KEEPALIVED_ROUTER_ID, '}' ] for instance in self.instances.values(): config.extend(instance.build_config()) return config def get_config_str(self): """Generates and returns the keepalived configuration. :return: Keepalived configuration string. """ return '\n'.join(self.build_config()) class KeepalivedManager(object): """Wrapper for keepalived. This wrapper permits to write keepalived config files, to start/restart keepalived process. """ def __init__(self, resource_id, config, process_monitor, conf_path, namespace=None, throttle_restart_value=None): self.resource_id = resource_id self.config = config self.namespace = namespace self.process_monitor = process_monitor self.conf_path = conf_path # configure throttler for spawn to introduce delay between SIGHUPs, # otherwise keepalived master may unnecessarily flip to slave if throttle_restart_value is not None: self._throttle_spawn(throttle_restart_value) # pylint: disable=method-hidden def _throttle_spawn(self, threshold): self.spawn = utils.throttler(threshold)(self.spawn) def get_conf_dir(self): confs_dir = os.path.abspath(os.path.normpath(self.conf_path)) conf_dir = os.path.join(confs_dir, self.resource_id) return conf_dir def get_full_config_file_path(self, filename, ensure_conf_dir=True): conf_dir = self.get_conf_dir() if ensure_conf_dir: fileutils.ensure_tree(conf_dir, mode=0o755) return os.path.join(conf_dir, filename) def _output_config_file(self): config_str = self.config.get_config_str() LOG.debug("Router %s keepalived config: %s", self.resource_id, config_str) config_path = self.get_full_config_file_path('keepalived.conf') file_utils.replace_file(config_path, config_str) return config_path @staticmethod def _safe_remove_pid_file(pid_file): try: os.remove(pid_file) except OSError as e: if e.errno != errno.ENOENT: LOG.error("Could not delete file %s, keepalived can " "refuse to start.", pid_file) def get_vrrp_pid_file_name(self, base_pid_file): return '%s-vrrp' % base_pid_file def get_conf_on_disk(self): config_path = self.get_full_config_file_path('keepalived.conf') try: with open(config_path) as conf: return conf.read() except (OSError, IOError) as e: if e.errno != errno.ENOENT: raise def spawn(self): config_path = self._output_config_file() for key, instance in self.config.instances.items(): if instance.track_script: instance.track_script.write_check_script() keepalived_pm = self.get_process() vrrp_pm = self._get_vrrp_process( self.get_vrrp_pid_file_name(keepalived_pm.get_pid_file_name())) keepalived_pm.default_cmd_callback = ( self._get_keepalived_process_callback(vrrp_pm, config_path)) keepalived_pm.enable(reload_cfg=True) self.process_monitor.register(uuid=self.resource_id, service_name=KEEPALIVED_SERVICE_NAME, monitored_process=keepalived_pm) LOG.debug('Keepalived spawned with config %s', config_path) def disable(self): self.process_monitor.unregister(uuid=self.resource_id, service_name=KEEPALIVED_SERVICE_NAME) pm = self.get_process() pm.disable(sig=str(int(signal.SIGTERM))) try: utils.wait_until_true(lambda: not pm.active, timeout=SIGTERM_TIMEOUT) except utils.WaitTimeout: LOG.warning('Keepalived process %s did not finish after SIGTERM ' 'signal in %s seconds, sending SIGKILL signal', pm.pid, SIGTERM_TIMEOUT) pm.disable(sig=str(int(signal.SIGKILL))) def check_processes(self): keepalived_pm = self.get_process() vrrp_pm = self._get_vrrp_process( self.get_vrrp_pid_file_name(keepalived_pm.get_pid_file_name())) return keepalived_pm.active and vrrp_pm.active def get_process(self): return external_process.ProcessManager( cfg.CONF, self.resource_id, self.namespace, service=KEEPALIVED_SERVICE_NAME, pids_path=self.conf_path) def _get_vrrp_process(self, pid_file): return external_process.ProcessManager( cfg.CONF, self.resource_id, self.namespace, pid_file=pid_file) def _get_keepalived_process_callback(self, vrrp_pm, config_path): def callback(pid_file): # If keepalived process crashed unexpectedly, the vrrp process # will be orphan and prevent keepalived process to be spawned. # A check here will let the l3-agent to kill the orphan process # and spawn keepalived successfully. if vrrp_pm.active: vrrp_pm.disable() self._safe_remove_pid_file(pid_file) self._safe_remove_pid_file(self.get_vrrp_pid_file_name(pid_file)) cmd = ['keepalived', '-P', '-f', config_path, '-p', pid_file, '-r', self.get_vrrp_pid_file_name(pid_file)] if logging.is_debug_enabled(cfg.CONF): cmd.append('-D') return cmd return callback class KeepalivedTrackScript(KeepalivedConf): """Track script generator for Keepalived""" def __init__(self, interval, conf_dir, vr_id): self.interval = interval self.conf_dir = conf_dir self.vr_id = vr_id self.routes = [] self.vips = [] def build_config_preamble(self): config = ['', 'vrrp_script %s_%s {' % (HEALTH_CHECK_NAME, self.vr_id), ' script "%s"' % self._get_script_location(), ' interval %s' % self.interval, ' fall 2', ' rise 2', '}', ''] return config def _is_needed(self): """Check if track script is needed by checking amount of routes. :return: True/False """ return len(self.routes) > 0 def build_config(self): if not self._is_needed(): return '' config = [' track_script {', ' %s_%s' % (HEALTH_CHECK_NAME, self.vr_id), ' }'] return config def build_script(self): return itertools.chain(['#!/bin/bash -eu'], ['%s' % self._check_ip_assigned()], ('%s' % self._add_ip_addr(route.nexthop) for route in self.routes if route.nexthop), ) def _add_ip_addr(self, ip_addr): cmd = { 4: 'ping', 6: 'ping6', }.get(netaddr.IPAddress(ip_addr).version) return '%s -c 1 -w 1 %s 1>/dev/null || exit 1' % (cmd, ip_addr) def _check_ip_assigned(self): cmd = 'ip a | grep %s || exit 0' return cmd % netaddr.IPNetwork(self.vips[0].ip_address).ip if len( self.vips) else '' def _get_script_str(self): """Generates and returns bash script to verify connectivity. :return: Bash script code """ return '\n'.join(self.build_script()) def _get_script_location(self): return os.path.join(self.conf_dir, 'ha_check_script_%s.sh' % self.vr_id) def write_check_script(self): if not self._is_needed(): return file_utils.replace_file( self._get_script_location(), self._get_script_str(), 0o520)