Previously L3 HA generated a bash script and copied it to a per-router configuration directory that was visible to that router's keepalived instance. This patch changes the in-line generated Bash script to a Python script that can be maintained in the repository. The bash script was used as a keepalived notifier script, that was invoked by keepalived whenever a state transition occured. These notifier scripts may be invoked by keepalived out of order in case it transitions quickly twice. For example, if the master failed and two slaves fight for the new master role. One will transition to master, and the other will often transition to master and then immidiately back to standby. In this case, the transition scripts were often fired out of order, resulting in the wrong state being reported. The proposed approach is to get rid of the keepalived notifier scripts entirely. Instead, monitor IP changes on the HA device. If the omnipresent IP address was configured on the HA device, it means that we're looking at a master instance. If it was deleted, the router transition to standby or fault. In order to keep the L3 agent CPU usage down, it will spawn a process per HA router. That process will start the ip address monitor. Whenever it gets an IP address change event, it will notify the L3 agent via a unix domain socket. Partially-Implements: blueprint report-ha-router-master Change-Id: I2022bced330d5f108fbedd40548a901225d7ea1c Closes-Bug: #1402010 Closes-Bug: #1367705changes/84/125384/46
parent
89eef89047
commit
9bae3b1832
@ -0,0 +1,144 @@
|
||||
# Copyright (c) 2015 Red Hat Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import httplib2
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
import requests
|
||||
|
||||
from neutron.agent.l3 import ha
|
||||
from neutron.agent.linux import daemon
|
||||
from neutron.agent.linux import ip_monitor
|
||||
from neutron.agent.linux import utils as agent_utils
|
||||
from neutron.common import config
|
||||
from neutron.i18n import _LE
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KeepalivedUnixDomainConnection(agent_utils.UnixDomainHTTPConnection):
|
||||
def __init__(self, *args, **kwargs):
|
||||
# Old style super initialization is required!
|
||||
agent_utils.UnixDomainHTTPConnection.__init__(
|
||||
self, *args, **kwargs)
|
||||
self.socket_path = (
|
||||
ha.L3AgentKeepalivedStateChangeServer.
|
||||
get_keepalived_state_change_socket_path(cfg.CONF))
|
||||
|
||||
|
||||
class MonitorDaemon(daemon.Daemon):
|
||||
def __init__(self, pidfile, router_id, user, group, namespace, conf_dir,
|
||||
interface, cidr):
|
||||
self.router_id = router_id
|
||||
self.namespace = namespace
|
||||
self.conf_dir = conf_dir
|
||||
self.interface = interface
|
||||
self.cidr = cidr
|
||||
super(MonitorDaemon, self).__init__(pidfile, uuid=router_id,
|
||||
user=user, group=group)
|
||||
|
||||
def run(self, run_as_root=False):
|
||||
monitor = ip_monitor.IPMonitor(namespace=self.namespace,
|
||||
run_as_root=run_as_root)
|
||||
monitor.start()
|
||||
# Only drop privileges if the process is currently running as root
|
||||
# (The run_as_root variable name here is unfortunate - It means to
|
||||
# use a root helper when the running process is NOT already running
|
||||
# as root
|
||||
if not run_as_root:
|
||||
super(MonitorDaemon, self).run()
|
||||
for iterable in monitor:
|
||||
self.parse_and_handle_event(iterable)
|
||||
|
||||
def parse_and_handle_event(self, iterable):
|
||||
try:
|
||||
event = ip_monitor.IPMonitorEvent.from_text(iterable)
|
||||
if event.interface == self.interface and event.cidr == self.cidr:
|
||||
new_state = 'master' if event.added else 'backup'
|
||||
self.write_state_change(new_state)
|
||||
self.notify_agent(new_state)
|
||||
except Exception:
|
||||
LOG.exception(_LE(
|
||||
'Failed to process or handle event for line %s'), iterable)
|
||||
|
||||
def write_state_change(self, state):
|
||||
with open(os.path.join(
|
||||
self.conf_dir, 'state'), 'w') as state_file:
|
||||
state_file.write(state)
|
||||
LOG.debug('Wrote router %s state %s', self.router_id, state)
|
||||
|
||||
def notify_agent(self, state):
|
||||
resp, content = httplib2.Http().request(
|
||||
# Note that the message is sent via a Unix domain socket so that
|
||||
# the URL doesn't matter.
|
||||
'http://127.0.0.1/',
|
||||
headers={'X-Neutron-Router-Id': self.router_id,
|
||||
'X-Neutron-State': state},
|
||||
connection_type=KeepalivedUnixDomainConnection)
|
||||
|
||||
if resp.status != requests.codes.ok:
|
||||
raise Exception(_('Unexpected response: %s') % resp)
|
||||
|
||||
LOG.debug('Notified agent router %s, state %s', self.router_id, state)
|
||||
|
||||
|
||||
def register_opts(conf):
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('router_id', help=_('ID of the router')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('namespace', help=_('Namespace of the router')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('conf_dir', help=_('Path to the router directory')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('monitor_interface', help=_('Interface to monitor')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('monitor_cidr', help=_('CIDR to monitor')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('pid_file', help=_('Path to PID file for this process')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('user', help=_('User (uid or name) running this process '
|
||||
'after its initialization')))
|
||||
conf.register_cli_opt(
|
||||
cfg.StrOpt('group', help=_('Group (gid or name) running this process '
|
||||
'after its initialization')))
|
||||
conf.register_opt(
|
||||
cfg.StrOpt('metadata_proxy_socket',
|
||||
default='$state_path/metadata_proxy',
|
||||
help=_('Location of Metadata Proxy UNIX domain '
|
||||
'socket')))
|
||||
|
||||
|
||||
def configure(conf):
|
||||
config.init(sys.argv[1:])
|
||||
conf.set_override('log_dir', cfg.CONF.conf_dir)
|
||||
conf.set_override('debug', True)
|
||||
conf.set_override('verbose', True)
|
||||
config.setup_logging()
|
||||
|
||||
|
||||
def main():
|
||||
register_opts(cfg.CONF)
|
||||
configure(cfg.CONF)
|
||||
MonitorDaemon(cfg.CONF.pid_file,
|
||||
cfg.CONF.router_id,
|
||||
cfg.CONF.user,
|
||||
cfg.CONF.group,
|
||||
cfg.CONF.namespace,
|
||||
cfg.CONF.conf_dir,
|
||||
cfg.CONF.monitor_interface,
|
||||
cfg.CONF.monitor_cidr).start()
|
@ -0,0 +1,19 @@
|
||||
# Copyright (c) 2015 Red Hat Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from neutron.agent.l3 import keepalived_state_change
|
||||
|
||||
|
||||
def main():
|
||||
keepalived_state_change.main()
|
@ -0,0 +1,73 @@
|
||||
# Copyright (c) 2015 Red Hat Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
||||
# not use this file except in compliance with the License. You may obtain
|
||||
# a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import os
|
||||
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
|
||||
from neutron.agent.l3 import keepalived_state_change
|
||||
from neutron.openstack.common import uuidutils
|
||||
from neutron.tests.functional import base
|
||||
|
||||
|
||||
class TestKeepalivedStateChange(base.BaseSudoTestCase):
|
||||
def setUp(self):
|
||||
super(TestKeepalivedStateChange, self).setUp()
|
||||
cfg.CONF.register_opt(
|
||||
cfg.StrOpt('metadata_proxy_socket',
|
||||
default='$state_path/metadata_proxy',
|
||||
help=_('Location of Metadata Proxy UNIX domain '
|
||||
'socket')))
|
||||
|
||||
self.router_id = uuidutils.generate_uuid()
|
||||
self.conf_dir = self.get_default_temp_dir().path
|
||||
self.cidr = '169.254.128.1/24'
|
||||
self.interface_name = 'interface'
|
||||
self.monitor = keepalived_state_change.MonitorDaemon(
|
||||
self.get_temp_file_path('monitor.pid'),
|
||||
self.router_id,
|
||||
1,
|
||||
2,
|
||||
'namespace',
|
||||
self.conf_dir,
|
||||
self.interface_name,
|
||||
self.cidr)
|
||||
mock.patch.object(self.monitor, 'notify_agent').start()
|
||||
self.line = '1: %s inet %s' % (self.interface_name, self.cidr)
|
||||
|
||||
def test_parse_and_handle_event_wrong_device_completes_without_error(self):
|
||||
self.monitor.parse_and_handle_event(
|
||||
'1: wrong_device inet wrong_cidr')
|
||||
|
||||
def _get_state(self):
|
||||
with open(os.path.join(self.monitor.conf_dir, 'state')) as state_file:
|
||||
return state_file.read()
|
||||
|
||||
def test_parse_and_handle_event_writes_to_file(self):
|
||||
self.monitor.parse_and_handle_event('Deleted %s' % self.line)
|
||||
self.assertEqual('backup', self._get_state())
|
||||
|
||||
self.monitor.parse_and_handle_event(self.line)
|
||||
self.assertEqual('master', self._get_state())
|
||||
|
||||
def test_parse_and_handle_event_fails_writing_state(self):
|
||||
with mock.patch.object(
|
||||
self.monitor, 'write_state_change', side_effect=OSError):
|
||||
self.monitor.parse_and_handle_event(self.line)
|
||||
|
||||
def test_parse_and_handle_event_fails_notifying_agent(self):
|
||||
with mock.patch.object(
|
||||
self.monitor, 'notify_agent', side_effect=Exception):
|
||||
self.monitor.parse_and_handle_event(self.line)
|
Loading…
Reference in new issue