Add L3 HA fullstack failover tests
* Generate unique internal and external router device names using the agent's hostname. This is to allow multiple HA router replicas to co-exist on the same machine, otherwise they'd all use the same device names and OVS would freak out. * Add host.disconnect method that disconnects the host from the central internal and external bridges, simulating pulling the cable from the host's NIC. * Add host.kill and host.shutdown methods to forcefully kill and gracefully shutdown a host simulating a host failure and host shutdown. This also includes code to cleanup left-over router namespaces. * Added a L3 HA failover test for graceful failover, host failure and network disconnect. * Improve systemd service restart handling in Process fixture to use `systemctl restart` and to not block the rootwrap daemon. Co-Authored-By: Assaf Muller <amuller@redhat.com> Change-Id: Iaaa1c2cab0341a929e368392aa7dc47c9b2399c2 Original-Change-Id: I250fa41d89dfc4f9f3ba4c03a027b52b2e8c4b4b
This commit is contained in:
@@ -275,6 +275,8 @@ def create_patch_ports(source, destination):
|
||||
source.add_patch_port(source_name, destination_name)
|
||||
destination.add_patch_port(destination_name, source_name)
|
||||
|
||||
return source_name, destination_name
|
||||
|
||||
|
||||
def create_vlan_interface(
|
||||
namespace, port_name, mac_address, ip_address, vlan_tag):
|
||||
@@ -406,7 +408,7 @@ class Pinger(object):
|
||||
"""
|
||||
|
||||
stats_pattern = re.compile(
|
||||
r'^(?P<trans>\d+) packets transmitted,.*(?P<recv>\d+) received.*$')
|
||||
r'^(?P<trans>\d+) packets transmitted, +(?P<recv>\d+) received.*$')
|
||||
unreachable_pattern = re.compile(
|
||||
r'.* Destination .* Unreachable')
|
||||
TIMEOUT = 15
|
||||
@@ -430,7 +432,9 @@ class Pinger(object):
|
||||
"Ping command hasn't ended after %d seconds." % self.TIMEOUT))
|
||||
|
||||
def _parse_stats(self):
|
||||
output = ''
|
||||
for line in self.proc.stdout:
|
||||
output += line
|
||||
if (not self.destination_unreachable and
|
||||
self.unreachable_pattern.match(line)):
|
||||
self.destination_unreachable = True
|
||||
@@ -441,7 +445,9 @@ class Pinger(object):
|
||||
self.received = int(result.group('recv'))
|
||||
break
|
||||
else:
|
||||
LOG.error(f"Didn't find ping statistics:\n{output}")
|
||||
raise RuntimeError("Didn't find ping statistics.")
|
||||
LOG.debug(f"ping command output:\n{output}")
|
||||
|
||||
def start(self):
|
||||
if self.proc and self.proc.is_running:
|
||||
|
||||
@@ -120,7 +120,7 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
|
||||
common_utils.wait_until_true(_agent_down)
|
||||
|
||||
def _assert_ping_during_agents_restart(
|
||||
self, agents, src_namespace, ips, restart_timeout=10,
|
||||
self, agents, src_namespace, ips, restart_timeout=30,
|
||||
ping_timeout=1, count=10):
|
||||
with net_helpers.async_ping(
|
||||
src_namespace, ips, timeout=ping_timeout,
|
||||
@@ -167,11 +167,12 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
|
||||
available_ips = itertools.islice(valid_ips, initial, initial + num)
|
||||
return [str(available_ip) for available_ip in available_ips]
|
||||
|
||||
def _create_external_vm(self, network, subnet):
|
||||
def _create_external_vm(self, network, subnet, ip=None):
|
||||
ip = ip or subnet['gateway_ip']
|
||||
vm = self.useFixture(
|
||||
machine_fixtures.FakeMachine(
|
||||
self.environment.central_bridge,
|
||||
common_utils.ip_to_cidr(subnet['gateway_ip'], 24)))
|
||||
common_utils.ip_to_cidr(ip, 24)))
|
||||
# NOTE(slaweq): as ext_net is 'vlan' network type external_vm needs to
|
||||
# send packets with proper vlan also
|
||||
vm.bridge.set_db_attribute(
|
||||
|
||||
@@ -444,3 +444,10 @@ class ClientFixture(fixtures.Fixture):
|
||||
|
||||
def update_quota(self, project_id, tracked_resource, quota):
|
||||
self._update_resource('quota', project_id, {tracked_resource: quota})
|
||||
|
||||
def add_gateway_router(self, router_id, network_id):
|
||||
self.client.add_gateway_router(
|
||||
router_id,
|
||||
{'network_id': network_id})
|
||||
self.addCleanup(
|
||||
self.client.remove_gateway_router, router_id)
|
||||
|
||||
@@ -12,10 +12,13 @@
|
||||
# License for the specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import signal
|
||||
|
||||
import fixtures
|
||||
from neutron_lib import constants
|
||||
from neutronclient.common import exceptions as nc_exc
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.common import utils as common_utils
|
||||
@@ -28,6 +31,8 @@ from neutron.tests.common import net_helpers
|
||||
from neutron.tests.fullstack.resources import config
|
||||
from neutron.tests.fullstack.resources import process
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnvironmentDescription(object):
|
||||
"""A set of characteristics of an environment setup.
|
||||
@@ -110,9 +115,8 @@ class Host(fixtures.Fixture):
|
||||
IP address on the appropriate physical NIC. The Host class does the same
|
||||
with the connect_* methods.
|
||||
|
||||
TODO(amuller): Add start/stop/restart methods that will start/stop/restart
|
||||
all of the agents on this host. Add a kill method that stops all agents
|
||||
and disconnects the host from other hosts.
|
||||
TODO(amuller): Add restart method that will restart all of the agents on
|
||||
this host.
|
||||
"""
|
||||
|
||||
def __init__(self, env_desc, host_desc, test_name,
|
||||
@@ -263,12 +267,14 @@ class Host(fixtures.Fixture):
|
||||
|
||||
veth_1.link.set_up()
|
||||
veth_2.link.set_up()
|
||||
self.tunnel_device = veth_1
|
||||
|
||||
def connect_to_central_network_via_vlans(self, host_data_bridge):
|
||||
# If using VLANs as a segmentation device, it's needed to connect
|
||||
# a provider bridge to a centralized, shared bridge.
|
||||
net_helpers.create_patch_ports(
|
||||
source, destination = net_helpers.create_patch_ports(
|
||||
self.central_bridge, host_data_bridge)
|
||||
self.internal_port = destination
|
||||
|
||||
def allocate_local_ip(self):
|
||||
if not self.env_desc.network_range:
|
||||
@@ -296,6 +302,32 @@ class Host(fixtures.Fixture):
|
||||
self.network_bridges[network_id] = bridge
|
||||
return bridge
|
||||
|
||||
def disconnect(self):
|
||||
if self.env_desc.tunneling_enabled:
|
||||
self.tunnel_device.addr.flush(4)
|
||||
else:
|
||||
self.br_phys.delete_port(self.internal_port)
|
||||
LOG.info(f'Host {self.hostname} disconnected.')
|
||||
|
||||
def kill(self, parent=None):
|
||||
# First kill all the agent to prevent a graceful shutdown
|
||||
for agent_name, agent in self.agents.items():
|
||||
agent.stop(kill_signal=signal.SIGKILL)
|
||||
LOG.info(f'Agents on host {self.hostname} killed.')
|
||||
|
||||
self.shutdown(parent)
|
||||
|
||||
def shutdown(self, parent=None):
|
||||
self.cleanUp()
|
||||
|
||||
# Remove cleanup function from parent because it can't be called twice
|
||||
if parent:
|
||||
parent._cleanups._cleanups.remove(
|
||||
(self.cleanUp, (), {})
|
||||
)
|
||||
|
||||
LOG.info(f'Host {self.hostname} shut down.')
|
||||
|
||||
@property
|
||||
def hostname(self):
|
||||
return self.neutron_config.config.DEFAULT.host
|
||||
@@ -385,6 +417,9 @@ class Environment(fixtures.Fixture):
|
||||
except nc_exc.NeutronClientException:
|
||||
return False
|
||||
|
||||
def get_host_by_name(self, hostname):
|
||||
return next(host for host in self.hosts if host.hostname == hostname)
|
||||
|
||||
def _create_host(self, host_desc):
|
||||
temp_dir = self.useFixture(fixtures.TempDir()).path
|
||||
neutron_config = config.NeutronConfigFixture(
|
||||
|
||||
@@ -81,7 +81,10 @@ class ProcessFixture(fixtures.Fixture):
|
||||
systemd_run = [
|
||||
'systemd-run',
|
||||
'--service-type', 'exec',
|
||||
'--property', 'TimeoutStopSec=30s',
|
||||
# Timeout and KILL processes 5s before the timeout the restart
|
||||
# tests use.
|
||||
'--property', 'TimeoutStopSec=25s',
|
||||
'--property', 'KillMode=mixed',
|
||||
'--unit', self.unit_name,
|
||||
'--setenv', f'PATH={os.environ["PATH"]}',
|
||||
'--same-dir',
|
||||
@@ -103,6 +106,7 @@ class ProcessFixture(fixtures.Fixture):
|
||||
# run unprivileged if run_as_root is False.
|
||||
run_as_root=True,
|
||||
)
|
||||
common_utils.wait_until_true(self.service_is_active)
|
||||
LOG.debug("Process started: %s", self.process_name)
|
||||
|
||||
def stop(self, kill_signal=None):
|
||||
@@ -120,16 +124,26 @@ class ProcessFixture(fixtures.Fixture):
|
||||
msg = (f'Process killed with signal {kill_signal}: '
|
||||
f'{self.process_name}')
|
||||
else:
|
||||
stop_cmd = ['systemctl', 'stop', self.unit_name]
|
||||
stop_cmd = ['systemctl', 'stop', '--no-block', self.unit_name]
|
||||
msg = f'Process stopped: {self.process_name}'
|
||||
|
||||
utils.execute(stop_cmd, run_as_root=True)
|
||||
common_utils.wait_until_true(self.process_is_not_running)
|
||||
LOG.debug(msg)
|
||||
|
||||
def restart(self, executor=None):
|
||||
def _restart():
|
||||
self.stop()
|
||||
self.start()
|
||||
if self.process_is_running():
|
||||
restart_cmd = [
|
||||
'systemctl',
|
||||
'restart',
|
||||
'--no-block',
|
||||
self.unit_name,
|
||||
]
|
||||
utils.execute(restart_cmd, run_as_root=True)
|
||||
common_utils.wait_until_true(self.service_is_active)
|
||||
else:
|
||||
self.start()
|
||||
|
||||
LOG.debug("Restarting process: %s", self.process_name)
|
||||
|
||||
@@ -138,14 +152,21 @@ class ProcessFixture(fixtures.Fixture):
|
||||
else:
|
||||
return executor.submit(_restart)
|
||||
|
||||
def process_is_running(self):
|
||||
@property
|
||||
def service_state(self):
|
||||
cmd = ['systemctl', 'is-active', self.unit_name]
|
||||
return utils.execute(
|
||||
cmd,
|
||||
run_as_root=True,
|
||||
log_fail_as_error=False,
|
||||
check_exit_code=False,
|
||||
) == 'active\n'
|
||||
).strip()
|
||||
|
||||
def service_is_active(self):
|
||||
return self.service_state == 'active'
|
||||
|
||||
def process_is_running(self):
|
||||
return self.service_state in ('active', 'activating', 'deactivating')
|
||||
|
||||
def process_is_not_running(self):
|
||||
return not self.process_is_running()
|
||||
@@ -347,7 +368,32 @@ class LinuxBridgeAgentFixture(ServiceFixture):
|
||||
)
|
||||
|
||||
|
||||
class L3AgentFixture(ServiceFixture):
|
||||
class NamespaceCleanupFixture(ServiceFixture):
|
||||
|
||||
def _setUp(self):
|
||||
super(NamespaceCleanupFixture, self)._setUp()
|
||||
self.addCleanup(self.clean_namespaces)
|
||||
|
||||
def clean_namespaces(self):
|
||||
"""Delete all DHCP namespaces created by DHCP agent.
|
||||
|
||||
In some tests for DHCP agent HA agents are killed when handling DHCP
|
||||
service for network(s). In such case DHCP namespace is not deleted by
|
||||
DHCP agent and such namespaces are found and deleted using agent's
|
||||
namespace suffix.
|
||||
"""
|
||||
|
||||
for namespace in ip_lib.list_network_namespaces():
|
||||
if (getattr(self, 'namespace_pattern') and
|
||||
self.namespace_pattern.match(namespace)):
|
||||
try:
|
||||
ip_lib.delete_network_namespace(namespace)
|
||||
except RuntimeError:
|
||||
# Continue cleaning even if namespace deletions fails
|
||||
pass
|
||||
|
||||
|
||||
class L3AgentFixture(NamespaceCleanupFixture):
|
||||
|
||||
def __init__(self, env_desc, host_desc, test_name,
|
||||
neutron_cfg_fixture, l3_agent_cfg_fixture,
|
||||
@@ -362,6 +408,8 @@ class L3AgentFixture(ServiceFixture):
|
||||
self.hostname = self.neutron_cfg_fixture.config['DEFAULT']['host']
|
||||
|
||||
def _setUp(self):
|
||||
super(L3AgentFixture, self)._setUp()
|
||||
|
||||
self.plugin_config = self.l3_agent_cfg_fixture.config
|
||||
|
||||
config_filenames = [self.neutron_cfg_fixture.filename,
|
||||
@@ -386,12 +434,15 @@ class L3AgentFixture(ServiceFixture):
|
||||
namespace=self.namespace
|
||||
)
|
||||
)
|
||||
self.namespace_pattern = re.compile(
|
||||
r"qrouter-[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}@%s" %
|
||||
self.get_namespace_suffix())
|
||||
|
||||
def get_namespace_suffix(self):
|
||||
return self.plugin_config.DEFAULT.test_namespace_suffix
|
||||
|
||||
|
||||
class DhcpAgentFixture(ServiceFixture):
|
||||
class DhcpAgentFixture(NamespaceCleanupFixture):
|
||||
|
||||
def __init__(self, env_desc, host_desc, test_name,
|
||||
neutron_cfg_fixture, agent_cfg_fixture, namespace=None):
|
||||
@@ -404,6 +455,8 @@ class DhcpAgentFixture(ServiceFixture):
|
||||
self.namespace = namespace
|
||||
|
||||
def _setUp(self):
|
||||
super(DhcpAgentFixture, self)._setUp()
|
||||
|
||||
self.plugin_config = self.agent_cfg_fixture.config
|
||||
|
||||
config_filenames = [self.neutron_cfg_fixture.filename,
|
||||
@@ -429,10 +482,9 @@ class DhcpAgentFixture(ServiceFixture):
|
||||
namespace=self.namespace
|
||||
)
|
||||
)
|
||||
self.dhcp_namespace_pattern = re.compile(
|
||||
self.namespace_pattern = re.compile(
|
||||
r"qdhcp-[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}%s" %
|
||||
self.get_namespace_suffix())
|
||||
self.addCleanup(self.clean_dhcp_namespaces)
|
||||
|
||||
def get_agent_hostname(self):
|
||||
return self.neutron_cfg_fixture.config['DEFAULT']['host']
|
||||
@@ -442,21 +494,4 @@ class DhcpAgentFixture(ServiceFixture):
|
||||
|
||||
def kill(self):
|
||||
self.process_fixture.stop()
|
||||
self.clean_dhcp_namespaces()
|
||||
|
||||
def clean_dhcp_namespaces(self):
|
||||
"""Delete all DHCP namespaces created by DHCP agent.
|
||||
|
||||
In some tests for DHCP agent HA agents are killed when handling DHCP
|
||||
service for network(s). In such case DHCP namespace is not deleted by
|
||||
DHCP agent and such namespaces are found and deleted using agent's
|
||||
namespace suffix.
|
||||
"""
|
||||
|
||||
for namespace in ip_lib.list_network_namespaces():
|
||||
if self.dhcp_namespace_pattern.match(namespace):
|
||||
try:
|
||||
ip_lib.delete_network_namespace(namespace)
|
||||
except RuntimeError:
|
||||
# Continue cleaning even if namespace deletions fails
|
||||
pass
|
||||
self.clean_namespaces()
|
||||
|
||||
@@ -16,8 +16,11 @@ import functools
|
||||
import os
|
||||
import time
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from neutron_lib import constants
|
||||
from neutronclient.common import exceptions
|
||||
from oslo_log import log as logging
|
||||
from oslo_utils import uuidutils
|
||||
|
||||
from neutron.agent.l3 import ha_router
|
||||
@@ -27,6 +30,7 @@ from neutron.agent.linux import l3_tc_lib
|
||||
from neutron.common import utils as common_utils
|
||||
from neutron.tests import base as tests_base
|
||||
from neutron.tests.common.exclusive_resources import ip_network
|
||||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.fullstack import base
|
||||
from neutron.tests.fullstack.resources import environment
|
||||
from neutron.tests.fullstack.resources import machine
|
||||
@@ -34,6 +38,8 @@ from neutron.tests.unit import testlib_api
|
||||
|
||||
load_tests = testlib_api.module_load_tests
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TestL3Agent(base.BaseFullStackTestCase):
|
||||
|
||||
@@ -175,10 +181,13 @@ class TestL3Agent(base.BaseFullStackTestCase):
|
||||
return "%s@%s" % (namespace, suffix)
|
||||
|
||||
def _get_l3_agents_with_ha_state(
|
||||
self, l3_agents, router_id, ha_state=None):
|
||||
self, router_id, ha_state=None):
|
||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts
|
||||
if 'l3' in host.agents]
|
||||
found_agents = []
|
||||
agents_hosting_router = self.client.list_l3_agent_hosting_routers(
|
||||
router_id)['agents']
|
||||
|
||||
for agent in l3_agents:
|
||||
agent_host = agent.neutron_cfg_fixture.get_host()
|
||||
for agent_hosting_router in agents_hosting_router:
|
||||
@@ -189,6 +198,13 @@ class TestL3Agent(base.BaseFullStackTestCase):
|
||||
break
|
||||
return found_agents
|
||||
|
||||
def _get_hosts_with_ha_state(
|
||||
self, router_id, ha_state=None):
|
||||
return [
|
||||
self.environment.get_host_by_name(agent.hostname)
|
||||
for agent in self._get_l3_agents_with_ha_state(router_id, ha_state)
|
||||
]
|
||||
|
||||
def _router_fip_qos_after_admin_state_down_up(self, ha=False):
|
||||
def get_router_gw_interface():
|
||||
devices = ip.get_devices()
|
||||
@@ -231,9 +247,7 @@ class TestL3Agent(base.BaseFullStackTestCase):
|
||||
external_vm.block_until_ping(fip['floating_ip_address'])
|
||||
|
||||
if ha:
|
||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts]
|
||||
router_agent = self._get_l3_agents_with_ha_state(
|
||||
l3_agents, router['id'])[0]
|
||||
router_agent = self._get_l3_agents_with_ha_state(router['id'])[0]
|
||||
qrouter_ns = self._get_namespace(
|
||||
router['id'],
|
||||
router_agent)
|
||||
@@ -369,14 +383,23 @@ class TestHAL3Agent(TestL3Agent):
|
||||
use_dhcp = False
|
||||
|
||||
def setUp(self):
|
||||
# Two hosts with L3 agent to host HA routers
|
||||
host_descriptions = [
|
||||
environment.HostDescription(l3_agent=True,
|
||||
dhcp_agent=self.use_dhcp,
|
||||
l3_agent_extensions="fip_qos")
|
||||
for _ in range(2)]
|
||||
|
||||
# Add two hosts for FakeFullstackMachines
|
||||
host_descriptions.extend([
|
||||
environment.HostDescription()
|
||||
for _ in range(2)
|
||||
])
|
||||
|
||||
env = environment.Environment(
|
||||
environment.EnvironmentDescription(
|
||||
network_type='vlan', l2_pop=True,
|
||||
agent_down_time=30,
|
||||
qos=True),
|
||||
host_descriptions)
|
||||
super(TestHAL3Agent, self).setUp(env)
|
||||
@@ -387,9 +410,6 @@ class TestHAL3Agent(TestL3Agent):
|
||||
agents['agents'][0]['ha_state'] != agents['agents'][1]['ha_state'])
|
||||
|
||||
def test_ha_router(self):
|
||||
# TODO(amuller): Test external connectivity before and after a
|
||||
# failover, see: https://review.opendev.org/#/c/196393/
|
||||
|
||||
tenant_id = uuidutils.generate_uuid()
|
||||
router = self.safe_client.create_router(tenant_id, ha=True)
|
||||
|
||||
@@ -405,6 +425,139 @@ class TestHAL3Agent(TestL3Agent):
|
||||
router['id']),
|
||||
timeout=90)
|
||||
|
||||
def _test_ha_router_failover(self, method):
|
||||
tenant_id = uuidutils.generate_uuid()
|
||||
|
||||
# Create router
|
||||
router = self.safe_client.create_router(tenant_id, ha=True)
|
||||
router_id = router['id']
|
||||
agents = self.client.list_l3_agent_hosting_routers(router_id)
|
||||
self.assertEqual(2, len(agents['agents']),
|
||||
'HA router must be scheduled to both nodes')
|
||||
|
||||
# Create internal subnet
|
||||
network = self.safe_client.create_network(tenant_id)
|
||||
subnet = self.safe_client.create_subnet(
|
||||
tenant_id, network['id'], '20.0.0.0/24')
|
||||
self.safe_client.add_router_interface(router_id, subnet['id'])
|
||||
|
||||
# Create external network
|
||||
external_network = self.safe_client.create_network(
|
||||
tenant_id, external=True)
|
||||
self.safe_client.create_subnet(
|
||||
tenant_id, external_network['id'], '42.0.0.0/24',
|
||||
enable_dhcp=False)
|
||||
self.safe_client.add_gateway_router(
|
||||
router_id,
|
||||
external_network['id'])
|
||||
|
||||
# Create internal VM
|
||||
vm = self.useFixture(
|
||||
machine.FakeFullstackMachine(
|
||||
self.environment.hosts[2],
|
||||
network['id'],
|
||||
tenant_id,
|
||||
self.safe_client))
|
||||
vm.block_until_boot()
|
||||
|
||||
# Create external VM
|
||||
external = self.useFixture(
|
||||
machine.FakeFullstackMachine(
|
||||
self.environment.hosts[3],
|
||||
external_network['id'],
|
||||
tenant_id,
|
||||
self.safe_client))
|
||||
external.block_until_boot()
|
||||
|
||||
common_utils.wait_until_true(
|
||||
functools.partial(
|
||||
self._is_ha_router_active_on_one_agent,
|
||||
router_id),
|
||||
timeout=90)
|
||||
|
||||
# Test external connectivity, failover, test again
|
||||
pinger = net_helpers.Pinger(vm.namespace, external.ip, interval=0.1)
|
||||
pinger.start()
|
||||
|
||||
# Ensure connectivity before disconnect
|
||||
vm.block_until_ping(external.ip)
|
||||
|
||||
get_active_hosts = functools.partial(
|
||||
self._get_hosts_with_ha_state,
|
||||
router_id,
|
||||
'active',
|
||||
)
|
||||
|
||||
active_hosts = get_active_hosts()
|
||||
|
||||
# Only one host should be active
|
||||
self.assertEqual(len(active_hosts), 1,
|
||||
'More than one active HA routers')
|
||||
|
||||
active_host = active_hosts[0]
|
||||
backup_host = next(
|
||||
h for h in self.environment.hosts if h != active_host)
|
||||
|
||||
start = datetime.now()
|
||||
|
||||
if method == 'disconnect':
|
||||
active_host.disconnect()
|
||||
elif method == 'kill':
|
||||
active_host.kill(parent=self.environment)
|
||||
elif method == 'shutdown':
|
||||
active_host.shutdown(parent=self.environment)
|
||||
|
||||
if method != 'shutdown':
|
||||
# Ensure connectivity is shortly lost if the failover is not
|
||||
# graceful
|
||||
vm.assert_no_ping(external.ip)
|
||||
|
||||
LOG.debug(f'Connectivity lost after {datetime.now() - start}')
|
||||
|
||||
# Ensure connectivity is restored
|
||||
vm.block_until_ping(external.ip)
|
||||
LOG.debug(f'Connectivity restored after {datetime.now() - start}')
|
||||
|
||||
# Assert the backup host got active
|
||||
timeout = self.environment.env_desc.agent_down_time * 1.2
|
||||
common_utils.wait_until_true(
|
||||
lambda: backup_host in get_active_hosts(),
|
||||
timeout=timeout,
|
||||
)
|
||||
LOG.debug(f'Active host asserted after {datetime.now() - start}')
|
||||
|
||||
if method in ('kill', 'shutdown'):
|
||||
# Assert the previously active host is no longer active if it was
|
||||
# killed or shutdown. In the disconnect case both hosts will stay
|
||||
# active, but one host is disconnected from the data plane.
|
||||
common_utils.wait_until_true(
|
||||
lambda: active_host not in get_active_hosts(),
|
||||
timeout=timeout,
|
||||
)
|
||||
LOG.debug(f'Inactive host asserted after {datetime.now() - start}')
|
||||
|
||||
# Stop probing processes
|
||||
pinger.stop()
|
||||
|
||||
# With the default advert_int of 2s the keepalived master timeout is
|
||||
# about 6s. Assert less than 90 lost packets (9 seconds)
|
||||
threshold = 90
|
||||
|
||||
lost = pinger.sent - pinger.received
|
||||
message = (f'Sent {pinger.sent} packets, received {pinger.received} '
|
||||
f'packets, lost {lost} packets')
|
||||
|
||||
self.assertLess(lost, threshold, message)
|
||||
|
||||
def test_ha_router_failover_graceful(self):
|
||||
self._test_ha_router_failover('shutdown')
|
||||
|
||||
def test_ha_router_failover_host_failure(self):
|
||||
self._test_ha_router_failover('kill')
|
||||
|
||||
def test_ha_router_failover_disconnect(self):
|
||||
self._test_ha_router_failover('disconnect')
|
||||
|
||||
def _get_keepalived_state(self, keepalived_state_file):
|
||||
with open(keepalived_state_file, "r") as fd:
|
||||
return fd.read()
|
||||
@@ -491,11 +644,10 @@ class TestHAL3Agent(TestL3Agent):
|
||||
router_ip = router['external_gateway_info'][
|
||||
'external_fixed_ips'][0]['ip_address']
|
||||
|
||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts]
|
||||
l3_standby_agents = self._get_l3_agents_with_ha_state(
|
||||
l3_agents, router['id'], 'standby')
|
||||
router['id'], 'standby')
|
||||
l3_active_agents = self._get_l3_agents_with_ha_state(
|
||||
l3_agents, router['id'], 'active')
|
||||
router['id'], 'active')
|
||||
self.assertEqual(1, len(l3_active_agents))
|
||||
|
||||
# Let's check first if connectivity from external_vm to router's
|
||||
|
||||
Reference in New Issue
Block a user