Add L3 HA fullstack failover test
* Generate unique internal and external router device names using the agent's hostname. This is to allow multiple HA router replicas to co-exist on the same machine, otherwise they'd all use the same device names and OVS would freak out. * Add host.disconnect method that disconnects the host from the central internal and external bridges, simulating pulling the cable from the host's NIC. * Added a L3 HA failover test. Co-Authored-By: Assaf Muller <amuller@redhat.com> Change-Id: Iaaa1c2cab0341a929e368392aa7dc47c9b2399c2 Original-Change-Id: I250fa41d89dfc4f9f3ba4c03a027b52b2e8c4b4b
This commit is contained in:
parent
a69bd0dd60
commit
bd9ba68047
|
@ -275,6 +275,8 @@ def create_patch_ports(source, destination):
|
|||
source.add_patch_port(source_name, destination_name)
|
||||
destination.add_patch_port(destination_name, source_name)
|
||||
|
||||
return source_name, destination_name
|
||||
|
||||
|
||||
def create_vlan_interface(
|
||||
namespace, port_name, mac_address, ip_address, vlan_tag):
|
||||
|
@ -406,7 +408,7 @@ class Pinger(object):
|
|||
"""
|
||||
|
||||
stats_pattern = re.compile(
|
||||
r'^(?P<trans>\d+) packets transmitted,.*(?P<recv>\d+) received.*$')
|
||||
r'^(?P<trans>\d+) packets transmitted, +(?P<recv>\d+) received.*$')
|
||||
unreachable_pattern = re.compile(
|
||||
r'.* Destination .* Unreachable')
|
||||
TIMEOUT = 15
|
||||
|
@ -430,7 +432,9 @@ class Pinger(object):
|
|||
"Ping command hasn't ended after %d seconds." % self.TIMEOUT))
|
||||
|
||||
def _parse_stats(self):
|
||||
output = ''
|
||||
for line in self.proc.stdout:
|
||||
output += line
|
||||
if (not self.destination_unreachable and
|
||||
self.unreachable_pattern.match(line)):
|
||||
self.destination_unreachable = True
|
||||
|
@ -441,7 +445,9 @@ class Pinger(object):
|
|||
self.received = int(result.group('recv'))
|
||||
break
|
||||
else:
|
||||
LOG.error(f"Didn't find ping statistics:\n{output}")
|
||||
raise RuntimeError("Didn't find ping statistics.")
|
||||
LOG.debug(f"ping command output:\n{output}")
|
||||
|
||||
def start(self):
|
||||
if self.proc and self.proc.is_running:
|
||||
|
|
|
@ -165,11 +165,12 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
|
|||
available_ips = itertools.islice(valid_ips, initial, initial + num)
|
||||
return [str(available_ip) for available_ip in available_ips]
|
||||
|
||||
def _create_external_vm(self, network, subnet):
|
||||
def _create_external_vm(self, network, subnet=None, ip=None):
|
||||
ip = ip or subnet['gateway_ip']
|
||||
vm = self.useFixture(
|
||||
machine_fixtures.FakeMachine(
|
||||
self.environment.central_bridge,
|
||||
common_utils.ip_to_cidr(subnet['gateway_ip'], 24)))
|
||||
common_utils.ip_to_cidr(ip, 24)))
|
||||
# NOTE(slaweq): as ext_net is 'vlan' network type external_vm needs to
|
||||
# send packets with proper vlan also
|
||||
vm.bridge.set_db_attribute(
|
||||
|
|
|
@ -444,3 +444,10 @@ class ClientFixture(fixtures.Fixture):
|
|||
|
||||
def update_quota(self, project_id, tracked_resource, quota):
|
||||
self._update_resource('quota', project_id, {tracked_resource: quota})
|
||||
|
||||
def add_gateway_router(self, router_id, network_id):
|
||||
self.client.add_gateway_router(
|
||||
router_id,
|
||||
{'network_id': network_id})
|
||||
self.addCleanup(
|
||||
self.client.remove_gateway_router, router_id)
|
||||
|
|
|
@ -16,6 +16,7 @@ import fixtures
|
|||
from neutron_lib import constants
|
||||
from neutronclient.common import exceptions as nc_exc
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log as logging
|
||||
|
||||
from neutron.agent.linux import ip_lib
|
||||
from neutron.common import utils as common_utils
|
||||
|
@ -28,6 +29,8 @@ from neutron.tests.common import net_helpers
|
|||
from neutron.tests.fullstack.resources import config
|
||||
from neutron.tests.fullstack.resources import process
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EnvironmentDescription(object):
|
||||
"""A set of characteristics of an environment setup.
|
||||
|
@ -263,12 +266,14 @@ class Host(fixtures.Fixture):
|
|||
|
||||
veth_1.link.set_up()
|
||||
veth_2.link.set_up()
|
||||
self.tunnel_device = veth_1
|
||||
|
||||
def connect_to_central_network_via_vlans(self, host_data_bridge):
|
||||
# If using VLANs as a segmentation device, it's needed to connect
|
||||
# a provider bridge to a centralized, shared bridge.
|
||||
net_helpers.create_patch_ports(
|
||||
source, destination = net_helpers.create_patch_ports(
|
||||
self.central_bridge, host_data_bridge)
|
||||
self.internal_port = destination
|
||||
|
||||
def allocate_local_ip(self):
|
||||
if not self.env_desc.network_range:
|
||||
|
@ -296,6 +301,13 @@ class Host(fixtures.Fixture):
|
|||
self.network_bridges[network_id] = bridge
|
||||
return bridge
|
||||
|
||||
def disconnect(self):
|
||||
if self.env_desc.tunneling_enabled:
|
||||
self.tunnel_device.addr.flush(4)
|
||||
else:
|
||||
self.br_phys.delete_port(self.internal_port)
|
||||
LOG.info(f'Host {self.hostname} disconnected.')
|
||||
|
||||
@property
|
||||
def hostname(self):
|
||||
return self.neutron_config.config.DEFAULT.host
|
||||
|
@ -385,6 +397,9 @@ class Environment(fixtures.Fixture):
|
|||
except nc_exc.NeutronClientException:
|
||||
return False
|
||||
|
||||
def get_host_by_name(self, hostname):
|
||||
return next(host for host in self.hosts if host.hostname == hostname)
|
||||
|
||||
def _create_host(self, host_desc):
|
||||
temp_dir = self.useFixture(fixtures.TempDir()).path
|
||||
neutron_config = config.NeutronConfigFixture(
|
||||
|
|
|
@ -27,6 +27,7 @@ from neutron.agent.linux import l3_tc_lib
|
|||
from neutron.common import utils as common_utils
|
||||
from neutron.tests import base as tests_base
|
||||
from neutron.tests.common.exclusive_resources import ip_network
|
||||
from neutron.tests.common import net_helpers
|
||||
from neutron.tests.fullstack import base
|
||||
from neutron.tests.fullstack.resources import environment
|
||||
from neutron.tests.fullstack.resources import machine
|
||||
|
@ -231,7 +232,8 @@ class TestL3Agent(base.BaseFullStackTestCase):
|
|||
external_vm.block_until_ping(fip['floating_ip_address'])
|
||||
|
||||
if ha:
|
||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts]
|
||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts
|
||||
if 'l3' in host.agents]
|
||||
router_agent = self._get_l3_agents_with_ha_state(
|
||||
l3_agents, router['id'])[0]
|
||||
qrouter_ns = self._get_namespace(
|
||||
|
@ -369,11 +371,19 @@ class TestHAL3Agent(TestL3Agent):
|
|||
use_dhcp = False
|
||||
|
||||
def setUp(self):
|
||||
# Two hosts with L3 agent to host HA routers
|
||||
host_descriptions = [
|
||||
environment.HostDescription(l3_agent=True,
|
||||
dhcp_agent=self.use_dhcp,
|
||||
l3_agent_extensions="fip_qos")
|
||||
for _ in range(2)]
|
||||
|
||||
# Add two hosts for FakeFullstackMachines
|
||||
host_descriptions.extend([
|
||||
environment.HostDescription()
|
||||
for _ in range(2)
|
||||
])
|
||||
|
||||
env = environment.Environment(
|
||||
environment.EnvironmentDescription(
|
||||
network_type='vlan', l2_pop=True,
|
||||
|
@ -387,9 +397,6 @@ class TestHAL3Agent(TestL3Agent):
|
|||
agents['agents'][0]['ha_state'] != agents['agents'][1]['ha_state'])
|
||||
|
||||
def test_ha_router(self):
|
||||
# TODO(amuller): Test external connectivity before and after a
|
||||
# failover, see: https://review.opendev.org/#/c/196393/
|
||||
|
||||
tenant_id = uuidutils.generate_uuid()
|
||||
router = self.safe_client.create_router(tenant_id, ha=True)
|
||||
|
||||
|
@ -405,6 +412,85 @@ class TestHAL3Agent(TestL3Agent):
|
|||
router['id']),
|
||||
timeout=90)
|
||||
|
||||
def _get_host_for_active_ha_router_replica(self, router_id):
|
||||
result = self.client.list_l3_agent_hosting_routers(router_id)
|
||||
hostname = next(
|
||||
agent['host'] for agent in result['agents'] if
|
||||
agent['ha_state'] == 'active')
|
||||
return self.environment.get_host_by_name(hostname)
|
||||
|
||||
def test_ha_router_failover(self):
|
||||
tenant_id = uuidutils.generate_uuid()
|
||||
|
||||
# Create router
|
||||
router = self.safe_client.create_router(tenant_id, ha=True)
|
||||
router_id = router['id']
|
||||
agents = self.client.list_l3_agent_hosting_routers(router_id)
|
||||
self.assertEqual(2, len(agents['agents']),
|
||||
'HA router must be scheduled to both nodes')
|
||||
|
||||
# Create internal subnet
|
||||
network = self.safe_client.create_network(tenant_id)
|
||||
subnet = self.safe_client.create_subnet(
|
||||
tenant_id, network['id'], '20.0.0.0/24')
|
||||
self.safe_client.add_router_interface(router_id, subnet['id'])
|
||||
|
||||
# Create external network
|
||||
external_network = self.safe_client.create_network(
|
||||
tenant_id, external=True)
|
||||
self.safe_client.create_subnet(
|
||||
tenant_id, external_network['id'], '42.0.0.0/24',
|
||||
enable_dhcp=False)
|
||||
self.safe_client.add_gateway_router(
|
||||
router_id,
|
||||
external_network['id'])
|
||||
|
||||
# Create internal VM
|
||||
vm = self.useFixture(
|
||||
machine.FakeFullstackMachine(
|
||||
self.environment.hosts[2],
|
||||
network['id'],
|
||||
tenant_id,
|
||||
self.safe_client))
|
||||
vm.block_until_boot()
|
||||
|
||||
# Create external VM
|
||||
external = self.useFixture(
|
||||
machine.FakeFullstackMachine(
|
||||
self.environment.hosts[3],
|
||||
external_network['id'],
|
||||
tenant_id,
|
||||
self.safe_client))
|
||||
external.block_until_boot()
|
||||
|
||||
common_utils.wait_until_true(
|
||||
functools.partial(
|
||||
self._is_ha_router_active_on_one_agent,
|
||||
router_id),
|
||||
timeout=90)
|
||||
|
||||
# Test external connectivity, failover, test again
|
||||
pinger = net_helpers.Pinger(vm.namespace, external.ip, interval=0.1)
|
||||
pinger.start()
|
||||
|
||||
# Ensure connectivity before disconnect
|
||||
vm.block_until_ping(external.ip)
|
||||
|
||||
active_host = self._get_host_for_active_ha_router_replica(router_id)
|
||||
active_host.disconnect()
|
||||
|
||||
# Ensure connectivity is shortly lost on failover and recovers
|
||||
vm.assert_no_ping(external.ip)
|
||||
vm.block_until_ping(external.ip)
|
||||
pinger.stop()
|
||||
|
||||
# With the default advert_int of 2s the keepalived master timeout is
|
||||
# about 6s. Assert less than 80 lost packets (9 seconds)
|
||||
lost = pinger.sent - pinger.received
|
||||
message = (f'Sent {pinger.sent} packets, received {pinger.received} '
|
||||
f'packets, lost {lost} packets')
|
||||
assert lost < 90, message
|
||||
|
||||
def _get_keepalived_state(self, keepalived_state_file):
|
||||
with open(keepalived_state_file, "r") as fd:
|
||||
return fd.read()
|
||||
|
|
Loading…
Reference in New Issue