Add L3 HA fullstack failover test
* Generate unique internal and external router device names using the agent's hostname. This is to allow multiple HA router replicas to co-exist on the same machine, otherwise they'd all use the same device names and OVS would freak out. * Add host.disconnect method that disconnects the host from the central internal and external bridges, simulating pulling the cable from the host's NIC. * Added a L3 HA failover test. Co-Authored-By: Assaf Muller <amuller@redhat.com> Change-Id: Iaaa1c2cab0341a929e368392aa7dc47c9b2399c2 Original-Change-Id: I250fa41d89dfc4f9f3ba4c03a027b52b2e8c4b4b
This commit is contained in:
parent
a69bd0dd60
commit
bd9ba68047
|
@ -275,6 +275,8 @@ def create_patch_ports(source, destination):
|
||||||
source.add_patch_port(source_name, destination_name)
|
source.add_patch_port(source_name, destination_name)
|
||||||
destination.add_patch_port(destination_name, source_name)
|
destination.add_patch_port(destination_name, source_name)
|
||||||
|
|
||||||
|
return source_name, destination_name
|
||||||
|
|
||||||
|
|
||||||
def create_vlan_interface(
|
def create_vlan_interface(
|
||||||
namespace, port_name, mac_address, ip_address, vlan_tag):
|
namespace, port_name, mac_address, ip_address, vlan_tag):
|
||||||
|
@ -406,7 +408,7 @@ class Pinger(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
stats_pattern = re.compile(
|
stats_pattern = re.compile(
|
||||||
r'^(?P<trans>\d+) packets transmitted,.*(?P<recv>\d+) received.*$')
|
r'^(?P<trans>\d+) packets transmitted, +(?P<recv>\d+) received.*$')
|
||||||
unreachable_pattern = re.compile(
|
unreachable_pattern = re.compile(
|
||||||
r'.* Destination .* Unreachable')
|
r'.* Destination .* Unreachable')
|
||||||
TIMEOUT = 15
|
TIMEOUT = 15
|
||||||
|
@ -430,7 +432,9 @@ class Pinger(object):
|
||||||
"Ping command hasn't ended after %d seconds." % self.TIMEOUT))
|
"Ping command hasn't ended after %d seconds." % self.TIMEOUT))
|
||||||
|
|
||||||
def _parse_stats(self):
|
def _parse_stats(self):
|
||||||
|
output = ''
|
||||||
for line in self.proc.stdout:
|
for line in self.proc.stdout:
|
||||||
|
output += line
|
||||||
if (not self.destination_unreachable and
|
if (not self.destination_unreachable and
|
||||||
self.unreachable_pattern.match(line)):
|
self.unreachable_pattern.match(line)):
|
||||||
self.destination_unreachable = True
|
self.destination_unreachable = True
|
||||||
|
@ -441,7 +445,9 @@ class Pinger(object):
|
||||||
self.received = int(result.group('recv'))
|
self.received = int(result.group('recv'))
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
LOG.error(f"Didn't find ping statistics:\n{output}")
|
||||||
raise RuntimeError("Didn't find ping statistics.")
|
raise RuntimeError("Didn't find ping statistics.")
|
||||||
|
LOG.debug(f"ping command output:\n{output}")
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
if self.proc and self.proc.is_running:
|
if self.proc and self.proc.is_running:
|
||||||
|
|
|
@ -165,11 +165,12 @@ class BaseFullStackTestCase(testlib_api.MySQLTestCaseMixin,
|
||||||
available_ips = itertools.islice(valid_ips, initial, initial + num)
|
available_ips = itertools.islice(valid_ips, initial, initial + num)
|
||||||
return [str(available_ip) for available_ip in available_ips]
|
return [str(available_ip) for available_ip in available_ips]
|
||||||
|
|
||||||
def _create_external_vm(self, network, subnet):
|
def _create_external_vm(self, network, subnet=None, ip=None):
|
||||||
|
ip = ip or subnet['gateway_ip']
|
||||||
vm = self.useFixture(
|
vm = self.useFixture(
|
||||||
machine_fixtures.FakeMachine(
|
machine_fixtures.FakeMachine(
|
||||||
self.environment.central_bridge,
|
self.environment.central_bridge,
|
||||||
common_utils.ip_to_cidr(subnet['gateway_ip'], 24)))
|
common_utils.ip_to_cidr(ip, 24)))
|
||||||
# NOTE(slaweq): as ext_net is 'vlan' network type external_vm needs to
|
# NOTE(slaweq): as ext_net is 'vlan' network type external_vm needs to
|
||||||
# send packets with proper vlan also
|
# send packets with proper vlan also
|
||||||
vm.bridge.set_db_attribute(
|
vm.bridge.set_db_attribute(
|
||||||
|
|
|
@ -444,3 +444,10 @@ class ClientFixture(fixtures.Fixture):
|
||||||
|
|
||||||
def update_quota(self, project_id, tracked_resource, quota):
|
def update_quota(self, project_id, tracked_resource, quota):
|
||||||
self._update_resource('quota', project_id, {tracked_resource: quota})
|
self._update_resource('quota', project_id, {tracked_resource: quota})
|
||||||
|
|
||||||
|
def add_gateway_router(self, router_id, network_id):
|
||||||
|
self.client.add_gateway_router(
|
||||||
|
router_id,
|
||||||
|
{'network_id': network_id})
|
||||||
|
self.addCleanup(
|
||||||
|
self.client.remove_gateway_router, router_id)
|
||||||
|
|
|
@ -16,6 +16,7 @@ import fixtures
|
||||||
from neutron_lib import constants
|
from neutron_lib import constants
|
||||||
from neutronclient.common import exceptions as nc_exc
|
from neutronclient.common import exceptions as nc_exc
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
|
from oslo_log import log as logging
|
||||||
|
|
||||||
from neutron.agent.linux import ip_lib
|
from neutron.agent.linux import ip_lib
|
||||||
from neutron.common import utils as common_utils
|
from neutron.common import utils as common_utils
|
||||||
|
@ -28,6 +29,8 @@ from neutron.tests.common import net_helpers
|
||||||
from neutron.tests.fullstack.resources import config
|
from neutron.tests.fullstack.resources import config
|
||||||
from neutron.tests.fullstack.resources import process
|
from neutron.tests.fullstack.resources import process
|
||||||
|
|
||||||
|
LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class EnvironmentDescription(object):
|
class EnvironmentDescription(object):
|
||||||
"""A set of characteristics of an environment setup.
|
"""A set of characteristics of an environment setup.
|
||||||
|
@ -263,12 +266,14 @@ class Host(fixtures.Fixture):
|
||||||
|
|
||||||
veth_1.link.set_up()
|
veth_1.link.set_up()
|
||||||
veth_2.link.set_up()
|
veth_2.link.set_up()
|
||||||
|
self.tunnel_device = veth_1
|
||||||
|
|
||||||
def connect_to_central_network_via_vlans(self, host_data_bridge):
|
def connect_to_central_network_via_vlans(self, host_data_bridge):
|
||||||
# If using VLANs as a segmentation device, it's needed to connect
|
# If using VLANs as a segmentation device, it's needed to connect
|
||||||
# a provider bridge to a centralized, shared bridge.
|
# a provider bridge to a centralized, shared bridge.
|
||||||
net_helpers.create_patch_ports(
|
source, destination = net_helpers.create_patch_ports(
|
||||||
self.central_bridge, host_data_bridge)
|
self.central_bridge, host_data_bridge)
|
||||||
|
self.internal_port = destination
|
||||||
|
|
||||||
def allocate_local_ip(self):
|
def allocate_local_ip(self):
|
||||||
if not self.env_desc.network_range:
|
if not self.env_desc.network_range:
|
||||||
|
@ -296,6 +301,13 @@ class Host(fixtures.Fixture):
|
||||||
self.network_bridges[network_id] = bridge
|
self.network_bridges[network_id] = bridge
|
||||||
return bridge
|
return bridge
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
if self.env_desc.tunneling_enabled:
|
||||||
|
self.tunnel_device.addr.flush(4)
|
||||||
|
else:
|
||||||
|
self.br_phys.delete_port(self.internal_port)
|
||||||
|
LOG.info(f'Host {self.hostname} disconnected.')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def hostname(self):
|
def hostname(self):
|
||||||
return self.neutron_config.config.DEFAULT.host
|
return self.neutron_config.config.DEFAULT.host
|
||||||
|
@ -385,6 +397,9 @@ class Environment(fixtures.Fixture):
|
||||||
except nc_exc.NeutronClientException:
|
except nc_exc.NeutronClientException:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def get_host_by_name(self, hostname):
|
||||||
|
return next(host for host in self.hosts if host.hostname == hostname)
|
||||||
|
|
||||||
def _create_host(self, host_desc):
|
def _create_host(self, host_desc):
|
||||||
temp_dir = self.useFixture(fixtures.TempDir()).path
|
temp_dir = self.useFixture(fixtures.TempDir()).path
|
||||||
neutron_config = config.NeutronConfigFixture(
|
neutron_config = config.NeutronConfigFixture(
|
||||||
|
|
|
@ -27,6 +27,7 @@ from neutron.agent.linux import l3_tc_lib
|
||||||
from neutron.common import utils as common_utils
|
from neutron.common import utils as common_utils
|
||||||
from neutron.tests import base as tests_base
|
from neutron.tests import base as tests_base
|
||||||
from neutron.tests.common.exclusive_resources import ip_network
|
from neutron.tests.common.exclusive_resources import ip_network
|
||||||
|
from neutron.tests.common import net_helpers
|
||||||
from neutron.tests.fullstack import base
|
from neutron.tests.fullstack import base
|
||||||
from neutron.tests.fullstack.resources import environment
|
from neutron.tests.fullstack.resources import environment
|
||||||
from neutron.tests.fullstack.resources import machine
|
from neutron.tests.fullstack.resources import machine
|
||||||
|
@ -231,7 +232,8 @@ class TestL3Agent(base.BaseFullStackTestCase):
|
||||||
external_vm.block_until_ping(fip['floating_ip_address'])
|
external_vm.block_until_ping(fip['floating_ip_address'])
|
||||||
|
|
||||||
if ha:
|
if ha:
|
||||||
l3_agents = [host.agents['l3'] for host in self.environment.hosts]
|
l3_agents = [host.agents['l3'] for host in self.environment.hosts
|
||||||
|
if 'l3' in host.agents]
|
||||||
router_agent = self._get_l3_agents_with_ha_state(
|
router_agent = self._get_l3_agents_with_ha_state(
|
||||||
l3_agents, router['id'])[0]
|
l3_agents, router['id'])[0]
|
||||||
qrouter_ns = self._get_namespace(
|
qrouter_ns = self._get_namespace(
|
||||||
|
@ -369,11 +371,19 @@ class TestHAL3Agent(TestL3Agent):
|
||||||
use_dhcp = False
|
use_dhcp = False
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
# Two hosts with L3 agent to host HA routers
|
||||||
host_descriptions = [
|
host_descriptions = [
|
||||||
environment.HostDescription(l3_agent=True,
|
environment.HostDescription(l3_agent=True,
|
||||||
dhcp_agent=self.use_dhcp,
|
dhcp_agent=self.use_dhcp,
|
||||||
l3_agent_extensions="fip_qos")
|
l3_agent_extensions="fip_qos")
|
||||||
for _ in range(2)]
|
for _ in range(2)]
|
||||||
|
|
||||||
|
# Add two hosts for FakeFullstackMachines
|
||||||
|
host_descriptions.extend([
|
||||||
|
environment.HostDescription()
|
||||||
|
for _ in range(2)
|
||||||
|
])
|
||||||
|
|
||||||
env = environment.Environment(
|
env = environment.Environment(
|
||||||
environment.EnvironmentDescription(
|
environment.EnvironmentDescription(
|
||||||
network_type='vlan', l2_pop=True,
|
network_type='vlan', l2_pop=True,
|
||||||
|
@ -387,9 +397,6 @@ class TestHAL3Agent(TestL3Agent):
|
||||||
agents['agents'][0]['ha_state'] != agents['agents'][1]['ha_state'])
|
agents['agents'][0]['ha_state'] != agents['agents'][1]['ha_state'])
|
||||||
|
|
||||||
def test_ha_router(self):
|
def test_ha_router(self):
|
||||||
# TODO(amuller): Test external connectivity before and after a
|
|
||||||
# failover, see: https://review.opendev.org/#/c/196393/
|
|
||||||
|
|
||||||
tenant_id = uuidutils.generate_uuid()
|
tenant_id = uuidutils.generate_uuid()
|
||||||
router = self.safe_client.create_router(tenant_id, ha=True)
|
router = self.safe_client.create_router(tenant_id, ha=True)
|
||||||
|
|
||||||
|
@ -405,6 +412,85 @@ class TestHAL3Agent(TestL3Agent):
|
||||||
router['id']),
|
router['id']),
|
||||||
timeout=90)
|
timeout=90)
|
||||||
|
|
||||||
|
def _get_host_for_active_ha_router_replica(self, router_id):
|
||||||
|
result = self.client.list_l3_agent_hosting_routers(router_id)
|
||||||
|
hostname = next(
|
||||||
|
agent['host'] for agent in result['agents'] if
|
||||||
|
agent['ha_state'] == 'active')
|
||||||
|
return self.environment.get_host_by_name(hostname)
|
||||||
|
|
||||||
|
def test_ha_router_failover(self):
|
||||||
|
tenant_id = uuidutils.generate_uuid()
|
||||||
|
|
||||||
|
# Create router
|
||||||
|
router = self.safe_client.create_router(tenant_id, ha=True)
|
||||||
|
router_id = router['id']
|
||||||
|
agents = self.client.list_l3_agent_hosting_routers(router_id)
|
||||||
|
self.assertEqual(2, len(agents['agents']),
|
||||||
|
'HA router must be scheduled to both nodes')
|
||||||
|
|
||||||
|
# Create internal subnet
|
||||||
|
network = self.safe_client.create_network(tenant_id)
|
||||||
|
subnet = self.safe_client.create_subnet(
|
||||||
|
tenant_id, network['id'], '20.0.0.0/24')
|
||||||
|
self.safe_client.add_router_interface(router_id, subnet['id'])
|
||||||
|
|
||||||
|
# Create external network
|
||||||
|
external_network = self.safe_client.create_network(
|
||||||
|
tenant_id, external=True)
|
||||||
|
self.safe_client.create_subnet(
|
||||||
|
tenant_id, external_network['id'], '42.0.0.0/24',
|
||||||
|
enable_dhcp=False)
|
||||||
|
self.safe_client.add_gateway_router(
|
||||||
|
router_id,
|
||||||
|
external_network['id'])
|
||||||
|
|
||||||
|
# Create internal VM
|
||||||
|
vm = self.useFixture(
|
||||||
|
machine.FakeFullstackMachine(
|
||||||
|
self.environment.hosts[2],
|
||||||
|
network['id'],
|
||||||
|
tenant_id,
|
||||||
|
self.safe_client))
|
||||||
|
vm.block_until_boot()
|
||||||
|
|
||||||
|
# Create external VM
|
||||||
|
external = self.useFixture(
|
||||||
|
machine.FakeFullstackMachine(
|
||||||
|
self.environment.hosts[3],
|
||||||
|
external_network['id'],
|
||||||
|
tenant_id,
|
||||||
|
self.safe_client))
|
||||||
|
external.block_until_boot()
|
||||||
|
|
||||||
|
common_utils.wait_until_true(
|
||||||
|
functools.partial(
|
||||||
|
self._is_ha_router_active_on_one_agent,
|
||||||
|
router_id),
|
||||||
|
timeout=90)
|
||||||
|
|
||||||
|
# Test external connectivity, failover, test again
|
||||||
|
pinger = net_helpers.Pinger(vm.namespace, external.ip, interval=0.1)
|
||||||
|
pinger.start()
|
||||||
|
|
||||||
|
# Ensure connectivity before disconnect
|
||||||
|
vm.block_until_ping(external.ip)
|
||||||
|
|
||||||
|
active_host = self._get_host_for_active_ha_router_replica(router_id)
|
||||||
|
active_host.disconnect()
|
||||||
|
|
||||||
|
# Ensure connectivity is shortly lost on failover and recovers
|
||||||
|
vm.assert_no_ping(external.ip)
|
||||||
|
vm.block_until_ping(external.ip)
|
||||||
|
pinger.stop()
|
||||||
|
|
||||||
|
# With the default advert_int of 2s the keepalived master timeout is
|
||||||
|
# about 6s. Assert less than 80 lost packets (9 seconds)
|
||||||
|
lost = pinger.sent - pinger.received
|
||||||
|
message = (f'Sent {pinger.sent} packets, received {pinger.received} '
|
||||||
|
f'packets, lost {lost} packets')
|
||||||
|
assert lost < 90, message
|
||||||
|
|
||||||
def _get_keepalived_state(self, keepalived_state_file):
|
def _get_keepalived_state(self, keepalived_state_file):
|
||||||
with open(keepalived_state_file, "r") as fd:
|
with open(keepalived_state_file, "r") as fd:
|
||||||
return fd.read()
|
return fd.read()
|
||||||
|
|
Loading…
Reference in New Issue