Merge "Add L3HA OVN tests"

This commit is contained in:
Zuul 2024-04-09 16:35:14 +00:00 committed by Gerrit Code Review
commit 3736bb1bc2
4 changed files with 373 additions and 21 deletions

View File

@ -211,3 +211,19 @@ def run_local_cmd(cmd, timeout=10):
command, shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()
return output, errors
def interface_state_set(client, interface, state):
shell_path = 'PATH=$PATH:/sbin'
LOG.debug('Setting interface {} {} on {}'.format(
interface, state, client.host))
client.exec_command(
"{path}; sudo ip link set {interface} {state}".format(
path=shell_path, interface=interface, state=state))
def remote_service_action(client, service, action):
cmd = "sudo systemctl {action} {service}".format(
action=action, service=service)
LOG.debug("Running '{}' on {}".format(cmd, client.host))
client.exec_command(cmd)

View File

@ -37,6 +37,7 @@ from tempest.common import waiters
from tempest import config
from tempest.lib.common import fixed_network
from tempest.lib.common.utils import data_utils
from tempest.lib.common.utils import test_utils
from whitebox_neutron_tempest_plugin.common import tcpdump_capture as capture
from whitebox_neutron_tempest_plugin.common import utils as local_utils
@ -122,6 +123,16 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
return ssh.Client(host=host, username=username,
key_filename=key_filename)
def find_different_compute_host(self, exclude_hosts):
for node in self.nodes:
if not node['is_compute']:
continue
if node['is_compute'] and not node['name'] in exclude_hosts:
return node['name']
raise self.skipException(
"Not able to find a different compute than: {}".format(
exclude_hosts))
def get_local_ssh_client(self, network):
return ssh.Client(
host=self._get_local_ip_from_network(
@ -135,13 +146,11 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
subnet_id)['subnet']
if subnet['ip_version'] == ip_version:
return subnet['cidr']
return None
def find_node_client(self, node_name):
for node in self.nodes:
if node['name'] == node_name:
return node['client']
return None
@staticmethod
def _get_local_ip_from_network(network):
@ -151,7 +160,6 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
for ip_address in host_ip_addresses:
if netaddr.IPAddress(ip_address) in netaddr.IPNetwork(network):
return ip_address
return None
def get_fip_port_details(self, fip):
fip_ports = self.os_admin.network_client.list_ports(
@ -162,7 +170,6 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
fp['fixed_ips'][0]['ip_address'] ==
fip['floating_ip_address']):
return fp
return None
@classmethod
def get_podified_nodes_data(cls):
@ -265,6 +272,11 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
cls.discover_nodes()
return len(cls.nodes) == 1
def get_node_setting(self, node_name, setting):
for node in self.nodes:
if node_name == node['name']:
return node[setting]
@classmethod
def get_pod_of_service(cls, service='neutron'):
pods_list = "oc get pods"
@ -403,25 +415,60 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
LOG.debug("Service '%s' active on host '%s'.",
service_glob, host_ip)
def _create_server(self, create_floating_ip=True,
scheduler_hints=None, network=None):
def _create_server(
self, create_floating_ip=True, exclude_hosts=None,
network=None, **kwargs):
network = network or self.network
server_kwargs = {
'flavor_ref': self.flavor_ref,
'image_ref': self.image_ref,
'key_name': self.keypair['name'],
'networks': [{'uuid': network['id']}],
'security_groups': [{'name': self.security_groups[-1]['name']}]
}
if scheduler_hints:
server_kwargs['scheduler_hints'] = scheduler_hints
server = self.create_server(**server_kwargs)
kwargs.setdefault('name', data_utils.rand_name('server-test'))
kwargs['flavorRef'] = self.flavor_ref
kwargs['imageRef'] = self.image_ref
kwargs['networks'] = [{'uuid': network['id']}]
if not kwargs.get('key_name'):
kwargs['key_name'] = self.keypair['name']
if not kwargs.get('security_groups'):
kwargs['security_groups'] = [{
'name': self.security_groups[-1]['name']}]
if exclude_hosts:
exclude_hosts_ignored = False
if kwargs.get('host') and (kwargs['host'] in exclude_hosts):
exclude_hosts_ignored = True
LOG.debug("'exclude_hosts' parameter contains same value as "
"'host' so it will be ignored, i.e. 'host' will be "
"used")
else:
kwargs['host'] = self.find_different_compute_host(
exclude_hosts)
if kwargs.get('host'):
servers_client = self.os_admin.servers_client
network_client = self.os_admin.network_client
else:
servers_client = self.os_primary.servers_client
network_client = self.os_primary.network_client
server = servers_client.create_server(**kwargs)['server']
self.addCleanup(test_utils.call_and_ignore_notfound_exc,
waiters.wait_for_server_termination,
servers_client,
server['id'])
self.addCleanup(test_utils.call_and_ignore_notfound_exc,
servers_client.delete_server,
server['id'])
if exclude_hosts and not exclude_hosts_ignored:
if self.get_host_for_server(server['id']) in exclude_hosts:
self.fail("Failed to spawn a server on a host other than in "
"this list: '{}'. Can not proceed.".format(
' '.join(exclude_hosts)))
self.wait_for_server_active(server, client=servers_client)
port = self.client.list_ports(
network_id=network['id'],
device_id=server['server']['id'])['ports'][0]
device_id=server['id'])['ports'][0]
if create_floating_ip:
fip = self.create_floatingip(port=port)
fip = network_client.create_floatingip(
floating_network_id=CONF.network.public_network_id,
port_id=port['id'])['floatingip']
self.addCleanup(test_utils.call_and_ignore_notfound_exc,
network_client.delete_floatingip,
fip['id'])
else:
fip = None
return {'port': port, 'fip': fip, 'server': server}

View File

@ -0,0 +1,292 @@
# Copyright 2024 Red Hat, Inc.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import testtools
from neutron_lib import constants as lib_constants
from neutron_tempest_plugin.common import ssh
from neutron_tempest_plugin.common import utils as common_utils
from oslo_log import log
from tempest import config
from tempest.lib.common.utils import data_utils
from tempest.lib.common.utils import test_utils
from tempest.lib import decorators
from tempest.lib import exceptions
from whitebox_neutron_tempest_plugin.common import constants
from whitebox_neutron_tempest_plugin.common import utils
from whitebox_neutron_tempest_plugin.tests.scenario import base
CONF = config.CONF
WB_CONF = config.CONF.whitebox_neutron_plugin_options
LOG = log.getLogger(__name__)
class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
credentials = ['primary', 'admin']
@classmethod
def resource_setup(cls):
super(L3haOvnTest, cls).resource_setup()
cls.setup_api_microversion_fixture(
compute_microversion='2.74')
def verify_routing_via_chassis(self, chassis_id):
self.expected_gateway_chassis = None
def _get_router_gateway_chassis_by_id(chassis_id):
try:
self.expected_gateway_chassis = \
self.get_router_gateway_chassis_by_id(chassis_id)
except exceptions.SSHExecCommandFailed as err:
LOG.exception(err)
LOG.warning("Retrying to obtain router gateway chassis in "
"case the OVN DBs are not ready yet")
return False
return True
common_utils.wait_until_true(
lambda: _get_router_gateway_chassis_by_id(chassis_id),
timeout=60, sleep=5)
LOG.debug("Waiting until router gateway chassis is updated")
self.router_gateway_chassis = None
def _router_gateway_chassis_updated():
self.router_gateway_chassis = self.get_router_gateway_chassis(
self.router_port['id'])
LOG.debug("chassis = '{}', expected = {} ".format(
self.router_gateway_chassis, self.expected_gateway_chassis))
return self.router_gateway_chassis == self.expected_gateway_chassis
try:
common_utils.wait_until_true(
lambda: _router_gateway_chassis_updated(),
timeout=60, sleep=5)
except common_utils.WaitTimeout:
self.fail("Gateway chassis was not updated as expected")
self.check_north_south_icmp_flow(
dst_ip=self.gateway_external_ip,
expected_routing_nodes=[self.expected_gateway_chassis],
expected_mac=self.router_port['mac_address'],
ssh_client=self.test_server_client,
ignore_outbound=self.ignore_outbound)
def _validate_gateway_chassis(self, chassis_id):
node_name = self.get_router_gateway_chassis_by_id(chassis_id)
if self.get_node_setting(node_name, 'is_controller'):
raise self.skipException(
"The test currently does not support the required action "
"when gateway chassis is on controller.")
def _setup(self):
router = self.create_router_by_client()
self.router_port = self.os_admin.network_client.list_ports(
device_id=router['id'],
device_owner=lib_constants.DEVICE_OWNER_ROUTER_GW)['ports'][0]
self.chassis_list = self.get_router_gateway_chassis_list(
self.router_port['id'])
self._validate_gateway_chassis(self.chassis_list[0])
chassis_name = self.get_router_gateway_chassis_by_id(
self.chassis_list[0])
LOG.debug("router chassis name = {}".format(chassis_name))
# Since we are going to spawn VMs with 'host' option which
# is available only for admin user, we create security group
# and keypair also as admin
secgroup = self.os_admin.network_client.create_security_group(
name=data_utils.rand_name('secgroup'))
self.security_groups.append(secgroup['security_group'])
self.os_admin.network_client.create_security_group_rule(
security_group_id=secgroup['security_group']['id'],
protocol=lib_constants.PROTO_NAME_ICMP,
direction=lib_constants.INGRESS_DIRECTION)
self.os_admin.network_client.create_security_group_rule(
security_group_id=secgroup['security_group']['id'],
protocol=lib_constants.PROTO_NAME_TCP,
direction=lib_constants.INGRESS_DIRECTION,
port_range_min=22,
port_range_max=22)
self.addCleanup(
test_utils.call_and_ignore_notfound_exc,
self.os_admin.network_client.delete_security_group,
secgroup['security_group']['id'])
self.keypair = self.os_admin.keypairs_client.create_keypair(
name=data_utils.rand_name('keypair'))['keypair']
self.network = self.create_network()
self.subnet = self.create_subnet(self.network)
self.create_router_interface(router['id'], self.subnet['id'])
# We create VMs on compute hosts that are not on the same host
# as router gateway port, i.e. the test is capable to work even
# on environments that schedule ovn routers on compute nodes
self.exclude_hosts = [chassis_name]
ssh_proxy_server = self._create_server(
exclude_hosts=self.exclude_hosts)
test_server = self._create_server(exclude_hosts=self.exclude_hosts,
create_floating_ip=False)
self.ssh_proxy_server_client = ssh.Client(
ssh_proxy_server['fip']['floating_ip_address'],
CONF.validation.image_ssh_user,
pkey=self.keypair['private_key'])
test_server_ip = test_server['port']['fixed_ips'][0]['ip_address']
self.test_server_client = ssh.Client(
test_server_ip,
CONF.validation.image_ssh_user,
pkey=self.keypair['private_key'],
proxy_client=self.ssh_proxy_server_client)
network_details = self.os_admin.network_client.show_network(
self.network['id'])
if network_details['network']['provider:network_type'] == 'vlan':
# This helps to avoid false positives with vlan+dvr,see BZ2192633
self.ignore_outbound = True
else:
self.ignore_outbound = False
self.verify_routing_via_chassis(self.chassis_list[0])
@testtools.skipUnless(WB_CONF.run_power_operations_tests,
"run_power_operations_tests conf value is not "
"enabled.")
@decorators.idempotent_id('cf47a5e3-35cb-423c-84af-4cc6d389cfbd')
@decorators.attr(type='slow')
def test_l3ha_reboot_node(self):
"""Check that traffic from a VM connected to an internal network
passes through a networker node node which is the highest priority
chassis for a router the internal network is connected to.
The test is intended for OVN environments.
Topology: Any topology with at least 2 nodes acting as networker nodes
(controller nodes with networking services also valid) and at least
one compute node.
Scenario:
1. Create network, subnet, router, pingable and loginable security
group rules, keypair, run a VM instance (server).
2. Find which node is the highest priority chassis for the router.
3. Ping an external address from the VM and make sure that traffic
is passing through the interface connected to the external network
on the highest priority chassis.
4. Shutdown the node where the higher priority chassis was scheduled
and repeat steps 2-3. Make sure that now traffic is passing through
the other node(chassis).
5. Start up the turned off host, wait until it is up and repeat steps
2-3. Make sure that highest priority chassis is back and traffic
is passing through it.
"""
# ensures overcloud nodes are up for next tests
self.addCleanup(self.ensure_overcloud_nodes_active)
self._setup()
gateway_node = self.router_gateway_chassis
self.power_off_host(gateway_node)
self.discover_nodes()
self.verify_routing_via_chassis(self.chassis_list[1])
self.power_on_host(gateway_node)
self.discover_nodes()
self.verify_routing_via_chassis(self.chassis_list[0])
@decorators.idempotent_id('f8fe1f69-a87f-41d8-ac6e-ed7905438338')
@decorators.attr(type='slow')
def test_l3ha_bring_down_interface(self):
"""Check that traffic from a VM connected to an internal network
passes through a networker node which is the highest priority
chassis for a router the internal network is connected to.
The test is intended for OVN environments.
Topology: Any topology with at least 2 nodes acting as dedicated
networker nodes.
Scenario:
1. Create network, subnet, router, pingable and loginable security
group rules, keypair, run a VM instance (server).
2. Find which node is the highest priority chassis for the router.
3. Ping an external address from the VM and make sure that traffic
is passing through the interface connected to the external network
on the highest priority chassis.
4. Bring down the interface which is passing tenant traffic
on the node where the higher priority chassis was scheduled
and repeat steps 2-3. Make sure that now traffic is passing through
the other node(chassis).
5. Bring up the interface, wait until port mappings updated and
repeat steps 2-3. Make sure that highest priority chassis is back
and traffic is passing through it.
"""
self._setup()
node_client = self.find_node_client(self.router_gateway_chassis)
# We need to find a physical interface that is passing tenant traffic
bridge = node_client.exec_command(
"sudo ovs-vsctl get open . external_ids:ovn-bridge-mappings | "
r"sed 's/^\".*tenant:\(.*\).*\"$/\1/'").rstrip()
physical_interfaces = node_client.exec_command(
"find /sys/class/net -type l -not -lname '*virtual*' "
"-printf '%f\n'").rstrip().split('\n')
bridge_interfaces = node_client.exec_command(
"sudo ovs-vsctl list-ifaces " + bridge).rstrip().split('\n')
interface = (set(physical_interfaces) & set(bridge_interfaces)).pop()
self.addCleanup(
utils.interface_state_set, node_client, interface,
constants.STATE_UP)
utils.interface_state_set(node_client, interface, constants.STATE_DOWN)
self.verify_routing_via_chassis(self.chassis_list[1])
utils.interface_state_set(node_client, interface, constants.STATE_UP)
self.verify_routing_via_chassis(self.chassis_list[0])
@decorators.idempotent_id('c662477b-6871-4c19-ae87-a2ece859d7f4')
@decorators.attr(type='slow')
def test_l3ha_stop_ovs_service(self):
"""Check that traffic from a VM connected to an internal network
passes through a networker node which is the highest priority
chassis for a router the internal network is connected to.
The test is intended for OVN environments.
Topology: Any topology with at least 2 nodes acting as dedicated
networker nodes.
Scenario:
1. Create network, subnet, router, pingable and loginable security
group rules, keypair, run a VM instance (server).
2. Find which node is the highest priority chassis for the router.
3. Ping an external address from the VM and make sure that traffic
is passing through the interface connected to the external network
on the highest priority chassis.
4. Stop the openvswitch service on the node where the higher priority
chassis was scheduled and repeat steps 2-3. Make sure that now
traffic is passing through the other node(chassis).
5. Start the openvswitch service, wait until port mappings updated and
repeat steps 2-3. Make sure that highest priority chassis is back
and traffic is passing through it.
"""
self._setup()
node_client = self.find_node_client(self.router_gateway_chassis)
remote_service = 'ovs-vswitchd.service'
self.addCleanup(
utils.remote_service_action, node_client,
remote_service, constants.ACTION_START)
utils.remote_service_action(
node_client, remote_service, constants.ACTION_STOP)
self.verify_routing_via_chassis(self.chassis_list[1])
utils.remote_service_action(
node_client, remote_service, constants.ACTION_START)
self.verify_routing_via_chassis(self.chassis_list[0])

View File

@ -1353,12 +1353,10 @@ class QosTestOvn(base.BaseTempestTestCaseOvn, QosBaseTest):
# launch server with non policy port, then attach also to policy port
port_no_qos, fip_no_qos, server = self._create_server(
network=network_no_qos).values()
server = server['server']
# other server to validate QoS policy port later
other_fip, other_server = tuple(self._create_server(
network=network_qos).values())[1:]
other_server = other_server['server']
server['ssh_client'] = ssh.Client(
fip_no_qos['floating_ip_address'],
@ -1428,7 +1426,6 @@ class QosTestOvn(base.BaseTempestTestCaseOvn, QosBaseTest):
port, fip, server = tuple(self._create_server(
create_floating_ip=True, network=network).values())
server = server['server']
# attach a qos policy to the fip
fip_policy_id = self._create_qos_policy_bw_and_dscp()