From bf09f02aca933b6c2f81312db9ca17edb893f6dc Mon Sep 17 00:00:00 2001
From: Roman Safronov <rsafrono@redhat.com>
Date: Tue, 19 Mar 2024 22:28:41 +0200
Subject: [PATCH] Add L3HA OVN tests

Moved tests from the downstream plugin with minimal changes.
Setup function changed significantly in order to support
environments where routers can be scheduled on compute nodes.
Base _create_server function was changed to support spawning
a VM on a specific host or any host excluding list of hosts.
The tests are supported currently only on environments where ovn
routers are located on compute or standalone networker nodes.
Test that performs shutting down a node is required this
patch [1].
Also:
Since _create_server return value was changed (simplified),
adjusted some qos tests that could be affected.
Removed redundant 'return None' statements from some base functions.

[1] https://review.opendev.org/c/x/whitebox-neutron-tempest-plugin/+/913851

Change-Id: Ic7d46a9a432089f1cd1a30af7639e9824e464c6d
---
 .../common/utils.py                           |  16 +
 whitebox_neutron_tempest_plugin/config.py     |   6 +
 .../tests/scenario/base.py                    |  83 +++--
 .../tests/scenario/test_l3ha_ovn.py           | 292 ++++++++++++++++++
 .../tests/scenario/test_qos.py                |   3 -
 5 files changed, 379 insertions(+), 21 deletions(-)
 create mode 100644 whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py

diff --git a/whitebox_neutron_tempest_plugin/common/utils.py b/whitebox_neutron_tempest_plugin/common/utils.py
index 6cdc83c..a7aa36a 100644
--- a/whitebox_neutron_tempest_plugin/common/utils.py
+++ b/whitebox_neutron_tempest_plugin/common/utils.py
@@ -211,3 +211,19 @@ def run_local_cmd(cmd, timeout=10):
         command, shell=True, stdout=subprocess.PIPE,
         stderr=subprocess.PIPE).communicate()
     return output, errors
+
+
+def interface_state_set(client, interface, state):
+    shell_path = 'PATH=$PATH:/sbin'
+    LOG.debug('Setting interface {} {} on {}'.format(
+        interface, state, client.host))
+    client.exec_command(
+        "{path}; sudo ip link set {interface} {state}".format(
+            path=shell_path, interface=interface, state=state))
+
+
+def remote_service_action(client, service, action):
+    cmd = "sudo systemctl {action} {service}".format(
+        action=action, service=service)
+    LOG.debug("Running '{}' on {}".format(cmd, client.host))
+    client.exec_command(cmd)
diff --git a/whitebox_neutron_tempest_plugin/config.py b/whitebox_neutron_tempest_plugin/config.py
index e37210c..d19fd17 100644
--- a/whitebox_neutron_tempest_plugin/config.py
+++ b/whitebox_neutron_tempest_plugin/config.py
@@ -69,6 +69,12 @@ WhiteboxNeutronPluginOptions = [
                 default=False,
                 help='Boolean that specifies if Provider Routed Networks'
                      'are supported or not'),
+    cfg.BoolOpt('run_power_operations_tests',
+                default=False,
+                help='Specify explicitly whether to run tests that perform '
+                     'power operations, like shutdown/startup openstack nodes.'
+                     'These tests can be disruptive and not suitable for some '
+                     'environments.'),
     cfg.IntOpt('broadcast_receivers_count',
                default=2,
                help='How many receivers to use in broadcast tests. Default '
diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/base.py b/whitebox_neutron_tempest_plugin/tests/scenario/base.py
index 4f7d63e..ddaf241 100644
--- a/whitebox_neutron_tempest_plugin/tests/scenario/base.py
+++ b/whitebox_neutron_tempest_plugin/tests/scenario/base.py
@@ -35,6 +35,7 @@ from tempest.common import waiters
 from tempest import config
 from tempest.lib.common import fixed_network
 from tempest.lib.common.utils import data_utils
+from tempest.lib.common.utils import test_utils
 
 from whitebox_neutron_tempest_plugin.common import tcpdump_capture as capture
 from whitebox_neutron_tempest_plugin.common import utils as local_utils
@@ -117,6 +118,16 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
             return ssh.Client(host=host, username=username,
                               key_filename=key_filename)
 
+    def find_different_compute_host(self, exclude_hosts):
+        for node in self.nodes:
+            if not node['is_compute']:
+                continue
+            if node['is_compute'] and not node['name'] in exclude_hosts:
+                return node['name']
+        raise self.skipException(
+            "Not able to find a different compute than: {}".format(
+                exclude_hosts))
+
     def get_local_ssh_client(self, network):
         return ssh.Client(
             host=self._get_local_ip_from_network(
@@ -130,13 +141,11 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
                 subnet_id)['subnet']
             if subnet['ip_version'] == ip_version:
                 return subnet['cidr']
-        return None
 
     def find_node_client(self, node_name):
         for node in self.nodes:
             if node['name'] == node_name:
                 return node['client']
-        return None
 
     @staticmethod
     def _get_local_ip_from_network(network):
@@ -146,7 +155,6 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
         for ip_address in host_ip_addresses:
             if netaddr.IPAddress(ip_address) in netaddr.IPNetwork(network):
                 return ip_address
-        return None
 
     def get_fip_port_details(self, fip):
         fip_ports = self.os_admin.network_client.list_ports(
@@ -157,7 +165,6 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
                     fp['fixed_ips'][0]['ip_address'] ==
                     fip['floating_ip_address']):
                 return fp
-        return None
 
     @classmethod
     def get_podified_nodes_data(cls):
@@ -262,6 +269,11 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
         for host in l3_agent_hosts:
             cls.append_node(host, is_networker=True)
 
+    def get_node_setting(self, node_name, setting):
+        for node in self.nodes:
+            if node_name == node['name']:
+                return node[setting]
+
     @classmethod
     def get_pod_of_service(cls, service='neutron'):
         # (rsafrono) at this moment only neutron service pod handled
@@ -356,25 +368,60 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
         LOG.debug("Service '%s' active on host '%s'.",
                   service_glob, host_ip)
 
-    def _create_server(self, create_floating_ip=True,
-                       scheduler_hints=None, network=None):
+    def _create_server(
+            self, create_floating_ip=True, exclude_hosts=None,
+            network=None, **kwargs):
         network = network or self.network
-        server_kwargs = {
-            'flavor_ref': self.flavor_ref,
-            'image_ref': self.image_ref,
-            'key_name': self.keypair['name'],
-            'networks': [{'uuid': network['id']}],
-            'security_groups': [{'name': self.security_groups[-1]['name']}]
-        }
-        if scheduler_hints:
-            server_kwargs['scheduler_hints'] = scheduler_hints
-        server = self.create_server(**server_kwargs)
+        kwargs.setdefault('name', data_utils.rand_name('server-test'))
+        kwargs['flavorRef'] = self.flavor_ref
+        kwargs['imageRef'] = self.image_ref
+        kwargs['networks'] = [{'uuid': network['id']}]
+        if not kwargs.get('key_name'):
+            kwargs['key_name'] = self.keypair['name']
+        if not kwargs.get('security_groups'):
+            kwargs['security_groups'] = [{
+                'name': self.security_groups[-1]['name']}]
+        if exclude_hosts:
+            exclude_hosts_ignored = False
+            if kwargs.get('host') and (kwargs['host'] in exclude_hosts):
+                exclude_hosts_ignored = True
+                LOG.debug("'exclude_hosts' parameter contains same value as "
+                          "'host' so it will be ignored, i.e. 'host' will be "
+                          "used")
+            else:
+                kwargs['host'] = self.find_different_compute_host(
+                    exclude_hosts)
+        if kwargs.get('host'):
+            servers_client = self.os_admin.servers_client
+            network_client = self.os_admin.network_client
+        else:
+            servers_client = self.os_primary.servers_client
+            network_client = self.os_primary.network_client
 
+        server = servers_client.create_server(**kwargs)['server']
+        self.addCleanup(test_utils.call_and_ignore_notfound_exc,
+                        waiters.wait_for_server_termination,
+                        servers_client,
+                        server['id'])
+        self.addCleanup(test_utils.call_and_ignore_notfound_exc,
+                        servers_client.delete_server,
+                        server['id'])
+        if exclude_hosts and not exclude_hosts_ignored:
+            if self.get_host_for_server(server['id']) in exclude_hosts:
+                self.fail("Failed to spawn a server on a host other than in "
+                          "this list: '{}'. Can not proceed.".format(
+                              ' '.join(exclude_hosts)))
+        self.wait_for_server_active(server, client=servers_client)
         port = self.client.list_ports(
             network_id=network['id'],
-            device_id=server['server']['id'])['ports'][0]
+            device_id=server['id'])['ports'][0]
         if create_floating_ip:
-            fip = self.create_floatingip(port=port)
+            fip = network_client.create_floatingip(
+                floating_network_id=CONF.network.public_network_id,
+                port_id=port['id'])['floatingip']
+            self.addCleanup(test_utils.call_and_ignore_notfound_exc,
+                            network_client.delete_floatingip,
+                            fip['id'])
         else:
             fip = None
         return {'port': port, 'fip': fip, 'server': server}
diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py b/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py
new file mode 100644
index 0000000..4c9b7b1
--- /dev/null
+++ b/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py
@@ -0,0 +1,292 @@
+# Copyright 2024 Red Hat, Inc.
+# All Rights Reserved.
+#
+#    Licensed under the Apache License, Version 2.0 (the "License"); you may
+#    not use this file except in compliance with the License. You may obtain
+#    a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+#    License for the specific language governing permissions and limitations
+#    under the License.
+import testtools
+
+from neutron_lib import constants as lib_constants
+from neutron_tempest_plugin.common import ssh
+from neutron_tempest_plugin.common import utils as common_utils
+from oslo_log import log
+from tempest import config
+from tempest.lib.common.utils import data_utils
+from tempest.lib.common.utils import test_utils
+from tempest.lib import decorators
+from tempest.lib import exceptions
+
+from whitebox_neutron_tempest_plugin.common import constants
+from whitebox_neutron_tempest_plugin.common import utils
+from whitebox_neutron_tempest_plugin.tests.scenario import base
+
+
+CONF = config.CONF
+WB_CONF = config.CONF.whitebox_neutron_plugin_options
+LOG = log.getLogger(__name__)
+
+
+class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
+    credentials = ['primary', 'admin']
+
+    @classmethod
+    def resource_setup(cls):
+        super(L3haOvnTest, cls).resource_setup()
+        cls.setup_api_microversion_fixture(
+            compute_microversion='2.74')
+
+    def verify_routing_via_chassis(self, chassis_id):
+        self.expected_gateway_chassis = None
+
+        def _get_router_gateway_chassis_by_id(chassis_id):
+            try:
+                self.expected_gateway_chassis = \
+                    self.get_router_gateway_chassis_by_id(chassis_id)
+            except exceptions.SSHExecCommandFailed as err:
+                LOG.exception(err)
+                LOG.warning("Retrying to obtain router gateway chassis in "
+                            "case the OVN DBs are not ready yet")
+                return False
+            return True
+
+        common_utils.wait_until_true(
+            lambda: _get_router_gateway_chassis_by_id(chassis_id),
+            timeout=60, sleep=5)
+
+        LOG.debug("Waiting until router gateway chassis is updated")
+        self.router_gateway_chassis = None
+
+        def _router_gateway_chassis_updated():
+            self.router_gateway_chassis = self.get_router_gateway_chassis(
+                self.router_port['id'])
+            LOG.debug("chassis = '{}', expected = {} ".format(
+                self.router_gateway_chassis, self.expected_gateway_chassis))
+            return self.router_gateway_chassis == self.expected_gateway_chassis
+
+        try:
+            common_utils.wait_until_true(
+                lambda: _router_gateway_chassis_updated(),
+                timeout=60, sleep=5)
+        except common_utils.WaitTimeout:
+            self.fail("Gateway chassis was not updated as expected")
+
+        self.check_north_south_icmp_flow(
+            dst_ip=self.gateway_external_ip,
+            expected_routing_nodes=[self.expected_gateway_chassis],
+            expected_mac=self.router_port['mac_address'],
+            ssh_client=self.test_server_client,
+            ignore_outbound=self.ignore_outbound)
+
+    def _validate_gateway_chassis(self, chassis_id):
+        node_name = self.get_router_gateway_chassis_by_id(chassis_id)
+        if self.get_node_setting(node_name, 'is_controller'):
+            raise self.skipException(
+                "The test currently does not support the required action "
+                "when gateway chassis is on controller.")
+
+    def _setup(self):
+        router = self.create_router_by_client()
+        self.router_port = self.os_admin.network_client.list_ports(
+            device_id=router['id'],
+            device_owner=lib_constants.DEVICE_OWNER_ROUTER_GW)['ports'][0]
+        self.chassis_list = self.get_router_gateway_chassis_list(
+            self.router_port['id'])
+        self._validate_gateway_chassis(self.chassis_list[0])
+        chassis_name = self.get_router_gateway_chassis_by_id(
+            self.chassis_list[0])
+        LOG.debug("router chassis name = {}".format(chassis_name))
+
+        # Since we are going to spawn VMs with 'host' option which
+        # is available only for admin user, we create security group
+        # and keypair also as admin
+        secgroup = self.os_admin.network_client.create_security_group(
+            name=data_utils.rand_name('secgroup'))
+        self.security_groups.append(secgroup['security_group'])
+        self.os_admin.network_client.create_security_group_rule(
+            security_group_id=secgroup['security_group']['id'],
+            protocol=lib_constants.PROTO_NAME_ICMP,
+            direction=lib_constants.INGRESS_DIRECTION)
+        self.os_admin.network_client.create_security_group_rule(
+            security_group_id=secgroup['security_group']['id'],
+            protocol=lib_constants.PROTO_NAME_TCP,
+            direction=lib_constants.INGRESS_DIRECTION,
+            port_range_min=22,
+            port_range_max=22)
+        self.addCleanup(
+            test_utils.call_and_ignore_notfound_exc,
+            self.os_admin.network_client.delete_security_group,
+            secgroup['security_group']['id'])
+        self.keypair = self.os_admin.keypairs_client.create_keypair(
+            name=data_utils.rand_name('keypair'))['keypair']
+        self.network = self.create_network()
+        self.subnet = self.create_subnet(self.network)
+        self.create_router_interface(router['id'], self.subnet['id'])
+
+        # We create VMs on compute hosts that are not on the same host
+        # as router gateway port, i.e. the test is capable to work even
+        # on environments that schedule ovn routers on compute nodes
+        self.exclude_hosts = [chassis_name]
+        ssh_proxy_server = self._create_server(
+            exclude_hosts=self.exclude_hosts)
+        test_server = self._create_server(exclude_hosts=self.exclude_hosts,
+                                          create_floating_ip=False)
+        self.ssh_proxy_server_client = ssh.Client(
+            ssh_proxy_server['fip']['floating_ip_address'],
+            CONF.validation.image_ssh_user,
+            pkey=self.keypair['private_key'])
+        test_server_ip = test_server['port']['fixed_ips'][0]['ip_address']
+        self.test_server_client = ssh.Client(
+            test_server_ip,
+            CONF.validation.image_ssh_user,
+            pkey=self.keypair['private_key'],
+            proxy_client=self.ssh_proxy_server_client)
+
+        network_details = self.os_admin.network_client.show_network(
+            self.network['id'])
+        if network_details['network']['provider:network_type'] == 'vlan':
+            # This helps to avoid false positives with vlan+dvr,see BZ2192633
+            self.ignore_outbound = True
+        else:
+            self.ignore_outbound = False
+        self.verify_routing_via_chassis(self.chassis_list[0])
+
+    @testtools.skipUnless(WB_CONF.run_power_operations_tests,
+                          "run_power_operations_tests conf value is not "
+                          "enabled.")
+    @decorators.idempotent_id('cf47a5e3-35cb-423c-84af-4cc6d389cfbd')
+    @decorators.attr(type='slow')
+    def test_l3ha_reboot_node(self):
+        """Check that traffic from a VM connected to an internal network
+        passes through a networker node node which is the highest priority
+        chassis for a router the internal network is connected to.
+
+        The test is intended for  OVN environments.
+
+        Topology: Any topology with at least 2 nodes acting as networker nodes
+        (controller nodes with networking services also valid) and at least
+        one compute node.
+
+        Scenario:
+        1. Create network, subnet, router, pingable and loginable security
+           group rules, keypair, run a VM instance (server).
+        2. Find which node is the highest priority chassis for the router.
+        3. Ping an external address from the VM and make sure that traffic
+           is passing through the interface connected to the external network
+           on the highest priority chassis.
+        4. Shutdown the node where the higher priority chassis was scheduled
+           and repeat steps 2-3. Make sure that now traffic is passing through
+           the other node(chassis).
+        5. Start up the turned off host, wait until it is up and repeat steps
+           2-3. Make sure that highest priority chassis is back and traffic
+           is passing through it.
+
+        """
+        # ensures overcloud nodes are up for next tests
+        self.addCleanup(self.ensure_overcloud_nodes_active)
+        self._setup()
+        gateway_node = self.router_gateway_chassis
+
+        self.power_off_host(gateway_node)
+        self.discover_nodes()
+        self.verify_routing_via_chassis(self.chassis_list[1])
+
+        self.power_on_host(gateway_node)
+        self.discover_nodes()
+        self.verify_routing_via_chassis(self.chassis_list[0])
+
+    @decorators.idempotent_id('f8fe1f69-a87f-41d8-ac6e-ed7905438338')
+    @decorators.attr(type='slow')
+    def test_l3ha_bring_down_interface(self):
+        """Check that traffic from a VM connected to an internal network
+        passes through a networker node which is the highest priority
+        chassis for a router the internal network is connected to.
+
+        The test is intended for  OVN environments.
+
+        Topology: Any topology with at least 2 nodes acting as dedicated
+        networker nodes.
+
+        Scenario:
+        1. Create network, subnet, router, pingable and loginable security
+           group rules, keypair, run a VM instance (server).
+        2. Find which node is the highest priority chassis for the router.
+        3. Ping an external address from the VM and make sure that traffic
+           is passing through the interface connected to the external network
+           on the highest priority chassis.
+        4. Bring down the interface which is passing tenant traffic
+           on the node where the higher priority chassis was scheduled
+           and repeat steps 2-3. Make sure that now traffic is passing through
+           the other node(chassis).
+        5. Bring up the interface, wait until port mappings updated and
+           repeat steps 2-3. Make sure that highest priority chassis is back
+           and traffic is passing through it.
+
+        """
+        self._setup()
+        node_client = self.find_node_client(self.router_gateway_chassis)
+        # We need to find a physical interface that is passing tenant traffic
+        bridge = node_client.exec_command(
+            "sudo ovs-vsctl get open . external_ids:ovn-bridge-mappings | "
+            r"sed 's/^\".*tenant:\(.*\).*\"$/\1/'").rstrip()
+        physical_interfaces = node_client.exec_command(
+            "find /sys/class/net -type l -not -lname '*virtual*' "
+            "-printf '%f\n'").rstrip().split('\n')
+        bridge_interfaces = node_client.exec_command(
+            "sudo ovs-vsctl list-ifaces " + bridge).rstrip().split('\n')
+        interface = (set(physical_interfaces) & set(bridge_interfaces)).pop()
+        self.addCleanup(
+            utils.interface_state_set, node_client, interface,
+            constants.STATE_UP)
+        utils.interface_state_set(node_client, interface, constants.STATE_DOWN)
+        self.verify_routing_via_chassis(self.chassis_list[1])
+
+        utils.interface_state_set(node_client, interface, constants.STATE_UP)
+        self.verify_routing_via_chassis(self.chassis_list[0])
+
+    @decorators.idempotent_id('c662477b-6871-4c19-ae87-a2ece859d7f4')
+    @decorators.attr(type='slow')
+    def test_l3ha_stop_ovs_service(self):
+        """Check that traffic from a VM connected to an internal network
+        passes through a networker node which is the highest priority
+        chassis for a router the internal network is connected to.
+
+        The test is intended for  OVN environments.
+
+        Topology: Any topology with at least 2 nodes acting as dedicated
+        networker nodes.
+
+        Scenario:
+        1. Create network, subnet, router, pingable and loginable security
+           group rules, keypair, run a VM instance (server).
+        2. Find which node is the highest priority chassis for the router.
+        3. Ping an external address from the VM and make sure that traffic
+           is passing through the interface connected to the external network
+           on the highest priority chassis.
+        4. Stop the openvswitch service on the node where the higher priority
+           chassis was scheduled and repeat steps 2-3. Make sure that now
+           traffic is passing through the other node(chassis).
+        5. Start the openvswitch service, wait until port mappings updated and
+           repeat steps 2-3. Make sure that highest priority chassis is back
+           and traffic is passing through it.
+
+        """
+        self._setup()
+        node_client = self.find_node_client(self.router_gateway_chassis)
+        remote_service = 'ovs-vswitchd.service'
+        self.addCleanup(
+            utils.remote_service_action, node_client,
+            remote_service, constants.ACTION_START)
+        utils.remote_service_action(
+            node_client, remote_service, constants.ACTION_STOP)
+        self.verify_routing_via_chassis(self.chassis_list[1])
+        utils.remote_service_action(
+            node_client, remote_service, constants.ACTION_START)
+        self.verify_routing_via_chassis(self.chassis_list[0])
diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/test_qos.py b/whitebox_neutron_tempest_plugin/tests/scenario/test_qos.py
index 097d278..7dfc7c7 100644
--- a/whitebox_neutron_tempest_plugin/tests/scenario/test_qos.py
+++ b/whitebox_neutron_tempest_plugin/tests/scenario/test_qos.py
@@ -1353,12 +1353,10 @@ class QosTestOvn(base.BaseTempestTestCaseOvn, QosBaseTest):
         # launch server with non policy port, then attach also to policy port
         port_no_qos, fip_no_qos, server = self._create_server(
             network=network_no_qos).values()
-        server = server['server']
 
         # other server to validate QoS policy port later
         other_fip, other_server = tuple(self._create_server(
             network=network_qos).values())[1:]
-        other_server = other_server['server']
 
         server['ssh_client'] = ssh.Client(
             fip_no_qos['floating_ip_address'],
@@ -1428,7 +1426,6 @@ class QosTestOvn(base.BaseTempestTestCaseOvn, QosBaseTest):
 
         port, fip, server = tuple(self._create_server(
             create_floating_ip=True, network=network).values())
-        server = server['server']
 
         # attach a qos policy to the fip
         fip_policy_id = self._create_qos_policy_bw_and_dscp()