diff --git a/whitebox_neutron_tempest_plugin/common/utils.py b/whitebox_neutron_tempest_plugin/common/utils.py index 462d89c..81c2ef5 100644 --- a/whitebox_neutron_tempest_plugin/common/utils.py +++ b/whitebox_neutron_tempest_plugin/common/utils.py @@ -228,6 +228,7 @@ def remote_service_action(client, service, action): action=action, service=service) LOG.debug("Running '{}' on {}".format(cmd, client.host)) client.exec_command(cmd) + time.sleep(5) def retry_on_assert_fail(max_retries): diff --git a/whitebox_neutron_tempest_plugin/config.py b/whitebox_neutron_tempest_plugin/config.py index 287b4e7..b29265c 100644 --- a/whitebox_neutron_tempest_plugin/config.py +++ b/whitebox_neutron_tempest_plugin/config.py @@ -85,6 +85,12 @@ WhiteboxNeutronPluginOptions = [ 'power operations, like shutdown/startup openstack nodes.' 'These tests can be disruptive and not suitable for some ' 'environments.'), + cfg.BoolOpt('avoid_disrupting_controllers', + default=True, + help='Whether to avoid executing disruptive operations on ' + 'OSP controller or OCP master/worker nodes which can be ' + 'hosting neutron routers. With this option test will try ' + 'to use other nodes.'), cfg.IntOpt('broadcast_receivers_count', default=2, help='How many receivers to use in broadcast tests. Default ' @@ -142,8 +148,8 @@ WhiteboxNeutronPluginOptions = [ 'Can be used for tweaking iperf in case default value ' 'does not work for some reason, e.g. MTU issues.'), cfg.StrOpt('node_tenant_interface', - default='eth1', - help='Physical interface of a node that intended to pass tenant' + default='vlan22', + help='Interface of a node that intended to pass tenant' 'network traffic. Note: currently only environments with ' 'the same name of the tenant interface are supported'), cfg.IntOpt('capture_timeout', @@ -154,7 +160,7 @@ WhiteboxNeutronPluginOptions = [ 'remote process in case test or connection was ' 'interrupted unexpectedly.'), cfg.StrOpt('hypervisor_host', - default='hypervisor-1', + default='hypervisor-1.utility', help='Hypervisor host for podified environment based on libvirt' 'virtual machines, typically deployed by ci-framework: ' 'https://github.com/openstack-k8s-operators/ci-framework'), diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/base.py b/whitebox_neutron_tempest_plugin/tests/scenario/base.py index b82bf8a..df97544 100644 --- a/whitebox_neutron_tempest_plugin/tests/scenario/base.py +++ b/whitebox_neutron_tempest_plugin/tests/scenario/base.py @@ -39,6 +39,7 @@ from tempest import config from tempest.lib.common import fixed_network from tempest.lib.common.utils import data_utils from tempest.lib.common.utils import test_utils +from tempest.lib import exceptions as lib_exceptions from whitebox_neutron_tempest_plugin.common import constants as local_constants from whitebox_neutron_tempest_plugin.common import tcpdump_capture as capture @@ -297,7 +298,7 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase): host['hypervisor_hostname'] for host in cls.os_admin.hv_client.list_hypervisors()['hypervisors']] if WB_CONF.openstack_type == 'podified': - cls.nodes = cls.get_podified_nodes_data() + cls.nodes_data = cls.get_podified_nodes_data() with open(WB_CONF.proxy_host_key_file, 'r') as file: id_cifw_key = file.read() cls.keys_data = { @@ -306,17 +307,20 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase): devscripts_key = cls.proxy_host_client.exec_command( 'cat ' + cls.ocp_nodes_key_path) cls.keys_data['devscripts_key'] = devscripts_key - for host in cls.nodes: + for host in cls.nodes_data: client = cls.get_node_client( host=host['ip'], username=host['user'], pkey=f"{cls.keys_data[host['key']]}") host['client'] = client else: - cls.nodes = [] + cls.nodes_data = [] for host in set([*l3_agent_hosts, *compute_hosts]): - cls.nodes.append( + cls.nodes_data.append( {'ip': host, 'client': cls.get_node_client(host)}) - for host in cls.nodes: + cls.nodes = [] + for host in cls.nodes_data: + if not local_utils.host_responds_to_ping(host['ip']): + continue host['name'] = cls.get_full_name( host['client'].exec_command('hostname').strip()) host['full_name'] = cls.get_full_name( @@ -328,6 +332,13 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase): host['is_controller'] = (output.strip() != "") host['is_compute'] = (host['full_name'] in compute_hosts) host['is_networker'] = (host['full_name'] in l3_agent_hosts) + cls.nodes.append(host) + + @classmethod + def get_standalone_networkers(cls): + return [node['name'] for node in cls.nodes + if node['is_networker'] and not + (node['is_controller'] or node['is_compute'])] @classmethod def is_setup_single_node(cls): @@ -959,70 +970,6 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase): LOG.debug('Command for resource creation succeeded') return _id - @classmethod - def find_host_virsh_name(cls, host): - cmd = ("timeout 10 ssh {} sudo virsh list --name | grep -w {}").format( - WB_CONF.hypervisor_host, host) - return cls.proxy_host_client.exec_command(cmd).strip() - - @classmethod - def is_host_state_is_shut_off(cls, host): - cmd = ("timeout 10 ssh {} virsh list --state-shutoff | grep -w {} " - "|| true".format(WB_CONF.hypervisor_host, host)) - output = cls.proxy_host_client.exec_command(cmd) - return True if host in output else False - - @classmethod - def is_host_loginable(cls, host): - cmd = "timeout 10 ssh {} ssh {} hostname || true".format( - WB_CONF.hypervisor_host, host) - output = cls.proxy_host_client.exec_command(cmd) - return True if host in output else False - - @classmethod - def power_off_host(cls, host): - if not WB_CONF.run_power_operations_tests: - raise cls.skipException("Power operations are not allowed") - cmd = "timeout 10 ssh {} sudo virsh destroy {}".format( - WB_CONF.hypervisor_host, cls.find_host_virsh_name()) - cls.proxy_host_client.exec_command(cmd) - common_utils.wait_until_true( - lambda: cls.is_host_state_is_shut_off(host), - timeout=30, sleep=5) - - @classmethod - def power_on_host(cls, host): - if not WB_CONF.run_power_operations_tests: - raise cls.skipException("Power operations are not allowed") - cmd = "timeout 10 ssh {} sudo virsh start {}".format( - WB_CONF.hypervisor_host, cls.find_host_virsh_name()) - cls.proxy_host_client.exec_command(cmd) - # TODO(rsafrono): implement and apply additional health checks - common_utils.wait_until_true( - lambda: cls.is_host_loginable(host), - timeout=120, sleep=5) - - @classmethod - def reboot_host(cls, host): - if not WB_CONF.run_power_operations_tests: - raise cls.skipException("Power operations are not allowed") - cmd = "timeout 10 ssh {} sudo virsh reboot {}".format( - WB_CONF.hypervisor_host, cls.find_host_virsh_name()) - cls.proxy_host_client.exec_command(cmd) - common_utils.wait_until_true( - lambda: cls.is_host_loginable(host), - timeout=120, sleep=5) - - def ensure_overcloud_nodes_active(self): - """Checks all openstack nodes are up, otherwise activates them. - """ - # get overcloud nodes info if it doesn't exist - if not hasattr(self, 'nodes'): - self.discover_nodes() - for node in self.nodes: - if self.is_host_state_is_shut_off(node['name']): - self.power_on_host(node['name']) - class BaseTempestTestCaseAdvanced(BaseTempestWhiteboxTestCase): """Base class skips test suites unless advanced image is available, @@ -1078,15 +1025,19 @@ class TrafficFlowTest(BaseTempestWhiteboxTestCase): "is not configured.") cls.discover_nodes() if WB_CONF.openstack_type == 'podified': - cmd = ("{} get pods --field-selector=status.phase=Running " - "-o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name " - "| grep ovn-controller-ovs".format(cls.OC)) - output = cls.proxy_host_client.exec_command( - cmd).strip().split('\n') - for line in output: - for node in cls.nodes: - if node['name'] == line.split()[0]: - node['ovs_pod'] = line.split()[1] + cls.set_ovs_pods_for_nodes() + + @classmethod + def set_ovs_pods_for_nodes(cls): + cmd = ("{} get pods --field-selector=status.phase=Running " + "-o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name " + "-l service=ovn-controller-ovs".format(cls.OC)) + output = cls.proxy_host_client.exec_command( + cmd).strip().splitlines() + for line in output: + for node in cls.nodes: + if node['name'] == line.split()[0]: + node['ovs_pod'] = line.split()[1] def _start_captures(self, filters, interface=None): def get_interface(client): @@ -1108,9 +1059,9 @@ class TrafficFlowTest(BaseTempestWhiteboxTestCase): return ','.join(interfaces) for node in self.nodes: - if not(node['is_controller'] or - node['is_compute'] or - node['is_networker']): + if not (node['is_controller'] or + node['is_compute'] or + node['is_networker']): LOG.debug('Traffic is not captured on node %s because it is ' 'not: controller, compute, networker', node['name']) continue @@ -1408,6 +1359,85 @@ class BaseTempestTestCaseOvn(BaseTempestWhiteboxTestCase): self.assertEqual(output, '') +class BaseDisruptiveTempestTestCase(BaseTempestWhiteboxTestCase): + @classmethod + def resource_setup(cls): + super(BaseDisruptiveTempestTestCase, cls).resource_setup() + try: + cls.proxy_host_client.exec_command( + "timeout 10 ssh {} virsh list".format(WB_CONF.hypervisor_host)) + except lib_exceptions.SSHExecCommandFailed: + raise cls.skipException( + "No access to virsh tool on hypervisor node. Please make sure " + "that hypervisor_host is configured properly and/or virsh " + "is deployed there.") + + @classmethod + def find_host_virsh_name(cls, host): + cmd = ("timeout 10 ssh {} sudo virsh list --all --name " + "| grep -w {}").format( + WB_CONF.hypervisor_host, host) + return cls.proxy_host_client.exec_command(cmd).strip() + + @classmethod + def is_host_state_is_shut_off(cls, host): + cmd = ("timeout 10 ssh {} virsh list --state-shutoff | grep -w {} " + "|| true".format(WB_CONF.hypervisor_host, host)) + output = cls.proxy_host_client.exec_command(cmd) + return True if host in output else False + + @classmethod + def is_host_loginable(cls, host): + cmd = "timeout 10 ssh {} ssh {} hostname || true".format( + WB_CONF.hypervisor_host, host) + output = cls.proxy_host_client.exec_command(cmd) + return True if host in output else False + + @classmethod + def power_off_host(cls, host): + if not WB_CONF.run_power_operations_tests: + raise cls.skipException("Power operations are not allowed") + cmd = "timeout 10 ssh {} sudo virsh destroy {}".format( + WB_CONF.hypervisor_host, cls.find_host_virsh_name(host)) + cls.proxy_host_client.exec_command(cmd) + common_utils.wait_until_true( + lambda: cls.is_host_state_is_shut_off(host), + timeout=30, sleep=5) + + @classmethod + def power_on_host(cls, host): + if not WB_CONF.run_power_operations_tests: + raise cls.skipException("Power operations are not allowed") + cmd = "timeout 10 ssh {} sudo virsh start {}".format( + WB_CONF.hypervisor_host, cls.find_host_virsh_name(host)) + cls.proxy_host_client.exec_command(cmd) + # TODO(rsafrono): implement and apply additional health checks + common_utils.wait_until_true( + lambda: cls.is_host_loginable(host), + timeout=120, sleep=5) + + @classmethod + def reboot_host(cls, host): + if not WB_CONF.run_power_operations_tests: + raise cls.skipException("Power operations are not allowed") + cmd = "timeout 10 ssh {} sudo virsh reboot {}".format( + WB_CONF.hypervisor_host, cls.find_host_virsh_name(host)) + cls.proxy_host_client.exec_command(cmd) + common_utils.wait_until_true( + lambda: cls.is_host_loginable(host), + timeout=120, sleep=5) + + def ensure_overcloud_nodes_active(self): + """Checks all openstack nodes are up, otherwise activates them. + """ + hosts = self.proxy_host_client.exec_command( + "timeout 10 ssh {} sudo virsh list --all --name".format( + WB_CONF.hypervisor_host)).strip().split() + for host in hosts: + if self.is_host_state_is_shut_off(host): + self.power_on_host(host) + + # user_data_cmd is used to generate a VLAN interface on VM instances with PF # ports user_data_cmd = """ diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/test_internal_dns.py b/whitebox_neutron_tempest_plugin/tests/scenario/test_internal_dns.py index 86152de..a2f1556 100644 --- a/whitebox_neutron_tempest_plugin/tests/scenario/test_internal_dns.py +++ b/whitebox_neutron_tempest_plugin/tests/scenario/test_internal_dns.py @@ -13,7 +13,6 @@ # License for the specific language governing permissions and limitations # under the License. import re -import testtools from neutron_lib import constants as lib_constants from neutron_tempest_plugin.common import ssh @@ -290,7 +289,8 @@ class InternalDNSInterruptionsTestOvn(InternalDNSBaseOvn): class InternalDNSInterruptionsAdvancedTestOvn( InternalDNSBaseOvn, - base.BaseTempestTestCaseAdvanced): + base.BaseTempestTestCaseAdvanced, + base.BaseDisruptiveTempestTestCase): """Tests internal DNS capabilities with interruptions in overcloud, on advanced image only. """ @@ -318,8 +318,6 @@ class InternalDNSInterruptionsAdvancedTestOvn( @decorators.attr(type='slow') @utils.requires_ext(extension="dns-integration", service="network") - @testtools.skipIf(WB_CONF.openstack_type == 'podified', - 'Not yet adapted for podified environment') @decorators.idempotent_id('e6c5dbea-d704-4cda-bb92-a5bfd0aa1bb2') def test_ovn_dns_name_after_networker_reboot(self): """Tests that OpenStack port, guest VM and OVN NB database have correct @@ -355,7 +353,13 @@ class InternalDNSInterruptionsAdvancedTestOvn( vm_1['fip'] = self.create_floatingip(port=dns_port) vm_1['ssh_client'] = self._create_ssh_client( vm_1['fip']['floating_ip_address']) + self._get_router_and_nodes_info() + if self.get_node_setting(self.router_gateway_chassis, 'is_controller'): + raise self.skipException( + "The test currently does not support a required action " + "when gateway chassis is on a node with OSP control plane " + "services rather than on a standalone networker node.") # soft shutdown master networker node self.power_off_host(self.router_gateway_chassis) # validate hostname (dns-name) using API, guest VM, diff --git a/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py b/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py index 0cfe243..27ecc82 100644 --- a/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py +++ b/whitebox_neutron_tempest_plugin/tests/scenario/test_l3ha_ovn.py @@ -34,14 +34,12 @@ WB_CONF = config.CONF.whitebox_neutron_plugin_options LOG = log.getLogger(__name__) -@testtools.skipIf(WB_CONF.openstack_type == 'podified', - 'Not yet adapted for podified environment') -class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn): +class L3haOvnCommon(base.TrafficFlowTest, base.BaseTempestTestCaseOvn): credentials = ['primary', 'admin'] @classmethod def resource_setup(cls): - super(L3haOvnTest, cls).resource_setup() + super(L3haOvnCommon, cls).resource_setup() cls.setup_api_microversion_fixture( compute_microversion='2.74') @@ -95,17 +93,34 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn): "when gateway chassis is on controller.") def _setup(self): - router = self.create_router_by_client() - self.router_port = self.os_admin.network_client.list_ports( - device_id=router['id'], - device_owner=lib_constants.DEVICE_OWNER_ROUTER_GW)['ports'][0] - self.chassis_list = self.get_router_gateway_chassis_list( - self.router_port['id']) - self._validate_gateway_chassis(self.chassis_list[0]) - chassis_name = self.get_router_gateway_chassis_by_id( - self.chassis_list[0]) - LOG.debug("router chassis name = {}".format(chassis_name)) + def create_router_candidate(): + router = self.create_router_by_client() + self.router_port = self.os_admin.network_client.list_ports( + device_id=router['id'], + device_owner=lib_constants.DEVICE_OWNER_ROUTER_GW)['ports'][0] + self.chassis_list = self.get_router_gateway_chassis_list( + self.router_port['id']) + chassis_name = self.get_router_gateway_chassis_by_id( + self.chassis_list[0]) + LOG.debug("router chassis name = {}".format(chassis_name)) + return router, chassis_name + if (WB_CONF.avoid_disrupting_controllers and + self.get_standalone_networkers()): + attempts = 5 + controller_nodes = [node['name'] for node in self.nodes + if node['is_controller']] + for i in range(1, attempts): + LOG.debug("Router creation attempt {}".format(i)) + router, chassis_name = create_router_candidate() + if chassis_name in controller_nodes: + continue + else: + break + else: + router, chassis_name = create_router_candidate() + + self._validate_gateway_chassis(self.chassis_list[0]) # Since we are going to spawn VMs with 'host' option which # is available only for admin user, we create security group # and keypair also as admin @@ -160,9 +175,16 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn): self.ignore_outbound = False self.verify_routing_via_chassis(self.chassis_list[0]) - @testtools.skipUnless(WB_CONF.run_power_operations_tests, - "run_power_operations_tests conf value is not " - "enabled.") + def refresh_nodes_data(self): + self.discover_nodes() + if WB_CONF.openstack_type == 'podified': + self.set_ovs_pods_for_nodes() + + +@testtools.skipUnless(WB_CONF.run_power_operations_tests, + "run_power_operations_tests conf value is not " + "enabled.") +class L3haOvnDisruptiveTest(L3haOvnCommon, base.BaseDisruptiveTempestTestCase): @decorators.idempotent_id('cf47a5e3-35cb-423c-84af-4cc6d389cfbd') @decorators.attr(type='slow') def test_l3ha_reboot_node(self): @@ -197,13 +219,15 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn): gateway_node = self.router_gateway_chassis self.power_off_host(gateway_node) - self.discover_nodes() + self.refresh_nodes_data() self.verify_routing_via_chassis(self.chassis_list[1]) self.power_on_host(gateway_node) - self.discover_nodes() + self.refresh_nodes_data() self.verify_routing_via_chassis(self.chassis_list[0]) + +class L3haOvnTest(L3haOvnCommon): @decorators.idempotent_id('f8fe1f69-a87f-41d8-ac6e-ed7905438338') @decorators.attr(type='slow') def test_l3ha_bring_down_interface(self): @@ -234,16 +258,7 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn): """ self._setup() node_client = self.find_node_client(self.router_gateway_chassis) - # We need to find a physical interface that is passing tenant traffic - bridge = node_client.exec_command( - "sudo ovs-vsctl get open . external_ids:ovn-bridge-mappings | " - r"sed 's/^\".*tenant:\(.*\).*\"$/\1/'").rstrip() - physical_interfaces = node_client.exec_command( - "find /sys/class/net -type l -not -lname '*virtual*' " - "-printf '%f\n'").rstrip().split('\n') - bridge_interfaces = node_client.exec_command( - "sudo ovs-vsctl list-ifaces " + bridge).rstrip().split('\n') - interface = (set(physical_interfaces) & set(bridge_interfaces)).pop() + interface = WB_CONF.node_tenant_interface self.addCleanup( utils.interface_state_set, node_client, interface, constants.STATE_UP)