Merge "L3HA tests fixes and improvements"

This commit is contained in:
Zuul 2024-07-25 18:30:45 +00:00 committed by Gerrit Code Review
commit 1067fef73a
5 changed files with 173 additions and 117 deletions

View File

@ -228,6 +228,7 @@ def remote_service_action(client, service, action):
action=action, service=service) action=action, service=service)
LOG.debug("Running '{}' on {}".format(cmd, client.host)) LOG.debug("Running '{}' on {}".format(cmd, client.host))
client.exec_command(cmd) client.exec_command(cmd)
time.sleep(5)
def retry_on_assert_fail(max_retries): def retry_on_assert_fail(max_retries):

View File

@ -85,6 +85,12 @@ WhiteboxNeutronPluginOptions = [
'power operations, like shutdown/startup openstack nodes.' 'power operations, like shutdown/startup openstack nodes.'
'These tests can be disruptive and not suitable for some ' 'These tests can be disruptive and not suitable for some '
'environments.'), 'environments.'),
cfg.BoolOpt('avoid_disrupting_controllers',
default=True,
help='Whether to avoid executing disruptive operations on '
'OSP controller or OCP master/worker nodes which can be '
'hosting neutron routers. With this option test will try '
'to use other nodes.'),
cfg.IntOpt('broadcast_receivers_count', cfg.IntOpt('broadcast_receivers_count',
default=2, default=2,
help='How many receivers to use in broadcast tests. Default ' help='How many receivers to use in broadcast tests. Default '
@ -142,8 +148,8 @@ WhiteboxNeutronPluginOptions = [
'Can be used for tweaking iperf in case default value ' 'Can be used for tweaking iperf in case default value '
'does not work for some reason, e.g. MTU issues.'), 'does not work for some reason, e.g. MTU issues.'),
cfg.StrOpt('node_tenant_interface', cfg.StrOpt('node_tenant_interface',
default='eth1', default='vlan22',
help='Physical interface of a node that intended to pass tenant' help='Interface of a node that intended to pass tenant'
'network traffic. Note: currently only environments with ' 'network traffic. Note: currently only environments with '
'the same name of the tenant interface are supported'), 'the same name of the tenant interface are supported'),
cfg.IntOpt('capture_timeout', cfg.IntOpt('capture_timeout',
@ -154,7 +160,7 @@ WhiteboxNeutronPluginOptions = [
'remote process in case test or connection was ' 'remote process in case test or connection was '
'interrupted unexpectedly.'), 'interrupted unexpectedly.'),
cfg.StrOpt('hypervisor_host', cfg.StrOpt('hypervisor_host',
default='hypervisor-1', default='hypervisor-1.utility',
help='Hypervisor host for podified environment based on libvirt' help='Hypervisor host for podified environment based on libvirt'
'virtual machines, typically deployed by ci-framework: ' 'virtual machines, typically deployed by ci-framework: '
'https://github.com/openstack-k8s-operators/ci-framework'), 'https://github.com/openstack-k8s-operators/ci-framework'),

View File

@ -39,6 +39,7 @@ from tempest import config
from tempest.lib.common import fixed_network from tempest.lib.common import fixed_network
from tempest.lib.common.utils import data_utils from tempest.lib.common.utils import data_utils
from tempest.lib.common.utils import test_utils from tempest.lib.common.utils import test_utils
from tempest.lib import exceptions as lib_exceptions
from whitebox_neutron_tempest_plugin.common import constants as local_constants from whitebox_neutron_tempest_plugin.common import constants as local_constants
from whitebox_neutron_tempest_plugin.common import tcpdump_capture as capture from whitebox_neutron_tempest_plugin.common import tcpdump_capture as capture
@ -297,7 +298,7 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
host['hypervisor_hostname'] for host host['hypervisor_hostname'] for host
in cls.os_admin.hv_client.list_hypervisors()['hypervisors']] in cls.os_admin.hv_client.list_hypervisors()['hypervisors']]
if WB_CONF.openstack_type == 'podified': if WB_CONF.openstack_type == 'podified':
cls.nodes = cls.get_podified_nodes_data() cls.nodes_data = cls.get_podified_nodes_data()
with open(WB_CONF.proxy_host_key_file, 'r') as file: with open(WB_CONF.proxy_host_key_file, 'r') as file:
id_cifw_key = file.read() id_cifw_key = file.read()
cls.keys_data = { cls.keys_data = {
@ -306,17 +307,20 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
devscripts_key = cls.proxy_host_client.exec_command( devscripts_key = cls.proxy_host_client.exec_command(
'cat ' + cls.ocp_nodes_key_path) 'cat ' + cls.ocp_nodes_key_path)
cls.keys_data['devscripts_key'] = devscripts_key cls.keys_data['devscripts_key'] = devscripts_key
for host in cls.nodes: for host in cls.nodes_data:
client = cls.get_node_client( client = cls.get_node_client(
host=host['ip'], username=host['user'], host=host['ip'], username=host['user'],
pkey=f"{cls.keys_data[host['key']]}") pkey=f"{cls.keys_data[host['key']]}")
host['client'] = client host['client'] = client
else: else:
cls.nodes = [] cls.nodes_data = []
for host in set([*l3_agent_hosts, *compute_hosts]): for host in set([*l3_agent_hosts, *compute_hosts]):
cls.nodes.append( cls.nodes_data.append(
{'ip': host, 'client': cls.get_node_client(host)}) {'ip': host, 'client': cls.get_node_client(host)})
for host in cls.nodes: cls.nodes = []
for host in cls.nodes_data:
if not local_utils.host_responds_to_ping(host['ip']):
continue
host['name'] = cls.get_full_name( host['name'] = cls.get_full_name(
host['client'].exec_command('hostname').strip()) host['client'].exec_command('hostname').strip())
host['full_name'] = cls.get_full_name( host['full_name'] = cls.get_full_name(
@ -328,6 +332,13 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
host['is_controller'] = (output.strip() != "") host['is_controller'] = (output.strip() != "")
host['is_compute'] = (host['full_name'] in compute_hosts) host['is_compute'] = (host['full_name'] in compute_hosts)
host['is_networker'] = (host['full_name'] in l3_agent_hosts) host['is_networker'] = (host['full_name'] in l3_agent_hosts)
cls.nodes.append(host)
@classmethod
def get_standalone_networkers(cls):
return [node['name'] for node in cls.nodes
if node['is_networker'] and not
(node['is_controller'] or node['is_compute'])]
@classmethod @classmethod
def is_setup_single_node(cls): def is_setup_single_node(cls):
@ -959,70 +970,6 @@ class BaseTempestWhiteboxTestCase(base.BaseTempestTestCase):
LOG.debug('Command for resource creation succeeded') LOG.debug('Command for resource creation succeeded')
return _id return _id
@classmethod
def find_host_virsh_name(cls, host):
cmd = ("timeout 10 ssh {} sudo virsh list --name | grep -w {}").format(
WB_CONF.hypervisor_host, host)
return cls.proxy_host_client.exec_command(cmd).strip()
@classmethod
def is_host_state_is_shut_off(cls, host):
cmd = ("timeout 10 ssh {} virsh list --state-shutoff | grep -w {} "
"|| true".format(WB_CONF.hypervisor_host, host))
output = cls.proxy_host_client.exec_command(cmd)
return True if host in output else False
@classmethod
def is_host_loginable(cls, host):
cmd = "timeout 10 ssh {} ssh {} hostname || true".format(
WB_CONF.hypervisor_host, host)
output = cls.proxy_host_client.exec_command(cmd)
return True if host in output else False
@classmethod
def power_off_host(cls, host):
if not WB_CONF.run_power_operations_tests:
raise cls.skipException("Power operations are not allowed")
cmd = "timeout 10 ssh {} sudo virsh destroy {}".format(
WB_CONF.hypervisor_host, cls.find_host_virsh_name())
cls.proxy_host_client.exec_command(cmd)
common_utils.wait_until_true(
lambda: cls.is_host_state_is_shut_off(host),
timeout=30, sleep=5)
@classmethod
def power_on_host(cls, host):
if not WB_CONF.run_power_operations_tests:
raise cls.skipException("Power operations are not allowed")
cmd = "timeout 10 ssh {} sudo virsh start {}".format(
WB_CONF.hypervisor_host, cls.find_host_virsh_name())
cls.proxy_host_client.exec_command(cmd)
# TODO(rsafrono): implement and apply additional health checks
common_utils.wait_until_true(
lambda: cls.is_host_loginable(host),
timeout=120, sleep=5)
@classmethod
def reboot_host(cls, host):
if not WB_CONF.run_power_operations_tests:
raise cls.skipException("Power operations are not allowed")
cmd = "timeout 10 ssh {} sudo virsh reboot {}".format(
WB_CONF.hypervisor_host, cls.find_host_virsh_name())
cls.proxy_host_client.exec_command(cmd)
common_utils.wait_until_true(
lambda: cls.is_host_loginable(host),
timeout=120, sleep=5)
def ensure_overcloud_nodes_active(self):
"""Checks all openstack nodes are up, otherwise activates them.
"""
# get overcloud nodes info if it doesn't exist
if not hasattr(self, 'nodes'):
self.discover_nodes()
for node in self.nodes:
if self.is_host_state_is_shut_off(node['name']):
self.power_on_host(node['name'])
class BaseTempestTestCaseAdvanced(BaseTempestWhiteboxTestCase): class BaseTempestTestCaseAdvanced(BaseTempestWhiteboxTestCase):
"""Base class skips test suites unless advanced image is available, """Base class skips test suites unless advanced image is available,
@ -1078,11 +1025,15 @@ class TrafficFlowTest(BaseTempestWhiteboxTestCase):
"is not configured.") "is not configured.")
cls.discover_nodes() cls.discover_nodes()
if WB_CONF.openstack_type == 'podified': if WB_CONF.openstack_type == 'podified':
cls.set_ovs_pods_for_nodes()
@classmethod
def set_ovs_pods_for_nodes(cls):
cmd = ("{} get pods --field-selector=status.phase=Running " cmd = ("{} get pods --field-selector=status.phase=Running "
"-o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name " "-o custom-columns=NODE:.spec.nodeName,NAME:.metadata.name "
"| grep ovn-controller-ovs".format(cls.OC)) "-l service=ovn-controller-ovs".format(cls.OC))
output = cls.proxy_host_client.exec_command( output = cls.proxy_host_client.exec_command(
cmd).strip().split('\n') cmd).strip().splitlines()
for line in output: for line in output:
for node in cls.nodes: for node in cls.nodes:
if node['name'] == line.split()[0]: if node['name'] == line.split()[0]:
@ -1108,7 +1059,7 @@ class TrafficFlowTest(BaseTempestWhiteboxTestCase):
return ','.join(interfaces) return ','.join(interfaces)
for node in self.nodes: for node in self.nodes:
if not(node['is_controller'] or if not (node['is_controller'] or
node['is_compute'] or node['is_compute'] or
node['is_networker']): node['is_networker']):
LOG.debug('Traffic is not captured on node %s because it is ' LOG.debug('Traffic is not captured on node %s because it is '
@ -1408,6 +1359,85 @@ class BaseTempestTestCaseOvn(BaseTempestWhiteboxTestCase):
self.assertEqual(output, '') self.assertEqual(output, '')
class BaseDisruptiveTempestTestCase(BaseTempestWhiteboxTestCase):
@classmethod
def resource_setup(cls):
super(BaseDisruptiveTempestTestCase, cls).resource_setup()
try:
cls.proxy_host_client.exec_command(
"timeout 10 ssh {} virsh list".format(WB_CONF.hypervisor_host))
except lib_exceptions.SSHExecCommandFailed:
raise cls.skipException(
"No access to virsh tool on hypervisor node. Please make sure "
"that hypervisor_host is configured properly and/or virsh "
"is deployed there.")
@classmethod
def find_host_virsh_name(cls, host):
cmd = ("timeout 10 ssh {} sudo virsh list --all --name "
"| grep -w {}").format(
WB_CONF.hypervisor_host, host)
return cls.proxy_host_client.exec_command(cmd).strip()
@classmethod
def is_host_state_is_shut_off(cls, host):
cmd = ("timeout 10 ssh {} virsh list --state-shutoff | grep -w {} "
"|| true".format(WB_CONF.hypervisor_host, host))
output = cls.proxy_host_client.exec_command(cmd)
return True if host in output else False
@classmethod
def is_host_loginable(cls, host):
cmd = "timeout 10 ssh {} ssh {} hostname || true".format(
WB_CONF.hypervisor_host, host)
output = cls.proxy_host_client.exec_command(cmd)
return True if host in output else False
@classmethod
def power_off_host(cls, host):
if not WB_CONF.run_power_operations_tests:
raise cls.skipException("Power operations are not allowed")
cmd = "timeout 10 ssh {} sudo virsh destroy {}".format(
WB_CONF.hypervisor_host, cls.find_host_virsh_name(host))
cls.proxy_host_client.exec_command(cmd)
common_utils.wait_until_true(
lambda: cls.is_host_state_is_shut_off(host),
timeout=30, sleep=5)
@classmethod
def power_on_host(cls, host):
if not WB_CONF.run_power_operations_tests:
raise cls.skipException("Power operations are not allowed")
cmd = "timeout 10 ssh {} sudo virsh start {}".format(
WB_CONF.hypervisor_host, cls.find_host_virsh_name(host))
cls.proxy_host_client.exec_command(cmd)
# TODO(rsafrono): implement and apply additional health checks
common_utils.wait_until_true(
lambda: cls.is_host_loginable(host),
timeout=120, sleep=5)
@classmethod
def reboot_host(cls, host):
if not WB_CONF.run_power_operations_tests:
raise cls.skipException("Power operations are not allowed")
cmd = "timeout 10 ssh {} sudo virsh reboot {}".format(
WB_CONF.hypervisor_host, cls.find_host_virsh_name(host))
cls.proxy_host_client.exec_command(cmd)
common_utils.wait_until_true(
lambda: cls.is_host_loginable(host),
timeout=120, sleep=5)
def ensure_overcloud_nodes_active(self):
"""Checks all openstack nodes are up, otherwise activates them.
"""
hosts = self.proxy_host_client.exec_command(
"timeout 10 ssh {} sudo virsh list --all --name".format(
WB_CONF.hypervisor_host)).strip().split()
for host in hosts:
if self.is_host_state_is_shut_off(host):
self.power_on_host(host)
# user_data_cmd is used to generate a VLAN interface on VM instances with PF # user_data_cmd is used to generate a VLAN interface on VM instances with PF
# ports # ports
user_data_cmd = """ user_data_cmd = """

View File

@ -13,7 +13,6 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
import re import re
import testtools
from neutron_lib import constants as lib_constants from neutron_lib import constants as lib_constants
from neutron_tempest_plugin.common import ssh from neutron_tempest_plugin.common import ssh
@ -290,7 +289,8 @@ class InternalDNSInterruptionsTestOvn(InternalDNSBaseOvn):
class InternalDNSInterruptionsAdvancedTestOvn( class InternalDNSInterruptionsAdvancedTestOvn(
InternalDNSBaseOvn, InternalDNSBaseOvn,
base.BaseTempestTestCaseAdvanced): base.BaseTempestTestCaseAdvanced,
base.BaseDisruptiveTempestTestCase):
"""Tests internal DNS capabilities with interruptions in overcloud, """Tests internal DNS capabilities with interruptions in overcloud,
on advanced image only. on advanced image only.
""" """
@ -318,8 +318,6 @@ class InternalDNSInterruptionsAdvancedTestOvn(
@decorators.attr(type='slow') @decorators.attr(type='slow')
@utils.requires_ext(extension="dns-integration", service="network") @utils.requires_ext(extension="dns-integration", service="network")
@testtools.skipIf(WB_CONF.openstack_type == 'podified',
'Not yet adapted for podified environment')
@decorators.idempotent_id('e6c5dbea-d704-4cda-bb92-a5bfd0aa1bb2') @decorators.idempotent_id('e6c5dbea-d704-4cda-bb92-a5bfd0aa1bb2')
def test_ovn_dns_name_after_networker_reboot(self): def test_ovn_dns_name_after_networker_reboot(self):
"""Tests that OpenStack port, guest VM and OVN NB database have correct """Tests that OpenStack port, guest VM and OVN NB database have correct
@ -355,7 +353,13 @@ class InternalDNSInterruptionsAdvancedTestOvn(
vm_1['fip'] = self.create_floatingip(port=dns_port) vm_1['fip'] = self.create_floatingip(port=dns_port)
vm_1['ssh_client'] = self._create_ssh_client( vm_1['ssh_client'] = self._create_ssh_client(
vm_1['fip']['floating_ip_address']) vm_1['fip']['floating_ip_address'])
self._get_router_and_nodes_info() self._get_router_and_nodes_info()
if self.get_node_setting(self.router_gateway_chassis, 'is_controller'):
raise self.skipException(
"The test currently does not support a required action "
"when gateway chassis is on a node with OSP control plane "
"services rather than on a standalone networker node.")
# soft shutdown master networker node # soft shutdown master networker node
self.power_off_host(self.router_gateway_chassis) self.power_off_host(self.router_gateway_chassis)
# validate hostname (dns-name) using API, guest VM, # validate hostname (dns-name) using API, guest VM,

View File

@ -34,14 +34,12 @@ WB_CONF = config.CONF.whitebox_neutron_plugin_options
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
@testtools.skipIf(WB_CONF.openstack_type == 'podified', class L3haOvnCommon(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
'Not yet adapted for podified environment')
class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
credentials = ['primary', 'admin'] credentials = ['primary', 'admin']
@classmethod @classmethod
def resource_setup(cls): def resource_setup(cls):
super(L3haOvnTest, cls).resource_setup() super(L3haOvnCommon, cls).resource_setup()
cls.setup_api_microversion_fixture( cls.setup_api_microversion_fixture(
compute_microversion='2.74') compute_microversion='2.74')
@ -95,17 +93,34 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
"when gateway chassis is on controller.") "when gateway chassis is on controller.")
def _setup(self): def _setup(self):
def create_router_candidate():
router = self.create_router_by_client() router = self.create_router_by_client()
self.router_port = self.os_admin.network_client.list_ports( self.router_port = self.os_admin.network_client.list_ports(
device_id=router['id'], device_id=router['id'],
device_owner=lib_constants.DEVICE_OWNER_ROUTER_GW)['ports'][0] device_owner=lib_constants.DEVICE_OWNER_ROUTER_GW)['ports'][0]
self.chassis_list = self.get_router_gateway_chassis_list( self.chassis_list = self.get_router_gateway_chassis_list(
self.router_port['id']) self.router_port['id'])
self._validate_gateway_chassis(self.chassis_list[0])
chassis_name = self.get_router_gateway_chassis_by_id( chassis_name = self.get_router_gateway_chassis_by_id(
self.chassis_list[0]) self.chassis_list[0])
LOG.debug("router chassis name = {}".format(chassis_name)) LOG.debug("router chassis name = {}".format(chassis_name))
return router, chassis_name
if (WB_CONF.avoid_disrupting_controllers and
self.get_standalone_networkers()):
attempts = 5
controller_nodes = [node['name'] for node in self.nodes
if node['is_controller']]
for i in range(1, attempts):
LOG.debug("Router creation attempt {}".format(i))
router, chassis_name = create_router_candidate()
if chassis_name in controller_nodes:
continue
else:
break
else:
router, chassis_name = create_router_candidate()
self._validate_gateway_chassis(self.chassis_list[0])
# Since we are going to spawn VMs with 'host' option which # Since we are going to spawn VMs with 'host' option which
# is available only for admin user, we create security group # is available only for admin user, we create security group
# and keypair also as admin # and keypair also as admin
@ -160,9 +175,16 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
self.ignore_outbound = False self.ignore_outbound = False
self.verify_routing_via_chassis(self.chassis_list[0]) self.verify_routing_via_chassis(self.chassis_list[0])
@testtools.skipUnless(WB_CONF.run_power_operations_tests, def refresh_nodes_data(self):
self.discover_nodes()
if WB_CONF.openstack_type == 'podified':
self.set_ovs_pods_for_nodes()
@testtools.skipUnless(WB_CONF.run_power_operations_tests,
"run_power_operations_tests conf value is not " "run_power_operations_tests conf value is not "
"enabled.") "enabled.")
class L3haOvnDisruptiveTest(L3haOvnCommon, base.BaseDisruptiveTempestTestCase):
@decorators.idempotent_id('cf47a5e3-35cb-423c-84af-4cc6d389cfbd') @decorators.idempotent_id('cf47a5e3-35cb-423c-84af-4cc6d389cfbd')
@decorators.attr(type='slow') @decorators.attr(type='slow')
def test_l3ha_reboot_node(self): def test_l3ha_reboot_node(self):
@ -197,13 +219,15 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
gateway_node = self.router_gateway_chassis gateway_node = self.router_gateway_chassis
self.power_off_host(gateway_node) self.power_off_host(gateway_node)
self.discover_nodes() self.refresh_nodes_data()
self.verify_routing_via_chassis(self.chassis_list[1]) self.verify_routing_via_chassis(self.chassis_list[1])
self.power_on_host(gateway_node) self.power_on_host(gateway_node)
self.discover_nodes() self.refresh_nodes_data()
self.verify_routing_via_chassis(self.chassis_list[0]) self.verify_routing_via_chassis(self.chassis_list[0])
class L3haOvnTest(L3haOvnCommon):
@decorators.idempotent_id('f8fe1f69-a87f-41d8-ac6e-ed7905438338') @decorators.idempotent_id('f8fe1f69-a87f-41d8-ac6e-ed7905438338')
@decorators.attr(type='slow') @decorators.attr(type='slow')
def test_l3ha_bring_down_interface(self): def test_l3ha_bring_down_interface(self):
@ -234,16 +258,7 @@ class L3haOvnTest(base.TrafficFlowTest, base.BaseTempestTestCaseOvn):
""" """
self._setup() self._setup()
node_client = self.find_node_client(self.router_gateway_chassis) node_client = self.find_node_client(self.router_gateway_chassis)
# We need to find a physical interface that is passing tenant traffic interface = WB_CONF.node_tenant_interface
bridge = node_client.exec_command(
"sudo ovs-vsctl get open . external_ids:ovn-bridge-mappings | "
r"sed 's/^\".*tenant:\(.*\).*\"$/\1/'").rstrip()
physical_interfaces = node_client.exec_command(
"find /sys/class/net -type l -not -lname '*virtual*' "
"-printf '%f\n'").rstrip().split('\n')
bridge_interfaces = node_client.exec_command(
"sudo ovs-vsctl list-ifaces " + bridge).rstrip().split('\n')
interface = (set(physical_interfaces) & set(bridge_interfaces)).pop()
self.addCleanup( self.addCleanup(
utils.interface_state_set, node_client, interface, utils.interface_state_set, node_client, interface,
constants.STATE_UP) constants.STATE_UP)