Add faults ovn_bgp_agent and frr tests

This patch adds two faults tests. They restart the ovn_bgp_agent and
the frr services, respectively, on all the overcloud nodes where these
services run.
When BGP is configured with expose_tenant_networks enabled, faults tests
will create an extra VM without FIP to verify connectivity to its tenant
IP addresses.

Change-Id: I118427b41e1c97d075572a5ebee21025ac5cc967
This commit is contained in:
Eduardo Olivares 2023-03-31 17:20:39 +02:00
parent bc07bea7b4
commit a3116bf0e1
14 changed files with 130 additions and 25 deletions

View File

@ -35,6 +35,8 @@ OPENVSWITCH_AGENT = _agent.OPENVSWITCH_AGENT
OVN_CONTROLLER = _agent.OVN_CONTROLLER
OVN_METADATA_AGENT = _agent.OVN_METADATA_AGENT
NEUTRON_OVN_METADATA_AGENT = _agent.NEUTRON_OVN_METADATA_AGENT
OVN_BGP_AGENT = _agent.OVN_BGP_AGENT
FRR = _agent.FRR
DEFAULT_SG_NAME = _security_group.DEFAULT_SG_NAME
STATEFUL_OVN_ACTION = _security_group.STATEFUL_OVN_ACTION
STATELESS_OVN_ACTION = _security_group.STATELESS_OVN_ACTION

View File

@ -38,6 +38,8 @@ OVN_CONTROLLER = 'ovn-controller'
# called 'neutron-ovn-metadata-agent'
OVN_METADATA_AGENT = 'networking-ovn-metadata-agent'
NEUTRON_OVN_METADATA_AGENT = 'neutron-ovn-metadata-agent'
OVN_BGP_AGENT = 'ovn-bgp-agent'
FRR = 'frr'
class AgentNotFoundOnHost(tobiko.TobikoException):

View File

@ -35,6 +35,7 @@ test_evacuable_server_creation = _nova.test_evacuable_server_creation
test_server_creation = _nova.test_server_creation
test_servers_creation = _nova.test_servers_creation
test_server_creation_and_shutoff = _nova.test_server_creation_and_shutoff
test_server_creation_no_fip = _nova.test_server_creation_no_fip
TestServerCreationStack = _nova.TestServerCreationStack
TestEvacuableServerCreationStack = _nova.TestEvacuableServerCreationStack
test_ovsdb_transactions = _neutron.test_ovsdb_transactions

View File

@ -17,7 +17,6 @@ from __future__ import absolute_import
import os
import typing # noqa
import time
import tobiko
from tobiko.shell import ping
@ -25,6 +24,7 @@ from tobiko.shell import sh
from tobiko.openstack import nova
from tobiko.openstack.stacks import _cirros
from tobiko.openstack.stacks import _nova
from tobiko.openstack.stacks import _neutron
class TestServerCreationStack(_cirros.CirrosServerStackFixture):
@ -38,6 +38,27 @@ def test_server_creation(stack=TestServerCreationStack):
number_of_servers=0).first
class NetworkNoFipStackFixture(_neutron.NetworkStackFixture):
"""Neutron network where VMs will be created with no FIP"""
def setup_fixture(self):
super().setup_fixture()
# this stack will be deleted at the end of the test
tobiko.add_cleanup(NetworkNoFipStackFixture.cleanup_fixture, self)
class TestServerNoFipCreationStack(_cirros.CirrosServerStackFixture):
"""Nova instance without FIP intended to be used for testing server
creation"""
has_floating_ip = False
network_stack = tobiko.required_fixture(NetworkNoFipStackFixture)
def test_server_creation_no_fip():
"""Test Nova server without FIP creation
"""
return test_server_creation(stack=TestServerNoFipCreationStack)
class TestEvacuableServerCreationStack(_cirros.EvacuableServerStackFixture):
"""Nova instance intended to be used for testing server creation"""
@ -86,14 +107,16 @@ def test_servers_creation(stack=TestServerCreationStack,
test_case.assertEqual(number_of_servers or 1, len(server_ids))
test_case.assertFalse(server_ids & initial_servers_ids)
# sleep for 20 sec , ensure no race condition with ssh
time.sleep(20)
# Test SSH connectivity to floating IP address
for fixture in fixtures:
# Test pinging to floating IP address (or fixed IP)
if fixture.floating_ip_address is not None:
pingable_ips = [fixture.floating_ip_address]
else:
pingable_ips = [fixed_ip['ip_address']
for fixed_ip in fixture.fixed_ips]
ping.assert_reachable_hosts(pingable_ips)
# Test SSH connectivity to floating IP address (or fixed IP)
test_case.assertTrue(sh.get_hostname(ssh_client=fixture.ssh_client))
# Test pinging to floating IP address
ping.assert_reachable_hosts(fixture.floating_ip_address
for fixture in fixtures)
return fixtures

View File

@ -62,3 +62,4 @@ OpenStackTopologyNode = _topology.OpenStackTopologyNode
set_default_openstack_topology_class = (
_topology.set_default_openstack_topology_class)
verify_osp_version = _topology.verify_osp_version
get_config_setting = _topology.get_config_setting

View File

@ -15,6 +15,7 @@ from __future__ import absolute_import
import collections
from collections import abc
import configparser
import functools
import re
import typing
@ -302,13 +303,16 @@ class OpenStackTopology(tobiko.SharedFixture):
neutron.OVN_METADATA_AGENT: 'devstack@q-ovn-metadata-agent',
neutron.NEUTRON_OVN_METADATA_AGENT: 'devstack@q-ovn-metadata-agent',
neutron.OVN_CONTROLLER: 'ovn-controller'
# TODO(eolivare): ovn_bgp_agent on devstack?
# TODO(eolivare): frr on devstack?
}
agent_to_container_name_mappings: typing.Dict[str, str] = {}
has_containers = False
config_file_mappings = {
'ml2_conf.ini': '/etc/neutron/plugins/ml2/ml2_conf.ini'
'ml2_conf.ini': '/etc/neutron/plugins/ml2/ml2_conf.ini',
'bgp-agent.conf': '/etc/ovn-bgp-agent/bgp-agent.conf'
}
_connections = tobiko.required_fixture(
@ -661,6 +665,25 @@ def get_config_file_path(file_name: str) -> str:
return topology.get_config_file_path(file_name)
def get_config_setting(file_name: str,
ssh_client: ssh.SSHClientFixture,
param: str,
section: str = None) -> typing.Optional[str]:
config_file_path = get_config_file_path(file_name)
config_file_content = sh.execute(f'cat {config_file_path}',
ssh_client=ssh_client, sudo=True).stdout
config = configparser.ConfigParser()
config.read_string(config_file_content)
if section is None:
value = config.defaults().get(param)
elif section not in config.sections():
value = None
else:
value = config[section].get(param)
return value
def get_rhosp_version():
ssh_client = list_openstack_nodes(group='controller')[0].ssh_client
rhosp_release = sh.execute('cat /etc/rhosp-release',

View File

@ -135,17 +135,15 @@ def list_systemd_units(*units: SystemdUnitType,
def stop_systemd_units(*units: SystemdUnitType,
ssh_client: ssh.SSHClientType = None,
sudo: bool = None):
ssh_client: ssh.SSHClientType = None):
command = systemctl_command('stop', *units)
_execute.execute(command, ssh_client=ssh_client, sudo=sudo)
_execute.execute(command, ssh_client=ssh_client, sudo=True)
def start_systemd_units(*units: SystemdUnitType,
ssh_client: ssh.SSHClientType = None,
sudo: bool = None):
ssh_client: ssh.SSHClientType = None):
command = systemctl_command('start', *units)
_execute.execute(command, ssh_client=ssh_client, sudo=sudo)
_execute.execute(command, ssh_client=ssh_client, sudo=True)
def wait_for_active_systemd_units(*units: SystemdUnitType,

View File

@ -470,6 +470,17 @@ def reset_ovndb_master_container():
container_host=node)
def restart_service_on_all_nodes(service):
"""restart the ovn bgp agent or the frr service from all the nodes where it
is running and check the cloud is healthy after they are started again"""
node_names = tripleo.get_overcloud_nodes_running_service(service)
nodes = topology.list_openstack_nodes(hostnames=node_names)
for node in nodes:
sh.stop_systemd_units(service, ssh_client=node.ssh_client)
for node in nodes:
sh.start_systemd_units(service, ssh_client=node.ssh_client)
def kill_rabbitmq_service():
"""kill a rabbit process on a random controller,
check in pacemaker it is down"""

View File

@ -70,6 +70,15 @@ def overcloud_health_checks(passive_checks_only=False,
# check vm create with ssh and ping checks
def check_vm_create():
tests.test_server_creation()
if overcloud.is_ovn_bgp_agent_running():
try:
node = topology.find_openstack_node(group='networker')
except topology.NoSuchOpenStackTopologyNodeGroup:
node = topology.find_openstack_node(group='controller')
expose_tenant_networks = topology.get_config_setting(
'bgp-agent.conf', node.ssh_client, 'expose_tenant_networks')
if expose_tenant_networks and expose_tenant_networks.lower() == 'true':
tests.test_server_creation_no_fip()
# check cluster failed statuses
@ -280,6 +289,20 @@ class DisruptTripleoNodesTest(testtools.TestCase):
cloud_disruptions.test_controllers_shutdown()
OvercloudHealthCheck.run_after()
@overcloud.skip_unless_ovn_bgp_agent
def test_restart_ovn_bgp_agents(self):
OvercloudHealthCheck.run_before()
cloud_disruptions.restart_service_on_all_nodes(
topology.get_agent_service_name(neutron.OVN_BGP_AGENT))
OvercloudHealthCheck.run_after()
@overcloud.skip_unless_ovn_bgp_agent
def test_restart_frr(self):
OvercloudHealthCheck.run_before()
cloud_disruptions.restart_service_on_all_nodes(
topology.get_agent_service_name(neutron.FRR))
OvercloudHealthCheck.run_after()
# [..]
# more tests to follow
# run health checks

View File

@ -201,9 +201,7 @@ class OctaviaServicesFaultTest(testtools.TestCase):
for service, ssh_clients in services_to_stop.items():
for ssh_client in ssh_clients:
sh.stop_systemd_units(service,
ssh_client=ssh_client,
sudo=True)
sh.stop_systemd_units(service, ssh_client=ssh_client)
LOG.debug(f'We stopped {service} on {ssh_client.host}')
self.loadbalancer_stack.wait_for_octavia_service()
@ -238,9 +236,7 @@ class OctaviaServicesFaultTest(testtools.TestCase):
for service, ssh_clients in services_to_stop.items():
for ssh_client in ssh_clients:
sh.start_systemd_units(service,
ssh_client=ssh_client,
sudo=True)
sh.start_systemd_units(service, ssh_client=ssh_client)
LOG.debug(f'We started {service} on {ssh_client.host}')

View File

@ -19,6 +19,7 @@ from tobiko.tripleo import _rhosp
from tobiko.tripleo import _topology as topology
from tobiko.tripleo import _undercloud as undercloud
from tobiko.tripleo import containers
from tobiko.tripleo import services
get_tripleo_ansible_inventory = _ansible.get_tripleo_ansible_inventory
@ -74,3 +75,6 @@ undercloud_keystone_credentials = undercloud.undercloud_keystone_credentials
undercloud_keystone_session = undercloud.undercloud_keystone_session
undercloud_ssh_client = undercloud.undercloud_ssh_client
undercloud_version = undercloud.undercloud_version
get_overcloud_nodes_running_service = \
services.get_overcloud_nodes_running_service

View File

@ -22,6 +22,7 @@ from oslo_log import log
import tobiko
from tobiko import config
from tobiko import tripleo
from tobiko.openstack import keystone
from tobiko.openstack import ironic
from tobiko.openstack import metalsmith
@ -341,6 +342,16 @@ skip_unless_ovn_using_ha = tobiko.skip_unless(
'OVN does not use HA DB model', is_ovn_using_ha)
def is_ovn_bgp_agent_running():
return (len(tripleo.get_overcloud_nodes_running_service(
topology.get_agent_service_name(neutron.OVN_BGP_AGENT))) > 0)
skip_unless_ovn_bgp_agent = tobiko.skip_unless(
'The OVN BGP Agent is not running on the System Under Test',
is_ovn_bgp_agent_running)
@functools.lru_cache()
def are_kexec_tools_installed():
for controller in topology.list_openstack_nodes(group='controller'):

View File

@ -45,7 +45,9 @@ class TripleoTopology(topology.OpenStackTopology):
neutron.METADATA_AGENT: 'tripleo_neutron_metadata_agent',
neutron.OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent',
neutron.NEUTRON_OVN_METADATA_AGENT: 'tripleo_ovn_metadata_agent',
neutron.OVN_CONTROLLER: 'tripleo_ovn_controller'
neutron.OVN_CONTROLLER: 'tripleo_ovn_controller',
neutron.OVN_BGP_AGENT: 'tripleo_ovn_bgp_agent',
neutron.FRR: 'tripleo_frr'
}
agent_to_container_name_mappings = {
@ -55,14 +57,18 @@ class TripleoTopology(topology.OpenStackTopology):
neutron.METADATA_AGENT: 'neutron_metadata_agent',
neutron.OVN_METADATA_AGENT: 'ovn_metadata_agent',
neutron.NEUTRON_OVN_METADATA_AGENT: 'ovn_metadata_agent',
neutron.OVN_CONTROLLER: 'ovn_controller'
neutron.OVN_CONTROLLER: 'ovn_controller',
neutron.OVN_BGP_AGENT: 'ovn_bgp_agent',
neutron.FRR: 'frr'
}
has_containers = True
config_file_mappings = {
'ml2_conf.ini': '/var/lib/config-data/puppet-generated/neutron'
'/etc/neutron/plugins/ml2/ml2_conf.ini'
'/etc/neutron/plugins/ml2/ml2_conf.ini',
'bgp-agent.conf': '/var/lib/config-data/ansible-generated/'
'ovn-bgp-agent/etc/ovn-bgp-agent/bgp-agent.conf'
}
# TODO: add more known subgrups here

View File

@ -67,9 +67,13 @@ def get_overcloud_nodes_running_service(service):
"""
oc_procs_df = overcloud.get_overcloud_nodes_dataframe(
get_overcloud_node_services_table)
# remove the ".service" suffix
oc_procs_df = oc_procs_df.replace(to_replace={'UNIT': '.service'},
value='',
regex=True)
oc_nodes_running_service = oc_procs_df.query('UNIT=="{}"'.format(service))[
'overcloud_node'].unique()
return oc_nodes_running_service
return oc_nodes_running_service.tolist()
def check_if_process_running_on_overcloud(process):