test_restart_frr not applied on all controllers
If FRR is restarted on all controllers simultaneously, quorum is lost from pcs cluster perspective (which is not supported) and recovering from that situation may not always work. Change-Id: I7b8dd4deb348c5e33b50e07ee274437ef25f6c78
This commit is contained in:
parent
9f0d47e870
commit
a6e9824990
@ -584,6 +584,10 @@ def restart_service_on_all_nodes(service):
|
||||
is running and check the cloud is healthy after they are started again"""
|
||||
node_names = tripleo.get_overcloud_nodes_running_service(service)
|
||||
nodes = topology.list_openstack_nodes(hostnames=node_names)
|
||||
restart_service_on_nodes(service, nodes)
|
||||
|
||||
|
||||
def restart_service_on_nodes(service, nodes):
|
||||
for node in nodes:
|
||||
sh.stop_systemd_units(service, ssh_client=node.ssh_client)
|
||||
for node in nodes:
|
||||
|
@ -15,6 +15,7 @@
|
||||
# under the License.
|
||||
from __future__ import absolute_import
|
||||
|
||||
import random
|
||||
import typing
|
||||
|
||||
import pytest
|
||||
@ -29,6 +30,7 @@ from tobiko.openstack import nova as nova_osp
|
||||
from tobiko.openstack import octavia
|
||||
from tobiko.openstack import topology
|
||||
from tobiko.openstack import tests
|
||||
from tobiko.shell import sh
|
||||
from tobiko.tests.faults.ha import cloud_disruptions
|
||||
from tobiko.tripleo import pacemaker
|
||||
from tobiko.tripleo import processes
|
||||
@ -359,8 +361,29 @@ class DisruptTripleoNodesTest(testtools.TestCase):
|
||||
@overcloud.skip_unless_ovn_bgp_agent
|
||||
def test_restart_frr(self):
|
||||
OvercloudHealthCheck.run_before()
|
||||
cloud_disruptions.restart_service_on_all_nodes(
|
||||
topology.get_agent_service_name(neutron.FRR))
|
||||
|
||||
frr_service = topology.get_agent_service_name(neutron.FRR)
|
||||
# restart frr on all computes
|
||||
computes = topology.list_openstack_nodes(group='compute')
|
||||
cloud_disruptions.restart_service_on_nodes(frr_service, computes)
|
||||
# restart frr on all networkers
|
||||
if 'networker' in topology.list_openstack_node_groups():
|
||||
networkers = topology.list_openstack_nodes(group='networker')
|
||||
cloud_disruptions.restart_service_on_nodes(frr_service, networkers)
|
||||
# restart frr on one controller (in order to avoid quorum issues)
|
||||
controller = random.choice(topology.list_openstack_nodes(
|
||||
group='controller'))
|
||||
sh.stop_systemd_units(frr_service, ssh_client=controller.ssh_client)
|
||||
start_time = tobiko.time()
|
||||
if not pacemaker.fencing_deployed():
|
||||
# when not fencing, the test just starts frr on the controller
|
||||
sh.start_systemd_units(
|
||||
frr_service, ssh_client=controller.ssh_client)
|
||||
else:
|
||||
# when fencing, the controller is rebooted automatically
|
||||
cloud_disruptions.check_overcloud_node_uptime(
|
||||
controller.ssh_client, start_time)
|
||||
|
||||
OvercloudHealthCheck.run_after()
|
||||
|
||||
def test_restart_neutron(self):
|
||||
|
Loading…
Reference in New Issue
Block a user