Browse Source

Test quorum number of controller nodes hard shutdown

Test cloud recovery after two controller nodes has been
hard power off using sysrq.

This patch should verify BugZilla n. 1942094 [1]

[1] https://bugzilla.redhat.com/show_bug.cgi?id=1942094

Change-Id: I5d8cb0711e9658982786ceb8a33c8fffecdf952d
changes/63/786863/1
Slawek Kaplonski 4 weeks ago
parent
commit
919f5d3eaa
4 changed files with 24 additions and 4 deletions
  1. +1
    -0
      tobiko/shell/sh/__init__.py
  2. +3
    -1
      tobiko/shell/sh/_reboot.py
  3. +6
    -0
      tobiko/tests/faults/ha/cloud_disruptions.py
  4. +14
    -3
      tobiko/tests/faults/ha/test_cloud_recovery.py

+ 1
- 0
tobiko/shell/sh/__init__.py View File

@ -77,6 +77,7 @@ RebootHostMethod = _reboot.RebootHostMethod
crash_method = RebootHostMethod.CRASH
hard_reset_method = RebootHostMethod.HARD
soft_reset_method = RebootHostMethod.SOFT
hard_shutdown_method = RebootHostMethod.HARD_SHUTDOWN
ssh_process = _ssh.ssh_process
ssh_execute = _ssh.ssh_execute


+ 3
- 1
tobiko/shell/sh/_reboot.py View File

@ -32,6 +32,8 @@ class RebootHostMethod(enum.Enum):
SOFT = '/sbin/reboot',
HARD = 'echo 1 > /proc/sys/kernel/sysrq && echo b > /proc/sysrq-trigger',
CRASH = 'echo 1 > /proc/sys/kernel/sysrq && echo c > /proc/sysrq-trigger',
HARD_SHUTDOWN = \
'echo 1 > /proc/sys/kernel/sysrq && echo o > /proc/sysrq-trigger',
def __init__(self, command: str):
self.command = command
@ -53,7 +55,7 @@ def reboot_host(ssh_client: ssh.SSHClientFixture,
timeout=timeout,
method=method)
tobiko.setup_fixture(reboot)
if wait:
if method != RebootHostMethod.HARD_SHUTDOWN and wait:
reboot.wait_for_operation()
return reboot


+ 6
- 0
tobiko/tests/faults/ha/cloud_disruptions.py View File

@ -192,6 +192,12 @@ def reboot_all_controller_nodes(reboot_method=sh.hard_reset_method,
check_overcloud_node_responsive(controller)
def hard_power_off_overcloud_nodes(nodes):
for node in nodes:
sh.reboot_host(ssh_client=node.ssh_client,
method=sh.hard_shutdown_method)
def get_main_vip():
"""return the ip of the overcloud main vip.
Retreive an ip address (ipv4/ipv6) from the auth_url."""


+ 14
- 3
tobiko/tests/faults/ha/test_cloud_recovery.py View File

@ -215,7 +215,7 @@ class DisruptTripleoNodesTest(testtools.TestCase):
cloud_disruptions.request_galera_sst()
OvercloudHealthCheck.run_after()
def test_controllers_shutdown(self):
def _test_controllers_shutdown(self, shutdown_method):
all_nodes = topology.list_openstack_nodes(group='controller')
if len(all_nodes) < 3:
self.skipTest('It requires at least three controller nodes')
@ -229,10 +229,10 @@ class DisruptTripleoNodesTest(testtools.TestCase):
quorum_level = math.ceil(0.5 * len(all_nodes))
assert quorum_level >= len(all_nodes) - quorum_level
nodes = random.sample(all_nodes, quorum_level)
LOG.info(f"Power off {quorum_level} random controller nodes: "
f"{[node.name for node in nodes]}")
for node in nodes:
node.power_off_overcloud_node()
shutdown_method(nodes)
random.shuffle(nodes)
LOG.info("Power on controller nodes: "
@ -245,6 +245,17 @@ class DisruptTripleoNodesTest(testtools.TestCase):
LOG.info("Verify can create VMs after controllers power on...")
tests.test_server_creation()
def test_controllers_shutdown(self):
def shutdown_nodes(nodes):
for node in nodes:
node.power_off_overcloud_node()
self._test_controllers_shutdown(shutdown_nodes)
def test_controllers_hard_power_down(self):
self._test_controllers_shutdown(
cloud_disruptions.hard_power_off_overcloud_nodes)
# [..]
# more tests to follow


Loading…
Cancel
Save