Call overcloud_health_checks in test_controllers_shutdown

Update test_controllers_shutdown to perform below operations

 1) before disruption:
   - assert overcloud is healthy
   - ensure all controller are running and reachable
   - assert a VM is running and reachable
 2) shutdown some controller nodes
   - ensure some controllers not reachable while others
     are
   - check whenever the VM is still running and reachable
 3) power on those controller nodes
   - ensure all controller are running and reachable
   - assert the VM is running and reachable
   - assert overcloud is healthy

Change-Id: I3837a7a236f764236fd9fe07e67d1265c692b7e0
This commit is contained in:
Federico Ressi 2021-07-22 11:04:39 +02:00
parent 3f569d368e
commit 0abf121cf9
4 changed files with 117 additions and 41 deletions

View File

@ -13,10 +13,13 @@
# under the License. # under the License.
from __future__ import absolute_import from __future__ import absolute_import
from tobiko.openstack.topology import _assert
from tobiko.openstack.topology import _exception from tobiko.openstack.topology import _exception
from tobiko.openstack.topology import _neutron from tobiko.openstack.topology import _neutron
from tobiko.openstack.topology import _topology from tobiko.openstack.topology import _topology
assert_reachable_nodes = _assert.assert_reachable_nodes
assert_unreachable_nodes = _assert.assert_unreachable_nodes
NoSuchOpenStackTopologyNodeGroup = _exception.NoSuchOpenStackTopologyNodeGroup NoSuchOpenStackTopologyNodeGroup = _exception.NoSuchOpenStackTopologyNodeGroup
NoSuchOpenStackTopologyNode = _exception.NoSuchOpenStackTopologyNode NoSuchOpenStackTopologyNode = _exception.NoSuchOpenStackTopologyNode

View File

@ -0,0 +1,43 @@
# Copyright 2021 Red Hat
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import
import json
import typing
from oslo_log import log
from tobiko.openstack.topology import _topology
from tobiko.shell import ping
LOG = log.getLogger(__name__)
def assert_reachable_nodes(
nodes: typing.Iterable[_topology.OpenStackTopologyNode],
**ping_params):
node_ips = {node.name: str(node.public_ip) for node in nodes}
LOG.debug(f"Test nodes are reachable: "
f"{json.dumps(node_ips, sort_keys=True, indent=4)}")
ping.assert_reachable_hosts(node_ips.values(), **ping_params)
def assert_unreachable_nodes(
nodes: typing.Iterable[_topology.OpenStackTopologyNode],
**ping_params):
node_ips = {node.name: str(node.public_ip) for node in nodes}
LOG.debug(f"Test nodes are unreachable: "
f"{json.dumps(node_ips, sort_keys=True, indent=4)}")
ping.assert_unreachable_hosts(node_ips.values(), **ping_params)

View File

@ -1,25 +1,28 @@
from __future__ import absolute_import from __future__ import absolute_import
import time
import random
import urllib.parse
import re
from datetime import datetime from datetime import datetime
import math
import random
import re
import time
import urllib.parse
from oslo_log import log from oslo_log import log
import tobiko import tobiko
from tobiko.shell import sh
from tobiko.openstack import glance from tobiko.openstack import glance
from tobiko.openstack import keystone
from tobiko.openstack import stacks
from tobiko.openstack import tests from tobiko.openstack import tests
from tobiko.openstack import topology from tobiko.openstack import topology
from tobiko.tripleo import topology as tripleo_topology from tobiko.tests.faults.ha import test_cloud_recovery
from tobiko.openstack import keystone from tobiko.shell import ping
from tobiko.tripleo import pacemaker from tobiko.shell import sh
from tobiko.tripleo import containers from tobiko.tripleo import containers
from tobiko.tripleo import nova from tobiko.tripleo import nova
from tobiko.tests.faults.ha import test_cloud_recovery from tobiko.tripleo import pacemaker
from tobiko.tripleo import topology as tripleo_topology
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
@ -514,3 +517,59 @@ def check_iha_evacuation_network_disruption():
def check_iha_evacuation_hard_reset_shutoff_instance(): def check_iha_evacuation_hard_reset_shutoff_instance():
check_iha_evacuation(failover_type=sh.hard_reset_method, vm_type='shutoff') check_iha_evacuation(failover_type=sh.hard_reset_method, vm_type='shutoff')
def test_controllers_shutdown():
test_case = tobiko.get_test_case()
all_nodes = topology.list_openstack_nodes(group='controller')
if len(all_nodes) < 3:
tobiko.skip_test('It requires at least three controller nodes')
all_node_names = [node.name for node in all_nodes]
LOG.info("Ensure all controller nodes are running: "
f"{all_node_names}")
for node in all_nodes:
node.power_on_overcloud_node()
topology.assert_reachable_nodes(all_nodes)
LOG.debug('Check VM is running while all controllers nodes are on')
nova_server = tobiko.setup_fixture(stacks.CirrosServerStackFixture)
nova_server_ip = nova_server.ip_address
ping.assert_reachable_hosts([nova_server_ip])
quorum_level = math.ceil(0.5 * len(all_nodes))
assert quorum_level >= len(all_nodes) - quorum_level
nodes = random.sample(all_nodes, quorum_level)
node_names = [node.name for node in nodes]
LOG.info(f"Power off {quorum_level} random controller nodes: "
f"{node_names}")
for node in nodes:
node.power_off_overcloud_node()
test_case.addCleanup(node.power_on_overcloud_node)
topology.assert_unreachable_nodes(nodes, retry_count=1)
topology.assert_reachable_nodes(node
for node in all_nodes
if node not in nodes)
LOG.debug('Check whenever VM is still running while some "'
'"controllers nodes are off')
reachable, unreachable = ping.ping_hosts([nova_server_ip],
count=1)
if reachable:
LOG.debug(f"VM ips are reachable: {reachable}")
if unreachable:
LOG.debug(f"VM is are unreachable: {unreachable}")
# TODO what do we expect here: VM reachable or unreachable?
random.shuffle(nodes)
LOG.info(f"Power on controller nodes: {node_names}")
for node in nodes:
node.power_on_overcloud_node()
LOG.debug("Check all controller nodes are running again: "
f"{all_node_names}")
topology.assert_reachable_nodes(all_nodes, retry_timeout=600.)
LOG.debug('Check VM is running while all controllers nodes are on')
ping.assert_reachable_hosts([nova_server_ip])

View File

@ -1,7 +1,5 @@
from __future__ import absolute_import from __future__ import absolute_import
import math
import random
import typing import typing
from oslo_log import log from oslo_log import log
@ -10,7 +8,6 @@ import testtools
import tobiko import tobiko
from tobiko.openstack import neutron from tobiko.openstack import neutron
from tobiko.openstack import tests from tobiko.openstack import tests
from tobiko.openstack import topology
from tobiko.tests.faults.ha import cloud_disruptions from tobiko.tests.faults.ha import cloud_disruptions
from tobiko.tripleo import pacemaker from tobiko.tripleo import pacemaker
from tobiko.tripleo import processes from tobiko.tripleo import processes
@ -216,35 +213,9 @@ class DisruptTripleoNodesTest(testtools.TestCase):
OvercloudHealthCheck.run_after() OvercloudHealthCheck.run_after()
def test_controllers_shutdown(self): def test_controllers_shutdown(self):
all_nodes = topology.list_openstack_nodes(group='controller') OvercloudHealthCheck.run_before()
if len(all_nodes) < 3: cloud_disruptions.test_controllers_shutdown()
self.skipTest('It requires at least three controller nodes') OvercloudHealthCheck.run_after()
LOG.info("Ensure all controller nodes are running...")
for node in all_nodes:
node.power_on_overcloud_node()
LOG.info("Verify can create VMs before controllers power off...")
tests.test_server_creation()
quorum_level = math.ceil(0.5 * len(all_nodes))
assert quorum_level >= len(all_nodes) - quorum_level
nodes = random.sample(all_nodes, quorum_level)
LOG.info(f"Power off {quorum_level} random controller nodes: "
f"{[node.name for node in nodes]}")
for node in nodes:
node.power_off_overcloud_node()
random.shuffle(nodes)
LOG.info("Power on controller nodes: "
f"{[node.name for node in nodes]}")
for node in nodes:
node.power_on_overcloud_node()
LOG.info("Wait until pacemaker resources will be healthy again...")
check_pacemaker_resources_health()
LOG.info("Verify can create VMs after controllers power on...")
tests.test_server_creation()
# [..] # [..]
# more tests to follow # more tests to follow