From 0abf121cf9d38fbdac6c53224068d4acf86baa5e Mon Sep 17 00:00:00 2001 From: Federico Ressi Date: Thu, 22 Jul 2021 11:04:39 +0200 Subject: [PATCH] Call overcloud_health_checks in test_controllers_shutdown Update test_controllers_shutdown to perform below operations 1) before disruption: - assert overcloud is healthy - ensure all controller are running and reachable - assert a VM is running and reachable 2) shutdown some controller nodes - ensure some controllers not reachable while others are - check whenever the VM is still running and reachable 3) power on those controller nodes - ensure all controller are running and reachable - assert the VM is running and reachable - assert overcloud is healthy Change-Id: I3837a7a236f764236fd9fe07e67d1265c692b7e0 --- tobiko/openstack/topology/__init__.py | 3 + tobiko/openstack/topology/_assert.py | 43 +++++++++++ tobiko/tests/faults/ha/cloud_disruptions.py | 77 ++++++++++++++++--- tobiko/tests/faults/ha/test_cloud_recovery.py | 35 +-------- 4 files changed, 117 insertions(+), 41 deletions(-) create mode 100644 tobiko/openstack/topology/_assert.py diff --git a/tobiko/openstack/topology/__init__.py b/tobiko/openstack/topology/__init__.py index 3b3f87ce0..317cc3837 100644 --- a/tobiko/openstack/topology/__init__.py +++ b/tobiko/openstack/topology/__init__.py @@ -13,10 +13,13 @@ # under the License. from __future__ import absolute_import +from tobiko.openstack.topology import _assert from tobiko.openstack.topology import _exception from tobiko.openstack.topology import _neutron from tobiko.openstack.topology import _topology +assert_reachable_nodes = _assert.assert_reachable_nodes +assert_unreachable_nodes = _assert.assert_unreachable_nodes NoSuchOpenStackTopologyNodeGroup = _exception.NoSuchOpenStackTopologyNodeGroup NoSuchOpenStackTopologyNode = _exception.NoSuchOpenStackTopologyNode diff --git a/tobiko/openstack/topology/_assert.py b/tobiko/openstack/topology/_assert.py new file mode 100644 index 000000000..7438d9181 --- /dev/null +++ b/tobiko/openstack/topology/_assert.py @@ -0,0 +1,43 @@ +# Copyright 2021 Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +from __future__ import absolute_import + +import json +import typing + +from oslo_log import log + +from tobiko.openstack.topology import _topology +from tobiko.shell import ping + + +LOG = log.getLogger(__name__) + + +def assert_reachable_nodes( + nodes: typing.Iterable[_topology.OpenStackTopologyNode], + **ping_params): + node_ips = {node.name: str(node.public_ip) for node in nodes} + LOG.debug(f"Test nodes are reachable: " + f"{json.dumps(node_ips, sort_keys=True, indent=4)}") + ping.assert_reachable_hosts(node_ips.values(), **ping_params) + + +def assert_unreachable_nodes( + nodes: typing.Iterable[_topology.OpenStackTopologyNode], + **ping_params): + node_ips = {node.name: str(node.public_ip) for node in nodes} + LOG.debug(f"Test nodes are unreachable: " + f"{json.dumps(node_ips, sort_keys=True, indent=4)}") + ping.assert_unreachable_hosts(node_ips.values(), **ping_params) diff --git a/tobiko/tests/faults/ha/cloud_disruptions.py b/tobiko/tests/faults/ha/cloud_disruptions.py index 431026894..72c1beeeb 100644 --- a/tobiko/tests/faults/ha/cloud_disruptions.py +++ b/tobiko/tests/faults/ha/cloud_disruptions.py @@ -1,25 +1,28 @@ from __future__ import absolute_import -import time -import random -import urllib.parse -import re from datetime import datetime +import math +import random +import re +import time +import urllib.parse from oslo_log import log import tobiko -from tobiko.shell import sh from tobiko.openstack import glance +from tobiko.openstack import keystone +from tobiko.openstack import stacks from tobiko.openstack import tests from tobiko.openstack import topology -from tobiko.tripleo import topology as tripleo_topology -from tobiko.openstack import keystone -from tobiko.tripleo import pacemaker +from tobiko.tests.faults.ha import test_cloud_recovery +from tobiko.shell import ping +from tobiko.shell import sh from tobiko.tripleo import containers from tobiko.tripleo import nova -from tobiko.tests.faults.ha import test_cloud_recovery +from tobiko.tripleo import pacemaker +from tobiko.tripleo import topology as tripleo_topology LOG = log.getLogger(__name__) @@ -514,3 +517,59 @@ def check_iha_evacuation_network_disruption(): def check_iha_evacuation_hard_reset_shutoff_instance(): check_iha_evacuation(failover_type=sh.hard_reset_method, vm_type='shutoff') + + +def test_controllers_shutdown(): + test_case = tobiko.get_test_case() + + all_nodes = topology.list_openstack_nodes(group='controller') + if len(all_nodes) < 3: + tobiko.skip_test('It requires at least three controller nodes') + + all_node_names = [node.name for node in all_nodes] + LOG.info("Ensure all controller nodes are running: " + f"{all_node_names}") + for node in all_nodes: + node.power_on_overcloud_node() + topology.assert_reachable_nodes(all_nodes) + + LOG.debug('Check VM is running while all controllers nodes are on') + nova_server = tobiko.setup_fixture(stacks.CirrosServerStackFixture) + nova_server_ip = nova_server.ip_address + ping.assert_reachable_hosts([nova_server_ip]) + + quorum_level = math.ceil(0.5 * len(all_nodes)) + assert quorum_level >= len(all_nodes) - quorum_level + nodes = random.sample(all_nodes, quorum_level) + node_names = [node.name for node in nodes] + LOG.info(f"Power off {quorum_level} random controller nodes: " + f"{node_names}") + for node in nodes: + node.power_off_overcloud_node() + test_case.addCleanup(node.power_on_overcloud_node) + topology.assert_unreachable_nodes(nodes, retry_count=1) + topology.assert_reachable_nodes(node + for node in all_nodes + if node not in nodes) + + LOG.debug('Check whenever VM is still running while some "' + '"controllers nodes are off') + reachable, unreachable = ping.ping_hosts([nova_server_ip], + count=1) + if reachable: + LOG.debug(f"VM ips are reachable: {reachable}") + if unreachable: + LOG.debug(f"VM is are unreachable: {unreachable}") + # TODO what do we expect here: VM reachable or unreachable? + + random.shuffle(nodes) + LOG.info(f"Power on controller nodes: {node_names}") + for node in nodes: + node.power_on_overcloud_node() + + LOG.debug("Check all controller nodes are running again: " + f"{all_node_names}") + topology.assert_reachable_nodes(all_nodes, retry_timeout=600.) + + LOG.debug('Check VM is running while all controllers nodes are on') + ping.assert_reachable_hosts([nova_server_ip]) diff --git a/tobiko/tests/faults/ha/test_cloud_recovery.py b/tobiko/tests/faults/ha/test_cloud_recovery.py index 8f7e7660d..d12b0c6d8 100644 --- a/tobiko/tests/faults/ha/test_cloud_recovery.py +++ b/tobiko/tests/faults/ha/test_cloud_recovery.py @@ -1,7 +1,5 @@ from __future__ import absolute_import -import math -import random import typing from oslo_log import log @@ -10,7 +8,6 @@ import testtools import tobiko from tobiko.openstack import neutron from tobiko.openstack import tests -from tobiko.openstack import topology from tobiko.tests.faults.ha import cloud_disruptions from tobiko.tripleo import pacemaker from tobiko.tripleo import processes @@ -216,35 +213,9 @@ class DisruptTripleoNodesTest(testtools.TestCase): OvercloudHealthCheck.run_after() def test_controllers_shutdown(self): - all_nodes = topology.list_openstack_nodes(group='controller') - if len(all_nodes) < 3: - self.skipTest('It requires at least three controller nodes') - LOG.info("Ensure all controller nodes are running...") - for node in all_nodes: - node.power_on_overcloud_node() - - LOG.info("Verify can create VMs before controllers power off...") - tests.test_server_creation() - - quorum_level = math.ceil(0.5 * len(all_nodes)) - assert quorum_level >= len(all_nodes) - quorum_level - nodes = random.sample(all_nodes, quorum_level) - LOG.info(f"Power off {quorum_level} random controller nodes: " - f"{[node.name for node in nodes]}") - for node in nodes: - node.power_off_overcloud_node() - - random.shuffle(nodes) - LOG.info("Power on controller nodes: " - f"{[node.name for node in nodes]}") - for node in nodes: - node.power_on_overcloud_node() - - LOG.info("Wait until pacemaker resources will be healthy again...") - check_pacemaker_resources_health() - LOG.info("Verify can create VMs after controllers power on...") - tests.test_server_creation() - + OvercloudHealthCheck.run_before() + cloud_disruptions.test_controllers_shutdown() + OvercloudHealthCheck.run_after() # [..] # more tests to follow