From 16032b26f74106f2f739be856ddd19ba4257cd4f Mon Sep 17 00:00:00 2001 From: pkomarov Date: Mon, 16 Sep 2019 00:22:12 +0300 Subject: [PATCH] Check pacemaker resources Add API to fetch and parse 'pcs status' command result. Change-Id: Iaaa4896a0fb390ba5ae59b0b1b87ed1664a4b4dd --- infrared/tasks/templates/tobiko.conf.j2 | 8 + requirements.txt | 1 + .../functional/tripleo/test_overcloud.py | 18 ++ tobiko/tripleo/pacemaker.py | 193 ++++++++++++++++++ 4 files changed, 220 insertions(+) create mode 100644 tobiko/tripleo/pacemaker.py diff --git a/infrared/tasks/templates/tobiko.conf.j2 b/infrared/tasks/templates/tobiko.conf.j2 index 6c674fc80..fcb32572a 100644 --- a/infrared/tasks/templates/tobiko.conf.j2 +++ b/infrared/tasks/templates/tobiko.conf.j2 @@ -8,3 +8,11 @@ key_file = ~/.ssh/id_rsa [neutron] floating_network = "{{ test.floating_network }}" + +[ssh] +key_file = ~/.ssh/id_rsa +undercloud_ssh_password = stack +undercloud_ssh_username = stack + +[tripleo] +undercloud_ssh_hostname = undercloud-0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 32c858415..8e1851469 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,3 +21,4 @@ python-openstackclient>=3.0.0 # Apache-2.0 stestr>=2.0 # Apache-2.0 six>=1.10.0 # MIT testtools>=2.2.0 # MIT +pandas>=0.24.2 # BSD \ No newline at end of file diff --git a/tobiko/tests/functional/tripleo/test_overcloud.py b/tobiko/tests/functional/tripleo/test_overcloud.py index 4570a640b..b109b12a7 100644 --- a/tobiko/tests/functional/tripleo/test_overcloud.py +++ b/tobiko/tests/functional/tripleo/test_overcloud.py @@ -16,11 +16,13 @@ from __future__ import absolute_import import os import netaddr +import pandas as pd import testtools from tobiko import config from tobiko.openstack import nova from tobiko.tripleo import overcloud +from tobiko.tripleo import pacemaker import tobiko @@ -82,3 +84,19 @@ class OvercloudNovaApiTest(testtools.TestCase): hostname = overcloud.find_overcloud_node().name ssh_client = overcloud.overcloud_ssh_client(hostname=hostname) ssh_client.connect() + + +@overcloud.skip_if_missing_overcloud +class OvercloudPacemakerTest(testtools.TestCase): + + """ + Assert that all pacemaker resources are in + healthy state + """ + def test_get_pacemaker_resource_table(self): + resource_table = pacemaker.get_pcs_resources_table() + self.assertIsInstance(resource_table, pd.DataFrame) + + def test_pacemaker_resources_health(self): + pcs_health = pacemaker.PacemakerResourcesStatus() + self.assertTrue(pcs_health.all_healthy) diff --git a/tobiko/tripleo/pacemaker.py b/tobiko/tripleo/pacemaker.py new file mode 100644 index 000000000..f94d1b163 --- /dev/null +++ b/tobiko/tripleo/pacemaker.py @@ -0,0 +1,193 @@ +from __future__ import absolute_import + +from oslo_log import log +import pandas +import six + +import tobiko +from tobiko.tripleo import overcloud +from tobiko.shell import sh + + +LOG = log.getLogger(__name__) + + +class PcsResourceException(tobiko.TobikoException): + message = "pcs cluster is not in a healthy state" + + +def get_pcs_resources_table(hostname='controller-0'): + """ + get pcs status from a controller and parse it + to have it's resources states in check + returns : + rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started con + troller-0 + rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started con + troller-1 + rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started con + troller-2 + galera-bundle-0 (ocf::heartbeat:galera): Master controller-0 + galera-bundle-1 (ocf::heartbeat:galera): Master controller-1 + galera-bundle-2 (ocf::heartbeat:galera): Master controller-2 + redis-bundle-0 (ocf::heartbeat:redis): Master controller-0 + redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1 + redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2 + ip-192.168.24.6 (ocf::heartbeat:IPaddr2): Started controller- + 0 + ip-10.0.0.101 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.1.12 (ocf::heartbeat:IPaddr2): Started controller-2 + ip-172.17.1.22 (ocf::heartbeat:IPaddr2): Started controller-0 + ip-172.17.3.22 (ocf::heartbeat:IPaddr2): Started controller-1 + ip-172.17.4.30 (ocf::heartbeat:IPaddr2): Started controller-2 + haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started con + troller-0 + haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started con + troller-1 + haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started con + troller-2 + openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Sta + rted controller-0 + + :return: dataframe of pcs resources stats table + """ + ssh_client = overcloud.overcloud_ssh_client(hostname) + output = sh.execute("sudo pcs status | grep ocf", + ssh_client=ssh_client).stdout + stream = six.StringIO(output) + table = pandas.read_csv(stream, delim_whitespace=True, header=None) + table.columns = ['resource', 'resource_type', 'resource_state', + 'resource location'] + LOG.debug("Got pcs status :\n%s", table) + return table + + +class PacemakerResourcesStatus(object): + """ + class to handle pcs resources checks + """ + def __init__(self): + self.pcs_df = get_pcs_resources_table() + + def container_runtime(self): + if not self.pcs_df[(self.pcs_df['resource_type'] == + "(ocf::heartbeat:docker):")].empty: + return 'docker' + if not self.pcs_df[(self.pcs_df['resource_type'] == + "(ocf::heartbeat:podman):")].empty: + return 'podman' + + def resource_count(self, resource_type): + return self.pcs_df[(self.pcs_df['resource_type'] == resource_type)][ + 'resource_state'].count() + + def resource_count_in_state(self, resource_type, resource_state): + return self.pcs_df[(self.pcs_df['resource_type'] == + resource_type) & (self.pcs_df['resource_state'] == + resource_state)][ + 'resource_state'].count() + + def rabbitmq_resource_healthy(self): + nodes_num = self.resource_count("(ocf::heartbeat:rabbitmq-cluster):") + started_num = self.resource_count_in_state( + "(ocf::heartbeat:rabbitmq-cluster):", "Started") + if nodes_num == started_num: + LOG.info("pcs status check: resource rabbitmq is in healthy state") + return True + else: + LOG.info("pcs status check: resource rabbitmq not in healthy " + "state") + return False + + def galera_resource_healthy(self): + nodes_num = self.resource_count("(ocf::heartbeat:galera):") + master_num = self.resource_count_in_state("(ocf::heartbeat:galera):", + "Master") + if nodes_num == master_num: + LOG.info("pcs status check: resource galera is in healthy state") + return True + else: + LOG.info("pcs status check: resource galera not in healthy state") + return False + + def redis_resource_healthy(self): + nodes_num = self.resource_count("(ocf::heartbeat:redis):") + master_num = self.resource_count_in_state( + "(ocf::heartbeat:redis):", "Master") + slave_num = self.resource_count_in_state( + "(ocf::heartbeat:redis):", "Slave") + if (master_num == 1) and (slave_num == nodes_num - master_num): + LOG.info("pcs status check: resource redis is in healthy state") + return True + else: + LOG.info("pcs status check: resource redis not in healthy state") + return False + + def vips_resource_healthy(self): + nodes_num = self.resource_count("(ocf::heartbeat:IPaddr2):") + started_num = self.resource_count_in_state( + "(ocf::heartbeat:IPaddr2):", "Started") + if nodes_num == started_num: + LOG.info("pcs status check: resources vips are in healthy state") + return True + else: + LOG.info( + "pcs status check: resources vips are not in healthy state") + return False + + def ha_proxy_cinder_healthy(self): + + nodes_num = self.resource_count("(ocf::heartbeat:{}):".format( + self.container_runtime())) + started_num = self.resource_count_in_state( + "(ocf::heartbeat:{}):".format(self.container_runtime()), "Started") + if nodes_num == started_num: + LOG.info("pcs status check: resources ha_proxy and" + " cinder are in healthy state") + return True + else: + LOG.info( + "pcs status check: resources ha_proxy and cinder are not in " + "healthy state") + return False + + def ovn_resource_healthy(self): + nodes_num = self.resource_count("(ocf::heartbeat:redis):") + if nodes_num > 0: + return True + else: + master_num = self.resource_count_in_state( + "(ocf::heartbeat:redis):", "Master") + slave_num = self.resource_count_in_state( + "(ocf::heartbeat:redis):", "Slave") + if (master_num == 1) and (slave_num == nodes_num - master_num): + LOG.info( + "pcs status check: resource ovn is in healthy state") + return True + else: + LOG.info( + "pcs status check: resource ovn is in not in " + "healthy state") + return False + + @property + def all_healthy(self): + """ + check if each resource is in healthy order + and return a global healthy status + :return: Bool + """ + if all([ + self.rabbitmq_resource_healthy(), + self.galera_resource_healthy(), + self.redis_resource_healthy(), + self.vips_resource_healthy(), + self.ha_proxy_cinder_healthy(), + self.ovn_resource_healthy() + ]): + LOG.info("pcs status checks: all resources are in healthy state") + return True + else: + LOG.info("pcs status check: not all resources are in healthy " + "state") + raise PcsResourceException()