Check pacemaker resources

Add API to fetch and parse 'pcs status' command result.

Change-Id: Iaaa4896a0fb390ba5ae59b0b1b87ed1664a4b4dd
This commit is contained in:
pkomarov 2019-09-16 00:22:12 +03:00 committed by Federico Ressi
parent 026dca67bd
commit 16032b26f7
4 changed files with 220 additions and 0 deletions

View File

@ -8,3 +8,11 @@ key_file = ~/.ssh/id_rsa
[neutron] [neutron]
floating_network = "{{ test.floating_network }}" floating_network = "{{ test.floating_network }}"
[ssh]
key_file = ~/.ssh/id_rsa
undercloud_ssh_password = stack
undercloud_ssh_username = stack
[tripleo]
undercloud_ssh_hostname = undercloud-0

View File

@ -21,3 +21,4 @@ python-openstackclient>=3.0.0 # Apache-2.0
stestr>=2.0 # Apache-2.0 stestr>=2.0 # Apache-2.0
six>=1.10.0 # MIT six>=1.10.0 # MIT
testtools>=2.2.0 # MIT testtools>=2.2.0 # MIT
pandas>=0.24.2 # BSD

View File

@ -16,11 +16,13 @@ from __future__ import absolute_import
import os import os
import netaddr import netaddr
import pandas as pd
import testtools import testtools
from tobiko import config from tobiko import config
from tobiko.openstack import nova from tobiko.openstack import nova
from tobiko.tripleo import overcloud from tobiko.tripleo import overcloud
from tobiko.tripleo import pacemaker
import tobiko import tobiko
@ -82,3 +84,19 @@ class OvercloudNovaApiTest(testtools.TestCase):
hostname = overcloud.find_overcloud_node().name hostname = overcloud.find_overcloud_node().name
ssh_client = overcloud.overcloud_ssh_client(hostname=hostname) ssh_client = overcloud.overcloud_ssh_client(hostname=hostname)
ssh_client.connect() ssh_client.connect()
@overcloud.skip_if_missing_overcloud
class OvercloudPacemakerTest(testtools.TestCase):
"""
Assert that all pacemaker resources are in
healthy state
"""
def test_get_pacemaker_resource_table(self):
resource_table = pacemaker.get_pcs_resources_table()
self.assertIsInstance(resource_table, pd.DataFrame)
def test_pacemaker_resources_health(self):
pcs_health = pacemaker.PacemakerResourcesStatus()
self.assertTrue(pcs_health.all_healthy)

193
tobiko/tripleo/pacemaker.py Normal file
View File

@ -0,0 +1,193 @@
from __future__ import absolute_import
from oslo_log import log
import pandas
import six
import tobiko
from tobiko.tripleo import overcloud
from tobiko.shell import sh
LOG = log.getLogger(__name__)
class PcsResourceException(tobiko.TobikoException):
message = "pcs cluster is not in a healthy state"
def get_pcs_resources_table(hostname='controller-0'):
"""
get pcs status from a controller and parse it
to have it's resources states in check
returns :
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started con
troller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started con
troller-1
rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started con
troller-2
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2
ip-192.168.24.6 (ocf::heartbeat:IPaddr2): Started controller-
0
ip-10.0.0.101 (ocf::heartbeat:IPaddr2): Started controller-1
ip-172.17.1.12 (ocf::heartbeat:IPaddr2): Started controller-2
ip-172.17.1.22 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.22 (ocf::heartbeat:IPaddr2): Started controller-1
ip-172.17.4.30 (ocf::heartbeat:IPaddr2): Started controller-2
haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started con
troller-0
haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started con
troller-1
haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started con
troller-2
openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Sta
rted controller-0
:return: dataframe of pcs resources stats table
"""
ssh_client = overcloud.overcloud_ssh_client(hostname)
output = sh.execute("sudo pcs status | grep ocf",
ssh_client=ssh_client).stdout
stream = six.StringIO(output)
table = pandas.read_csv(stream, delim_whitespace=True, header=None)
table.columns = ['resource', 'resource_type', 'resource_state',
'resource location']
LOG.debug("Got pcs status :\n%s", table)
return table
class PacemakerResourcesStatus(object):
"""
class to handle pcs resources checks
"""
def __init__(self):
self.pcs_df = get_pcs_resources_table()
def container_runtime(self):
if not self.pcs_df[(self.pcs_df['resource_type'] ==
"(ocf::heartbeat:docker):")].empty:
return 'docker'
if not self.pcs_df[(self.pcs_df['resource_type'] ==
"(ocf::heartbeat:podman):")].empty:
return 'podman'
def resource_count(self, resource_type):
return self.pcs_df[(self.pcs_df['resource_type'] == resource_type)][
'resource_state'].count()
def resource_count_in_state(self, resource_type, resource_state):
return self.pcs_df[(self.pcs_df['resource_type'] ==
resource_type) & (self.pcs_df['resource_state'] ==
resource_state)][
'resource_state'].count()
def rabbitmq_resource_healthy(self):
nodes_num = self.resource_count("(ocf::heartbeat:rabbitmq-cluster):")
started_num = self.resource_count_in_state(
"(ocf::heartbeat:rabbitmq-cluster):", "Started")
if nodes_num == started_num:
LOG.info("pcs status check: resource rabbitmq is in healthy state")
return True
else:
LOG.info("pcs status check: resource rabbitmq not in healthy "
"state")
return False
def galera_resource_healthy(self):
nodes_num = self.resource_count("(ocf::heartbeat:galera):")
master_num = self.resource_count_in_state("(ocf::heartbeat:galera):",
"Master")
if nodes_num == master_num:
LOG.info("pcs status check: resource galera is in healthy state")
return True
else:
LOG.info("pcs status check: resource galera not in healthy state")
return False
def redis_resource_healthy(self):
nodes_num = self.resource_count("(ocf::heartbeat:redis):")
master_num = self.resource_count_in_state(
"(ocf::heartbeat:redis):", "Master")
slave_num = self.resource_count_in_state(
"(ocf::heartbeat:redis):", "Slave")
if (master_num == 1) and (slave_num == nodes_num - master_num):
LOG.info("pcs status check: resource redis is in healthy state")
return True
else:
LOG.info("pcs status check: resource redis not in healthy state")
return False
def vips_resource_healthy(self):
nodes_num = self.resource_count("(ocf::heartbeat:IPaddr2):")
started_num = self.resource_count_in_state(
"(ocf::heartbeat:IPaddr2):", "Started")
if nodes_num == started_num:
LOG.info("pcs status check: resources vips are in healthy state")
return True
else:
LOG.info(
"pcs status check: resources vips are not in healthy state")
return False
def ha_proxy_cinder_healthy(self):
nodes_num = self.resource_count("(ocf::heartbeat:{}):".format(
self.container_runtime()))
started_num = self.resource_count_in_state(
"(ocf::heartbeat:{}):".format(self.container_runtime()), "Started")
if nodes_num == started_num:
LOG.info("pcs status check: resources ha_proxy and"
" cinder are in healthy state")
return True
else:
LOG.info(
"pcs status check: resources ha_proxy and cinder are not in "
"healthy state")
return False
def ovn_resource_healthy(self):
nodes_num = self.resource_count("(ocf::heartbeat:redis):")
if nodes_num > 0:
return True
else:
master_num = self.resource_count_in_state(
"(ocf::heartbeat:redis):", "Master")
slave_num = self.resource_count_in_state(
"(ocf::heartbeat:redis):", "Slave")
if (master_num == 1) and (slave_num == nodes_num - master_num):
LOG.info(
"pcs status check: resource ovn is in healthy state")
return True
else:
LOG.info(
"pcs status check: resource ovn is in not in "
"healthy state")
return False
@property
def all_healthy(self):
"""
check if each resource is in healthy order
and return a global healthy status
:return: Bool
"""
if all([
self.rabbitmq_resource_healthy(),
self.galera_resource_healthy(),
self.redis_resource_healthy(),
self.vips_resource_healthy(),
self.ha_proxy_cinder_healthy(),
self.ovn_resource_healthy()
]):
LOG.info("pcs status checks: all resources are in healthy state")
return True
else:
LOG.info("pcs status check: not all resources are in healthy "
"state")
raise PcsResourceException()