Check pacemaker resources
Add API to fetch and parse 'pcs status' command result. Change-Id: Iaaa4896a0fb390ba5ae59b0b1b87ed1664a4b4dd
This commit is contained in:
parent
026dca67bd
commit
16032b26f7
@ -8,3 +8,11 @@ key_file = ~/.ssh/id_rsa
|
||||
|
||||
[neutron]
|
||||
floating_network = "{{ test.floating_network }}"
|
||||
|
||||
[ssh]
|
||||
key_file = ~/.ssh/id_rsa
|
||||
undercloud_ssh_password = stack
|
||||
undercloud_ssh_username = stack
|
||||
|
||||
[tripleo]
|
||||
undercloud_ssh_hostname = undercloud-0
|
@ -21,3 +21,4 @@ python-openstackclient>=3.0.0 # Apache-2.0
|
||||
stestr>=2.0 # Apache-2.0
|
||||
six>=1.10.0 # MIT
|
||||
testtools>=2.2.0 # MIT
|
||||
pandas>=0.24.2 # BSD
|
@ -16,11 +16,13 @@ from __future__ import absolute_import
|
||||
import os
|
||||
|
||||
import netaddr
|
||||
import pandas as pd
|
||||
import testtools
|
||||
|
||||
from tobiko import config
|
||||
from tobiko.openstack import nova
|
||||
from tobiko.tripleo import overcloud
|
||||
from tobiko.tripleo import pacemaker
|
||||
import tobiko
|
||||
|
||||
|
||||
@ -82,3 +84,19 @@ class OvercloudNovaApiTest(testtools.TestCase):
|
||||
hostname = overcloud.find_overcloud_node().name
|
||||
ssh_client = overcloud.overcloud_ssh_client(hostname=hostname)
|
||||
ssh_client.connect()
|
||||
|
||||
|
||||
@overcloud.skip_if_missing_overcloud
|
||||
class OvercloudPacemakerTest(testtools.TestCase):
|
||||
|
||||
"""
|
||||
Assert that all pacemaker resources are in
|
||||
healthy state
|
||||
"""
|
||||
def test_get_pacemaker_resource_table(self):
|
||||
resource_table = pacemaker.get_pcs_resources_table()
|
||||
self.assertIsInstance(resource_table, pd.DataFrame)
|
||||
|
||||
def test_pacemaker_resources_health(self):
|
||||
pcs_health = pacemaker.PacemakerResourcesStatus()
|
||||
self.assertTrue(pcs_health.all_healthy)
|
||||
|
193
tobiko/tripleo/pacemaker.py
Normal file
193
tobiko/tripleo/pacemaker.py
Normal file
@ -0,0 +1,193 @@
|
||||
from __future__ import absolute_import
|
||||
|
||||
from oslo_log import log
|
||||
import pandas
|
||||
import six
|
||||
|
||||
import tobiko
|
||||
from tobiko.tripleo import overcloud
|
||||
from tobiko.shell import sh
|
||||
|
||||
|
||||
LOG = log.getLogger(__name__)
|
||||
|
||||
|
||||
class PcsResourceException(tobiko.TobikoException):
|
||||
message = "pcs cluster is not in a healthy state"
|
||||
|
||||
|
||||
def get_pcs_resources_table(hostname='controller-0'):
|
||||
"""
|
||||
get pcs status from a controller and parse it
|
||||
to have it's resources states in check
|
||||
returns :
|
||||
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started con
|
||||
troller-0
|
||||
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started con
|
||||
troller-1
|
||||
rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started con
|
||||
troller-2
|
||||
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
|
||||
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
|
||||
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
|
||||
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
|
||||
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
|
||||
redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2
|
||||
ip-192.168.24.6 (ocf::heartbeat:IPaddr2): Started controller-
|
||||
0
|
||||
ip-10.0.0.101 (ocf::heartbeat:IPaddr2): Started controller-1
|
||||
ip-172.17.1.12 (ocf::heartbeat:IPaddr2): Started controller-2
|
||||
ip-172.17.1.22 (ocf::heartbeat:IPaddr2): Started controller-0
|
||||
ip-172.17.3.22 (ocf::heartbeat:IPaddr2): Started controller-1
|
||||
ip-172.17.4.30 (ocf::heartbeat:IPaddr2): Started controller-2
|
||||
haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started con
|
||||
troller-0
|
||||
haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started con
|
||||
troller-1
|
||||
haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started con
|
||||
troller-2
|
||||
openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Sta
|
||||
rted controller-0
|
||||
|
||||
:return: dataframe of pcs resources stats table
|
||||
"""
|
||||
ssh_client = overcloud.overcloud_ssh_client(hostname)
|
||||
output = sh.execute("sudo pcs status | grep ocf",
|
||||
ssh_client=ssh_client).stdout
|
||||
stream = six.StringIO(output)
|
||||
table = pandas.read_csv(stream, delim_whitespace=True, header=None)
|
||||
table.columns = ['resource', 'resource_type', 'resource_state',
|
||||
'resource location']
|
||||
LOG.debug("Got pcs status :\n%s", table)
|
||||
return table
|
||||
|
||||
|
||||
class PacemakerResourcesStatus(object):
|
||||
"""
|
||||
class to handle pcs resources checks
|
||||
"""
|
||||
def __init__(self):
|
||||
self.pcs_df = get_pcs_resources_table()
|
||||
|
||||
def container_runtime(self):
|
||||
if not self.pcs_df[(self.pcs_df['resource_type'] ==
|
||||
"(ocf::heartbeat:docker):")].empty:
|
||||
return 'docker'
|
||||
if not self.pcs_df[(self.pcs_df['resource_type'] ==
|
||||
"(ocf::heartbeat:podman):")].empty:
|
||||
return 'podman'
|
||||
|
||||
def resource_count(self, resource_type):
|
||||
return self.pcs_df[(self.pcs_df['resource_type'] == resource_type)][
|
||||
'resource_state'].count()
|
||||
|
||||
def resource_count_in_state(self, resource_type, resource_state):
|
||||
return self.pcs_df[(self.pcs_df['resource_type'] ==
|
||||
resource_type) & (self.pcs_df['resource_state'] ==
|
||||
resource_state)][
|
||||
'resource_state'].count()
|
||||
|
||||
def rabbitmq_resource_healthy(self):
|
||||
nodes_num = self.resource_count("(ocf::heartbeat:rabbitmq-cluster):")
|
||||
started_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:rabbitmq-cluster):", "Started")
|
||||
if nodes_num == started_num:
|
||||
LOG.info("pcs status check: resource rabbitmq is in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info("pcs status check: resource rabbitmq not in healthy "
|
||||
"state")
|
||||
return False
|
||||
|
||||
def galera_resource_healthy(self):
|
||||
nodes_num = self.resource_count("(ocf::heartbeat:galera):")
|
||||
master_num = self.resource_count_in_state("(ocf::heartbeat:galera):",
|
||||
"Master")
|
||||
if nodes_num == master_num:
|
||||
LOG.info("pcs status check: resource galera is in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info("pcs status check: resource galera not in healthy state")
|
||||
return False
|
||||
|
||||
def redis_resource_healthy(self):
|
||||
nodes_num = self.resource_count("(ocf::heartbeat:redis):")
|
||||
master_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:redis):", "Master")
|
||||
slave_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:redis):", "Slave")
|
||||
if (master_num == 1) and (slave_num == nodes_num - master_num):
|
||||
LOG.info("pcs status check: resource redis is in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info("pcs status check: resource redis not in healthy state")
|
||||
return False
|
||||
|
||||
def vips_resource_healthy(self):
|
||||
nodes_num = self.resource_count("(ocf::heartbeat:IPaddr2):")
|
||||
started_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:IPaddr2):", "Started")
|
||||
if nodes_num == started_num:
|
||||
LOG.info("pcs status check: resources vips are in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info(
|
||||
"pcs status check: resources vips are not in healthy state")
|
||||
return False
|
||||
|
||||
def ha_proxy_cinder_healthy(self):
|
||||
|
||||
nodes_num = self.resource_count("(ocf::heartbeat:{}):".format(
|
||||
self.container_runtime()))
|
||||
started_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:{}):".format(self.container_runtime()), "Started")
|
||||
if nodes_num == started_num:
|
||||
LOG.info("pcs status check: resources ha_proxy and"
|
||||
" cinder are in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info(
|
||||
"pcs status check: resources ha_proxy and cinder are not in "
|
||||
"healthy state")
|
||||
return False
|
||||
|
||||
def ovn_resource_healthy(self):
|
||||
nodes_num = self.resource_count("(ocf::heartbeat:redis):")
|
||||
if nodes_num > 0:
|
||||
return True
|
||||
else:
|
||||
master_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:redis):", "Master")
|
||||
slave_num = self.resource_count_in_state(
|
||||
"(ocf::heartbeat:redis):", "Slave")
|
||||
if (master_num == 1) and (slave_num == nodes_num - master_num):
|
||||
LOG.info(
|
||||
"pcs status check: resource ovn is in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info(
|
||||
"pcs status check: resource ovn is in not in "
|
||||
"healthy state")
|
||||
return False
|
||||
|
||||
@property
|
||||
def all_healthy(self):
|
||||
"""
|
||||
check if each resource is in healthy order
|
||||
and return a global healthy status
|
||||
:return: Bool
|
||||
"""
|
||||
if all([
|
||||
self.rabbitmq_resource_healthy(),
|
||||
self.galera_resource_healthy(),
|
||||
self.redis_resource_healthy(),
|
||||
self.vips_resource_healthy(),
|
||||
self.ha_proxy_cinder_healthy(),
|
||||
self.ovn_resource_healthy()
|
||||
]):
|
||||
LOG.info("pcs status checks: all resources are in healthy state")
|
||||
return True
|
||||
else:
|
||||
LOG.info("pcs status check: not all resources are in healthy "
|
||||
"state")
|
||||
raise PcsResourceException()
|
Loading…
Reference in New Issue
Block a user