pacemaker table get now with time limit

Change-Id: I981aec989926fb11b636f0e0299b8b63c093e126
This commit is contained in:
pinikomarov 2020-07-08 22:38:31 +03:00
parent a07a6d9db5
commit 72743ec8d2
1 changed files with 18 additions and 27 deletions

View File

@ -19,50 +19,29 @@ class PcsResourceException(tobiko.TobikoException):
message = "pcs cluster is not in a healthy state"
def get_pcs_resources_table():
def get_pcs_resources_table(timeout=120, interval=2):
"""
get pcs status from a controller and parse it
to have it's resources states in check
returns :
rabbitmq-bundle-0 (ocf::heartbeat:rabbitmq-cluster): Started con
troller-0
rabbitmq-bundle-1 (ocf::heartbeat:rabbitmq-cluster): Started con
troller-1
rabbitmq-bundle-2 (ocf::heartbeat:rabbitmq-cluster): Started con
troller-2
galera-bundle-0 (ocf::heartbeat:galera): Master controller-0
galera-bundle-1 (ocf::heartbeat:galera): Master controller-1
galera-bundle-2 (ocf::heartbeat:galera): Master controller-2
redis-bundle-0 (ocf::heartbeat:redis): Master controller-0
redis-bundle-1 (ocf::heartbeat:redis): Slave controller-1
redis-bundle-2 (ocf::heartbeat:redis): Slave controller-2
ip-192.168.24.6 (ocf::heartbeat:IPaddr2): Started controller-
0
ip-10.0.0.101 (ocf::heartbeat:IPaddr2): Started controller-1
ip-172.17.1.12 (ocf::heartbeat:IPaddr2): Started controller-2
ip-172.17.1.22 (ocf::heartbeat:IPaddr2): Started controller-0
ip-172.17.3.22 (ocf::heartbeat:IPaddr2): Started controller-1
ip-172.17.4.30 (ocf::heartbeat:IPaddr2): Started controller-2
haproxy-bundle-docker-0 (ocf::heartbeat:docker): Started con
troller-0
haproxy-bundle-docker-1 (ocf::heartbeat:docker): Started con
troller-1
haproxy-bundle-docker-2 (ocf::heartbeat:docker): Started con
troller-2
openstack-cinder-volume-docker-0 (ocf::heartbeat:docker): Sta
rted controller-0
:return: dataframe of pcs resources stats table
"""
# TODO make more robust(done, need other methods to be too)
# TODO make table.columns retry without exception
failures = []
start = time.time()
nodes = topology.list_openstack_nodes(group='controller')
controller_node = nodes[0].name
ssh_client = overcloud.overcloud_ssh_client(controller_node)
# prevent pcs table read failure while pacemaker is starting
while True:
while time.time() - start < timeout:
failures = []
try:
output = sh.execute("sudo pcs status | grep 'ocf\\|fence'",
ssh_client=ssh_client,
@ -74,9 +53,21 @@ def get_pcs_resources_table():
table.columns = ['resource', 'resource_type', 'resource_state',
'overcloud_node']
except ValueError:
pass
pcs_status_raw = sh.execute("sudo pcs status ",
ssh_client=ssh_client,
expect_exit_status=None).stdout
failures.append(f'pcs status table import failed : '
f'pcs status stdout:\n {pcs_status_raw}')
LOG.info('Retrying , timeout at: {}'
.format(timeout-(time.time() - start)))
time.sleep(interval)
else:
break
# exhausted all retries
if failures:
tobiko.fail(
'pcs status table import error\n{!s}', '\n'.join(failures))
LOG.debug("Got pcs status :\n%s", table)
return table