From c6126e194cc8575098c1f33aafd580985beb630b Mon Sep 17 00:00:00 2001 From: Eduardo Olivares Date: Wed, 21 Feb 2024 09:57:22 +0100 Subject: [PATCH] Fix retries on get_pcs_resources_table method That method used retries wrongly: - it did not use the tobiko retries mechanism - its try/except did not catch all the relevant exceptions Change-Id: I59c3d6e3db3294f135621e739d7582aa4977305e --- tobiko/tripleo/pacemaker.py | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/tobiko/tripleo/pacemaker.py b/tobiko/tripleo/pacemaker.py index 0885dc9fd..c5be2ac68 100644 --- a/tobiko/tripleo/pacemaker.py +++ b/tobiko/tripleo/pacemaker.py @@ -3,7 +3,6 @@ from __future__ import absolute_import import enum import io import time -import typing from oslo_log import log import pandas @@ -41,12 +40,9 @@ def get_pcs_resources_table(timeout=720, interval=2) -> pandas.DataFrame: :return: dataframe of pcs resources stats table """ - failures: typing.List[str] = [] - start = time.time() - # prevent pcs table read failure while pacemaker is starting - while time.time() - start < timeout: - failures = [] + for attempt in tobiko.retry(timeout=timeout, + interval=interval): try: output = run_pcs_status(options=['resources'], grep_str='ocf') # remove the first column when it only includes '*' characters @@ -56,18 +52,12 @@ def get_pcs_resources_table(timeout=720, interval=2) -> pandas.DataFrame: stream, delim_whitespace=True, header=None) table.columns = ['resource', 'resource_type', 'resource_state', 'overcloud_node'] - except ValueError: - pcs_status_raw = run_pcs_status() - failures.append(f'pcs status table import failed : ' - f'pcs status stdout:\n {pcs_status_raw}') - LOG.info('Retrying , timeout at: {}' - .format(timeout-(time.time() - start))) - time.sleep(interval) + except (ValueError, sh.ShellCommandFailed, sh.ShellTimeoutExpired): + if attempt.is_last: + raise + LOG.exception('Failed to obtain pcs status table - Retrying...') else: break - # exhausted all retries - if failures: - tobiko.fail('pcs status table import error\n' + '\n'.join(failures)) LOG.debug("Got pcs status :\n%s", table) return table