From bca81c9f64a0a84a2db34877dad1c93990e4ed89 Mon Sep 17 00:00:00 2001 From: Gustavo Ornaghi Antunes Date: Thu, 24 Nov 2022 13:25:16 +0000 Subject: [PATCH] Adding retry when getting ceph services in sysinv When the system is initialize, it takes a while to provide ceph services available. This can cause problems when updating via nfv for example. To fix this, added some attempts to get the service URL. Closes-Bug: 1997758 Signed-off-by: Gustavo Ornaghi Antunes Change-Id: I7d47d9c88da1afd909eb65ce510c0a562e6912b6 --- .../python-cephclient/cephclient/client.py | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/ceph/python-cephclient/python-cephclient/cephclient/client.py b/ceph/python-cephclient/python-cephclient/cephclient/client.py index 21903c8d..d2f95f1e 100644 --- a/ceph/python-cephclient/python-cephclient/cephclient/client.py +++ b/ceph/python-cephclient/python-cephclient/cephclient/client.py @@ -24,6 +24,7 @@ from cephclient import exception CEPH_MON_RESTFUL_USER = 'admin' CEPH_MON_RESTFUL_SERVICE = 'restful' CEPH_CLIENT_RETRY_COUNT = 2 +CEPH_GET_SERVICE_RETRY_COUNT = 15 CEPH_CLIENT_RETRY_TIMEOUT_SEC = 5 CEPH_CLI_TIMEOUT_SEC = 15 API_SUPPORTED_RESPONSE_FORMATS = [ @@ -84,20 +85,29 @@ class CephClient(object): raise exception.CephMonRestfulMissingUserCredentials(self.username) def _get_service_url(self): + attempts = 1 + while attempts <= CEPH_GET_SERVICE_RETRY_COUNT: + try: + output = subprocess.check_output( + ('ceph mgr services ' + '--connect-timeout {}').format( + CEPH_CLI_TIMEOUT_SEC), + shell=True) + except subprocess.CalledProcessError as e: + raise exception.CephMgrDumpError(str(e)) + try: + status = json.loads(output) + if not status: + LOG.info("Unable to get service url") + time.sleep(CEPH_CLIENT_RETRY_TIMEOUT_SEC) + attempts+=1 + continue + except (KeyError, ValueError): + raise exception.CephMgrJsonError(output) + LOG.info("Service url retrieved successfully") + break try: - output = subprocess.check_output( - ('ceph mgr dump ' - '--connect-timeout {}').format( - CEPH_CLI_TIMEOUT_SEC), - shell=True) - except subprocess.CalledProcessError as e: - raise exception.CephMgrDumpError(str(e)) - try: - status = json.loads(output) - except (KeyError, ValueError): - raise exception.CephMgrJsonError(output) - try: - self.service_url = status["services"][CEPH_MON_RESTFUL_SERVICE] + self.service_url = status[CEPH_MON_RESTFUL_SERVICE] except (KeyError, TypeError): raise exception.CephMgrMissingRestfulService( status.get('services', ''))