From ba8889bd79fe9c4baf68dda563f93d8429581aab Mon Sep 17 00:00:00 2001 From: Gustavo Ornaghi Antunes Date: Thu, 24 Nov 2022 13:25:16 +0000 Subject: [PATCH] Adding retry when getting ceph services in cephmgr After bootstrap system, it takes a while to provide ceph services available. This can cause problems when updating via nfv for example. To fix this, added some attempts to get the service URL. Now, the system is checking and waiting for the ceph services url to initialization continue. Test Plan: PASS: Check if the message "Service url successfully retrieved" is displayed after "Could not get service url" in the ceph-manager logs. Closes-Bug: 1997758 Signed-off-by: Gustavo Ornaghi Antunes Change-Id: I7d47d9c88da1afd909eb65ce510c0a562e6912b6 --- .../python-cephclient/cephclient/client.py | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/ceph/python-cephclient/python-cephclient/cephclient/client.py b/ceph/python-cephclient/python-cephclient/cephclient/client.py index 21903c8d..da085db4 100644 --- a/ceph/python-cephclient/python-cephclient/cephclient/client.py +++ b/ceph/python-cephclient/python-cephclient/cephclient/client.py @@ -24,6 +24,7 @@ from cephclient import exception CEPH_MON_RESTFUL_USER = 'admin' CEPH_MON_RESTFUL_SERVICE = 'restful' CEPH_CLIENT_RETRY_COUNT = 2 +CEPH_GET_SERVICE_RETRY_COUNT = 15 CEPH_CLIENT_RETRY_TIMEOUT_SEC = 5 CEPH_CLI_TIMEOUT_SEC = 15 API_SUPPORTED_RESPONSE_FORMATS = [ @@ -84,20 +85,29 @@ class CephClient(object): raise exception.CephMonRestfulMissingUserCredentials(self.username) def _get_service_url(self): + attempts = 1 + while attempts <= CEPH_GET_SERVICE_RETRY_COUNT: + try: + output = subprocess.check_output( + ('ceph mgr services ' + '--connect-timeout {}').format( + CEPH_CLI_TIMEOUT_SEC), + shell=True) + except subprocess.CalledProcessError as e: + raise exception.CephMgrDumpError(str(e)) + try: + status = json.loads(output) + if not status: + LOG.info("Unable to get service url") + time.sleep(CEPH_CLIENT_RETRY_TIMEOUT_SEC) + attempts += 1 + continue + except (KeyError, ValueError): + raise exception.CephMgrJsonError(output) + LOG.info("Service url retrieved successfully") + break try: - output = subprocess.check_output( - ('ceph mgr dump ' - '--connect-timeout {}').format( - CEPH_CLI_TIMEOUT_SEC), - shell=True) - except subprocess.CalledProcessError as e: - raise exception.CephMgrDumpError(str(e)) - try: - status = json.loads(output) - except (KeyError, ValueError): - raise exception.CephMgrJsonError(output) - try: - self.service_url = status["services"][CEPH_MON_RESTFUL_SERVICE] + self.service_url = status[CEPH_MON_RESTFUL_SERVICE] except (KeyError, TypeError): raise exception.CephMgrMissingRestfulService( status.get('services', ''))