Adding retry when getting ceph services in sysinv

When the system is initialize, it takes a while to provide ceph
services available. This can cause problems when updating
via nfv for example.
To fix this, added some attempts to get the service URL.

Closes-Bug: 1997758

Signed-off-by: Gustavo Ornaghi Antunes <gustavo.ornaghiantunes@windriver.com>
Change-Id: I7d47d9c88da1afd909eb65ce510c0a562e6912b6
This commit is contained in:
Gustavo Ornaghi Antunes 2022-11-24 13:25:16 +00:00
parent d8e85e4c64
commit bca81c9f64
1 changed files with 23 additions and 13 deletions

View File

@ -24,6 +24,7 @@ from cephclient import exception
CEPH_MON_RESTFUL_USER = 'admin'
CEPH_MON_RESTFUL_SERVICE = 'restful'
CEPH_CLIENT_RETRY_COUNT = 2
CEPH_GET_SERVICE_RETRY_COUNT = 15
CEPH_CLIENT_RETRY_TIMEOUT_SEC = 5
CEPH_CLI_TIMEOUT_SEC = 15
API_SUPPORTED_RESPONSE_FORMATS = [
@ -84,20 +85,29 @@ class CephClient(object):
raise exception.CephMonRestfulMissingUserCredentials(self.username)
def _get_service_url(self):
attempts = 1
while attempts <= CEPH_GET_SERVICE_RETRY_COUNT:
try:
output = subprocess.check_output(
('ceph mgr services '
'--connect-timeout {}').format(
CEPH_CLI_TIMEOUT_SEC),
shell=True)
except subprocess.CalledProcessError as e:
raise exception.CephMgrDumpError(str(e))
try:
status = json.loads(output)
if not status:
LOG.info("Unable to get service url")
time.sleep(CEPH_CLIENT_RETRY_TIMEOUT_SEC)
attempts+=1
continue
except (KeyError, ValueError):
raise exception.CephMgrJsonError(output)
LOG.info("Service url retrieved successfully")
break
try:
output = subprocess.check_output(
('ceph mgr dump '
'--connect-timeout {}').format(
CEPH_CLI_TIMEOUT_SEC),
shell=True)
except subprocess.CalledProcessError as e:
raise exception.CephMgrDumpError(str(e))
try:
status = json.loads(output)
except (KeyError, ValueError):
raise exception.CephMgrJsonError(output)
try:
self.service_url = status["services"][CEPH_MON_RESTFUL_SERVICE]
self.service_url = status[CEPH_MON_RESTFUL_SERVICE]
except (KeyError, TypeError):
raise exception.CephMgrMissingRestfulService(
status.get('services', ''))