Check for connectivity to the tiller postgres backend.
The existing code checks that the pod(s) are 'Running' but that might not be enough as the service inside the pod (postgres) might not be able to accept connections. Closes-Bug: 1923587 Signed-off-by: Andrei Grosu <andrei.grosu@windriver.com> Change-Id: Ide49e4a38b805d5fc41d9f06d94393c69c6ed9d2
This commit is contained in:
parent
a40a3bd892
commit
5edd3bdbe5
|
@ -3193,8 +3193,12 @@ class ArmadaHelper(object):
|
||||||
|
|
||||||
# Wait for armada to be ready for cmd execution.
|
# Wait for armada to be ready for cmd execution.
|
||||||
# NOTE: make_armada_requests() also has retry mechanism
|
# NOTE: make_armada_requests() also has retry mechanism
|
||||||
timeout = 30
|
TIMEOUT_DELTA = 5
|
||||||
while True:
|
TIMEOUT_SLEEP = 5
|
||||||
|
TIMEOUT_START_VALUE = 30
|
||||||
|
|
||||||
|
timeout = TIMEOUT_START_VALUE
|
||||||
|
while timeout > 0:
|
||||||
try:
|
try:
|
||||||
pods = self._kube.kube_get_pods_by_selector(
|
pods = self._kube.kube_get_pods_by_selector(
|
||||||
ARMADA_NAMESPACE,
|
ARMADA_NAMESPACE,
|
||||||
|
@ -3222,20 +3226,42 @@ class ArmadaHelper(object):
|
||||||
LOG.error("Failed to copy %s to %s, error: %s",
|
LOG.error("Failed to copy %s to %s, error: %s",
|
||||||
src, dest_dir, stderr)
|
src, dest_dir, stderr)
|
||||||
raise RuntimeError('armada pod not ready')
|
raise RuntimeError('armada pod not ready')
|
||||||
else:
|
break
|
||||||
return True
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.info("Could not get Armada service : %s " % e)
|
LOG.info("Could not get Armada service : %s " % e)
|
||||||
|
|
||||||
if timeout <= 0:
|
time.sleep(TIMEOUT_SLEEP)
|
||||||
break
|
timeout -= TIMEOUT_DELTA
|
||||||
time.sleep(5)
|
|
||||||
timeout -= 5
|
|
||||||
|
|
||||||
LOG.error("Failed to get Armada service after 30 seconds.")
|
if timeout <= 0:
|
||||||
return False
|
LOG.error("Failed to get Armada service after {seconds} seconds.".
|
||||||
|
format(seconds=TIMEOUT_START_VALUE))
|
||||||
|
return False
|
||||||
|
|
||||||
|
# We don't need to loop through the code that checks the pod's status
|
||||||
|
# again. Once the previous loop exits with pod 'Running' we can test
|
||||||
|
# the connectivity to the tiller postgres backend:
|
||||||
|
timeout = TIMEOUT_START_VALUE
|
||||||
|
while timeout > 0:
|
||||||
|
try:
|
||||||
|
_ = helm_utils.retrieve_helm_releases()
|
||||||
|
break
|
||||||
|
except exception.HelmTillerFailure:
|
||||||
|
LOG.warn("Could not query Helm/Tiller releases")
|
||||||
|
time.sleep(TIMEOUT_SLEEP)
|
||||||
|
timeout -= TIMEOUT_DELTA
|
||||||
|
continue
|
||||||
|
except Exception as ex:
|
||||||
|
LOG.error("Unhandled exception : {error}".format(error=str(ex)))
|
||||||
|
return False
|
||||||
|
|
||||||
|
if timeout <= 0:
|
||||||
|
LOG.error("Failed to query Helm/Tiller for {seconds} seconds.".
|
||||||
|
format(seconds=TIMEOUT_START_VALUE))
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def stop_armada_request(self):
|
def stop_armada_request(self):
|
||||||
"""A simple way to cancel an on-going manifest apply/rollback/delete
|
"""A simple way to cancel an on-going manifest apply/rollback/delete
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# sim: tabstop=4 shiftwidth=4 softtabstop=4
|
# sim: tabstop=4 shiftwidth=4 softtabstop=4
|
||||||
#
|
#
|
||||||
# Copyright (c) 2019 Wind River Systems, Inc.
|
# Copyright (c) 2019-2021 Wind River Systems, Inc.
|
||||||
#
|
#
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
#
|
#
|
||||||
|
|
Loading…
Reference in New Issue