From ad8567f06485a10edf3857fbc87ae7d3058a1dfc Mon Sep 17 00:00:00 2001 From: Gustavo Santos Date: Tue, 13 Apr 2021 16:09:21 -0300 Subject: [PATCH] Restart tiller on openstack pending install check This is another attempt at fixing the same bug as the merged review https://review.opendev.org/c/starlingx/config/+/783472 had tried, since there were reports indicating that the bug would still occur on certain setups. This patch explicitly forces a tiller restart when catching the first HelmTillerFailure exception caused by the broken pipe error, instead of only trying to rerun the 'helm list' command, which was believed to be a reliable workaround to the problem, but didn't solve it in every possible scenario. Closes-Bug: #1917308 Signed-off-by: Gustavo Santos Change-Id: I38667609173ca5c6fed028f75742ae99efedf149 --- sysinv/sysinv/sysinv/sysinv/helm/utils.py | 27 +++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/sysinv/sysinv/sysinv/sysinv/helm/utils.py b/sysinv/sysinv/sysinv/sysinv/helm/utils.py index 424b203e3d..1294893295 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/utils.py @@ -181,9 +181,32 @@ def delete_helm_release(release): def _retry_on_HelmTillerFailure(ex): - LOG.info('Caught HelmTillerFailure exception. Retrying... ' + LOG.info('Caught HelmTillerFailure exception. Resetting tiller and retrying... ' 'Exception: {}'.format(ex)) - return isinstance(ex, exception.HelmTillerFailure) + env = os.environ.copy() + env['PATH'] = '/usr/local/sbin:' + env['PATH'] + env['KUBECONFIG'] = kubernetes.KUBERNETES_ADMIN_CONF + helm_reset = subprocess.Popen( + ['helmv2-cli', '--', + 'helm', 'reset', '--force'], + env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + timer = threading.Timer(20, kill_process_and_descendants, [helm_reset]) + + try: + timer.start() + out, err = helm_reset.communicate() + if helm_reset.returncode == 0: + return isinstance(ex, exception.HelmTillerFailure) + elif err: + raise exception.HelmTillerFailure(reason=err) + else: + err_msg = "helmv2-cli -- helm reset operation failed." + raise exception.HelmTillerFailure(reason=err_msg) + except Exception as e: + raise exception.HelmTillerFailure( + reason="Failed to reset tiller: %s" % e) + finally: + timer.cancel() @retrying.retry(stop_max_attempt_number=2,