Restart tiller on openstack pending install check

This is another attempt at fixing the same bug as the merged review
https://review.opendev.org/c/starlingx/config/+/783472 had tried, since
there were reports indicating that the bug would still occur on certain
setups.

This patch explicitly forces a tiller restart when catching the first
HelmTillerFailure exception caused by the broken pipe error, instead of
only trying to rerun the 'helm list' command, which was believed to be
a reliable workaround to the problem, but didn't solve it in every
possible scenario.

Closes-Bug: #1917308
Signed-off-by: Gustavo Santos <gustavofaganello.santos@windriver.com>
Change-Id: I38667609173ca5c6fed028f75742ae99efedf149
This commit is contained in:
Gustavo Santos 2021-04-13 16:09:21 -03:00
parent a40a3bd892
commit ad8567f064
1 changed files with 25 additions and 2 deletions

View File

@ -181,9 +181,32 @@ def delete_helm_release(release):
def _retry_on_HelmTillerFailure(ex):
LOG.info('Caught HelmTillerFailure exception. Retrying... '
LOG.info('Caught HelmTillerFailure exception. Resetting tiller and retrying... '
'Exception: {}'.format(ex))
return isinstance(ex, exception.HelmTillerFailure)
env = os.environ.copy()
env['PATH'] = '/usr/local/sbin:' + env['PATH']
env['KUBECONFIG'] = kubernetes.KUBERNETES_ADMIN_CONF
helm_reset = subprocess.Popen(
['helmv2-cli', '--',
'helm', 'reset', '--force'],
env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
timer = threading.Timer(20, kill_process_and_descendants, [helm_reset])
try:
timer.start()
out, err = helm_reset.communicate()
if helm_reset.returncode == 0:
return isinstance(ex, exception.HelmTillerFailure)
elif err:
raise exception.HelmTillerFailure(reason=err)
else:
err_msg = "helmv2-cli -- helm reset operation failed."
raise exception.HelmTillerFailure(reason=err_msg)
except Exception as e:
raise exception.HelmTillerFailure(
reason="Failed to reset tiller: %s" % e)
finally:
timer.cancel()
@retrying.retry(stop_max_attempt_number=2,