From 6a3f19db6b951fa4d0440bc46d9487cb6c933d2b Mon Sep 17 00:00:00 2001 From: Federico Ressi Date: Sat, 25 Dec 2021 09:38:38 +0100 Subject: [PATCH] Fix cuncurrency problem in wait_for_cloud_init_status function When cloud-init status command fails to found file '/run/cloud-init/status.json' on target VM, it fails printing out python stack trace as following: Traceback (most recent call last): File "/usr/bin/cloud-init", line 11, in load_entry_point('cloud-init==21.4', 'console_scripts', 'cloud-init')() File "/usr/lib/python3/dist-packages/cloudinit/cmd/main.py", line 927, in main retval = util.log_time( File "/usr/lib/python3/dist-packages/cloudinit/util.py", line 2472, in log_time ret = func(*args, **kwargs) File "/usr/lib/python3/dist-packages/cloudinit/cmd/status.py", line 55, in handle_status_args status, status_detail, time = _get_status_details(init.paths) File "/usr/lib/python3/dist-packages/cloudinit/cmd/status.py", line 123, in _get_status_details status_v1 = load_json(load_file(status_file)).get('v1', {}) File "/usr/lib/python3/dist-packages/cloudinit/util.py", line 1361, in load_file with open(fname, 'rb') as ifh: On such case produced stdout is empty and therefore get_cloud_init_status function raises a ShellCommandFailed exception. This workaround is making wait_for_cloud_init_status function to accept similar temporary failures of get_cloud_init_status before the retry timeout expires, so that in case this issue occurs before cloud-init is able to create status file, no error is going to break to break the retry loop in wait_for_cloud_init_status function. Change-Id: I7bbdf79e20f46bd6134b151db3269fe540a5738a --- tobiko/openstack/nova/_cloud_init.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tobiko/openstack/nova/_cloud_init.py b/tobiko/openstack/nova/_cloud_init.py index 5701e9b00..3048362fa 100644 --- a/tobiko/openstack/nova/_cloud_init.py +++ b/tobiko/openstack/nova/_cloud_init.py @@ -26,7 +26,7 @@ LOG = log.getLogger(__name__) CLOUD_INIT_TRANSIENT_STATES = { - 'done': tuple(['running']) + 'done': ('running',) } CLOUD_INIT_OUTPUT_FILE = '/var/log/cloud-init-output.log' @@ -199,14 +199,21 @@ def wait_for_cloud_init_status( timeout=timeout, tail=tail) + actual_status: typing.Optional[str] + for attempt in tobiko.retry(timeout=timeout, interval=sleep_interval, default_timeout=1200., default_interval=5.): - actual_status = get_cloud_init_status(ssh_client=ssh_client, - timeout=attempt.time_left) - if actual_status in expected_states: - break + try: + actual_status = get_cloud_init_status(ssh_client=ssh_client, + timeout=attempt.time_left) + except sh.ShellCommandFailed: + LOG.exception('Unable to get cloud-init status') + actual_status = None + else: + if actual_status in expected_states: + break if attempt.is_last: raise WaitForCloudInitTimeoutError(