Fix cuncurrency problem in wait_for_cloud_init_status function
When cloud-init status command fails to found file '/run/cloud-init/status.json' on target VM, it fails printing out python stack trace as following: Traceback (most recent call last): File "/usr/bin/cloud-init", line 11, in <module> load_entry_point('cloud-init==21.4', 'console_scripts', 'cloud-init')() File "/usr/lib/python3/dist-packages/cloudinit/cmd/main.py", line 927, in main retval = util.log_time( File "/usr/lib/python3/dist-packages/cloudinit/util.py", line 2472, in log_time ret = func(*args, **kwargs) File "/usr/lib/python3/dist-packages/cloudinit/cmd/status.py", line 55, in handle_status_args status, status_detail, time = _get_status_details(init.paths) File "/usr/lib/python3/dist-packages/cloudinit/cmd/status.py", line 123, in _get_status_details status_v1 = load_json(load_file(status_file)).get('v1', {}) File "/usr/lib/python3/dist-packages/cloudinit/util.py", line 1361, in load_file with open(fname, 'rb') as ifh: On such case produced stdout is empty and therefore get_cloud_init_status function raises a ShellCommandFailed exception. This workaround is making wait_for_cloud_init_status function to accept similar temporary failures of get_cloud_init_status before the retry timeout expires, so that in case this issue occurs before cloud-init is able to create status file, no error is going to break to break the retry loop in wait_for_cloud_init_status function. Change-Id: I7bbdf79e20f46bd6134b151db3269fe540a5738a
This commit is contained in:
parent
49376a1bc2
commit
6a3f19db6b
|
@ -26,7 +26,7 @@ LOG = log.getLogger(__name__)
|
|||
|
||||
|
||||
CLOUD_INIT_TRANSIENT_STATES = {
|
||||
'done': tuple(['running'])
|
||||
'done': ('running',)
|
||||
}
|
||||
|
||||
CLOUD_INIT_OUTPUT_FILE = '/var/log/cloud-init-output.log'
|
||||
|
@ -199,14 +199,21 @@ def wait_for_cloud_init_status(
|
|||
timeout=timeout,
|
||||
tail=tail)
|
||||
|
||||
actual_status: typing.Optional[str]
|
||||
|
||||
for attempt in tobiko.retry(timeout=timeout,
|
||||
interval=sleep_interval,
|
||||
default_timeout=1200.,
|
||||
default_interval=5.):
|
||||
actual_status = get_cloud_init_status(ssh_client=ssh_client,
|
||||
timeout=attempt.time_left)
|
||||
if actual_status in expected_states:
|
||||
break
|
||||
try:
|
||||
actual_status = get_cloud_init_status(ssh_client=ssh_client,
|
||||
timeout=attempt.time_left)
|
||||
except sh.ShellCommandFailed:
|
||||
LOG.exception('Unable to get cloud-init status')
|
||||
actual_status = None
|
||||
else:
|
||||
if actual_status in expected_states:
|
||||
break
|
||||
|
||||
if attempt.is_last:
|
||||
raise WaitForCloudInitTimeoutError(
|
||||
|
|
Loading…
Reference in New Issue