Merge "Improve cloud-init debugging traces"

This commit is contained in:
Zuul 2021-04-28 18:11:02 +00:00 committed by Gerrit Code Review
commit 02486ef1e6
1 changed files with 42 additions and 29 deletions

View File

@ -27,6 +27,14 @@ from tobiko.shell import ssh
LOG = log.getLogger(__name__) LOG = log.getLogger(__name__)
CLOUD_INIT_TRANSIENT_STATES = {
'done': tuple(['running'])
}
CLOUD_INIT_OUTPUT_FILE = '/var/log/cloud-init-output.log'
CLOUD_INIT_LOG_FILE = '/var/log/cloud-init.log'
def user_data(*args, **kwargs): def user_data(*args, **kwargs):
config = cloud_config(*args, **kwargs) config = cloud_config(*args, **kwargs)
if config: if config:
@ -82,20 +90,21 @@ class CloudConfig(dict):
class InvalidCloudInitStatusError(tobiko.TobikoException): class InvalidCloudInitStatusError(tobiko.TobikoException):
message = ("cloud-init status of host '{hostname}' is " message = ("cloud-init status of host '{hostname}' is "
"'{actual_status}' while it is expecting to " "'{actual_status}' while it is expecting to "
"be in {expected_states!r}:\n" "be in {expected_states!r}:\n\n"
"{details}") f"--- {CLOUD_INIT_LOG_FILE} ---\n"
"{log_file}\n\n"
f"--- {CLOUD_INIT_OUTPUT_FILE} ---\n"
"{output_file}\n\n")
class WaitForCloudInitTimeoutError(InvalidCloudInitStatusError): class WaitForCloudInitTimeoutError(InvalidCloudInitStatusError):
message = ("after {timeout} seconds cloud-init status of host " message = ("after {timeout} seconds cloud-init status of host "
"'{hostname}' is still '{actual_status}' while it is " "'{hostname}' is still '{actual_status}' while it is "
"expecting to be in {expected_states!r}:\n" "expecting to be in {expected_states!r}:\n\n"
"{details}") f"--- {CLOUD_INIT_LOG_FILE} ---\n"
"{log_file}\n\n"
f"--- {CLOUD_INIT_OUTPUT_FILE} ---\n"
COUD_INIT_TRANSIENT_STATES = { "{output_file}\n\n")
'done': tuple(['running'])
}
def get_cloud_init_status( def get_cloud_init_status(
@ -142,10 +151,14 @@ def wait_for_cloud_init_status(
if transient_states is None: if transient_states is None:
transient_states = list() transient_states = list()
for status in expected_states: for status in expected_states:
transient_states += COUD_INIT_TRANSIENT_STATES.get(status, []) transient_states += CLOUD_INIT_TRANSIENT_STATES.get(status, [])
with open_cloud_init_ouput(timeout=timeout, with open_output_file(filename=CLOUD_INIT_LOG_FILE,
ssh_client=ssh_client) as output: timeout=timeout,
ssh_client=ssh_client) as log_file, \
open_output_file(filename=CLOUD_INIT_OUTPUT_FILE,
timeout=timeout,
ssh_client=ssh_client) as output_file:
for attempt in tobiko.retry(timeout=timeout, for attempt in tobiko.retry(timeout=timeout,
interval=sleep_interval, interval=sleep_interval,
default_timeout=600., default_timeout=600.,
@ -155,13 +168,15 @@ def wait_for_cloud_init_status(
if actual_status in expected_states: if actual_status in expected_states:
return actual_status return actual_status
output.readall() log_file.readall()
output_file.readall()
if actual_status not in transient_states: if actual_status not in transient_states:
raise InvalidCloudInitStatusError( raise InvalidCloudInitStatusError(
hostname=hostname, hostname=hostname,
actual_status=actual_status, actual_status=actual_status,
expected_states=expected_states, expected_states=expected_states,
details=str(output)) log_file=str(log_file),
output_file=str(output_file))
try: try:
attempt.check_limits() attempt.check_limits()
@ -171,27 +186,25 @@ def wait_for_cloud_init_status(
hostname=hostname, hostname=hostname,
actual_status=actual_status, actual_status=actual_status,
expected_states=expected_states, expected_states=expected_states,
details=str(output)) from ex log_file=str(log_file),
output_file=str(output_file)) from ex
# show only the last 10 lines # show only the last log line
details = '\n'.join(str(output).splitlines()[-10:]) last_log_line = str(log_file).splitlines()[-1]
LOG.debug(f"Waiting cloud-init status on host '{hostname}' to " LOG.debug(f"Waiting cloud-init status on host '{hostname}' to "
f"switch from '{actual_status}' to any of expected " f"switch from '{actual_status}' to any of expected "
f"states ({', '.join(expected_states)})\n\n" f"states ({', '.join(expected_states)}):\n\n"
f"{details}\n") f"--- {CLOUD_INIT_LOG_FILE} ---\n"
f"{last_log_line}\n\n")
raise RuntimeError("Retry loop ended himself") raise RuntimeError("Retry loop ended himself")
CLOUD_INIT_OUTPUT_FILE = '/var/log/cloud-init-output.log'
@contextlib.contextmanager @contextlib.contextmanager
def open_cloud_init_ouput( def open_output_file(filename: str = CLOUD_INIT_OUTPUT_FILE,
cloud_init_output_file: str = CLOUD_INIT_OUTPUT_FILE, tail=False,
tail=False, follow=False,
follow=False, **process_params) \
**params) \
-> typing.Generator[sh.ShellStdout, None, None]: -> typing.Generator[sh.ShellStdout, None, None]:
command = ['tail'] command = ['tail']
if not tail: if not tail:
@ -200,7 +213,7 @@ def open_cloud_init_ouput(
if follow: if follow:
command += ['-F'] command += ['-F']
command += [cloud_init_output_file] command += [filename]
process = sh.process(command, **params) process = sh.process(command, **process_params)
with process: with process:
yield process.stdout yield process.stdout