Improve exception handling in agent_base_vendor

Currently we catch all Exception classes in a few places and log only
the error message. This is very problematic when e.g. KeyError happens,
since its error message is the missing key.

This change:
* Adds exception class to the error message wherever Exception is caught
* Logs traceback when the caught exception does not inherit IronicException
  (we assume that IronicException's come with good error messages and from
  predictable places)
* Fixes meaningless error message when boot.prepare_instance fails.

Story: #2002089
Task: #19761
Change-Id: I2a7ddcfbde36b31af8d9560500ce1dd47db5b642
(cherry picked from commit 1d0f90c9e7)
This commit is contained in:
Dmitry Tantsur 2018-05-23 13:53:46 +02:00
parent 1f0eaa23ba
commit 57e18d2e66
1 changed files with 28 additions and 18 deletions

View File

@ -140,7 +140,7 @@ def _cleaning_reboot(task):
{'step': task.node.clean_step,
'node': task.node.uuid,
'err': e})
LOG.error(msg)
LOG.error(msg, exc_info=not isinstance(e, exception.IronicException))
# do not set cleaning_reboot if we didn't reboot
manager_utils.cleaning_error_handler(task, msg)
return
@ -203,7 +203,7 @@ def _get_completed_cleaning_command(task, commands):
@METRICS.timer('log_and_raise_deployment_error')
def log_and_raise_deployment_error(task, msg, collect_logs=True):
def log_and_raise_deployment_error(task, msg, collect_logs=True, exc=None):
"""Helper method to log the error and raise exception.
:param task: a TaskManager instance containing the node to act on.
@ -212,8 +212,11 @@ def log_and_raise_deployment_error(task, msg, collect_logs=True):
logs from IPA-based ramdisk. Defaults to True.
Actual log collection is also affected by
CONF.agent.deploy_logs_collect config option.
:param exc: Exception that caused the failure.
"""
LOG.error(msg)
log_traceback = (exc is not None and
not isinstance(exc, exception.IronicException))
LOG.error(msg, exc_info=log_traceback)
deploy_utils.set_failed_state(task, msg, collect_logs=collect_logs)
raise exception.InstanceDeployFailure(msg)
@ -549,10 +552,11 @@ class AgentDeployMixin(HeartbeatMixin):
except Exception as e:
msg = (_('For node %(node)s, post clean step hook '
'%(method)s failed for clean step %(step)s.'
'Error: %(error)s') %
'%(cls)s: %(error)s') %
{'method': clean_step_hook.__name__,
'node': node.uuid,
'error': e,
'cls': e.__class__.__name__,
'step': node.clean_step})
LOG.exception(msg)
return manager_utils.cleaning_error_handler(task, msg)
@ -613,10 +617,12 @@ class AgentDeployMixin(HeartbeatMixin):
except Exception as e:
LOG.warning('Failed to soft power off node %(node_uuid)s '
'in at least %(timeout)d seconds. '
'Error: %(error)s',
'%(cls)s: %(error)s',
{'node_uuid': node.uuid,
'timeout': (wait * (attempts - 1)) / 1000,
'error': e})
'cls': e.__class__.__name__, 'error': e},
exc_info=not isinstance(
e, exception.IronicException))
manager_utils.node_power_action(task, states.POWER_OFF)
else:
# Flush the file system prior to hard rebooting the node
@ -635,9 +641,10 @@ class AgentDeployMixin(HeartbeatMixin):
manager_utils.node_power_action(task, states.POWER_OFF)
except Exception as e:
msg = (_('Error rebooting node %(node)s after deploy. '
'Error: %(error)s') %
{'node': node.uuid, 'error': e})
log_and_raise_deployment_error(task, msg)
'%(cls)s: %(error)s') %
{'node': node.uuid, 'cls': e.__class__.__name__,
'error': e})
log_and_raise_deployment_error(task, msg, exc=e)
try:
task.driver.network.remove_provisioning_network(task)
@ -646,11 +653,13 @@ class AgentDeployMixin(HeartbeatMixin):
manager_utils.node_power_action(task, states.POWER_ON)
except Exception as e:
msg = (_('Error rebooting node %(node)s after deploy. '
'Error: %(error)s') %
{'node': node.uuid, 'error': e})
'%(cls)s: %(error)s') %
{'node': node.uuid, 'cls': e.__class__.__name__,
'error': e})
# NOTE(mgoddard): Don't collect logs since the node has been
# powered off.
log_and_raise_deployment_error(task, msg, collect_logs=False)
log_and_raise_deployment_error(task, msg, collect_logs=False,
exc=e)
task.process_event('done')
LOG.info('Deployment to node %s done', task.node.uuid)
@ -674,11 +683,12 @@ class AgentDeployMixin(HeartbeatMixin):
try:
task.driver.boot.prepare_instance(task)
except Exception as e:
LOG.error('Deploy failed for instance %(instance)s. '
'Error: %(error)s',
{'instance': node.instance_uuid, 'error': e})
msg = _('Failed to continue agent deployment.')
log_and_raise_deployment_error(task, msg)
LOG.error('Preparing instance for booting failed for instance '
'%(instance)s. %(cls)s: %(error)s',
{'instance': node.instance_uuid,
'cls': e.__class__.__name__, 'error': e})
msg = _('Failed to prepare instance for booting')
log_and_raise_deployment_error(task, msg, exc=e)
@METRICS.timer('AgentDeployMixin.configure_local_boot')
def configure_local_boot(self, task, root_uuid=None,
@ -724,6 +734,6 @@ class AgentDeployMixin(HeartbeatMixin):
"when deploying node %(node)s. Error: %(error)s") %
{'boot_dev': boot_devices.DISK, 'node': node.uuid,
'error': e})
log_and_raise_deployment_error(task, msg)
log_and_raise_deployment_error(task, msg, exc=e)
LOG.info('Local boot successfully configured for node %s', node.uuid)