From 2a0f2a0d270c6a5bb7c141e84a83d9d0e783ae3b Mon Sep 17 00:00:00 2001 From: Kevin_Zheng Date: Wed, 25 Apr 2018 11:17:14 +0800 Subject: [PATCH] Add full traceback to ExceptionPayload in versioned notifications This patch adds full traceback to ExceptionPayload in versioned notifications. The instance fault field and instance-action REST API has already provide the traceback to the admin users (controlable through policy) and the notifications are also admin only things as they are emitted to the message bus by default. So it is assumed that security is not a bigger concern for the notification than for the REST API. On the ML [1] post there was no objection to add new string field to the ExceptionPayload that will hold the serialized traceback object. [1] http://lists.openstack.org/pipermail/openstack-dev/2018-March/128105.html Implements: blueprint add-full-traceback-to-error-notifications Change-Id: Id587967ea4f9980c292492e2f659bf55fb037b28 --- .../compute-exception.json | 5 +- .../instance-create-error.json | 5 +- .../instance-interface_attach-error.json | 5 +- .../instance-reboot-error.json | 5 +- .../instance-rebuild-error.json | 5 +- .../instance-resize-error.json | 5 +- .../instance-volume_attach-error.json | 5 +- .../instance-volume_swap-error.json | 5 +- nova/compute/manager.py | 46 ++++++++----- nova/compute/utils.py | 40 ++++++----- nova/exception_wrapper.py | 13 ++-- nova/notifications/objects/exception.py | 14 ++-- .../test_exception_notification.py | 5 ++ .../test_instance.py | 66 ++++++++++++++++--- nova/tests/unit/compute/test_compute.py | 12 ++-- nova/tests/unit/compute/test_compute_mgr.py | 8 +-- nova/tests/unit/compute/test_compute_utils.py | 6 +- .../objects/test_notification.py | 2 +- nova/tests/unit/test_exception.py | 3 +- ...-to-exceptionpayload-06cf8d55d2918eab.yaml | 10 +++ 20 files changed, 188 insertions(+), 77 deletions(-) create mode 100644 releasenotes/notes/add-full-traceback-to-exceptionpayload-06cf8d55d2918eab.yaml diff --git a/doc/notification_samples/compute-exception.json b/doc/notification_samples/compute-exception.json index 0a4d6a691b38..ae43783c9a90 100644 --- a/doc/notification_samples/compute-exception.json +++ b/doc/notification_samples/compute-exception.json @@ -5,11 +5,12 @@ "exception": "AggregateNameExists", "exception_message": "Aggregate versioned_exc_aggregate already exists.", "function_name": "_aggregate_create_in_db", - "module_name": "nova.objects.aggregate" + "module_name": "nova.objects.aggregate", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name": "ExceptionPayload", "nova_object.namespace": "nova", - "nova_object.version": "1.0" + "nova_object.version": "1.1" }, "priority": "ERROR", "publisher_id": "nova-api:fake-mini" diff --git a/doc/notification_samples/instance-create-error.json b/doc/notification_samples/instance-create-error.json index 4002dad620d6..095cfbf4f489 100644 --- a/doc/notification_samples/instance-create-error.json +++ b/doc/notification_samples/instance-create-error.json @@ -8,11 +8,12 @@ "exception": "FlavorDiskTooSmall", "exception_message": "The created instance's disk would be too small.", "function_name": "_build_resources", - "module_name": "nova.tests.functional.notification_sample_tests.test_instance" + "module_name": "nova.tests.functional.notification_sample_tests.test_instance", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name": "ExceptionPayload", "nova_object.namespace": "nova", - "nova_object.version": "1.0" + "nova_object.version": "1.1" }, "ip_addresses": [], "launched_at": null, diff --git a/doc/notification_samples/instance-interface_attach-error.json b/doc/notification_samples/instance-interface_attach-error.json index a5a51b872f02..f61297d69f77 100644 --- a/doc/notification_samples/instance-interface_attach-error.json +++ b/doc/notification_samples/instance-interface_attach-error.json @@ -9,11 +9,12 @@ "exception": "InterfaceAttachFailed", "exception_message": "dummy", "function_name": "_unsuccessful_attach_interface", - "module_name": "nova.tests.functional.notification_sample_tests.test_instance" + "module_name": "nova.tests.functional.notification_sample_tests.test_instance", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name": "ExceptionPayload", "nova_object.namespace": "nova", - "nova_object.version": "1.0" + "nova_object.version": "1.1" } } }, diff --git a/doc/notification_samples/instance-reboot-error.json b/doc/notification_samples/instance-reboot-error.json index 63431d80ec40..f95fb7bd5a4e 100644 --- a/doc/notification_samples/instance-reboot-error.json +++ b/doc/notification_samples/instance-reboot-error.json @@ -8,11 +8,12 @@ "exception": "UnsupportedVirtType", "exception_message": "Virtualization type 'FakeVirt' is not supported by this compute driver", "function_name": "_hard_reboot", - "module_name": "nova.tests.functional.notification_sample_tests.test_instance" + "module_name": "nova.tests.functional.notification_sample_tests.test_instance", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name": "ExceptionPayload", "nova_object.namespace": "nova", - "nova_object.version": "1.0" + "nova_object.version": "1.1" }, "task_state":"reboot_started_hard" } diff --git a/doc/notification_samples/instance-rebuild-error.json b/doc/notification_samples/instance-rebuild-error.json index e51019fd4536..9cb138f83f9c 100644 --- a/doc/notification_samples/instance-rebuild-error.json +++ b/doc/notification_samples/instance-rebuild-error.json @@ -9,9 +9,10 @@ "module_name": "nova.tests.functional.notification_sample_tests.test_instance", "exception_message": "Virtual Interface creation failed", "function_name": "_virtual_interface_create_failed", - "exception": "VirtualInterfaceCreateException" + "exception": "VirtualInterfaceCreateException", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, - "nova_object.version": "1.0", + "nova_object.version": "1.1", "nova_object.namespace": "nova" }, "architecture": null, diff --git a/doc/notification_samples/instance-resize-error.json b/doc/notification_samples/instance-resize-error.json index bfda5573c929..62ad8fcaf6af 100644 --- a/doc/notification_samples/instance-resize-error.json +++ b/doc/notification_samples/instance-resize-error.json @@ -8,11 +8,12 @@ "exception":"FlavorDiskTooSmall", "exception_message":"The created instance's disk would be too small.", "function_name":"_build_resources", - "module_name":"nova.tests.functional.notification_sample_tests.test_instance" + "module_name":"nova.tests.functional.notification_sample_tests.test_instance", + "traceback":"Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name":"ExceptionPayload", "nova_object.namespace":"nova", - "nova_object.version":"1.0" + "nova_object.version":"1.1" }, "block_devices": [], "task_state": "resize_prep" diff --git a/doc/notification_samples/instance-volume_attach-error.json b/doc/notification_samples/instance-volume_attach-error.json index 8cfc79c32882..a8ed7c383f63 100644 --- a/doc/notification_samples/instance-volume_attach-error.json +++ b/doc/notification_samples/instance-volume_attach-error.json @@ -8,11 +8,12 @@ "exception": "CinderConnectionFailed", "exception_message": "Connection to cinder host failed: Connection timed out", "function_name": "attach_volume", - "module_name": "nova.tests.functional.notification_sample_tests.test_instance" + "module_name": "nova.tests.functional.notification_sample_tests.test_instance", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name": "ExceptionPayload", "nova_object.namespace": "nova", - "nova_object.version": "1.0" + "nova_object.version": "1.1" } } }, diff --git a/doc/notification_samples/instance-volume_swap-error.json b/doc/notification_samples/instance-volume_swap-error.json index f0e3d2c44f0f..3938265c249e 100644 --- a/doc/notification_samples/instance-volume_swap-error.json +++ b/doc/notification_samples/instance-volume_swap-error.json @@ -10,11 +10,12 @@ "exception": "TypeError", "exception_message": "'tuple' object does not support item assignment", "function_name": "_init_volume_connection", - "module_name": "nova.compute.manager" + "module_name": "nova.compute.manager", + "traceback": "Traceback (most recent call last):\n File \"nova/compute/manager.py\", line ..." }, "nova_object.name": "ExceptionPayload", "nova_object.namespace": "nova", - "nova_object.version": "1.0" + "nova_object.version": "1.1" } } }, diff --git a/nova/compute/manager.py b/nova/compute/manager.py index c9ba8f87862f..dfc0f5e186e8 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -2017,18 +2017,20 @@ class ComputeManager(manager.Manager): with excutils.save_and_reraise_exception(): self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) except exception.ComputeResourcesUnavailable as e: LOG.debug(e.format_message(), instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) raise exception.RescheduledException( instance_uuid=instance.uuid, reason=e.format_message()) except exception.BuildAbortException as e: @@ -2036,20 +2038,22 @@ class ComputeManager(manager.Manager): LOG.debug(e.format_message(), instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) except (exception.FixedIpLimitExceeded, exception.NoMoreNetworks, exception.NoMoreFixedIps) as e: LOG.warning('No more network or fixed IP to be allocated', instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) msg = _('Failed to allocate the network(s) with error %s, ' 'not rescheduling.') % e.format_message() raise exception.BuildAbortException(instance_uuid=instance.uuid, @@ -2062,10 +2066,11 @@ class ComputeManager(manager.Manager): instance=instance) self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) msg = _('Failed to allocate the network(s), not rescheduling.') raise exception.BuildAbortException(instance_uuid=instance.uuid, reason=msg) @@ -2084,19 +2089,21 @@ class ComputeManager(manager.Manager): exception.RequestedVRamTooHigh) as e: self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) raise exception.BuildAbortException(instance_uuid=instance.uuid, reason=e.format_message()) except Exception as e: self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) raise exception.RescheduledException( instance_uuid=instance.uuid, reason=six.text_type(e)) @@ -2128,10 +2135,11 @@ class ComputeManager(manager.Manager): with excutils.save_and_reraise_exception(): self._notify_about_instance_usage(context, instance, 'create.error', fault=e) + tb = traceback.format_exc() compute_utils.notify_about_instance_create( context, instance, self.host, phase=fields.NotificationPhase.ERROR, exception=e, - bdms=block_device_mapping) + bdms=block_device_mapping, tb=tb) self._update_scheduler_instance_info(context, instance) self._notify_about_instance_usage(context, instance, 'create.end', @@ -2775,11 +2783,13 @@ class ComputeManager(manager.Manager): block_device_info=new_block_device_info) def _notify_instance_rebuild_error(self, context, instance, error, bdms): + tb = traceback.format_exc() self._notify_about_instance_usage(context, instance, 'rebuild.error', fault=error) compute_utils.notify_about_instance_rebuild( context, instance, self.host, - phase=fields.NotificationPhase.ERROR, exception=error, bdms=bdms) + phase=fields.NotificationPhase.ERROR, exception=error, bdms=bdms, + tb=tb) @messaging.expected_exceptions(exception.PreserveEphemeralNotSupported) @wrap_exception() @@ -3229,11 +3239,12 @@ class ComputeManager(manager.Manager): instance, error, exc_info) self._notify_about_instance_usage(context, instance, 'reboot.error', fault=error) + tb = traceback.format_exc() compute_utils.notify_about_instance_action( context, instance, self.host, action=fields.NotificationAction.REBOOT, phase=fields.NotificationPhase.ERROR, - exception=error, bdms=bdms + exception=error, bdms=bdms, tb=tb ) ctxt.reraise = False else: @@ -4202,7 +4213,8 @@ class ComputeManager(manager.Manager): context, instance, self.host, action=fields.NotificationAction.RESIZE, phase=fields.NotificationPhase.ERROR, - exception=error) + exception=error, + tb=','.join(traceback.format_exception(*exc_info))) if rescheduled: self._log_original_error(exc_info, instance_uuid) compute_utils.add_instance_fault_from_exc(context, @@ -4213,7 +4225,8 @@ class ComputeManager(manager.Manager): context, instance, self.host, action=fields.NotificationAction.RESIZE, phase=fields.NotificationPhase.ERROR, - exception=exc_info[1]) + exception=exc_info[1], + tb=','.join(traceback.format_exception(*exc_info))) else: # not re-scheduling six.reraise(*exc_info) @@ -5360,12 +5373,13 @@ class ComputeManager(manager.Manager): bdm['attachment_id']) else: self.volume_api.unreserve_volume(context, bdm.volume_id) + tb = traceback.format_exc() compute_utils.notify_about_volume_attach_detach( context, instance, self.host, action=fields.NotificationAction.VOLUME_ATTACH, phase=fields.NotificationPhase.ERROR, exception=e, - volume_id=bdm.volume_id) + volume_id=bdm.volume_id, tb=tb) info = {'volume_id': bdm.volume_id} self._notify_about_instance_usage( @@ -5561,10 +5575,11 @@ class ComputeManager(manager.Manager): except Exception as ex: failed = True with excutils.save_and_reraise_exception(): + tb = traceback.format_exc() compute_utils.notify_about_volume_swap( context, instance, self.host, fields.NotificationPhase.ERROR, - old_volume_id, new_volume_id, ex) + old_volume_id, new_volume_id, ex, tb) if new_cinfo: msg = ("Failed to swap volume %(old_volume_id)s " "for %(new_volume_id)s") @@ -5799,11 +5814,12 @@ class ComputeManager(manager.Manager): LOG.warning("deallocate port %(port_id)s failed", {'port_id': port_id}, instance=instance) + tb = traceback.format_exc() compute_utils.notify_about_instance_action( context, instance, self.host, action=fields.NotificationAction.INTERFACE_ATTACH, phase=fields.NotificationPhase.ERROR, - exception=ex) + exception=ex, tb=tb) raise exception.InterfaceAttachFailed( instance_uuid=instance.uuid) diff --git a/nova/compute/utils.py b/nova/compute/utils.py index b7d83e3bd6e0..2a9a18a95da3 100644 --- a/nova/compute/utils.py +++ b/nova/compute/utils.py @@ -366,14 +366,14 @@ def notify_about_instance_usage(notifier, context, instance, event_suffix, method(context, 'compute.instance.%s' % event_suffix, usage_info) -def _get_fault_and_priority_from_exc(exception): +def _get_fault_and_priority_from_exc_and_tb(exception, tb): fault = None priority = fields.NotificationPriority.INFO if exception: priority = fields.NotificationPriority.ERROR - fault = notification_exception.ExceptionPayload.from_exception( - exception) + fault = notification_exception.ExceptionPayload.from_exc_and_traceback( + exception, tb) return fault, priority @@ -381,7 +381,7 @@ def _get_fault_and_priority_from_exc(exception): @rpc.if_notifications_enabled def notify_about_instance_action(context, instance, host, action, phase=None, source=fields.NotificationSource.COMPUTE, - exception=None, bdms=None): + exception=None, bdms=None, tb=None): """Send versioned notification about the action made on the instance :param instance: the instance which the action performed on :param host: the host emitting the notification @@ -391,8 +391,9 @@ def notify_about_instance_action(context, instance, host, action, phase=None, :param exception: the thrown exception (used in error notifications) :param bdms: BlockDeviceMappingList object for the instance. If it is not provided then we will load it from the db if so configured + :param tb: the traceback (used in error notifications) """ - fault, priority = _get_fault_and_priority_from_exc(exception) + fault, priority = _get_fault_and_priority_from_exc_and_tb(exception, tb) payload = instance_notification.InstanceActionPayload( context=context, instance=instance, @@ -413,7 +414,7 @@ def notify_about_instance_action(context, instance, host, action, phase=None, @rpc.if_notifications_enabled def notify_about_instance_create(context, instance, host, phase=None, - exception=None, bdms=None): + exception=None, bdms=None, tb=None): """Send versioned notification about instance creation :param context: the request context @@ -423,8 +424,9 @@ def notify_about_instance_create(context, instance, host, phase=None, :param exception: the thrown exception (used in error notifications) :param bdms: BlockDeviceMappingList object for the instance. If it is not provided then we will load it from the db if so configured + :param tb: the traceback (used in error notifications) """ - fault, priority = _get_fault_and_priority_from_exc(exception) + fault, priority = _get_fault_and_priority_from_exc_and_tb(exception, tb) payload = instance_notification.InstanceCreatePayload( context=context, instance=instance, @@ -445,7 +447,7 @@ def notify_about_instance_create(context, instance, host, phase=None, @rpc.if_notifications_enabled def notify_about_volume_attach_detach(context, instance, host, action, phase, - volume_id=None, exception=None): + volume_id=None, exception=None, tb=None): """Send versioned notification about the action made on the instance :param instance: the instance which the action performed on :param host: the host emitting the notification @@ -453,8 +455,9 @@ def notify_about_volume_attach_detach(context, instance, host, action, phase, :param phase: the phase of the action :param volume_id: id of the volume will be attached :param exception: the thrown exception (used in error notifications) + :param tb: the traceback (used in error notifications) """ - fault, priority = _get_fault_and_priority_from_exc(exception) + fault, priority = _get_fault_and_priority_from_exc_and_tb(exception, tb) payload = instance_notification.InstanceActionVolumePayload( context=context, instance=instance, @@ -474,8 +477,9 @@ def notify_about_volume_attach_detach(context, instance, host, action, phase, @rpc.if_notifications_enabled -def notify_about_instance_rescue_action( - context, instance, host, rescue_image_ref, phase=None, exception=None): +def notify_about_instance_rescue_action(context, instance, host, + rescue_image_ref, phase=None, + exception=None, tb=None): """Send versioned notification about the action made on the instance :param instance: the instance which the action performed on @@ -483,8 +487,9 @@ def notify_about_instance_rescue_action( :param rescue_image_ref: the rescue image ref :param phase: the phase of the action :param exception: the thrown exception (used in error notifications) + :param tb: the traceback (used in error notifications) """ - fault, priority = _get_fault_and_priority_from_exc(exception) + fault, priority = _get_fault_and_priority_from_exc_and_tb(exception, tb) payload = instance_notification.InstanceActionRescuePayload( context=context, instance=instance, @@ -528,7 +533,8 @@ def notify_about_keypair_action(context, keypair, action, phase): @rpc.if_notifications_enabled def notify_about_volume_swap(context, instance, host, phase, - old_volume_id, new_volume_id, exception=None): + old_volume_id, new_volume_id, exception=None, + tb=None): """Send versioned notification about the volume swap action on the instance @@ -539,8 +545,9 @@ def notify_about_volume_swap(context, instance, host, phase, :param old_volume_id: the ID of the volume that is copied from and detached :param new_volume_id: the ID of the volume that is copied to and attached :param exception: an exception + :param tb: the traceback (used in error notifications) """ - fault, priority = _get_fault_and_priority_from_exc(exception) + fault, priority = _get_fault_and_priority_from_exc_and_tb(exception, tb) payload = instance_notification.InstanceActionVolumeSwapPayload( context=context, instance=instance, @@ -717,7 +724,7 @@ def notify_about_server_group_add_member(context, group_id): @rpc.if_notifications_enabled def notify_about_instance_rebuild(context, instance, host, phase=None, - exception=None, bdms=None): + exception=None, bdms=None, tb=None): """Send versioned notification about instance rebuild :param instance: the instance which the action performed on @@ -726,8 +733,9 @@ def notify_about_instance_rebuild(context, instance, host, phase=None, :param exception: the thrown exception (used in error notifications) :param bdms: BlockDeviceMappingList object for the instance. If it is not provided then we will load it from the db if so configured + :param tb: the traceback (used in error notifications) """ - fault, priority = _get_fault_and_priority_from_exc(exception) + fault, priority = _get_fault_and_priority_from_exc_and_tb(exception, tb) payload = instance_notification.InstanceActionRebuildPayload( context=context, instance=instance, diff --git a/nova/exception_wrapper.py b/nova/exception_wrapper.py index c5d932c6dc68..bc4c8717456c 100644 --- a/nova/exception_wrapper.py +++ b/nova/exception_wrapper.py @@ -12,6 +12,7 @@ import functools import inspect +import traceback from oslo_utils import excutils @@ -27,15 +28,16 @@ CONF = nova.conf.CONF def _emit_exception_notification(notifier, context, ex, function_name, args, - source): + source, trace_back): _emit_legacy_exception_notification(notifier, context, ex, function_name, args) - _emit_versioned_exception_notification(context, ex, source) + _emit_versioned_exception_notification(context, ex, source, trace_back) @rpc.if_notifications_enabled -def _emit_versioned_exception_notification(context, ex, source): - versioned_exception_payload = exception.ExceptionPayload.from_exception(ex) +def _emit_versioned_exception_notification(context, ex, source, trace_back): + versioned_exception_payload = \ + exception.ExceptionPayload.from_exc_and_traceback(ex, trace_back) publisher = base.NotificationPublisher(host=CONF.host, source=source) event_type = base.EventType( object='compute', @@ -66,6 +68,7 @@ def wrap_exception(notifier=None, get_notifier=None, binary=None): try: return f(self, context, *args, **kw) except Exception as e: + tb = traceback.format_exc() with excutils.save_and_reraise_exception(): if notifier or get_notifier: call_dict = _get_call_dict( @@ -73,7 +76,7 @@ def wrap_exception(notifier=None, get_notifier=None, binary=None): function_name = f.__name__ _emit_exception_notification( notifier or get_notifier(), context, e, - function_name, call_dict, binary) + function_name, call_dict, binary, tb) return functools.wraps(f)(wrapped) return inner diff --git a/nova/notifications/objects/exception.py b/nova/notifications/objects/exception.py index e0e5016f47d8..6ebb927b279e 100644 --- a/nova/notifications/objects/exception.py +++ b/nova/notifications/objects/exception.py @@ -21,24 +21,27 @@ from nova.objects import fields @nova_base.NovaObjectRegistry.register_notification class ExceptionPayload(base.NotificationPayloadBase): # Version 1.0: Initial version - VERSION = '1.0' + # Version 1.1: Add traceback field to ExceptionPayload + VERSION = '1.1' fields = { 'module_name': fields.StringField(), 'function_name': fields.StringField(), 'exception': fields.StringField(), - 'exception_message': fields.StringField() + 'exception_message': fields.StringField(), + 'traceback': fields.StringField() } def __init__(self, module_name, function_name, exception, - exception_message): + exception_message, traceback): super(ExceptionPayload, self).__init__() self.module_name = module_name self.function_name = function_name self.exception = exception self.exception_message = exception_message + self.traceback = traceback @classmethod - def from_exception(cls, fault): + def from_exc_and_traceback(cls, fault, traceback): trace = inspect.trace()[-1] # TODO(gibi): apply strutils.mask_password on exception_message and # consider emitting the exception_message only if the safe flag is @@ -49,7 +52,8 @@ class ExceptionPayload(base.NotificationPayloadBase): function_name=trace[3], module_name=module_name, exception=fault.__class__.__name__, - exception_message=six.text_type(fault)) + exception_message=six.text_type(fault), + traceback=traceback) @base.notification_sample('compute-exception.json') diff --git a/nova/tests/functional/notification_sample_tests/test_exception_notification.py b/nova/tests/functional/notification_sample_tests/test_exception_notification.py index c13176855a23..a21acb0c952b 100644 --- a/nova/tests/functional/notification_sample_tests/test_exception_notification.py +++ b/nova/tests/functional/notification_sample_tests/test_exception_notification.py @@ -34,6 +34,11 @@ class TestExceptionNotificationSample( self.admin_api.api_post, 'os-aggregates', post) self.assertEqual(4, len(fake_notifier.VERSIONED_NOTIFICATIONS)) + traceback = fake_notifier.VERSIONED_NOTIFICATIONS[3][ + 'payload']['nova_object.data']['traceback'] + self.assertIn('AggregateNameExists', traceback) self._verify_notification( 'compute-exception', + replacements={ + 'traceback': self.ANY}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[3]) diff --git a/nova/tests/functional/notification_sample_tests/test_instance.py b/nova/tests/functional/notification_sample_tests/test_instance.py index 48d0b295ee08..c489abb316c3 100644 --- a/nova/tests/functional/notification_sample_tests/test_instance.py +++ b/nova/tests/functional/notification_sample_tests/test_instance.py @@ -367,6 +367,10 @@ class TestInstanceNotificationSample( self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS)) + tb = fake_notifier.VERSIONED_NOTIFICATIONS[1]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn('raise exception.FlavorDiskTooSmall()', tb) + self._verify_notification( 'instance-create-start', replacements={ @@ -377,7 +381,8 @@ class TestInstanceNotificationSample( 'instance-create-error', replacements={ 'reservation_id': server['reservation_id'], - 'uuid': server['id']}, + 'uuid': server['id'], + 'fault.traceback': self.ANY}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[1]) def test_instance_exists_usage_audit(self): @@ -910,10 +915,16 @@ class TestInstanceNotificationSample( fake_notifier.VERSIONED_NOTIFICATIONS) # Note(gibi): There is also an instance.exists notification emitted # during the rescheduling + + tb = fake_notifier.VERSIONED_NOTIFICATIONS[2]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn("raise exception.FlavorDiskTooSmall()", tb) + self._verify_notification('instance-resize-error', replacements={ 'reservation_id': server['reservation_id'], - 'uuid': server['id'] + 'uuid': server['id'], + 'fault.traceback': self.ANY }, actual=fake_notifier.VERSIONED_NOTIFICATIONS[2]) @@ -970,10 +981,16 @@ class TestInstanceNotificationSample( self.assertEqual(5, len(fake_notifier.VERSIONED_NOTIFICATIONS), 'Unexpected number of notifications: %s' % fake_notifier.VERSIONED_NOTIFICATIONS) + + tb = fake_notifier.VERSIONED_NOTIFICATIONS[2]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn("raise exception.FlavorDiskTooSmall()", tb) + self._verify_notification('instance-resize-error', replacements={ 'reservation_id': server['reservation_id'], - 'uuid': server['id'] + 'uuid': server['id'], + 'fault.traceback': self.ANY }, actual=fake_notifier.VERSIONED_NOTIFICATIONS[2]) @@ -1146,12 +1163,18 @@ class TestInstanceNotificationSample( self._wait_for_state_change(self.api, server, expected_status='ERROR') notification = self._get_notifications('instance.rebuild.error') self.assertEqual(1, len(notification)) + + tb = fake_notifier.VERSIONED_NOTIFICATIONS[0]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn('raise exception.VirtualInterfaceCreateException()', tb) + self._verify_notification( 'instance-rebuild-error', replacements={ 'reservation_id': server['reservation_id'], 'uuid': server['id'], - 'trusted_image_certificates': None}, + 'trusted_image_certificates': None, + 'fault.traceback': self.ANY}, actual=notification[0]) def _test_restore_server(self, server): @@ -1207,6 +1230,11 @@ class TestInstanceNotificationSample( self._wait_for_notification('instance.reboot.start') self._wait_for_notification('instance.reboot.error') self.assertEqual(2, len(fake_notifier.VERSIONED_NOTIFICATIONS)) + + tb = fake_notifier.VERSIONED_NOTIFICATIONS[1]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn("raise exception.UnsupportedVirtType", tb) + self._verify_notification( 'instance-reboot-start', replacements={ @@ -1217,7 +1245,8 @@ class TestInstanceNotificationSample( 'instance-reboot-error', replacements={ 'reservation_id': server['reservation_id'], - 'uuid': server['id']}, + 'uuid': server['id'], + 'fault.traceback': self.ANY}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[1]) def _detach_volume_from_server(self, server, volume_id): @@ -1312,12 +1341,21 @@ class TestInstanceNotificationSample( 'reservation_id': server['reservation_id'], 'uuid': server['id']}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[5]) + + tb1 = fake_notifier.VERSIONED_NOTIFICATIONS[6]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn("_swap_volume", tb1) + tb2 = fake_notifier.VERSIONED_NOTIFICATIONS[7]['payload'][ + 'nova_object.data']['traceback'] + self.assertIn("_swap_volume", tb2) + self._verify_notification( 'instance-volume_swap-error', replacements={ 'reservation_id': server['reservation_id'], 'block_devices': block_devices, - 'uuid': server['id']}, + 'uuid': server['id'], + 'fault.traceback': self.ANY}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[6]) def test_resize_confirm_server(self): @@ -1525,13 +1563,19 @@ class TestInstanceNotificationSample( 'volume_id': self.cinder.SWAP_NEW_VOL, 'uuid': server['id']}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[0]) + + tb = fake_notifier.VERSIONED_NOTIFICATIONS[1]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn("CinderConnectionFailed:", tb) + self._verify_notification( 'instance-volume_attach-error', replacements={ 'reservation_id': server['reservation_id'], 'block_devices': block_devices, 'volume_id': self.cinder.SWAP_NEW_VOL, - 'uuid': server['id']}, + 'uuid': server['id'], + 'fault.traceback': self.ANY}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[1]) @mock.patch('nova.volume.cinder.API.attachment_update') @@ -1605,11 +1649,17 @@ class TestInstanceNotificationSample( 'reservation_id': server['reservation_id'], 'uuid': server['id']}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[0]) + + tb = fake_notifier.VERSIONED_NOTIFICATIONS[1]['payload'][ + 'nova_object.data']['fault']['nova_object.data']['traceback'] + self.assertIn("raise exception.InterfaceAttachFailed", tb) + self._verify_notification( 'instance-interface_attach-error', replacements={ 'reservation_id': server['reservation_id'], - 'uuid': server['id']}, + 'uuid': server['id'], + 'fault.traceback': self.ANY}, actual=fake_notifier.VERSIONED_NOTIFICATIONS[1]) diff --git a/nova/tests/unit/compute/test_compute.py b/nova/tests/unit/compute/test_compute.py index 61b8999a81a3..17f929ea11e1 100644 --- a/nova/tests/unit/compute/test_compute.py +++ b/nova/tests/unit/compute/test_compute.py @@ -428,7 +428,8 @@ class ComputeVolumeTestCase(BaseTestCase): mock.call(self.context, instance, 'fake-mini', action='volume_attach', phase='error', volume_id=uuids.volume_id, - exception=expected_exception), + exception=expected_exception, + tb=mock.ANY), ]) mock_event.assert_called_once_with( self.context, 'compute_attach_volume', CONF.host, @@ -468,7 +469,8 @@ class ComputeVolumeTestCase(BaseTestCase): mock.call(self.context, instance, 'fake-mini', action='volume_attach', phase='error', volume_id=uuids.volume_id, - exception=expected_exception), + exception=expected_exception, + tb=mock.ANY), ]) @mock.patch.object(compute_utils, 'EventReporter') @@ -3182,7 +3184,7 @@ class ComputeTestCase(BaseTestCase, notify_action_call_list.append( mock.call(econtext, instance, 'fake-mini', action='reboot', phase='error', exception=fault, - bdms=bdms)) + bdms=bdms, tb=mock.ANY)) notify_call_list.append(mock.call(econtext, instance, 'reboot.end')) notify_action_call_list.append( @@ -10253,7 +10255,7 @@ class ComputeAPITestCase(BaseTestCase): mock.call(self.context, instance, self.compute.host, action='interface_attach', exception=mock_attach.side_effect, - phase='error')]) + phase='error', tb=mock.ANY)]) @mock.patch.object(compute_utils, 'notify_about_instance_action') def test_detach_interface(self, mock_notify): @@ -12529,7 +12531,7 @@ class ComputeRescheduleResizeOrReraiseTestCase(BaseTestCase): task_states.RESIZE_PREP, exc_info, host_list=None) mock_notify.assert_called_once_with( self.context, instance, 'fake-mini', action='resize', - phase='error', exception=mock_res.side_effect) + phase='error', exception=mock_res.side_effect, tb=mock.ANY) @mock.patch.object(compute_manager.ComputeManager, "_reschedule") def test_reschedule_false(self, mock_res): diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 12ebbdd8e2d5..136f2beb7c04 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -2031,7 +2031,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): self.compute.host, fields.NotificationPhase.ERROR, uuids.old_volume, uuids.new_volume, - test.MatchType(expected_exception)) + test.MatchType(expected_exception), mock.ANY) else: self.compute.swap_volume(self.context, uuids.old_volume, uuids.new_volume, instance1, None) @@ -3987,7 +3987,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): ) mock_notify.assert_called_once_with( mock.ANY, instance, 'fake-mini', phase='error', exception=exc, - bdms=None) + bdms=None, tb=mock.ANY) def test_rebuild_deleting(self): instance = fake_instance.fake_instance_obj(self.context) @@ -4115,7 +4115,7 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase): elevated_context, instance, 'fake-node', node_type='destination') mock_notify.assert_called_once_with( elevated_context, instance, 'fake-mini', bdms=None, exception=exc, - phase='error') + phase='error', tb=mock.ANY) def test_rebuild_node_not_updated_if_not_recreate(self): node = uuidutils.generate_uuid() # ironic node uuid @@ -5567,7 +5567,7 @@ class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase): mock.call(self.context, self.instance, 'fake-mini', phase='start', bdms=[]), mock.call(self.context, self.instance, 'fake-mini', - phase='error', exception=exc, bdms=[])]) + phase='error', exception=exc, bdms=[], tb=mock.ANY)]) save.assert_has_calls([ mock.call(), diff --git a/nova/tests/unit/compute/test_compute_utils.py b/nova/tests/unit/compute/test_compute_utils.py index c834f9183b8d..311db73209ae 100644 --- a/nova/tests/unit/compute/test_compute_utils.py +++ b/nova/tests/unit/compute/test_compute_utils.py @@ -18,6 +18,7 @@ import copy import string +import traceback import mock from oslo_serialization import jsonutils @@ -733,10 +734,11 @@ class UsageInfoTestCase(test.TestCase): # To get exception trace, raise and catch an exception raise test.TestingException('Volume swap error.') except Exception as ex: + tb = traceback.format_exc() compute_utils.notify_about_volume_swap( self.context, instance, 'fake-compute', fields.NotificationPhase.ERROR, - uuids.old_volume_id, uuids.new_volume_id, ex) + uuids.old_volume_id, uuids.new_volume_id, ex, tb) self.assertEqual(len(fake_notifier.VERSIONED_NOTIFICATIONS), 1) notification = fake_notifier.VERSIONED_NOTIFICATIONS[0] @@ -776,6 +778,8 @@ class UsageInfoTestCase(test.TestCase): exception_payload['function_name']) self.assertEqual('nova.tests.unit.compute.test_compute_utils', exception_payload['module_name']) + self.assertIn('test_notify_about_volume_swap_with_error', + exception_payload['traceback']) def test_notify_about_instance_rescue_action(self): instance = create_instance(self.context) diff --git a/nova/tests/unit/notifications/objects/test_notification.py b/nova/tests/unit/notifications/objects/test_notification.py index fc664cee62d5..b97633d974ab 100644 --- a/nova/tests/unit/notifications/objects/test_notification.py +++ b/nova/tests/unit/notifications/objects/test_notification.py @@ -371,7 +371,7 @@ notification_object_data = { 'BlockDevicePayload': '1.0-29751e1b6d41b1454e36768a1e764df8', 'EventType': '1.10-2a6ab743d767837366ab1aec387d7c3b', 'ExceptionNotification': '1.0-a73147b93b520ff0061865849d3dfa56', - 'ExceptionPayload': '1.0-27db46ee34cd97e39f2643ed92ad0cc5', + 'ExceptionPayload': '1.1-6c43008bd81885a63bc7f7c629f0793b', 'FlavorNotification': '1.0-a73147b93b520ff0061865849d3dfa56', 'FlavorPayload': '1.4-2e7011b8b4e59167fe8b7a0a81f0d452', 'InstanceActionNotification': '1.0-a73147b93b520ff0061865849d3dfa56', diff --git a/nova/tests/unit/test_exception.py b/nova/tests/unit/test_exception.py index a0dcb07037ed..5499bce883e5 100644 --- a/nova/tests/unit/test_exception.py +++ b/nova/tests/unit/test_exception.py @@ -104,7 +104,7 @@ class WrapExceptionTestCase(test.NoDBTestCase): payload = notification['payload'] self.assertEqual('ExceptionPayload', payload['nova_object.name']) - self.assertEqual('1.0', payload['nova_object.version']) + self.assertEqual('1.1', payload['nova_object.version']) payload = payload['nova_object.data'] self.assertEqual('TestingException', payload['exception']) @@ -112,6 +112,7 @@ class WrapExceptionTestCase(test.NoDBTestCase): self.assertEqual('bad_function_exception', payload['function_name']) self.assertEqual('nova.tests.unit.test_exception', payload['module_name']) + self.assertIn('bad_function_exception', payload['traceback']) @mock.patch('nova.rpc.NOTIFIER') @mock.patch('nova.notifications.objects.exception.' diff --git a/releasenotes/notes/add-full-traceback-to-exceptionpayload-06cf8d55d2918eab.yaml b/releasenotes/notes/add-full-traceback-to-exceptionpayload-06cf8d55d2918eab.yaml new file mode 100644 index 000000000000..83080b77b6f3 --- /dev/null +++ b/releasenotes/notes/add-full-traceback-to-exceptionpayload-06cf8d55d2918eab.yaml @@ -0,0 +1,10 @@ +--- +features: + - | + A new ``traceback`` field has been added to each versioned instance + notification. In an error notification this field contains the full + traceback string of the exception which caused the error notification. + See the `notification dev ref`_ for the sample file of + ``instance.create.error`` as an example. + + .. _notification dev ref: https://docs.openstack.org/nova/latest/reference/notifications.html#existing-versioned-notifications