Send a instance create error notification

Send a 'create.error' notification if instance creation (run_instance)
fails.

This will make it easier for external notification consuming systems to
learn when instance build attempts fail within the nova-compute layer.

blueprint create-error-notification

Change-Id: I62d7c9c80c51241bf124509af7cdd8484d9ea2d3
This commit is contained in:
Brian Elliott
2013-04-26 15:15:23 +00:00
parent 433e6885fd
commit 0c0a211f9f
3 changed files with 89 additions and 6 deletions

View File

@@ -838,10 +838,14 @@ class ComputeManager(manager.SchedulerDependentManager):
extra_usage_info = {} extra_usage_info = {}
def notify(status, msg=None): def notify(status, msg=None):
"""Send a create.{start,end} notification.""" """Send a create.{start,error,end} notification."""
type_ = "create.%(status)s" % dict(status=status) type_ = "create.%(status)s" % dict(status=status)
info = extra_usage_info.copy()
if not msg:
msg = ""
info['message'] = msg
self._notify_about_instance_usage(context, instance, type_, self._notify_about_instance_usage(context, instance, type_,
extra_usage_info=extra_usage_info) extra_usage_info=info)
try: try:
image_meta = self._prebuild_instance(context, instance) image_meta = self._prebuild_instance(context, instance)
@@ -853,20 +857,22 @@ class ComputeManager(manager.SchedulerDependentManager):
instance = self._build_instance(context, request_spec, instance = self._build_instance(context, request_spec,
filter_properties, requested_networks, injected_files, filter_properties, requested_networks, injected_files,
admin_password, is_first_time, node, instance, image_meta) admin_password, is_first_time, node, instance, image_meta)
notify("end") # notify that build is done notify("end", msg=_("Success")) # notify that build is done
except exception.RescheduledException as e: except exception.RescheduledException as e:
# Instance build encountered an error, and has been rescheduled. # Instance build encountered an error, and has been rescheduled.
pass notify("error", msg=unicode(e)) # notify that build failed
except exception.BuildAbortException as e: except exception.BuildAbortException as e:
# Instance build aborted due to a non-failure # Instance build aborted due to a non-failure
LOG.info(e) LOG.info(e)
notify("end", msg=unicode(e)) # notify that build is done
except Exception as e: except Exception as e:
# Instance build encountered a non-recoverable error: # Instance build encountered a non-recoverable error:
with excutils.save_and_reraise_exception(): with excutils.save_and_reraise_exception():
self._set_instance_error_state(context, instance['uuid']) self._set_instance_error_state(context, instance['uuid'])
notify("error", msg=unicode(e)) # notify that build failed
def _prebuild_instance(self, context, instance): def _prebuild_instance(self, context, instance):
self._check_instance_exists(context, instance) self._check_instance_exists(context, instance)

View File

@@ -245,9 +245,14 @@ def notify_about_instance_usage(context, instance, event_suffix,
usage_info = notifications.info_from_instance(context, instance, usage_info = notifications.info_from_instance(context, instance,
network_info, system_metadata, **extra_usage_info) network_info, system_metadata, **extra_usage_info)
if event_suffix.endswith("error"):
level = notifier_api.ERROR
else:
level = notifier_api.INFO
notifier_api.notify(context, 'compute.%s' % host, notifier_api.notify(context, 'compute.%s' % host,
'compute.instance.%s' % event_suffix, 'compute.instance.%s' % event_suffix, level,
notifier_api.INFO, usage_info) usage_info)
def get_nw_info_for_instance(instance): def get_nw_info_for_instance(instance):

View File

@@ -2011,9 +2011,81 @@ class ComputeTestCase(BaseTestCase):
self.assertTrue(payload['launched_at']) self.assertTrue(payload['launched_at'])
image_ref_url = glance.generate_image_url(FAKE_IMAGE_REF) image_ref_url = glance.generate_image_url(FAKE_IMAGE_REF)
self.assertEquals(payload['image_ref_url'], image_ref_url) self.assertEquals(payload['image_ref_url'], image_ref_url)
self.assertEqual('Success', payload['message'])
self.compute.terminate_instance(self.context, self.compute.terminate_instance(self.context,
instance=jsonutils.to_primitive(inst_ref)) instance=jsonutils.to_primitive(inst_ref))
def test_run_instance_end_notification_on_abort(self):
# Test that an end notif is sent if the build is aborted
instance = jsonutils.to_primitive(self._create_fake_instance())
instance_uuid = instance['uuid']
def build_inst_abort(*args, **kwargs):
raise exception.BuildAbortException(reason="already deleted",
instance_uuid=instance_uuid)
self.stubs.Set(self.compute, '_build_instance', build_inst_abort)
self.compute.run_instance(self.context, instance=instance)
self.assertEquals(len(test_notifier.NOTIFICATIONS), 2)
msg = test_notifier.NOTIFICATIONS[0]
self.assertEquals(msg['event_type'], 'compute.instance.create.start')
msg = test_notifier.NOTIFICATIONS[1]
self.assertEquals(msg['event_type'], 'compute.instance.create.end')
self.assertEquals('INFO', msg['priority'])
payload = msg['payload']
message = payload['message']
self.assertTrue(message.find("already deleted") != -1)
def test_run_instance_error_notification_on_reschedule(self):
# Test that error notif is sent if the build got rescheduled
instance = jsonutils.to_primitive(self._create_fake_instance())
instance_uuid = instance['uuid']
def build_inst_fail(*args, **kwargs):
raise exception.RescheduledException(instance_uuid=instance_uuid,
reason="something bad happened")
self.stubs.Set(self.compute, '_build_instance', build_inst_fail)
self.compute.run_instance(self.context, instance=instance)
self.assertTrue(len(test_notifier.NOTIFICATIONS) >= 2)
msg = test_notifier.NOTIFICATIONS[0]
self.assertEquals(msg['event_type'], 'compute.instance.create.start')
msg = test_notifier.NOTIFICATIONS[1]
self.assertEquals(msg['event_type'], 'compute.instance.create.error')
self.assertEquals('ERROR', msg['priority'])
payload = msg['payload']
message = payload['message']
self.assertTrue(message.find("something bad happened") != -1)
def test_run_instance_error_notification_on_failure(self):
# Test that error notif is sent if build fails hard
instance = jsonutils.to_primitive(self._create_fake_instance())
instance_uuid = instance['uuid']
def build_inst_fail(*args, **kwargs):
raise test.TestingException("i'm dying")
self.stubs.Set(self.compute, '_build_instance', build_inst_fail)
self.assertRaises(test.TestingException, self.compute.run_instance,
self.context, instance=instance)
self.assertTrue(len(test_notifier.NOTIFICATIONS) >= 2)
msg = test_notifier.NOTIFICATIONS[0]
self.assertEquals(msg['event_type'], 'compute.instance.create.start')
msg = test_notifier.NOTIFICATIONS[1]
self.assertEquals(msg['event_type'], 'compute.instance.create.error')
self.assertEquals('ERROR', msg['priority'])
payload = msg['payload']
message = payload['message']
self.assertTrue(message.find("i'm dying") != -1)
def test_terminate_usage_notification(self): def test_terminate_usage_notification(self):
# Ensure terminate_instance generates correct usage notification. # Ensure terminate_instance generates correct usage notification.
old_time = datetime.datetime(2012, 4, 1) old_time = datetime.datetime(2012, 4, 1)