Improve error handling of scheduler

Modifies scheduler errors to report instance faults and to set
instance_state back to None on failure.

Related to bug 1051066

Change-Id: Id9f36a75370849db7baf3fe24ce96c6f4284255d
This commit is contained in:
Vishvananda Ishaya 2012-09-17 16:09:41 -07:00
parent 27cbff55fe
commit 502bc22000
2 changed files with 57 additions and 47 deletions

View File

@ -22,7 +22,9 @@ Scheduler Service
"""
import functools
import sys
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova import db
from nova import exception
@ -104,12 +106,14 @@ class SchedulerManager(manager.Manager):
except exception.NoValidHost as ex:
# don't re-raise
self._set_vm_state_and_notify('run_instance',
{'vm_state': vm_states.ERROR},
{'vm_state': vm_states.ERROR,
'task_state': None},
context, ex, request_spec)
except Exception as ex:
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify('run_instance',
{'vm_state': vm_states.ERROR},
{'vm_state': vm_states.ERROR,
'task_state': None},
context, ex, request_spec)
def prep_resize(self, context, image, request_spec, filter_properties,
@ -139,7 +143,8 @@ class SchedulerManager(manager.Manager):
except Exception as ex:
with excutils.save_and_reraise_exception():
self._set_vm_state_and_notify('prep_resize',
{'vm_state': vm_states.ERROR},
{'vm_state': vm_states.ERROR,
'task_state': None},
context, ex, request_spec)
if reservations:
QUOTAS.rollback(context, reservations)
@ -162,34 +167,36 @@ class SchedulerManager(manager.Manager):
vm_state = updates['vm_state']
properties = request_spec.get('instance_properties', {})
# FIXME(comstud): We really need to move error handling closer
# to where the errors occur so we can deal with errors on
# individual instances when scheduling multiple.
if 'instance_uuids' in request_spec:
instance_uuid = request_spec['instance_uuids'][0]
else:
instance_uuid = properties.get('uuid', {})
# NOTE(vish): We shouldn't get here unless we have a catastrophic
# failure, so just set all instances to error. if uuid
# is not set, instance_uuids will be set to [None], this
# is solely to preserve existing behavior and can
# be removed along with the 'if instance_uuid:' if we can
# verify that uuid is always set.
uuids = [properties.get('uuid')]
for instance_uuid in request_spec.get('instance_uuids') or uuids:
if instance_uuid:
compute_utils.add_instance_fault_from_exc(context,
instance_uuid, ex, sys.exc_info())
state = vm_state.upper()
LOG.warning(_('Setting instance to %(state)s state.'),
locals(), instance_uuid=instance_uuid)
if instance_uuid:
state = vm_state.upper()
LOG.warning(_('Setting instance to %(state)s state.'), locals(),
instance_uuid=instance_uuid)
# update instance state and notify on the transition
(old_ref, new_ref) = db.instance_update_and_get_original(
context, instance_uuid, updates)
notifications.send_update(context, old_ref, new_ref,
service="scheduler")
# update instance state and notify on the transition
(old_ref, new_ref) = db.instance_update_and_get_original(context,
instance_uuid, updates)
notifications.send_update(context, old_ref, new_ref,
service="scheduler")
payload = dict(request_spec=request_spec,
instance_properties=properties,
instance_id=instance_uuid,
state=vm_state,
method=method,
reason=ex)
payload = dict(request_spec=request_spec,
instance_properties=properties,
instance_id=instance_uuid,
state=vm_state,
method=method,
reason=ex)
notifier.notify(context, notifier.publisher_id("scheduler"),
'scheduler.' + method, notifier.ERROR, payload)
notifier.notify(context, notifier.publisher_id("scheduler"),
'scheduler.' + method, notifier.ERROR, payload)
# NOTE (masumotok) : This method should be moved to nova.api.ec2.admin.
# Based on bexar design summit discussion,

View File

@ -19,16 +19,17 @@
Tests For Scheduler
"""
import mox
from nova.compute import api as compute_api
from nova.compute import power_state
from nova.compute import rpcapi as compute_rpcapi
from nova.compute import task_states
from nova.compute import utils as compute_utils
from nova.compute import vm_states
from nova import context
from nova import db
from nova import exception
from nova import flags
from nova import notifications
from nova.openstack.common import jsonutils
from nova.openstack.common import rpc
from nova.openstack.common import timeutils
@ -48,9 +49,6 @@ class SchedulerManagerTestCase(test.TestCase):
driver_cls = driver.Scheduler
driver_cls_name = 'nova.scheduler.driver.Scheduler'
class AnException(Exception):
pass
def setUp(self):
super(SchedulerManagerTestCase, self).setUp()
self.flags(scheduler_driver=self.driver_cls_name)
@ -153,14 +151,11 @@ class SchedulerManagerTestCase(test.TestCase):
method_name)
def test_run_instance_exception_puts_instance_in_error_state(self):
"""Test that a NoValidHost exception for run_instance puts
the instance in ERROR state and eats the exception.
"""
fake_instance_uuid = 'fake-instance-id'
inst = {"vm_state": "", "task_state": ""}
self._mox_schedule_method_helper('schedule_run_instance')
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
request_spec = {'instance_properties':
@ -170,21 +165,23 @@ class SchedulerManagerTestCase(test.TestCase):
request_spec, None, None, None, None, {}).AndRaise(
exception.NoValidHost(reason=""))
db.instance_update_and_get_original(self.context, fake_instance_uuid,
{"vm_state": vm_states.ERROR}).AndReturn((inst, inst))
{"vm_state": vm_states.ERROR,
"task_state": None}).AndReturn((inst, inst))
compute_utils.add_instance_fault_from_exc(self.context,
fake_instance_uuid, mox.IsA(exception.NoValidHost),
mox.IgnoreArg())
self.mox.ReplayAll()
self.manager.run_instance(self.context, request_spec,
None, None, None, None, {})
def test_prep_resize_no_valid_host_back_in_active_state(self):
"""Test that a NoValidHost exception for prep_resize puts
the instance in ACTIVE state
"""
fake_instance_uuid = 'fake-instance-id'
inst = {"vm_state": "", "task_state": ""}
self._mox_schedule_method_helper('schedule_prep_resize')
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
request_spec = {'instance_type': 'fake_type',
@ -204,18 +201,19 @@ class SchedulerManagerTestCase(test.TestCase):
db.instance_update_and_get_original(self.context, fake_instance_uuid,
{"vm_state": vm_states.ACTIVE, "task_state": None}).AndReturn(
(inst, inst))
compute_utils.add_instance_fault_from_exc(self.context,
fake_instance_uuid, mox.IsA(exception.NoValidHost),
mox.IgnoreArg())
self.mox.ReplayAll()
self.manager.prep_resize(**kwargs)
def test_prep_resize_exception_host_in_error_state_and_raise(self):
"""Test that a NoValidHost exception for prep_resize puts
the instance in ACTIVE state
"""
fake_instance_uuid = 'fake-instance-id'
self._mox_schedule_method_helper('schedule_prep_resize')
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
request_spec = {'instance_properties':
@ -231,18 +229,23 @@ class SchedulerManagerTestCase(test.TestCase):
}
self.manager.driver.schedule_prep_resize(**kwargs).AndRaise(
self.AnException('something happened'))
test.TestingException('something happened'))
inst = {
"vm_state": "",
"task_state": "",
}
db.instance_update_and_get_original(self.context, fake_instance_uuid,
{"vm_state": vm_states.ERROR}).AndReturn((inst, inst))
{"vm_state": vm_states.ERROR,
"task_state": None}).AndReturn((inst, inst))
compute_utils.add_instance_fault_from_exc(self.context,
fake_instance_uuid, mox.IsA(test.TestingException),
mox.IgnoreArg())
self.mox.ReplayAll()
self.assertRaises(self.AnException, self.manager.prep_resize, **kwargs)
self.assertRaises(test.TestingException, self.manager.prep_resize,
**kwargs)
class SchedulerTestCase(test.TestCase):