Improve error handling of scheduler
Modifies scheduler errors to report instance faults and to set instance_state back to None on failure. Related to bug 1051066 Change-Id: Id9f36a75370849db7baf3fe24ce96c6f4284255d
This commit is contained in:
parent
27cbff55fe
commit
502bc22000
@ -22,7 +22,9 @@ Scheduler Service
|
||||
"""
|
||||
|
||||
import functools
|
||||
import sys
|
||||
|
||||
from nova.compute import utils as compute_utils
|
||||
from nova.compute import vm_states
|
||||
from nova import db
|
||||
from nova import exception
|
||||
@ -104,12 +106,14 @@ class SchedulerManager(manager.Manager):
|
||||
except exception.NoValidHost as ex:
|
||||
# don't re-raise
|
||||
self._set_vm_state_and_notify('run_instance',
|
||||
{'vm_state': vm_states.ERROR},
|
||||
{'vm_state': vm_states.ERROR,
|
||||
'task_state': None},
|
||||
context, ex, request_spec)
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
self._set_vm_state_and_notify('run_instance',
|
||||
{'vm_state': vm_states.ERROR},
|
||||
{'vm_state': vm_states.ERROR,
|
||||
'task_state': None},
|
||||
context, ex, request_spec)
|
||||
|
||||
def prep_resize(self, context, image, request_spec, filter_properties,
|
||||
@ -139,7 +143,8 @@ class SchedulerManager(manager.Manager):
|
||||
except Exception as ex:
|
||||
with excutils.save_and_reraise_exception():
|
||||
self._set_vm_state_and_notify('prep_resize',
|
||||
{'vm_state': vm_states.ERROR},
|
||||
{'vm_state': vm_states.ERROR,
|
||||
'task_state': None},
|
||||
context, ex, request_spec)
|
||||
if reservations:
|
||||
QUOTAS.rollback(context, reservations)
|
||||
@ -162,34 +167,36 @@ class SchedulerManager(manager.Manager):
|
||||
|
||||
vm_state = updates['vm_state']
|
||||
properties = request_spec.get('instance_properties', {})
|
||||
# FIXME(comstud): We really need to move error handling closer
|
||||
# to where the errors occur so we can deal with errors on
|
||||
# individual instances when scheduling multiple.
|
||||
if 'instance_uuids' in request_spec:
|
||||
instance_uuid = request_spec['instance_uuids'][0]
|
||||
else:
|
||||
instance_uuid = properties.get('uuid', {})
|
||||
# NOTE(vish): We shouldn't get here unless we have a catastrophic
|
||||
# failure, so just set all instances to error. if uuid
|
||||
# is not set, instance_uuids will be set to [None], this
|
||||
# is solely to preserve existing behavior and can
|
||||
# be removed along with the 'if instance_uuid:' if we can
|
||||
# verify that uuid is always set.
|
||||
uuids = [properties.get('uuid')]
|
||||
for instance_uuid in request_spec.get('instance_uuids') or uuids:
|
||||
if instance_uuid:
|
||||
compute_utils.add_instance_fault_from_exc(context,
|
||||
instance_uuid, ex, sys.exc_info())
|
||||
state = vm_state.upper()
|
||||
LOG.warning(_('Setting instance to %(state)s state.'),
|
||||
locals(), instance_uuid=instance_uuid)
|
||||
|
||||
if instance_uuid:
|
||||
state = vm_state.upper()
|
||||
LOG.warning(_('Setting instance to %(state)s state.'), locals(),
|
||||
instance_uuid=instance_uuid)
|
||||
# update instance state and notify on the transition
|
||||
(old_ref, new_ref) = db.instance_update_and_get_original(
|
||||
context, instance_uuid, updates)
|
||||
notifications.send_update(context, old_ref, new_ref,
|
||||
service="scheduler")
|
||||
|
||||
# update instance state and notify on the transition
|
||||
(old_ref, new_ref) = db.instance_update_and_get_original(context,
|
||||
instance_uuid, updates)
|
||||
notifications.send_update(context, old_ref, new_ref,
|
||||
service="scheduler")
|
||||
payload = dict(request_spec=request_spec,
|
||||
instance_properties=properties,
|
||||
instance_id=instance_uuid,
|
||||
state=vm_state,
|
||||
method=method,
|
||||
reason=ex)
|
||||
|
||||
payload = dict(request_spec=request_spec,
|
||||
instance_properties=properties,
|
||||
instance_id=instance_uuid,
|
||||
state=vm_state,
|
||||
method=method,
|
||||
reason=ex)
|
||||
|
||||
notifier.notify(context, notifier.publisher_id("scheduler"),
|
||||
'scheduler.' + method, notifier.ERROR, payload)
|
||||
notifier.notify(context, notifier.publisher_id("scheduler"),
|
||||
'scheduler.' + method, notifier.ERROR, payload)
|
||||
|
||||
# NOTE (masumotok) : This method should be moved to nova.api.ec2.admin.
|
||||
# Based on bexar design summit discussion,
|
||||
|
@ -19,16 +19,17 @@
|
||||
Tests For Scheduler
|
||||
"""
|
||||
|
||||
import mox
|
||||
|
||||
from nova.compute import api as compute_api
|
||||
from nova.compute import power_state
|
||||
from nova.compute import rpcapi as compute_rpcapi
|
||||
from nova.compute import task_states
|
||||
from nova.compute import utils as compute_utils
|
||||
from nova.compute import vm_states
|
||||
from nova import context
|
||||
from nova import db
|
||||
from nova import exception
|
||||
from nova import flags
|
||||
from nova import notifications
|
||||
from nova.openstack.common import jsonutils
|
||||
from nova.openstack.common import rpc
|
||||
from nova.openstack.common import timeutils
|
||||
@ -48,9 +49,6 @@ class SchedulerManagerTestCase(test.TestCase):
|
||||
driver_cls = driver.Scheduler
|
||||
driver_cls_name = 'nova.scheduler.driver.Scheduler'
|
||||
|
||||
class AnException(Exception):
|
||||
pass
|
||||
|
||||
def setUp(self):
|
||||
super(SchedulerManagerTestCase, self).setUp()
|
||||
self.flags(scheduler_driver=self.driver_cls_name)
|
||||
@ -153,14 +151,11 @@ class SchedulerManagerTestCase(test.TestCase):
|
||||
method_name)
|
||||
|
||||
def test_run_instance_exception_puts_instance_in_error_state(self):
|
||||
"""Test that a NoValidHost exception for run_instance puts
|
||||
the instance in ERROR state and eats the exception.
|
||||
"""
|
||||
|
||||
fake_instance_uuid = 'fake-instance-id'
|
||||
inst = {"vm_state": "", "task_state": ""}
|
||||
|
||||
self._mox_schedule_method_helper('schedule_run_instance')
|
||||
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
|
||||
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
|
||||
|
||||
request_spec = {'instance_properties':
|
||||
@ -170,21 +165,23 @@ class SchedulerManagerTestCase(test.TestCase):
|
||||
request_spec, None, None, None, None, {}).AndRaise(
|
||||
exception.NoValidHost(reason=""))
|
||||
db.instance_update_and_get_original(self.context, fake_instance_uuid,
|
||||
{"vm_state": vm_states.ERROR}).AndReturn((inst, inst))
|
||||
{"vm_state": vm_states.ERROR,
|
||||
"task_state": None}).AndReturn((inst, inst))
|
||||
compute_utils.add_instance_fault_from_exc(self.context,
|
||||
fake_instance_uuid, mox.IsA(exception.NoValidHost),
|
||||
mox.IgnoreArg())
|
||||
|
||||
self.mox.ReplayAll()
|
||||
self.manager.run_instance(self.context, request_spec,
|
||||
None, None, None, None, {})
|
||||
|
||||
def test_prep_resize_no_valid_host_back_in_active_state(self):
|
||||
"""Test that a NoValidHost exception for prep_resize puts
|
||||
the instance in ACTIVE state
|
||||
"""
|
||||
fake_instance_uuid = 'fake-instance-id'
|
||||
inst = {"vm_state": "", "task_state": ""}
|
||||
|
||||
self._mox_schedule_method_helper('schedule_prep_resize')
|
||||
|
||||
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
|
||||
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
|
||||
|
||||
request_spec = {'instance_type': 'fake_type',
|
||||
@ -204,18 +201,19 @@ class SchedulerManagerTestCase(test.TestCase):
|
||||
db.instance_update_and_get_original(self.context, fake_instance_uuid,
|
||||
{"vm_state": vm_states.ACTIVE, "task_state": None}).AndReturn(
|
||||
(inst, inst))
|
||||
compute_utils.add_instance_fault_from_exc(self.context,
|
||||
fake_instance_uuid, mox.IsA(exception.NoValidHost),
|
||||
mox.IgnoreArg())
|
||||
|
||||
self.mox.ReplayAll()
|
||||
self.manager.prep_resize(**kwargs)
|
||||
|
||||
def test_prep_resize_exception_host_in_error_state_and_raise(self):
|
||||
"""Test that a NoValidHost exception for prep_resize puts
|
||||
the instance in ACTIVE state
|
||||
"""
|
||||
fake_instance_uuid = 'fake-instance-id'
|
||||
|
||||
self._mox_schedule_method_helper('schedule_prep_resize')
|
||||
|
||||
self.mox.StubOutWithMock(compute_utils, 'add_instance_fault_from_exc')
|
||||
self.mox.StubOutWithMock(db, 'instance_update_and_get_original')
|
||||
|
||||
request_spec = {'instance_properties':
|
||||
@ -231,18 +229,23 @@ class SchedulerManagerTestCase(test.TestCase):
|
||||
}
|
||||
|
||||
self.manager.driver.schedule_prep_resize(**kwargs).AndRaise(
|
||||
self.AnException('something happened'))
|
||||
test.TestingException('something happened'))
|
||||
|
||||
inst = {
|
||||
"vm_state": "",
|
||||
"task_state": "",
|
||||
}
|
||||
db.instance_update_and_get_original(self.context, fake_instance_uuid,
|
||||
{"vm_state": vm_states.ERROR}).AndReturn((inst, inst))
|
||||
{"vm_state": vm_states.ERROR,
|
||||
"task_state": None}).AndReturn((inst, inst))
|
||||
compute_utils.add_instance_fault_from_exc(self.context,
|
||||
fake_instance_uuid, mox.IsA(test.TestingException),
|
||||
mox.IgnoreArg())
|
||||
|
||||
self.mox.ReplayAll()
|
||||
|
||||
self.assertRaises(self.AnException, self.manager.prep_resize, **kwargs)
|
||||
self.assertRaises(test.TestingException, self.manager.prep_resize,
|
||||
**kwargs)
|
||||
|
||||
|
||||
class SchedulerTestCase(test.TestCase):
|
||||
|
Loading…
Reference in New Issue
Block a user