Add a compute API to trigger crash dump in instance.

This patch adds a new API named trigger_crash_dump() to nova compute.
It calls driver.inject_nmi() to inject an NMI into the instance. And
if the instance OS is configured to use NMI to trigger kernel crash
dump, this API could trigger instance crash dump.

Change-Id: I9b649aafba011d537e3fe4eebef7a678ff6733e4
Implements: blueprint instance-crash-dump
Co-Authored-By: Hironori Shiina <shiina.hironori@jp.fujitsu.com>
Co-Authored-By: Tang Chen <tangchen@cn.fujitsu.com>
This commit is contained in:
Hironori Shiina 2015-10-14 14:45:57 +08:00 committed by Tang Chen
parent 3816659e08
commit 1b006c7362
9 changed files with 122 additions and 2 deletions

View File

@ -1984,6 +1984,19 @@ class API(base.Base):
# availability_zone isn't used by run_instance.
self.compute_rpcapi.start_instance(context, instance)
@check_instance_lock
@check_instance_host
@check_instance_cell
@check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
def trigger_crash_dump(self, context, instance):
"""Trigger crash dump in an instance."""
LOG.debug("Try to trigger crash dump", instance=instance)
self._record_action_start(context, instance,
instance_actions.TRIGGER_CRASH_DUMP)
self.compute_rpcapi.trigger_crash_dump(context, instance)
def get(self, context, instance_id, want_objects=False,
expected_attrs=None):
"""Get a single instance with the given instance_id."""

View File

@ -21,6 +21,10 @@ tend to maintain the casing sent to the API.
Maintaining a list of actions here should protect against inconsistencies when
they are used.
The naming style of instance actions should be snake_case, as it will
consistent with the API names. Do not modify the old ones because they have
been exposed to users.
"""
CREATE = 'create'
@ -45,3 +49,4 @@ CHANGE_PASSWORD = 'changePassword'
SHELVE = 'shelve'
UNSHELVE = 'unshelve'
LIVE_MIGRATION = 'live-migration'
TRIGGER_CRASH_DUMP = 'trigger_crash_dump'

View File

@ -667,7 +667,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
class ComputeManager(manager.Manager):
"""Manages the running instances from creation to destruction."""
target = messaging.Target(version='4.5')
target = messaging.Target(version='4.6')
# How long to wait in seconds before re-issuing a shutdown
# signal to an instance during power off. The overall
@ -2571,6 +2571,25 @@ class ComputeManager(manager.Manager):
instance.save(expected_task_state=task_states.POWERING_ON)
self._notify_about_instance_usage(context, instance, "power_on.end")
@messaging.expected_exceptions(NotImplementedError,
exception.NMINotSupported,
exception.InstanceNotRunning)
@wrap_exception()
@wrap_instance_event
@wrap_instance_fault
def trigger_crash_dump(self, context, instance):
"""Trigger crash dump in an instance by injecting NMI."""
self._notify_about_instance_usage(context, instance,
"trigger_crash_dump.start")
# This method does not change task_state and power_state because the
# effect of an NMI depends on user's configuration.
self.driver.inject_nmi(instance)
self._notify_about_instance_usage(context, instance,
"trigger_crash_dump.end")
@wrap_exception()
@reverts_task_state
@wrap_instance_event

View File

@ -311,6 +311,7 @@ class ComputeAPI(object):
* 4.4 - Make refresh_instance_security_rules send an instance object
* 4.5 - Add migration, scheduler_node and limits arguments to
rebuild_instance()
* 4.6 - Add trigger_crash_dump()
... Liberty supports messaging version 4.5. So, any changes to
existing methods in 4.x after that point should be done so that they
@ -984,3 +985,13 @@ class ComputeAPI(object):
version=version)
cctxt.cast(ctxt, 'refresh_instance_security_rules',
instance=instance)
def trigger_crash_dump(self, ctxt, instance):
version = '4.6'
if not self.client.can_send_version(version):
raise exception.NMINotSupported()
cctxt = self.client.prepare(server=_compute_host(None, instance),
version=version)
return cctxt.cast(ctxt, "trigger_crash_dump", instance=instance)

View File

@ -50,3 +50,6 @@ SHELVED_OFFLOADED = 'shelved_offloaded' # VM and associated resources are
ALLOW_SOFT_REBOOT = [ACTIVE] # states we can soft reboot from
ALLOW_HARD_REBOOT = ALLOW_SOFT_REBOOT + [STOPPED, PAUSED, SUSPENDED, ERROR]
# states we allow hard reboot from
ALLOW_TRIGGER_CRASH_DUMP = [ACTIVE, PAUSED, RESCUED, RESIZED, ERROR]
# states we allow to trigger crash dump

View File

@ -28,7 +28,7 @@ LOG = logging.getLogger(__name__)
# NOTE(danms): This is the global service version counter
SERVICE_VERSION = 2
SERVICE_VERSION = 3
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
@ -56,6 +56,8 @@ SERVICE_VERSION_HISTORY = (
{'compute_rpc': '4.4'},
# Version 2: Changes to rebuild_instance signature in the compute_rpc
{'compute_rpc': '4.5'},
# Version 3: Changes to trigger_crash_dump signature in the compute_rpc
{'compute_rpc': '4.6'},
)

View File

@ -429,6 +429,47 @@ class _ComputeAPIUnitTestMixIn(object):
self.compute_api.stop,
self.context, instance)
@mock.patch('nova.compute.api.API._record_action_start')
@mock.patch('nova.compute.rpcapi.ComputeAPI.trigger_crash_dump')
def test_trigger_crash_dump(self,
trigger_crash_dump,
_record_action_start):
instance = self._create_instance_obj()
self.compute_api.trigger_crash_dump(self.context, instance)
_record_action_start.assert_called_once_with(self.context, instance,
instance_actions.TRIGGER_CRASH_DUMP)
if self.cell_type == 'api':
# cell api has not been implemented.
pass
else:
trigger_crash_dump.assert_called_once_with(self.context, instance)
self.assertIsNone(instance.task_state)
def test_trigger_crash_dump_invalid_state(self):
params = dict(vm_state=vm_states.STOPPED)
instance = self._create_instance_obj(params)
self.assertRaises(exception.InstanceInvalidState,
self.compute_api.trigger_crash_dump,
self.context, instance)
def test_trigger_crash_dump_no_host(self):
params = dict(host='')
instance = self._create_instance_obj(params=params)
self.assertRaises(exception.InstanceNotReady,
self.compute_api.trigger_crash_dump,
self.context, instance)
def test_trigger_crash_dump_locked(self):
params = dict(locked=True)
instance = self._create_instance_obj(params=params)
self.assertRaises(exception.InstanceIsLocked,
self.compute_api.trigger_crash_dump,
self.context, instance)
def _test_shelve(self, vm_state=vm_states.ACTIVE,
boot_from_volume=False, clean_shutdown=True):
params = dict(task_state=None, vm_state=vm_state,

View File

@ -2898,6 +2898,21 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
mock_delete_instance.assert_called_once_with(
self.context, instance, bdms, mock.ANY)
@mock.patch.object(nova.compute.manager.ComputeManager,
'_notify_about_instance_usage')
def test_trigger_crash_dump(self, notify_mock):
instance = fake_instance.fake_instance_obj(
self.context, vm_state=vm_states.ACTIVE)
self.compute.trigger_crash_dump(self.context, instance)
notify_mock.assert_has_calls([
mock.call(self.context, instance, 'trigger_crash_dump.start'),
mock.call(self.context, instance, 'trigger_crash_dump.end')
])
self.assertIsNone(instance.task_state)
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
def setUp(self):

View File

@ -519,3 +519,14 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
def test_unquiesce_instance(self):
self._test_compute_api('unquiesce_instance', 'cast',
instance=self.fake_instance_obj, mapping=None, version='4.0')
def test_trigger_crash_dump(self):
self._test_compute_api('trigger_crash_dump', 'cast',
instance=self.fake_instance_obj, version='4.6')
def test_trigger_crash_dump_incompatible(self):
self.flags(compute='4.0', group='upgrade_levels')
self.assertRaises(exception.NMINotSupported,
self._test_compute_api,
'trigger_crash_dump', 'cast',
instance=self.fake_instance_obj, version='4.6')