Add a compute API to trigger crash dump in instance.
This patch adds a new API named trigger_crash_dump() to nova compute. It calls driver.inject_nmi() to inject an NMI into the instance. And if the instance OS is configured to use NMI to trigger kernel crash dump, this API could trigger instance crash dump. Change-Id: I9b649aafba011d537e3fe4eebef7a678ff6733e4 Implements: blueprint instance-crash-dump Co-Authored-By: Hironori Shiina <shiina.hironori@jp.fujitsu.com> Co-Authored-By: Tang Chen <tangchen@cn.fujitsu.com>
This commit is contained in:
parent
3816659e08
commit
1b006c7362
|
@ -1984,6 +1984,19 @@ class API(base.Base):
|
|||
# availability_zone isn't used by run_instance.
|
||||
self.compute_rpcapi.start_instance(context, instance)
|
||||
|
||||
@check_instance_lock
|
||||
@check_instance_host
|
||||
@check_instance_cell
|
||||
@check_instance_state(vm_state=vm_states.ALLOW_TRIGGER_CRASH_DUMP)
|
||||
def trigger_crash_dump(self, context, instance):
|
||||
"""Trigger crash dump in an instance."""
|
||||
LOG.debug("Try to trigger crash dump", instance=instance)
|
||||
|
||||
self._record_action_start(context, instance,
|
||||
instance_actions.TRIGGER_CRASH_DUMP)
|
||||
|
||||
self.compute_rpcapi.trigger_crash_dump(context, instance)
|
||||
|
||||
def get(self, context, instance_id, want_objects=False,
|
||||
expected_attrs=None):
|
||||
"""Get a single instance with the given instance_id."""
|
||||
|
|
|
@ -21,6 +21,10 @@ tend to maintain the casing sent to the API.
|
|||
|
||||
Maintaining a list of actions here should protect against inconsistencies when
|
||||
they are used.
|
||||
|
||||
The naming style of instance actions should be snake_case, as it will
|
||||
consistent with the API names. Do not modify the old ones because they have
|
||||
been exposed to users.
|
||||
"""
|
||||
|
||||
CREATE = 'create'
|
||||
|
@ -45,3 +49,4 @@ CHANGE_PASSWORD = 'changePassword'
|
|||
SHELVE = 'shelve'
|
||||
UNSHELVE = 'unshelve'
|
||||
LIVE_MIGRATION = 'live-migration'
|
||||
TRIGGER_CRASH_DUMP = 'trigger_crash_dump'
|
||||
|
|
|
@ -667,7 +667,7 @@ class ComputeVirtAPI(virtapi.VirtAPI):
|
|||
class ComputeManager(manager.Manager):
|
||||
"""Manages the running instances from creation to destruction."""
|
||||
|
||||
target = messaging.Target(version='4.5')
|
||||
target = messaging.Target(version='4.6')
|
||||
|
||||
# How long to wait in seconds before re-issuing a shutdown
|
||||
# signal to an instance during power off. The overall
|
||||
|
@ -2571,6 +2571,25 @@ class ComputeManager(manager.Manager):
|
|||
instance.save(expected_task_state=task_states.POWERING_ON)
|
||||
self._notify_about_instance_usage(context, instance, "power_on.end")
|
||||
|
||||
@messaging.expected_exceptions(NotImplementedError,
|
||||
exception.NMINotSupported,
|
||||
exception.InstanceNotRunning)
|
||||
@wrap_exception()
|
||||
@wrap_instance_event
|
||||
@wrap_instance_fault
|
||||
def trigger_crash_dump(self, context, instance):
|
||||
"""Trigger crash dump in an instance by injecting NMI."""
|
||||
|
||||
self._notify_about_instance_usage(context, instance,
|
||||
"trigger_crash_dump.start")
|
||||
|
||||
# This method does not change task_state and power_state because the
|
||||
# effect of an NMI depends on user's configuration.
|
||||
self.driver.inject_nmi(instance)
|
||||
|
||||
self._notify_about_instance_usage(context, instance,
|
||||
"trigger_crash_dump.end")
|
||||
|
||||
@wrap_exception()
|
||||
@reverts_task_state
|
||||
@wrap_instance_event
|
||||
|
|
|
@ -311,6 +311,7 @@ class ComputeAPI(object):
|
|||
* 4.4 - Make refresh_instance_security_rules send an instance object
|
||||
* 4.5 - Add migration, scheduler_node and limits arguments to
|
||||
rebuild_instance()
|
||||
* 4.6 - Add trigger_crash_dump()
|
||||
|
||||
... Liberty supports messaging version 4.5. So, any changes to
|
||||
existing methods in 4.x after that point should be done so that they
|
||||
|
@ -984,3 +985,13 @@ class ComputeAPI(object):
|
|||
version=version)
|
||||
cctxt.cast(ctxt, 'refresh_instance_security_rules',
|
||||
instance=instance)
|
||||
|
||||
def trigger_crash_dump(self, ctxt, instance):
|
||||
version = '4.6'
|
||||
|
||||
if not self.client.can_send_version(version):
|
||||
raise exception.NMINotSupported()
|
||||
|
||||
cctxt = self.client.prepare(server=_compute_host(None, instance),
|
||||
version=version)
|
||||
return cctxt.cast(ctxt, "trigger_crash_dump", instance=instance)
|
||||
|
|
|
@ -50,3 +50,6 @@ SHELVED_OFFLOADED = 'shelved_offloaded' # VM and associated resources are
|
|||
ALLOW_SOFT_REBOOT = [ACTIVE] # states we can soft reboot from
|
||||
ALLOW_HARD_REBOOT = ALLOW_SOFT_REBOOT + [STOPPED, PAUSED, SUSPENDED, ERROR]
|
||||
# states we allow hard reboot from
|
||||
|
||||
ALLOW_TRIGGER_CRASH_DUMP = [ACTIVE, PAUSED, RESCUED, RESIZED, ERROR]
|
||||
# states we allow to trigger crash dump
|
||||
|
|
|
@ -28,7 +28,7 @@ LOG = logging.getLogger(__name__)
|
|||
|
||||
|
||||
# NOTE(danms): This is the global service version counter
|
||||
SERVICE_VERSION = 2
|
||||
SERVICE_VERSION = 3
|
||||
|
||||
|
||||
# NOTE(danms): This is our SERVICE_VERSION history. The idea is that any
|
||||
|
@ -56,6 +56,8 @@ SERVICE_VERSION_HISTORY = (
|
|||
{'compute_rpc': '4.4'},
|
||||
# Version 2: Changes to rebuild_instance signature in the compute_rpc
|
||||
{'compute_rpc': '4.5'},
|
||||
# Version 3: Changes to trigger_crash_dump signature in the compute_rpc
|
||||
{'compute_rpc': '4.6'},
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -429,6 +429,47 @@ class _ComputeAPIUnitTestMixIn(object):
|
|||
self.compute_api.stop,
|
||||
self.context, instance)
|
||||
|
||||
@mock.patch('nova.compute.api.API._record_action_start')
|
||||
@mock.patch('nova.compute.rpcapi.ComputeAPI.trigger_crash_dump')
|
||||
def test_trigger_crash_dump(self,
|
||||
trigger_crash_dump,
|
||||
_record_action_start):
|
||||
instance = self._create_instance_obj()
|
||||
|
||||
self.compute_api.trigger_crash_dump(self.context, instance)
|
||||
|
||||
_record_action_start.assert_called_once_with(self.context, instance,
|
||||
instance_actions.TRIGGER_CRASH_DUMP)
|
||||
|
||||
if self.cell_type == 'api':
|
||||
# cell api has not been implemented.
|
||||
pass
|
||||
else:
|
||||
trigger_crash_dump.assert_called_once_with(self.context, instance)
|
||||
|
||||
self.assertIsNone(instance.task_state)
|
||||
|
||||
def test_trigger_crash_dump_invalid_state(self):
|
||||
params = dict(vm_state=vm_states.STOPPED)
|
||||
instance = self._create_instance_obj(params)
|
||||
self.assertRaises(exception.InstanceInvalidState,
|
||||
self.compute_api.trigger_crash_dump,
|
||||
self.context, instance)
|
||||
|
||||
def test_trigger_crash_dump_no_host(self):
|
||||
params = dict(host='')
|
||||
instance = self._create_instance_obj(params=params)
|
||||
self.assertRaises(exception.InstanceNotReady,
|
||||
self.compute_api.trigger_crash_dump,
|
||||
self.context, instance)
|
||||
|
||||
def test_trigger_crash_dump_locked(self):
|
||||
params = dict(locked=True)
|
||||
instance = self._create_instance_obj(params=params)
|
||||
self.assertRaises(exception.InstanceIsLocked,
|
||||
self.compute_api.trigger_crash_dump,
|
||||
self.context, instance)
|
||||
|
||||
def _test_shelve(self, vm_state=vm_states.ACTIVE,
|
||||
boot_from_volume=False, clean_shutdown=True):
|
||||
params = dict(task_state=None, vm_state=vm_state,
|
||||
|
|
|
@ -2898,6 +2898,21 @@ class ComputeManagerUnitTestCase(test.NoDBTestCase):
|
|||
mock_delete_instance.assert_called_once_with(
|
||||
self.context, instance, bdms, mock.ANY)
|
||||
|
||||
@mock.patch.object(nova.compute.manager.ComputeManager,
|
||||
'_notify_about_instance_usage')
|
||||
def test_trigger_crash_dump(self, notify_mock):
|
||||
instance = fake_instance.fake_instance_obj(
|
||||
self.context, vm_state=vm_states.ACTIVE)
|
||||
|
||||
self.compute.trigger_crash_dump(self.context, instance)
|
||||
|
||||
notify_mock.assert_has_calls([
|
||||
mock.call(self.context, instance, 'trigger_crash_dump.start'),
|
||||
mock.call(self.context, instance, 'trigger_crash_dump.end')
|
||||
])
|
||||
self.assertIsNone(instance.task_state)
|
||||
self.assertEqual(vm_states.ACTIVE, instance.vm_state)
|
||||
|
||||
|
||||
class ComputeManagerBuildInstanceTestCase(test.NoDBTestCase):
|
||||
def setUp(self):
|
||||
|
|
|
@ -519,3 +519,14 @@ class ComputeRpcAPITestCase(test.NoDBTestCase):
|
|||
def test_unquiesce_instance(self):
|
||||
self._test_compute_api('unquiesce_instance', 'cast',
|
||||
instance=self.fake_instance_obj, mapping=None, version='4.0')
|
||||
|
||||
def test_trigger_crash_dump(self):
|
||||
self._test_compute_api('trigger_crash_dump', 'cast',
|
||||
instance=self.fake_instance_obj, version='4.6')
|
||||
|
||||
def test_trigger_crash_dump_incompatible(self):
|
||||
self.flags(compute='4.0', group='upgrade_levels')
|
||||
self.assertRaises(exception.NMINotSupported,
|
||||
self._test_compute_api,
|
||||
'trigger_crash_dump', 'cast',
|
||||
instance=self.fake_instance_obj, version='4.6')
|
||||
|
|
Loading…
Reference in New Issue