Expose instance action event details out of the API

This adds a new microversion to expose the instance action
event details in the
GET /servers/{server_id}/os-instance-actions/{request_id} API.

With the new microversion the "details" key is always returned
with each event dict but the value may be null because of old
records or events that did not fail.

The details are not constrained by policy like the traceback
field since the details are like a fault message on the server
resource when the server is in ERROR status and the fault
message is likewise not constraint by policy unlike the fault
details which is a traceback like the event traceback field.

This commit add a SYSTEM_READER ('rule: system_reader_api') role
to the Show Server Action Details API. With this default policy,
events fault details can be displayed. And also add some nova and
non-nova exception functional tests for os-instance-actions API.

Co-Authored-By: Brin Zhang <zhangbailin@inspur.com>

Implements blueprint action-event-fault-details
Change-Id: I6fe4dd265b0030ce12f92771b255a3d795f03d01
This commit is contained in:
Matt Riedemann 2019-11-14 19:05:17 -05:00 committed by Brin Zhang
parent 1dd760a25e
commit 8337bee4b5
15 changed files with 381 additions and 7 deletions

View File

@ -19,7 +19,7 @@
}
],
"status": "CURRENT",
"version": "2.83",
"version": "2.84",
"min_version": "2.1",
"updated": "2013-07-23T11:33:21Z"
}

View File

@ -22,7 +22,7 @@
}
],
"status": "CURRENT",
"version": "2.83",
"version": "2.84",
"min_version": "2.1",
"updated": "2013-07-23T11:33:21Z"
}

View File

@ -226,6 +226,7 @@ REST_API_VERSION_HISTORY = """REST API Version History:
from Cyborg with ``GET /v2/accelerator_requests?instance={uuid}``
* 2.83 - Allow more filter parameters for ``GET /servers/detail`` and
``GET /servers`` for non-admin.
* 2.84 - Adds ``details`` field to instance action events.
"""
# The minimum and maximum versions of the API supported
@ -234,7 +235,7 @@ REST_API_VERSION_HISTORY = """REST API Version History:
# Note(cyeoh): This only applies for the v2.1 API once microversions
# support is fully merged. It does not affect the V2 API.
_MIN_API_VERSION = "2.1"
_MAX_API_VERSION = "2.83"
_MAX_API_VERSION = "2.84"
DEFAULT_API_VERSION = _MIN_API_VERSION
# Almost all proxy APIs which are related to network, images and baremetal

View File

@ -53,8 +53,9 @@ class InstanceActionsController(wsgi.Controller):
action[key] = action_raw.get(key)
return action
def _format_event(self, event_raw, project_id, show_traceback=False,
show_host=False, show_hostid=False):
@staticmethod
def _format_event(event_raw, project_id, show_traceback=False,
show_host=False, show_hostid=False, show_details=False):
event = {}
for key in EVENT_KEYS:
# By default, non-admins are not allowed to see traceback details.
@ -67,6 +68,8 @@ class InstanceActionsController(wsgi.Controller):
if show_hostid:
event['hostId'] = utils.generate_hostid(event_raw['host'],
project_id)
if show_details:
event['details'] = event_raw['details']
return event
@wsgi.Controller.api_version("2.1", "2.20")
@ -178,6 +181,15 @@ class InstanceActionsController(wsgi.Controller):
show_hostid = api_version_request.is_supported(req, '2.62')
if show_events:
# NOTE(brinzhang): Event details are shown since microversion
# 2.84.
show_details = False
support_v284 = api_version_request.is_supported(req, '2.84')
if support_v284:
show_details = context.can(
ia_policies.BASE_POLICY_NAME % 'events:details',
target={'project_id': instance.project_id}, fatal=False)
events_raw = self.action_api.action_events_get(context, instance,
action_id)
# NOTE(takashin): The project IDs of instance action events
@ -189,6 +201,7 @@ class InstanceActionsController(wsgi.Controller):
action['events'] = [self._format_event(
evt, action['project_id'] or instance.project_id,
show_traceback=show_traceback,
show_host=show_host, show_hostid=show_hostid
show_host=show_host, show_hostid=show_hostid,
show_details=show_details
) for evt in events_raw]
return {'instanceAction': action}

View File

@ -1105,3 +1105,11 @@ and ``GET /servers`` for non-admin :
* ``vm_state``
* ``progress``
* ``user_id``
2.84
----
The ``GET /servers/{server_id}/os-instance-actions/{request_id}`` API returns
a ``details`` parameter for each failed event with a fault message, similar to
the server ``fault.message`` parameter in ``GET /servers/{server_id}`` for a
server with status ``ERROR``.

View File

@ -33,6 +33,27 @@ in nova 23.0.0 release.
"""
instance_actions_policies = [
policy.DocumentedRuleDefault(
name=BASE_POLICY_NAME % 'events:details',
check_str=base.SYSTEM_READER,
description="""Add "details" key in action events for a server.
This check is performed only after the check
os_compute_api:os-instance-actions:show passes. Beginning with Microversion
2.84, new field 'details' is exposed via API which can have more details about
event failure. That field is controlled by this policy which is system reader
by default. Making the 'details' field visible to the non-admin user helps to
understand the nature of the problem (i.e. if the action can be retried),
but in the other hand it might leak information about the deployment
(e.g. the type of the hypervisor).
""",
operations=[
{
'method': 'GET',
'path': '/servers/{server_id}/os-instance-actions/{request_id}'
}
],
scope_types=['system']),
policy.DocumentedRuleDefault(
name=BASE_POLICY_NAME % 'events',
check_str=base.SYSTEM_READER,

View File

@ -162,10 +162,12 @@ class InstanceHelperMixin(object):
:param server: API response dict of the server being resized/migrated
:param action: Either "resize" or "migrate" instance action.
:param error_in_tb: Some expected part of the error event traceback.
:returns: The instance action event dict from the API response
"""
event = self._wait_for_action_fail_completion(
server, action, 'conductor_migrate_server')
self.assertIn(error_in_tb, event['traceback'])
return event
def _wait_for_migration_status(self, server, expected_statuses):
"""Waits for a migration record with the given statuses to be found

View File

@ -12,8 +12,19 @@
# License for the specific language governing permissions and limitations
# under the License.
import mock
from oslo_policy import policy as oslo_policy
from nova import exception
from nova import policy
from nova import test
from nova.tests import fixtures as nova_fixtures
from nova.tests.functional.api import client
from nova.tests.functional import fixtures as func_fixtures
from nova.tests.functional import integrated_helpers
from nova.tests.functional import test_servers
from nova.tests.unit.image import fake as fake_image
from nova.tests.unit import policy_fixture
class InstanceActionsTestV2(test_servers.ServersTestBase):
@ -49,3 +60,214 @@ class InstanceActionsTestV221(InstanceActionsTestV21):
actions = self.api.get_instance_actions(server['id'])
self.assertEqual('delete', actions[0]['action'])
self.assertEqual('create', actions[1]['action'])
class HypervisorError(Exception):
"""This is just used to make sure the exception type is in the events."""
pass
class InstanceActionEventFaultsTestCase(
test.TestCase, integrated_helpers.InstanceHelperMixin):
"""Tests for the instance action event details reporting from the API"""
def setUp(self):
super(InstanceActionEventFaultsTestCase, self).setUp()
# Setup the standard fixtures.
fake_image.stub_out_image_service(self)
self.addCleanup(fake_image.FakeImageService_reset)
self.useFixture(nova_fixtures.NeutronFixture(self))
self.useFixture(func_fixtures.PlacementFixture())
self.useFixture(policy_fixture.RealPolicyFixture())
# Start the compute services.
self.start_service('conductor')
self.start_service('scheduler')
self.compute = self.start_service('compute')
api_fixture = self.useFixture(nova_fixtures.OSAPIFixture(
api_version='v2.1'))
self.api = api_fixture.api
self.admin_api = api_fixture.admin_api
def _set_policy_rules(self, overwrite=True):
rules = {'os_compute_api:os-instance-actions:show': '',
'os_compute_api:os-instance-actions:events:details':
'project_id:%(project_id)s'}
policy.set_rules(oslo_policy.Rules.from_dict(rules),
overwrite=overwrite)
def test_instance_action_event_details_non_nova_exception(self):
"""Creates a server using the non-admin user, then reboot it which
will generate a non-NovaException fault and put the instance into
ERROR status. Then checks that fault details are visible.
"""
# Create the server with the non-admin user.
server = self._build_server(
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
server = self.api.post_server({'server': server})
server = self._wait_for_state_change(server, 'ACTIVE')
# Stop the server before rebooting it so that after the driver.reboot
# method raises an exception, the fake driver does not report the
# instance power state as running - that will make the compute manager
# set the instance vm_state to error.
self.api.post_server_action(server['id'], {'os-stop': None})
server = self._wait_for_state_change(server, 'SHUTOFF')
# Stub out the compute driver reboot method to raise a non-nova
# exception to simulate some error from the underlying hypervisor
# which in this case we are going to say has sensitive content.
error_msg = 'sensitive info'
with mock.patch.object(
self.compute.manager.driver, 'reboot',
side_effect=HypervisorError(error_msg)) as mock_reboot:
reboot_request = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(server['id'], reboot_request)
# In this case we wait for the status to change to ERROR using
# the non-admin user so we can assert the fault details. We also
# wait for the task_state to be None since the wrap_instance_fault
# decorator runs before the reverts_task_state decorator so we will
# be sure the fault is set on the server.
server = self._wait_for_server_parameter(
server, {'status': 'ERROR', 'OS-EXT-STS:task_state': None},
api=self.api)
mock_reboot.assert_called_once()
self._set_policy_rules(overwrite=False)
server_id = server['id']
# Calls GET on the server actions and verifies that the reboot
# action expected in the response.
response = self.api.api_get('/servers/%s/os-instance-actions' %
server_id)
server_actions = response.body['instanceActions']
for actions in server_actions:
if actions['action'] == 'reboot':
reboot_request_id = actions['request_id']
# non admin shows instance actions details and verifies the 'details'
# in the action events via 'request_id', since microversion 2.51 that
# we can show events, but in microversion 2.84 that we can show
# 'details' for non-admin.
self.api.microversion = '2.84'
action_events_response = self.api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
# Since reboot action failed, the 'message' property in reboot action
# should be 'Error', otherwise it's None.
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The instance action events from the non-admin user API response
# should not have 'traceback' in it.
self.assertNotIn('traceback', reboot_action_events[0])
# And the sensitive details from the non-nova exception should not be
# in the details.
self.assertIn('details', reboot_action_events[0])
self.assertNotIn(error_msg, reboot_action_events[0]['details'])
# The exception type class name should be in the details.
self.assertIn('HypervisorError', reboot_action_events[0]['details'])
# Get the server fault details for the admin user.
self.admin_api.microversion = '2.84'
action_events_response = self.admin_api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The admin can see the fault details which includes the traceback,
# and make sure the traceback is there by looking for part of it.
self.assertIn('traceback', reboot_action_events[0])
self.assertIn('in reboot_instance',
reboot_action_events[0]['traceback'])
# The exception type class name should be in the details for the admin
# user as well since the fault handling code cannot distinguish who
# is going to see the message so it only sets class name.
self.assertIn('HypervisorError', reboot_action_events[0]['details'])
def test_instance_action_event_details_with_nova_exception(self):
"""Creates a server using the non-admin user, then reboot it which
will generate a nova exception fault and put the instance into
ERROR status. Then checks that fault details are visible.
"""
# Create the server with the non-admin user.
server = self._build_server(
networks=[{'port': nova_fixtures.NeutronFixture.port_1['id']}])
server = self.api.post_server({'server': server})
server = self._wait_for_state_change(server, 'ACTIVE')
# Stop the server before rebooting it so that after the driver.reboot
# method raises an exception, the fake driver does not report the
# instance power state as running - that will make the compute manager
# set the instance vm_state to error.
self.api.post_server_action(server['id'], {'os-stop': None})
server = self._wait_for_state_change(server, 'SHUTOFF')
# Stub out the compute driver reboot method to raise a nova
# exception 'InstanceRebootFailure' to simulate some error.
exc_reason = 'reboot failure'
with mock.patch.object(
self.compute.manager.driver, 'reboot',
side_effect=exception.InstanceRebootFailure(reason=exc_reason)
) as mock_reboot:
reboot_request = {'reboot': {'type': 'HARD'}}
self.api.post_server_action(server['id'], reboot_request)
# In this case we wait for the status to change to ERROR using
# the non-admin user so we can assert the fault details. We also
# wait for the task_state to be None since the wrap_instance_fault
# decorator runs before the reverts_task_state decorator so we will
# be sure the fault is set on the server.
server = self._wait_for_server_parameter(
server, {'status': 'ERROR', 'OS-EXT-STS:task_state': None},
api=self.api)
mock_reboot.assert_called_once()
self._set_policy_rules(overwrite=False)
server_id = server['id']
# Calls GET on the server actions and verifies that the reboot
# action expected in the response.
response = self.api.api_get('/servers/%s/os-instance-actions' %
server_id)
server_actions = response.body['instanceActions']
for actions in server_actions:
if actions['action'] == 'reboot':
reboot_request_id = actions['request_id']
# non admin shows instance actions details and verifies the 'details'
# in the action events via 'request_id', since microversion 2.51 that
# we can show events, but in microversion 2.84 that we can show
# 'details' for non-admin.
self.api.microversion = '2.84'
action_events_response = self.api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
# Since reboot action failed, the 'message' property in reboot action
# should be 'Error', otherwise it's None.
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The instance action events from the non-admin user API response
# should not have 'traceback' in it.
self.assertNotIn('traceback', reboot_action_events[0])
# The nova exception format message should be in the details.
self.assertIn('details', reboot_action_events[0])
self.assertIn(exc_reason, reboot_action_events[0]['details'])
# Get the server fault details for the admin user.
self.admin_api.microversion = '2.84'
action_events_response = self.admin_api.api_get(
'/servers/%s/os-instance-actions/%s' % (server_id,
reboot_request_id))
reboot_action = action_events_response.body['instanceAction']
self.assertEqual('Error', reboot_action['message'])
reboot_action_events = reboot_action['events']
# The admin can see the fault details which includes the traceback,
# and make sure the traceback is there by looking for part of it.
self.assertIn('traceback', reboot_action_events[0])
self.assertIn('in reboot_instance',
reboot_action_events[0]['traceback'])
# The nova exception format message should be in the details.
self.assertIn(exc_reason, reboot_action_events[0]['details'])

View File

@ -1931,10 +1931,16 @@ class ServerMovingTests(integrated_helpers.ProviderUsageBaseTestCase):
self.api.post_server_action(
server['id'], resize_req, check_response_status=[202])
self._assert_resize_migrate_action_fail(
event = self._assert_resize_migrate_action_fail(
server, instance_actions.RESIZE, 'NoValidHost')
self.assertIn('details', event)
# This test case works in microversion 2.84.
self.assertIn('No valid host was found', event['details'])
server = self.admin_api.get_server(server['id'])
self.assertEqual(source_hostname, server['OS-EXT-SRV-ATTR:host'])
# The server is still ACTIVE and thus there is no fault message.
self.assertEqual('ACTIVE', server['status'])
self.assertNotIn('fault', server)
# only the source host shall have usages after the failed resize
self.assertFlavorMatchesUsage(source_rp_uuid, self.flavor1)

View File

@ -415,3 +415,47 @@ class InstanceActionsTestV266(InstanceActionsTestV258):
self.controller.index, req)
detail = 'Additional properties are not allowed'
self.assertIn(detail, six.text_type(ex))
class InstanceActionsTestV284(InstanceActionsTestV266):
wsgi_api_version = "2.84"
def _set_policy_rules(self, overwrite=True):
rules = {'os_compute_api:os-instance-actions:show': '',
'os_compute_api:os-instance-actions:events:details':
'project_id:%(project_id)s'}
policy.set_rules(oslo_policy.Rules.from_dict(rules),
overwrite=overwrite)
def test_show_action_with_details(self):
def fake_get_action(context, uuid, request_id):
return self.fake_actions[uuid][request_id]
def fake_get_events(context, action_id):
return self.fake_events[action_id]
self.stub_out('nova.db.api.action_get_by_request_id', fake_get_action)
self.stub_out('nova.db.api.action_events_get', fake_get_events)
self._set_policy_rules(overwrite=False)
req = self._get_http_req('os-instance-actions/1')
res_dict = self.controller.show(req, FAKE_UUID, FAKE_REQUEST_ID)
for event in res_dict['instanceAction']['events']:
self.assertIn('details', event)
def test_show_action_with_details_old_microversion(self):
"""Before microversion 2.84, we cannot get the details in events."""
def fake_get_action(context, uuid, request_id):
return self.fake_actions[uuid][request_id]
def fake_get_events(context, action_id):
return self.fake_events[action_id]
self.stub_out('nova.db.api.action_get_by_request_id', fake_get_action)
self.stub_out('nova.db.api.action_events_get', fake_get_events)
req = self._get_http_req_with_version('os-instance-actions/1',
version="2.83")
res_dict = self.controller.show(req, FAKE_UUID, FAKE_REQUEST_ID)
for event in res_dict['instanceAction']['events']:
self.assertNotIn('details', event)

View File

@ -122,6 +122,8 @@ class TestPolicyCheck(test.NoDBTestCase):
context, 'os-instance-actions:show', target)
passing_rules += self.cmd._filter_rules(
context, 'os-instance-actions:events', target)
passing_rules += self.cmd._filter_rules(
context, 'os-instance-actions:events:details', target)
self.assertEqual(set(expected_rules), set(passing_rules))
def test_filter_rules_non_admin(self):

View File

@ -52,6 +52,7 @@ policy_data = """
"os_compute_api:os-instance-actions:list": "",
"os_compute_api:os-instance-actions:show": "",
"os_compute_api:os-instance-actions:events": "",
"os_compute_api:os-instance-actions:events:details": "",
"os_compute_api:os-instance-usage-audit-log": "",
"os_compute_api:os-lock-server:lock": "",

View File

@ -14,6 +14,7 @@ import copy
import fixtures
import mock
from nova.api.openstack import api_version_request
from oslo_policy import policy as oslo_policy
from oslo_utils.fixture import uuidsentinel as uuids
from oslo_utils import timeutils
@ -232,6 +233,50 @@ class InstanceActionsScopeTypePolicyTest(InstanceActionsPolicyTest):
self.project_foo_context, self.project_reader_context
]
@mock.patch('nova.objects.InstanceActionEventList.get_by_action')
@mock.patch('nova.objects.InstanceAction.get_by_request_id')
def test_show_instance_action_policy_with_show_details(
self, mock_get_action, mock_get_events):
"""Test to ensure skip checking policy rule
'os_compute_api:os-instance-actions:show'.
"""
self.req.api_version_request = api_version_request.APIVersionRequest(
'2.84')
fake_action = self.fake_actions[FAKE_UUID][FAKE_REQUEST_ID]
mock_get_action.return_value = fake_action
fake_events = self.fake_events[fake_action['id']]
fake_action['events'] = fake_events
mock_get_events.return_value = fake_events
fake_action_fmt = test_instance_actions.format_action(
copy.deepcopy(fake_action))
self._set_policy_rules(overwrite=False)
rule_name = ia_policies.BASE_POLICY_NAME % "events:details"
authorize_res, unauthorize_res = self.common_policy_check(
self.system_reader_authorized_contexts,
self.system_reader_unauthorized_contexts,
rule_name, self.controller.show,
self.req, self.instance['uuid'],
fake_action['request_id'], fatal=False)
for action in authorize_res:
# Ensure the 'details' field in the action events
for event in action['instanceAction']['events']:
self.assertIn('details', event)
# In order to unify the display forms of 'start_time' and
# 'finish_time', format the results returned by the show api.
res_fmt = test_instance_actions.format_action(
action['instanceAction'])
self.assertEqual(fake_action_fmt['events'], res_fmt['events'])
# Because of the microversion > '2.51', that will be contain
# 'events' in the os-instance-actions show api response, but the
# 'details' should not contain in the action events.
for action in unauthorize_res:
# Ensure the 'details' field not in the action events
for event in action['instanceAction']['events']:
self.assertNotIn('details', event)
class InstanceActionsNoLegacyPolicyTest(InstanceActionsPolicyTest):
"""Test os-instance-actions APIs policies with system scope enabled,

View File

@ -456,6 +456,7 @@ class RealRolePolicyTestCase(test.NoDBTestCase):
self.system_reader_rules = (
"os_compute_api:os-services:list",
"os_compute_api:os-instance-actions:events:details",
)
self.system_reader_or_owner_rules = (

View File

@ -0,0 +1,8 @@
---
features:
- |
With microversion 2.84 the
``GET /servers/{server_id}/os-instance-actions/{request_id}`` API returns
a ``details`` parameter for each failed event with a fault message, similar
to the server ``fault.message`` parameter in ``GET /servers/{server_id}``
for a server with status ``ERROR``.