Power fault recovery: API implementation

This patch exposes fault field to the API node object,
microversion and compatibility is handled.

Story: #1596107
Task: #10469

Change-Id: I31ed332be12cf98baaf01badcbb09ae4b8c6cae9
Partial-Bug: #1596107
This commit is contained in:
Kaifeng Wang 2018-03-24 15:32:53 +08:00
parent 585427ab8e
commit b4c4eb99fc
8 changed files with 90 additions and 5 deletions

View File

@ -132,7 +132,7 @@ and any defaults added for non-specified fields. Most fields default to "null"
or "".
The list and example below are representative of the response as of API
microversion 1.38.
microversion 1.42.
.. rest_parameters:: parameters.yaml
@ -144,6 +144,7 @@ microversion 1.38.
- target_provision_state: target_provision_state
- maintenance: maintenance
- maintenance_reason: maintenance_reason
- fault: fault
- last_error: last_error
- reservation: reservation
- driver: driver_name
@ -268,6 +269,9 @@ Nova instance, eg. with a request to ``v1/nodes/detail?instance_uuid={NOVA INSTA
.. versionadded:: 1.38
Introduced the ``rescue_interface`` field.
.. versionadded:: 1.42
Introduced the ``fault`` field.
Normal response codes: 200
Error codes: 400,403,406
@ -279,6 +283,7 @@ Request
- instance_uuid: r_instance_uuid
- maintenance: r_maintenance
- fault: r_fault
- associated: r_associated
- provision_state: r_provision_state
- driver: r_driver
@ -301,6 +306,7 @@ Response
- target_provision_state: target_provision_state
- maintenance: maintenance
- maintenance_reason: maintenance_reason
- fault: fault
- last_error: last_error
- reservation: reservation
- driver: driver_name
@ -355,6 +361,9 @@ only the specified set.
.. versionadded:: 1.38
Introduced the ``rescue_interface`` field.
.. versionadded:: 1.42
Introduced the ``fault`` field.
Normal response codes: 200
Error codes: 400,403,404,406
@ -380,6 +389,7 @@ Response
- target_provision_state: target_provision_state
- maintenance: maintenance
- maintenance_reason: maintenance_reason
- fault: fault
- last_error: last_error
- reservation: reservation
- driver: driver_name
@ -463,6 +473,7 @@ Response
- target_provision_state: target_provision_state
- maintenance: maintenance
- maintenance_reason: maintenance_reason
- fault: fault
- last_error: last_error
- reservation: reservation
- driver: driver_name

View File

@ -198,6 +198,13 @@ r_driver:
in: query
required: false
type: string
r_fault:
description: |
Filter the list of returned nodes, and only return those with the specified
``fault``.
in: query
required: false
type: string
r_instance_uuid:
description: |
Filter the list of returned nodes, and only return the node with this
@ -594,6 +601,16 @@ extra:
in: body
required: true
type: object
fault:
description: |
The fault indicates the active fault detected by ironic, typically the
Node is in "maintenance mode".
None means no fault has been detected by ironic. "power failure" indicates
ironic failed to retrieve power state from this node. There are other
possible types, e.g., "clean failure" and "rescue abort failure".
in: body
required: false
type: string
hosts:
description: |
A list of active hosts that support this driver.

View File

@ -2,6 +2,12 @@
REST API Version History
========================
1.42 (Rocky, master)
--------------------
Added ``fault`` to the node object, to indicate currently detected fault on
the node.
1.41 (Rocky, master)
--------------------

View File

@ -147,6 +147,9 @@ def hide_fields_in_newer_versions(obj):
if pecan.request.version.minor < versions.MINOR_20_NETWORK_INTERFACE:
obj.network_interface = wsme.Unset
if pecan.request.version.minor < versions.MINOR_42_FAULT:
obj.fault = wsme.Unset
if not api_utils.allow_resource_class():
obj.resource_class = wsme.Unset
@ -961,6 +964,9 @@ class Node(base.APIBase):
maintenance_reason = wsme.wsattr(wtypes.text, readonly=True)
"""Indicates reason for putting a node in maintenance mode."""
fault = wsme.wsattr(wtypes.text, readonly=True)
"""Indicates the active fault of a node."""
target_provision_state = wsme.wsattr(wtypes.text, readonly=True)
"""The user modified desired provision state of the node."""
@ -1207,7 +1213,7 @@ class Node(base.APIBase):
'memory_mb': '1024', 'local_gb': '10', 'cpus': '1'},
updated_at=time, created_at=time,
provision_updated_at=time, instance_info={},
maintenance=False, maintenance_reason=None,
maintenance=False, maintenance_reason=None, fault=None,
inspection_finished_at=None, inspection_started_at=time,
console_enabled=False, clean_step={},
raid_config=None, target_raid_config=None,
@ -1241,7 +1247,8 @@ class NodePatchType(types.JsonPatchType):
'/provision_updated_at', '/maintenance_reason',
'/driver_internal_info', '/inspection_finished_at',
'/inspection_started_at', '/clean_step',
'/raid_config', '/target_raid_config']
'/raid_config', '/target_raid_config',
'/fault']
class NodeCollection(collection.Collection):

View File

@ -79,6 +79,7 @@ BASE_VERSION = 1
# v1.40: Add bios.properties.
# Add bios_interface to the node object.
# v1.41: Add inspection abort support.
# v1.42: Expose fault field to node.
MINOR_0_JUNO = 0
MINOR_1_INITIAL_VERSION = 1
@ -122,6 +123,7 @@ MINOR_38_RESCUE_INTERFACE = 38
MINOR_39_INSPECT_WAIT = 39
MINOR_40_BIOS_INTERFACE = 40
MINOR_41_INSPECTION_ABORT = 41
MINOR_42_FAULT = 42
# When adding another version, update:
# - MINOR_MAX_VERSION
@ -129,7 +131,7 @@ MINOR_41_INSPECTION_ABORT = 41
# explanation of what changed in the new version
# - common/release_mappings.py, RELEASE_MAPPING['master']['api']
MINOR_MAX_VERSION = MINOR_41_INSPECTION_ABORT
MINOR_MAX_VERSION = MINOR_42_FAULT
# String representations of the minor and maximum versions
_MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION)

View File

@ -100,7 +100,7 @@ RELEASE_MAPPING = {
}
},
'master': {
'api': '1.41',
'api': '1.42',
'rpc': '1.44',
'objects': {
'Node': ['1.25'],

View File

@ -250,6 +250,17 @@ class TestListNodes(test_api_base.BaseApiTest):
self.assertEqual('inspect wait',
higher_version_data['provision_state'])
def test_node_fault_hidden_in_lower_version(self):
node = obj_utils.create_test_node(self.context)
data = self.get_json(
'/nodes/%s' % node.uuid,
headers={api_base.Version.string: '1.41'})
self.assertNotIn('fault', data)
data = self.get_json(
'/nodes/%s' % node.uuid,
headers={api_base.Version.string: '1.42'})
self.assertIn('fault', data)
def test_get_one_custom_fields(self):
node = obj_utils.create_test_node(self.context,
chassis_id=self.chassis.id)
@ -2280,6 +2291,19 @@ class TestPatch(test_api_base.BaseApiTest):
self.assertEqual('application/json', response.content_type)
self.assertEqual(http_client.BAD_REQUEST, response.status_code)
def test_patch_fault_forbidden(self):
node = obj_utils.create_test_node(self.context,
uuid=uuidutils.generate_uuid())
response = self.patch_json('/nodes/%s' % node.uuid,
[{'path': '/fault',
'op': 'replace',
'value': 'why care'}],
headers={api_base.Version.string: "1.42"},
expect_errors=True)
self.assertEqual('application/json', response.content_type)
self.assertEqual(http_client.BAD_REQUEST, response.status_code)
self.assertTrue(response.json['error_message'])
def _create_node_locally(node):
driver_factory.check_and_update_node_interfaces(node)

View File

@ -0,0 +1,18 @@
---
features:
- |
Adds support for the ``fault`` field in the node, beginning with API
version 1.42. This field records the fault, if any, detected by ironic for
a node. If no fault is detected, the ``fault`` is ``None``. The ``fault``
is set to one of following values according to different circumstances:
* ``power failure``: when a node is put into maintenance due to power sync
failures that exceed max retries.
* ``clean failure``: when a node is put into maintenance due to failure of
a cleaning operation.
* ``rescue abort failure``: when a node is put into maintenance due to
failure of cleaning up during rescue abort.
The ``fault`` will be set to ``None`` if an operator manually set
maintenance to ``False``. The ``fault`` field can be used as a filter for
querying nodes.