Add --wait flag for provision actions and wait_for_provision_state function

This change adds --wait[=timeout] flag for all provisioning actions but abort.
It also adds a helper node method wait_for_provision_state to back this flag.

The wait is aborted with an error in any of the following cases:
* node.last_error gets set
* unexpected stable state was reached (e.g. available instead of active)
* a failure state was reached

Change-Id: I8ba5c7da8980e932a578b084167b00e2b20f6daf
Partial-Bug: #1590752
This commit is contained in:
Dmitry Tantsur 2016-06-10 11:25:14 +02:00 committed by Dmitry Tantsur
parent 342fb4f14b
commit e7c286ff1d
6 changed files with 312 additions and 5 deletions

View File

@ -36,6 +36,14 @@ class InvalidAttribute(ClientException):
pass
class StateTransitionFailed(ClientException):
"""Failed to reach a requested provision state."""
class StateTransitionTimeout(ClientException):
"""Timed out while waiting for a requested provision state."""
def from_response(response, message=None, traceback=None, method=None,
url=None):
"""Return an HttpError instance based on response from httplib/requests."""

View File

@ -14,6 +14,7 @@
import copy
import tempfile
import time
import mock
import six
@ -1049,3 +1050,117 @@ class NodeManagerTest(testtools.TestCase):
]
self.assertEqual(expect, self.api.calls)
self.assertEqual(NODE_VENDOR_PASSTHRU_METHOD, vendor_methods)
def _fake_node_for_wait(self, state, error=None, target=None):
spec = ['provision_state', 'last_error', 'target_provision_state']
return mock.Mock(provision_state=state,
last_error=error,
target_provision_state=target,
spec=spec)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(node.NodeManager, 'get', autospec=True)
def test_wait_for_provision_state(self, mock_get, mock_sleep):
mock_get.side_effect = [
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('active')
]
self.mgr.wait_for_provision_state('node', 'active')
mock_get.assert_called_with(self.mgr, 'node')
self.assertEqual(3, mock_get.call_count)
mock_sleep.assert_called_with(node._DEFAULT_POLL_INTERVAL)
self.assertEqual(2, mock_sleep.call_count)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(node.NodeManager, 'get', autospec=True)
def test_wait_for_provision_state_timeout(self, mock_get, mock_sleep):
mock_get.return_value = self._fake_node_for_wait(
'deploying', target='active')
self.assertRaises(exc.StateTransitionTimeout,
self.mgr.wait_for_provision_state, 'node', 'active',
timeout=0.001)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(node.NodeManager, 'get', autospec=True)
def test_wait_for_provision_state_error(self, mock_get, mock_sleep):
mock_get.side_effect = [
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('deploy failed', error='boom'),
]
self.assertRaisesRegexp(exc.StateTransitionFailed,
'boom',
self.mgr.wait_for_provision_state,
'node', 'active')
mock_get.assert_called_with(self.mgr, 'node')
self.assertEqual(2, mock_get.call_count)
mock_sleep.assert_called_with(node._DEFAULT_POLL_INTERVAL)
self.assertEqual(1, mock_sleep.call_count)
@mock.patch.object(node.NodeManager, 'get', autospec=True)
def test_wait_for_provision_state_custom_delay(self, mock_get):
mock_get.side_effect = [
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('active')
]
delay_mock = mock.Mock()
self.mgr.wait_for_provision_state('node', 'active',
poll_delay_function=delay_mock)
mock_get.assert_called_with(self.mgr, 'node')
self.assertEqual(2, mock_get.call_count)
delay_mock.assert_called_with(node._DEFAULT_POLL_INTERVAL)
self.assertEqual(1, delay_mock.call_count)
def test_wait_for_provision_state_wrong_input(self):
self.assertRaises(ValueError, self.mgr.wait_for_provision_state,
'node', 'active', timeout='42')
self.assertRaises(ValueError, self.mgr.wait_for_provision_state,
'node', 'active', timeout=-1)
self.assertRaises(TypeError, self.mgr.wait_for_provision_state,
'node', 'active', poll_delay_function={})
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(node.NodeManager, 'get', autospec=True)
def test_wait_for_provision_state_unexpected_stable_state(
self, mock_get, mock_sleep):
# This simulates aborted deployment
mock_get.side_effect = [
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('available'),
]
self.assertRaisesRegexp(exc.StateTransitionFailed,
'available',
self.mgr.wait_for_provision_state,
'node', 'active')
mock_get.assert_called_with(self.mgr, 'node')
self.assertEqual(2, mock_get.call_count)
mock_sleep.assert_called_with(node._DEFAULT_POLL_INTERVAL)
self.assertEqual(1, mock_sleep.call_count)
@mock.patch.object(time, 'sleep', autospec=True)
@mock.patch.object(node.NodeManager, 'get', autospec=True)
def test_wait_for_provision_state_unexpected_stable_state_allowed(
self, mock_get, mock_sleep):
mock_get.side_effect = [
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('available'),
self._fake_node_for_wait('deploying', target='active'),
self._fake_node_for_wait('active'),
]
self.mgr.wait_for_provision_state('node', 'active',
fail_on_unexpected_state=False)
mock_get.assert_called_with(self.mgr, 'node')
self.assertEqual(4, mock_get.call_count)
mock_sleep.assert_called_with(node._DEFAULT_POLL_INTERVAL)
self.assertEqual(3, mock_sleep.call_count)

View File

@ -486,10 +486,28 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'active'
args.config_drive = 'foo'
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
'node_uuid', 'active', configdrive='foo', cleansteps=None)
self.assertFalse(client_mock.node.wait_for_provision_state.called)
def test_do_node_set_provision_state_active_wait(self):
client_mock = mock.MagicMock()
args = mock.MagicMock()
args.node = 'node_uuid'
args.provision_state = 'active'
args.config_drive = 'foo'
args.clean_steps = None
args.wait_timeout = 0
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
'node_uuid', 'active', configdrive='foo', cleansteps=None)
client_mock.node.wait_for_provision_state.assert_called_once_with(
'node_uuid', expected_state='active', timeout=0,
poll_interval=n_shell._LONG_ACTION_POLL_INTERVAL)
def test_do_node_set_provision_state_deleted(self):
client_mock = mock.MagicMock()
@ -498,6 +516,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'deleted'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -510,6 +529,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'rebuild'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -522,6 +542,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'deleted'
args.config_drive = 'foo'
args.clean_steps = None
args.wait_timeout = None
self.assertRaises(exceptions.CommandError,
n_shell.do_node_set_provision_state,
@ -535,6 +556,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'inspect'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -547,6 +569,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'manage'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -559,6 +582,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'provide'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -572,6 +596,7 @@ class NodeShellTest(utils.BaseTestCase):
args.config_drive = None
clean_steps = '[{"step": "upgrade", "interface": "deploy"}]'
args.clean_steps = clean_steps
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -588,6 +613,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'clean'
args.config_drive = None
args.clean_steps = '-'
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
mock_stdin.assert_called_once_with('clean steps')
@ -604,6 +630,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'clean'
args.config_drive = None
args.clean_steps = '-'
args.wait_timeout = None
self.assertRaises(exc.InvalidAttribute,
n_shell.do_node_set_provision_state,
@ -624,6 +651,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'clean'
args.config_drive = None
args.clean_steps = f.name
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -637,6 +665,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'clean'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
# clean_steps isn't specified
self.assertRaisesRegex(exceptions.CommandError,
@ -653,6 +682,7 @@ class NodeShellTest(utils.BaseTestCase):
args.config_drive = None
clean_steps = '[{"step": "upgrade", "interface": "deploy"}]'
args.clean_steps = clean_steps
args.wait_timeout = None
# clean_steps specified but not cleaning
self.assertRaisesRegex(exceptions.CommandError,
@ -668,6 +698,7 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'abort'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = None
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
@ -680,11 +711,28 @@ class NodeShellTest(utils.BaseTestCase):
args.provision_state = 'adopt'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = 0
n_shell.do_node_set_provision_state(client_mock, args)
client_mock.node.set_provision_state.assert_called_once_with(
'node_uuid', 'adopt', cleansteps=None, configdrive=None)
def test_do_node_set_provision_state_abort_no_wait(self):
client_mock = mock.MagicMock()
args = mock.MagicMock()
args.node = 'node_uuid'
args.provision_state = 'abort'
args.config_drive = None
args.clean_steps = None
args.wait_timeout = 0
self.assertRaisesRegex(exceptions.CommandError,
"not supported for provision state 'abort'",
n_shell.do_node_set_provision_state,
client_mock, args)
self.assertFalse(client_mock.node.set_provision_state.called)
self.assertFalse(client_mock.node.wait_for_provision_state.called)
def test_do_node_set_console_mode(self):
client_mock = mock.MagicMock()
args = mock.MagicMock()

View File

@ -12,7 +12,9 @@
# License for the specific language governing permissions and limitations
# under the License.
import logging
import os
import time
from oslo_utils import strutils
@ -29,6 +31,10 @@ _power_states = {
}
LOG = logging.getLogger(__name__)
_DEFAULT_POLL_INTERVAL = 2
class Node(base.Resource):
def __repr__(self):
return "<Node %s>" % self._info
@ -355,3 +361,77 @@ class NodeManager(base.CreateManager):
def get_vendor_passthru_methods(self, node_ident):
path = "%s/vendor_passthru/methods" % node_ident
return self.get(path).to_dict()
def wait_for_provision_state(self, node_ident, expected_state,
timeout=0,
poll_interval=_DEFAULT_POLL_INTERVAL,
poll_delay_function=None,
fail_on_unexpected_state=True):
"""Helper function to wait for a node to reach a given state.
Polls Ironic API in a loop until node gets to a requested state.
Fails in the following cases:
* Timeout (if provided) is reached
* Node's last_error gets set to a non-empty value
* Unexpected stable state is reached and fail_on_unexpected_state is on
* Error state is reached (if it's not equal to expected_state)
:param node_ident: node UUID or name
:param expected_state: expected final provision state
:param timeout: timeout in seconds, no timeout if 0
:param poll_interval: interval in seconds between 2 poll
:param poll_delay_function: function to use to wait between polls
(defaults to time.sleep). Should take one argument - delay time
in seconds. Any exceptions raised inside it will abort the wait.
:param fail_on_unexpected_state: whether to fail if the nodes
reaches a different stable state.
:raises: StateTransitionFailed if node reached an error state
:raises: StateTransitionTimeout on timeout
"""
if not isinstance(timeout, (int, float)) or timeout < 0:
raise ValueError(_('Timeout must be a non-negative number'))
threshold = time.time() + timeout
expected_state = expected_state.lower()
poll_delay_function = (time.sleep if poll_delay_function is None
else poll_delay_function)
if not callable(poll_delay_function):
raise TypeError(_('poll_delay_function must be callable'))
# TODO(dtantsur): use version negotiation to request API 1.8 and use
# the "fields" argument to reduce amount of data sent.
while not timeout or time.time() < threshold:
node = self.get(node_ident)
if node.provision_state == expected_state:
LOG.debug('Node %(node)s reached provision state %(state)s',
{'node': node_ident, 'state': expected_state})
return
# Note that if expected_state == 'error' we still succeed
if (node.last_error or node.provision_state == 'error' or
node.provision_state.endswith(' failed')):
raise exc.StateTransitionFailed(
_('Node %(node)s failed to reach state %(state)s. '
'It\'s in state %(actual)s, and has error: %(error)s') %
{'node': node_ident, 'state': expected_state,
'actual': node.provision_state, 'error': node.last_error})
if fail_on_unexpected_state and not node.target_provision_state:
raise exc.StateTransitionFailed(
_('Node %(node)s failed to reach state %(state)s. '
'It\'s in unexpected stable state %(actual)s') %
{'node': node_ident, 'state': expected_state,
'actual': node.provision_state})
LOG.debug('Still waiting for node %(node)s to reach state '
'%(state)s, the current state is %(actual)s',
{'node': node_ident, 'state': expected_state,
'actual': node.provision_state})
poll_delay_function(poll_interval)
raise exc.StateTransitionTimeout(
_('Node %(node)s failed to reach state %(state)s in '
'%(timeout)s seconds') % {'node': node_ident,
'state': expected_state,
'timeout': timeout})

View File

@ -23,6 +23,35 @@ from ironicclient import exc
from ironicclient.v1 import resource_fields as res_fields
# Polling intervals in seconds.
_LONG_ACTION_POLL_INTERVAL = 10
_SHORT_ACTION_POLL_INTERVAL = 2
# This dict acts as both list of possible provision actions and arguments for
# wait_for_provision_state invocation.
PROVISION_ACTIONS = {
'active': {'expected_state': 'active',
'poll_interval': _LONG_ACTION_POLL_INTERVAL},
'deleted': {'expected_state': 'available',
'poll_interval': _LONG_ACTION_POLL_INTERVAL},
'rebuild': {'expected_state': 'active',
'poll_interval': _LONG_ACTION_POLL_INTERVAL},
'inspect': {'expected_state': 'manageable',
# This is suboptimal for in-band inspection, but it's probably
# not worth making people wait 10 seconds for OOB inspection
'poll_interval': _SHORT_ACTION_POLL_INTERVAL},
'provide': {'expected_state': 'available',
# This assumes cleaning is in place
'poll_interval': _LONG_ACTION_POLL_INTERVAL},
'manage': {'expected_state': 'manageable',
'poll_interval': _SHORT_ACTION_POLL_INTERVAL},
'clean': {'expected_state': 'manageable',
'poll_interval': _LONG_ACTION_POLL_INTERVAL},
'adopt': {'expected_state': 'active',
'poll_interval': _SHORT_ACTION_POLL_INTERVAL},
'abort': None, # no support for --wait in abort
}
def _print_node_show(node, fields=None, json=False):
if fields is None:
fields = res_fields.NODE_DETAILED_RESOURCE.fields
@ -445,11 +474,9 @@ def do_node_set_target_raid_config(cc, args):
@cliutils.arg(
'provision_state',
metavar='<provision-state>',
choices=['active', 'deleted', 'rebuild', 'inspect', 'provide',
'manage', 'clean', 'abort', 'adopt'],
help="Supported states: 'active', 'deleted', 'rebuild', "
"'inspect', 'provide', 'manage', 'clean', 'abort', "
"or 'adopt'.")
choices=list(PROVISION_ACTIONS),
help="Supported states: %s." % ', '.join("'%s'" % state
for state in PROVISION_ACTIONS))
@cliutils.arg(
'--config-drive',
metavar='<config-drive>',
@ -470,6 +497,18 @@ def do_node_set_target_raid_config(cc, args):
"keys 'interface' and 'step', and optional key 'args'. "
"This argument must be specified (and is only valid) when "
"setting provision-state to 'clean'."))
@cliutils.arg(
'--wait',
type=int,
dest='wait_timeout',
default=None,
const=0,
nargs='?',
help=("Wait for a node to reach the expected state. Not supported "
"for 'abort'. Optionally takes a timeout in seconds. "
"The default value is 0, meaning no timeout. "
"Fails if the node reaches an unexpected stable state, a failure "
"state or a state with last_error set."))
def do_node_set_provision_state(cc, args):
"""Initiate a provisioning state change for a node."""
if args.config_drive and args.provision_state != 'active':
@ -482,6 +521,13 @@ def do_node_set_provision_state(cc, args):
raise exceptions.CommandError(_('--clean-steps must be specified when '
'setting provision state to "clean"'))
if args.wait_timeout is not None:
wait_args = PROVISION_ACTIONS.get(args.provision_state)
if wait_args is None:
raise exceptions.CommandError(
_("--wait is not supported for provision state '%s'")
% args.provision_state)
clean_steps = args.clean_steps
if args.clean_steps == '-':
clean_steps = utils.get_from_stdin('clean steps')
@ -490,6 +536,11 @@ def do_node_set_provision_state(cc, args):
cc.node.set_provision_state(args.node, args.provision_state,
configdrive=args.config_drive,
cleansteps=clean_steps)
if args.wait_timeout is not None:
print(_('Waiting for provision state %(state)s on node %(node)s') %
{'state': wait_args['expected_state'], 'node': args.node})
cc.node.wait_for_provision_state(args.node, timeout=args.wait_timeout,
**wait_args)
@cliutils.arg('node', metavar='<node>', help="Name or UUID of the node.")

View File

@ -0,0 +1,5 @@
---
features:
- Add --wait flag to "node-set-provision-state" command and the associated
"node.wait_for_provision_state" method to the Python API.
The flag works with all provision actions except for "abort".