Add wait step
Adds a wait step to allow for finer grained workflows and forcing interruptions which may be needed in some cases with specialized hardware. Change-Id: Idc338b761ebe35a4635022a324ca5acbf29fc462
This commit is contained in:
parent
2d8986bda4
commit
8fc8372e74
@ -63,6 +63,13 @@ perform specific actions.
|
||||
| | driver specific code may request an CPU interrupt based |
|
||||
| | reset. This step can be executed on child nodes. |
|
||||
+-----------+----------------------------------------------------------+
|
||||
| wait | Causes a brief pause in the overall step execution which |
|
||||
| | pauses until the next heartbeat operation, unless a |
|
||||
| | seconds argument is provided. If a *seconds* argument is |
|
||||
| | provided, then the step execution will pause for the |
|
||||
| | requested amount of time. |
|
||||
+-----------+----------------------------------------------------------+
|
||||
|
||||
|
||||
In the these cases, the interface upon which the method is expected is
|
||||
ignored, and the step is acted upon based upon just the step's name.
|
||||
|
@ -59,9 +59,28 @@ def warn_about_sqlite():
|
||||
'autocommit support.')
|
||||
|
||||
|
||||
def warn_about_max_wait_parameters(conf):
|
||||
max_wait = conf.conductor.max_conductor_wait_step_seconds
|
||||
max_deploy_timeout = conf.conductor.deploy_callback_timeout
|
||||
max_clean_timeout = conf.conductor.clean_callback_timeout
|
||||
error_with = None
|
||||
if max_wait >= max_deploy_timeout:
|
||||
error_with = 'deploy_callback_timeout'
|
||||
if max_wait >= max_clean_timeout:
|
||||
error_with = 'clean_callback_timeout'
|
||||
if error_with:
|
||||
LOG.warning('The [conductor]max_conductor_wait_step_seconds '
|
||||
'configuration parameter exceeds the value of '
|
||||
'[conductor]%s, which could create a condition where '
|
||||
'tasks may timeout. Ironic recommends a low default '
|
||||
'value for [conductor]max_conductor_wait_step_seconds ',
|
||||
'please re-evaluate your configuration.', error_with)
|
||||
|
||||
|
||||
def issue_startup_warnings(conf):
|
||||
warn_about_unsafe_shred_parameters(conf)
|
||||
warn_about_sqlite()
|
||||
warn_about_max_wait_parameters(conf)
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -11,6 +11,7 @@
|
||||
# under the License.
|
||||
|
||||
import collections
|
||||
import time
|
||||
|
||||
from oslo_config import cfg
|
||||
from oslo_log import log
|
||||
@ -825,6 +826,7 @@ def use_reserved_step_handler(task, step):
|
||||
:param step: The requested step.
|
||||
"""
|
||||
step_name = step.get('step')
|
||||
step_args = step.get('args', {})
|
||||
if step_name and step_name in RESERVED_STEP_HANDLER_MAPPING.keys():
|
||||
call_to_use = RESERVED_STEP_HANDLER_MAPPING[step_name]
|
||||
method = call_to_use[0]
|
||||
@ -837,3 +839,30 @@ def use_reserved_step_handler(task, step):
|
||||
# If we've reached this point, we're going to return None as
|
||||
# there is no work for us to do. This allows the caller to
|
||||
# take its normal path.
|
||||
if step_name == 'wait':
|
||||
# By default, we enter a wait state.
|
||||
task.process_event('wait')
|
||||
if 'seconds' in step_args:
|
||||
# If we have a seconds argument, just pause.
|
||||
rec_seconds = int(step_args['seconds'])
|
||||
if rec_seconds > CONF.conductor.max_conductor_wait_step_seconds:
|
||||
warning = (
|
||||
_('A wait time exceeding the configured maximum '
|
||||
'has been requested. Holding for %s, got %s.') %
|
||||
(rec_seconds,
|
||||
CONF.conductor.max_conductor_wait_step_seconds)
|
||||
)
|
||||
utils.node_history_record(task.node, event=warning,
|
||||
event_type=task.node.provision_state)
|
||||
LOG.warning(warning)
|
||||
rec_seconds = CONF.conductor.max_conductor_wait_step_seconds
|
||||
_sleep_wrapper(rec_seconds)
|
||||
# Explicitly resume.
|
||||
task.process_event('resume')
|
||||
# Return True, which closed out execution until the next heartbeat.
|
||||
return EXIT_STEPS
|
||||
|
||||
|
||||
def _sleep_wrapper(seconds):
|
||||
"""Wrapper for sleep to allow for unit testing."""
|
||||
time.sleep(seconds)
|
||||
|
@ -366,6 +366,20 @@ opts = [
|
||||
'for operators to use if needed to perform specific '
|
||||
'tasks where this is known acceptable. Use at your'
|
||||
'own risk!')),
|
||||
cfg.IntOpt('max_conductor_wait_step_seconds',
|
||||
default=30,
|
||||
min=0,
|
||||
max=1800,
|
||||
mutable=True,
|
||||
help=_('The maximum number of seconds which a step can '
|
||||
'be requested to explicitly sleep or wait. This '
|
||||
'value should be changed sparingly as it holds a '
|
||||
'conductor thread and if used across many nodes at '
|
||||
'once can exhaust a conductor\'s resources. This'
|
||||
'capability has a hard coded maximum wait of 1800 '
|
||||
'seconds, or 30 minutes. If you need to wait longer '
|
||||
'than the maximum value, we recommend exploring '
|
||||
'hold steps.')),
|
||||
]
|
||||
|
||||
|
||||
|
@ -1324,7 +1324,7 @@ class ReservedStepsHandlerTestCase(db_base.DbTestCase):
|
||||
def _test_reserved_step(self, step, mock_power_action):
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.VERIFYING,
|
||||
provision_state=states.CLEANING,
|
||||
target_provision_state=states.MANAGEABLE,
|
||||
last_error=None,
|
||||
clean_step=None)
|
||||
@ -1341,3 +1341,45 @@ class ReservedStepsHandlerTestCase(db_base.DbTestCase):
|
||||
|
||||
def test_reserved_step_power_reboot(self):
|
||||
self._test_reserved_step({'step': 'reboot'})
|
||||
|
||||
|
||||
class ReservedStepHandlerByNameTestCase(db_base.DbTestCase):
|
||||
def setUp(self):
|
||||
super(ReservedStepHandlerByNameTestCase, self).setUp()
|
||||
|
||||
@mock.patch.object(conductor_steps, '_sleep_wrapper', autospec=True)
|
||||
def _test_reserved_step(self, step, mock_sleep):
|
||||
CONF.set_override('max_conductor_wait_step_seconds', 2,
|
||||
group='conductor')
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.CLEANING,
|
||||
target_provision_state=states.MANAGEABLE,
|
||||
last_error=None,
|
||||
clean_step=None)
|
||||
with task_manager.acquire(
|
||||
self.context, node.uuid, shared=False) as task:
|
||||
res = conductor_steps.use_reserved_step_handler(task, step)
|
||||
self.assertTrue(res)
|
||||
if step.get('step') == 'wait':
|
||||
if 'args' in step:
|
||||
self.assertEqual(task.node.provision_state,
|
||||
states.CLEANING)
|
||||
if step['args']['seconds'] == 3:
|
||||
mock_sleep.assert_called_once_with(2)
|
||||
else:
|
||||
mock_sleep.assert_called_once_with(
|
||||
step['args']['seconds'])
|
||||
else:
|
||||
self.assertEqual(task.node.provision_state,
|
||||
states.CLEANWAIT)
|
||||
mock_sleep.assert_not_called()
|
||||
|
||||
def test_reserved_step_wait(self):
|
||||
self._test_reserved_step({'step': 'wait'})
|
||||
|
||||
def test_reserved_step_wait_time_to_long(self):
|
||||
self._test_reserved_step({'step': 'wait', 'args': {'seconds': 3}})
|
||||
|
||||
def test_reserved_step_wait_time(self):
|
||||
self._test_reserved_step({'step': 'wait', 'args': {'seconds': 1}})
|
||||
|
7
releasenotes/notes/add-wait-step-3751e7918afdd199.yaml
Normal file
7
releasenotes/notes/add-wait-step-3751e7918afdd199.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Adds a ``wait`` clean/deploy step, which takes an optional argument,
|
||||
passed in a step definition of ``seconds`` to force an explicit
|
||||
pause of the current process. Otherwise the next heartbeat action
|
||||
triggers resumption of the process.
|
Loading…
Reference in New Issue
Block a user