Add timeout to SERVICEWAIT
Change-Id: I11ae8fc3273107b9e6713166fb077f00e2dcef7e
This commit is contained in:
parent
05cc53f0df
commit
1a0de72470
@ -1904,6 +1904,28 @@ class ConductorManager(base_manager.BaseConductorManager):
|
||||
callback_method=utils.cleanup_rescuewait_timeout
|
||||
)
|
||||
|
||||
@METRICS.timer('ConductorManager._check_servicewait_timeouts')
|
||||
@periodics.periodic(
|
||||
spacing=CONF.conductor.check_provision_state_interval,
|
||||
enabled=CONF.conductor.check_provision_state_interval > 0
|
||||
and CONF.conductor.service_callback_timeout > 0)
|
||||
def _check_servicewait_timeouts(self, context):
|
||||
"""Periodically check if servicing has timed out waiting for heartbeat.
|
||||
|
||||
:param context: request context
|
||||
|
||||
"""
|
||||
callback_timeout = CONF.conductor.service_callback_timeout
|
||||
filters = {'reserved': False,
|
||||
'provision_state': states.SERVICEWAIT,
|
||||
'maintenance': False,
|
||||
'provisioned_before': callback_timeout}
|
||||
self._fail_if_in_state(
|
||||
context, filters, states.SERVICEWAIT,
|
||||
'provision_updated_at',
|
||||
keep_target_state=True,
|
||||
callback_method=utils.cleanup_servicewait_timeout)
|
||||
|
||||
@METRICS.timer('ConductorManager._sync_local_state')
|
||||
@periodics.node_periodic(
|
||||
purpose='node take over',
|
||||
|
@ -170,6 +170,13 @@ opts = [
|
||||
'ramdisk doing the cleaning. If the timeout is reached '
|
||||
'the node will be put in the "clean failed" provision '
|
||||
'state. Set to 0 to disable timeout.')),
|
||||
cfg.IntOpt('service_callback_timeout',
|
||||
default=1800,
|
||||
min=0,
|
||||
help=_('Timeout (seconds) to wait for a callback from the '
|
||||
'ramdisk doing the servicing. If the timeout is reached '
|
||||
'the node will be put in the "service failed" provision '
|
||||
'state. Set to 0 to disable timeout.')),
|
||||
cfg.IntOpt('rescue_callback_timeout',
|
||||
default=1800,
|
||||
min=0,
|
||||
|
@ -2370,6 +2370,31 @@ class CheckTimeoutsTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
mock_clean_up.assert_called_once_with(mock.ANY, mock.ANY)
|
||||
node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.tear_down_service',
|
||||
autospec=True)
|
||||
@mock.patch.object(conductor_utils, 'node_power_action', autospec=True)
|
||||
def test_check_servicewait_timeouts(self, node_power_mock, mock_clean_up):
|
||||
self._start_service()
|
||||
CONF.set_override('service_callback_timeout', 1, group='conductor')
|
||||
tgt_prov_state = states.RESCUE
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
network_interface='flat',
|
||||
provision_state=states.SERVICEWAIT,
|
||||
target_provision_state=tgt_prov_state,
|
||||
provision_updated_at=datetime.datetime(2000, 1, 1, 0, 0))
|
||||
|
||||
self.service._check_servicewait_timeouts(self.context)
|
||||
self._stop_service()
|
||||
node.refresh()
|
||||
self.assertEqual(states.SERVICEFAIL, node.provision_state)
|
||||
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||
self.assertIsNotNone(node.last_error)
|
||||
self.assertIn('Timeout reached while servicing the node',
|
||||
node.last_error)
|
||||
mock_clean_up.assert_called_once_with(mock.ANY, mock.ANY)
|
||||
node_power_mock.assert_not_called()
|
||||
|
||||
|
||||
@mgr_utils.mock_record_keepalive
|
||||
class DoNodeTearDownTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
|
||||
|
@ -0,0 +1,6 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Adds a timeout to the ``service wait`` state. Previously, a node stuck in
|
||||
this state would remain in it forever. The timeout value can be adjusted
|
||||
via the new option ``[conductor]service_callback_timeout``.
|
Loading…
Reference in New Issue
Block a user