Wait for power off before finishing discovery

This used to be part of returning-to-ramdisk, but was lost after
revert. Also do not make the ramdisk wait for it's own power off.

Change-Id: Ia43f834023b189324cba38a19cc782fc0b7745d4
Closes-Bug: #1401872
This commit is contained in:
Dmitry Tantsur 2014-12-12 14:42:42 +01:00
parent bdaf0f4d15
commit 6432b534f6
2 changed files with 47 additions and 10 deletions

View File

@ -29,6 +29,7 @@ from ironic_discoverd import utils
LOG = logging.getLogger("ironic_discoverd.process")
_POWER_CHECK_PERIOD = 5
_POWER_OFF_CHECK_PERIOD = 5
def process(node_info):
@ -110,7 +111,7 @@ def _process_node(ironic, node, node_info, cached_node):
'ipmi_username': node.driver_info.get('ipmi_username'),
'ipmi_password': node.driver_info.get('ipmi_password')}
else:
_finish_discovery(ironic, cached_node)
eventlet.greenthread.spawn_n(_finish_discovery, ironic, cached_node)
return {}
@ -131,21 +132,38 @@ def _wait_for_power_management(ironic, cached_node):
'after discovery', cached_node.uuid)
def _force_power_off(ironic, node):
LOG.debug('Forcing power off of node %s', node.uuid)
def _force_power_off(ironic, cached_node):
LOG.debug('Forcing power off of node %s', cached_node.uuid)
try:
utils.retry_on_conflict(ironic.node.set_power_state, node.uuid, 'off')
utils.retry_on_conflict(ironic.node.set_power_state,
cached_node.uuid, 'off')
except Exception as exc:
LOG.error('Failed to power off node %s, check it\'s power '
'management configuration:\n%s', node.uuid, exc)
raise utils.DiscoveryFailed('Failed to power off node %s' % node.uuid)
'management configuration:\n%s', cached_node.uuid, exc)
raise utils.DiscoveryFailed('Failed to power off node %s'
% cached_node.uuid)
deadline = cached_node.started_at + conf.getint('discoverd', 'timeout')
while time.time() < deadline:
node = ironic.node.get(cached_node.uuid)
if (node.power_state or '').lower() == 'power off':
return
LOG.info('Waiting for node %s to power off, current state is %s',
cached_node.uuid, node.power_state)
eventlet.greenthread.sleep(_POWER_OFF_CHECK_PERIOD)
LOG.error('Timeout waiting for node %s to power off after discovery',
cached_node.uuid)
raise utils.DiscoveryFailed(
'Timeout waiting for node %s to power off after discovery',
cached_node.uuid)
def _finish_discovery(ironic, node):
_force_power_off(ironic, node)
def _finish_discovery(ironic, cached_node):
_force_power_off(ironic, cached_node)
patch = [{'op': 'add', 'path': '/extra/newly_discovered', 'value': 'true'},
{'op': 'remove', 'path': '/extra/on_discovery'}]
utils.retry_on_conflict(ironic.node.update, node.uuid, patch)
utils.retry_on_conflict(ironic.node.update, cached_node.uuid, patch)
LOG.info('Discovery finished successfully for node %s', node.uuid)
LOG.info('Discovery finished successfully for node %s', cached_node.uuid)

View File

@ -202,6 +202,7 @@ class TestProcessNode(BaseTest):
conf.CONF.set('discoverd', 'processing_hooks',
'ramdisk_error,scheduler,validate_interfaces,example')
self.validate_attempts = 5
self.power_off_attempts = 2
self.data['macs'] = self.macs # validate_interfaces hook
self.cached_node = node_cache.NodeInfo(uuid=self.uuid,
started_at=self.started_at)
@ -218,6 +219,10 @@ class TestProcessNode(BaseTest):
self.cli.node.validate.side_effect = self.fake_validate()
self.cli.port.create.side_effect = self.ports
self.cli.node.update.return_value = self.node
# Simulate longer power off
self.cli.node.get.side_effect = (
[self.node] * self.power_off_attempts
+ [mock.Mock(power_state='power off')])
def fake_validate(self):
# Simulate long ramdisk task
@ -240,6 +245,8 @@ class TestProcessNode(BaseTest):
self.cli.node.update.assert_any_call(self.uuid, self.patch_after)
self.cli.node.set_power_state.assert_called_once_with(self.uuid, 'off')
self.assertFalse(self.cli.node.validate.called)
self.assertEqual(self.power_off_attempts + 1,
self.cli.node.get.call_count)
post_hook_mock.assert_called_once_with(self.node, mock.ANY,
self.data)
@ -276,6 +283,18 @@ class TestProcessNode(BaseTest):
self.cli.node.set_power_state.assert_called_with(self.uuid, 'off')
self.assertEqual(2, self.cli.node.set_power_state.call_count)
@mock.patch.object(time, 'time')
def test_power_off_timeout(self, time_mock, filters_mock,
post_hook_mock):
conf.CONF.set('discoverd', 'timeout', '100')
time_mock.return_value = self.started_at + 1000
self.cli.node.get.return_value = self.node
self.assertRaisesRegexp(utils.DiscoveryFailed, 'power off', self.call)
self.cli.node.update.assert_called_once_with(self.uuid,
self.patch_before)
def test_port_failed(self, filters_mock, post_hook_mock):
self.ports[0] = exceptions.Conflict()