From 6432b534f6d9e023eae4bc3ab0425c690dc045b2 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Fri, 12 Dec 2014 14:42:42 +0100 Subject: [PATCH] Wait for power off before finishing discovery This used to be part of returning-to-ramdisk, but was lost after revert. Also do not make the ramdisk wait for it's own power off. Change-Id: Ia43f834023b189324cba38a19cc782fc0b7745d4 Closes-Bug: #1401872 --- ironic_discoverd/process.py | 38 ++++++++++++++++++++------- ironic_discoverd/test/test_process.py | 19 ++++++++++++++ 2 files changed, 47 insertions(+), 10 deletions(-) diff --git a/ironic_discoverd/process.py b/ironic_discoverd/process.py index 0b7d7101a..91485afd6 100644 --- a/ironic_discoverd/process.py +++ b/ironic_discoverd/process.py @@ -29,6 +29,7 @@ from ironic_discoverd import utils LOG = logging.getLogger("ironic_discoverd.process") _POWER_CHECK_PERIOD = 5 +_POWER_OFF_CHECK_PERIOD = 5 def process(node_info): @@ -110,7 +111,7 @@ def _process_node(ironic, node, node_info, cached_node): 'ipmi_username': node.driver_info.get('ipmi_username'), 'ipmi_password': node.driver_info.get('ipmi_password')} else: - _finish_discovery(ironic, cached_node) + eventlet.greenthread.spawn_n(_finish_discovery, ironic, cached_node) return {} @@ -131,21 +132,38 @@ def _wait_for_power_management(ironic, cached_node): 'after discovery', cached_node.uuid) -def _force_power_off(ironic, node): - LOG.debug('Forcing power off of node %s', node.uuid) +def _force_power_off(ironic, cached_node): + LOG.debug('Forcing power off of node %s', cached_node.uuid) try: - utils.retry_on_conflict(ironic.node.set_power_state, node.uuid, 'off') + utils.retry_on_conflict(ironic.node.set_power_state, + cached_node.uuid, 'off') except Exception as exc: LOG.error('Failed to power off node %s, check it\'s power ' - 'management configuration:\n%s', node.uuid, exc) - raise utils.DiscoveryFailed('Failed to power off node %s' % node.uuid) + 'management configuration:\n%s', cached_node.uuid, exc) + raise utils.DiscoveryFailed('Failed to power off node %s' + % cached_node.uuid) + + deadline = cached_node.started_at + conf.getint('discoverd', 'timeout') + while time.time() < deadline: + node = ironic.node.get(cached_node.uuid) + if (node.power_state or '').lower() == 'power off': + return + LOG.info('Waiting for node %s to power off, current state is %s', + cached_node.uuid, node.power_state) + eventlet.greenthread.sleep(_POWER_OFF_CHECK_PERIOD) + + LOG.error('Timeout waiting for node %s to power off after discovery', + cached_node.uuid) + raise utils.DiscoveryFailed( + 'Timeout waiting for node %s to power off after discovery', + cached_node.uuid) -def _finish_discovery(ironic, node): - _force_power_off(ironic, node) +def _finish_discovery(ironic, cached_node): + _force_power_off(ironic, cached_node) patch = [{'op': 'add', 'path': '/extra/newly_discovered', 'value': 'true'}, {'op': 'remove', 'path': '/extra/on_discovery'}] - utils.retry_on_conflict(ironic.node.update, node.uuid, patch) + utils.retry_on_conflict(ironic.node.update, cached_node.uuid, patch) - LOG.info('Discovery finished successfully for node %s', node.uuid) + LOG.info('Discovery finished successfully for node %s', cached_node.uuid) diff --git a/ironic_discoverd/test/test_process.py b/ironic_discoverd/test/test_process.py index e8639d8ee..070dffd4a 100644 --- a/ironic_discoverd/test/test_process.py +++ b/ironic_discoverd/test/test_process.py @@ -202,6 +202,7 @@ class TestProcessNode(BaseTest): conf.CONF.set('discoverd', 'processing_hooks', 'ramdisk_error,scheduler,validate_interfaces,example') self.validate_attempts = 5 + self.power_off_attempts = 2 self.data['macs'] = self.macs # validate_interfaces hook self.cached_node = node_cache.NodeInfo(uuid=self.uuid, started_at=self.started_at) @@ -218,6 +219,10 @@ class TestProcessNode(BaseTest): self.cli.node.validate.side_effect = self.fake_validate() self.cli.port.create.side_effect = self.ports self.cli.node.update.return_value = self.node + # Simulate longer power off + self.cli.node.get.side_effect = ( + [self.node] * self.power_off_attempts + + [mock.Mock(power_state='power off')]) def fake_validate(self): # Simulate long ramdisk task @@ -240,6 +245,8 @@ class TestProcessNode(BaseTest): self.cli.node.update.assert_any_call(self.uuid, self.patch_after) self.cli.node.set_power_state.assert_called_once_with(self.uuid, 'off') self.assertFalse(self.cli.node.validate.called) + self.assertEqual(self.power_off_attempts + 1, + self.cli.node.get.call_count) post_hook_mock.assert_called_once_with(self.node, mock.ANY, self.data) @@ -276,6 +283,18 @@ class TestProcessNode(BaseTest): self.cli.node.set_power_state.assert_called_with(self.uuid, 'off') self.assertEqual(2, self.cli.node.set_power_state.call_count) + @mock.patch.object(time, 'time') + def test_power_off_timeout(self, time_mock, filters_mock, + post_hook_mock): + conf.CONF.set('discoverd', 'timeout', '100') + time_mock.return_value = self.started_at + 1000 + self.cli.node.get.return_value = self.node + + self.assertRaisesRegexp(utils.DiscoveryFailed, 'power off', self.call) + + self.cli.node.update.assert_called_once_with(self.uuid, + self.patch_before) + def test_port_failed(self, filters_mock, post_hook_mock): self.ports[0] = exceptions.Conflict()