Merge "Handle NodeLocked failures"
This commit is contained in:
commit
00c2e99b5a
ironic_inspector
releasenotes/notes
@ -123,8 +123,8 @@ def _do_introspect(node_info, ironic):
|
|||||||
|
|
||||||
if node_info.manage_boot:
|
if node_info.manage_boot:
|
||||||
try:
|
try:
|
||||||
ironic.set_node_boot_device(
|
ir_utils.call_with_retries(
|
||||||
node_info.uuid, 'pxe',
|
ironic.set_node_boot_device, node_info.uuid, 'pxe',
|
||||||
persistent=_persistent_ramdisk_boot(node_info.node()))
|
persistent=_persistent_ramdisk_boot(node_info.node()))
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise utils.Error(_('Failed to set boot device to PXE: %s') % exc,
|
raise utils.Error(_('Failed to set boot device to PXE: %s') % exc,
|
||||||
@ -133,7 +133,8 @@ def _do_introspect(node_info, ironic):
|
|||||||
_wait_for_turn(node_info)
|
_wait_for_turn(node_info)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ironic.set_node_power_state(node_info.uuid, 'rebooting')
|
ir_utils.call_with_retries(
|
||||||
|
ironic.set_node_power_state, node_info.uuid, 'rebooting')
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise utils.Error(_('Failed to power on the node, check its '
|
raise utils.Error(_('Failed to power on the node, check its '
|
||||||
'power management configuration: %s') % exc,
|
'power management configuration: %s') % exc,
|
||||||
@ -175,7 +176,8 @@ def _abort(node_info, ironic):
|
|||||||
LOG.debug('Forcing power-off', node_info=node_info)
|
LOG.debug('Forcing power-off', node_info=node_info)
|
||||||
if node_info.manage_boot:
|
if node_info.manage_boot:
|
||||||
try:
|
try:
|
||||||
ironic.set_node_power_state(node_info.uuid, 'power off')
|
ir_utils.call_with_retries(
|
||||||
|
ironic.set_node_power_state, node_info.uuid, 'power off')
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
LOG.warning('Failed to power off node: %s', exc,
|
LOG.warning('Failed to power off node: %s', exc,
|
||||||
node_info=node_info)
|
node_info=node_info)
|
||||||
|
@ -461,7 +461,8 @@ class NodeInfo(object):
|
|||||||
:param ironic: Ironic client to use instead of self.ironic
|
:param ironic: Ironic client to use instead of self.ironic
|
||||||
"""
|
"""
|
||||||
ironic = ironic or self.ironic
|
ironic = ironic or self.ironic
|
||||||
ironic.add_node_trait(self.uuid, trait)
|
ir_utils.call_with_retries(
|
||||||
|
ironic.add_node_trait, self.uuid, trait)
|
||||||
|
|
||||||
def remove_trait(self, trait, ironic=None):
|
def remove_trait(self, trait, ironic=None):
|
||||||
"""Remove a trait from the node.
|
"""Remove a trait from the node.
|
||||||
@ -471,6 +472,8 @@ class NodeInfo(object):
|
|||||||
"""
|
"""
|
||||||
ironic = ironic or self.ironic
|
ironic = ironic or self.ironic
|
||||||
try:
|
try:
|
||||||
|
# TODO(TheJulia): This should really have a retry around it for
|
||||||
|
# connection failure, however its not a big deal.
|
||||||
ironic.remove_node_trait(self.uuid, trait)
|
ironic.remove_node_trait(self.uuid, trait)
|
||||||
except os_exc.NotFoundException:
|
except os_exc.NotFoundException:
|
||||||
LOG.debug('Trait %s is not set, cannot remove', trait,
|
LOG.debug('Trait %s is not set, cannot remove', trait,
|
||||||
@ -486,8 +489,8 @@ class NodeInfo(object):
|
|||||||
ports = self.ports()
|
ports = self.ports()
|
||||||
if isinstance(port, str):
|
if isinstance(port, str):
|
||||||
port = ports[port]
|
port = ports[port]
|
||||||
|
ir_utils.call_with_retries(
|
||||||
ironic.delete_port(port.id)
|
ironic.delete_port, port.id)
|
||||||
del ports[port.address]
|
del ports[port.address]
|
||||||
|
|
||||||
def get_by_path(self, path):
|
def get_by_path(self, path):
|
||||||
|
@ -92,6 +92,34 @@ class TestIntrospect(BaseTest):
|
|||||||
self.node_info.acquire_lock.assert_called_once_with()
|
self.node_info.acquire_lock.assert_called_once_with()
|
||||||
self.node_info.release_lock.assert_called_once_with()
|
self.node_info.release_lock.assert_called_once_with()
|
||||||
|
|
||||||
|
def test_ok_retries_node_locked(self, client_mock, start_mock):
|
||||||
|
cli = self._prepare(client_mock)
|
||||||
|
start_mock.return_value = self.node_info
|
||||||
|
cli.set_node_power_state.side_effect = [
|
||||||
|
os_exc.ConflictException("Locked"),
|
||||||
|
None]
|
||||||
|
introspect.introspect(self.node.uuid)
|
||||||
|
|
||||||
|
cli.get_node.assert_called_once_with(self.uuid)
|
||||||
|
cli.validate_node.assert_called_once_with(self.uuid, required='power')
|
||||||
|
|
||||||
|
start_mock.assert_called_once_with(self.uuid,
|
||||||
|
bmc_address=[self.bmc_address],
|
||||||
|
manage_boot=True,
|
||||||
|
ironic=cli)
|
||||||
|
self.node_info.ports.assert_called_once_with()
|
||||||
|
self.node_info.add_attribute.assert_called_once_with(
|
||||||
|
'mac', self.macs)
|
||||||
|
self.sync_filter_mock.assert_called_with(cli)
|
||||||
|
cli.set_node_boot_device.assert_called_once_with(
|
||||||
|
self.uuid, 'pxe', persistent=False)
|
||||||
|
cli.set_node_power_state.assert_has_calls([
|
||||||
|
mock.call(self.uuid, 'rebooting'),
|
||||||
|
mock.call(self.uuid, 'rebooting')])
|
||||||
|
|
||||||
|
self.node_info.acquire_lock.assert_called_once_with()
|
||||||
|
self.node_info.release_lock.assert_called_once_with()
|
||||||
|
|
||||||
@mock.patch.object(ir_utils, 'get_ipmi_address', autospec=True)
|
@mock.patch.object(ir_utils, 'get_ipmi_address', autospec=True)
|
||||||
def test_resolved_bmc_address(self, ipmi_mock, client_mock, start_mock):
|
def test_resolved_bmc_address(self, ipmi_mock, client_mock, start_mock):
|
||||||
self.node.driver_info['ipmi_address'] = 'example.com'
|
self.node.driver_info['ipmi_address'] = 'example.com'
|
||||||
@ -177,8 +205,9 @@ class TestIntrospect(BaseTest):
|
|||||||
cli.set_node_boot_device.assert_called_once_with(self.uuid,
|
cli.set_node_boot_device.assert_called_once_with(self.uuid,
|
||||||
'pxe',
|
'pxe',
|
||||||
persistent=False)
|
persistent=False)
|
||||||
cli.set_node_power_state.assert_called_once_with(self.uuid,
|
power_call = mock.call(self.uuid, 'rebooting')
|
||||||
'rebooting')
|
cli.set_node_power_state.assert_has_calls([
|
||||||
|
power_call, power_call, power_call, power_call, power_call])
|
||||||
start_mock.return_value.finished.assert_called_once_with(
|
start_mock.return_value.finished.assert_called_once_with(
|
||||||
introspect.istate.Events.error, error=mock.ANY)
|
introspect.istate.Events.error, error=mock.ANY)
|
||||||
self.node_info.acquire_lock.assert_called_once_with()
|
self.node_info.acquire_lock.assert_called_once_with()
|
||||||
@ -218,9 +247,9 @@ class TestIntrospect(BaseTest):
|
|||||||
bmc_address=[self.bmc_address],
|
bmc_address=[self.bmc_address],
|
||||||
manage_boot=True,
|
manage_boot=True,
|
||||||
ironic=cli)
|
ironic=cli)
|
||||||
cli.set_node_boot_device.assert_called_once_with(self.uuid,
|
dev_call = mock.call(self.uuid, 'pxe', persistent=False)
|
||||||
'pxe',
|
cli.set_node_boot_device.assert_has_calls([
|
||||||
persistent=False)
|
dev_call, dev_call, dev_call, dev_call, dev_call])
|
||||||
cli.set_node_power_state.assert_not_called()
|
cli.set_node_power_state.assert_not_called()
|
||||||
start_mock.return_value.finished.assert_called_once_with(
|
start_mock.return_value.finished.assert_called_once_with(
|
||||||
introspect.istate.Events.error, error=mock.ANY)
|
introspect.istate.Events.error, error=mock.ANY)
|
||||||
|
@ -19,6 +19,7 @@ import unittest
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import automaton
|
import automaton
|
||||||
|
from openstack import exceptions as os_exc
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
from oslo_utils import timeutils
|
from oslo_utils import timeutils
|
||||||
from oslo_utils import uuidutils
|
from oslo_utils import uuidutils
|
||||||
@ -759,6 +760,14 @@ class TestUpdate(test_base.NodeTest):
|
|||||||
self.ironic.delete_port.assert_called_once_with('0')
|
self.ironic.delete_port.assert_called_once_with('0')
|
||||||
self.assertEqual(['mac1'], list(self.node_info.ports()))
|
self.assertEqual(['mac1'], list(self.node_info.ports()))
|
||||||
|
|
||||||
|
def test_delete_port_retries(self):
|
||||||
|
self.ironic.delete_port.side_effect = \
|
||||||
|
os_exc.ConflictException("Locked")
|
||||||
|
self.assertRaises(
|
||||||
|
os_exc.ConflictException,
|
||||||
|
self.node_info.delete_port, self.ports['mac0'])
|
||||||
|
self.assertEqual(5, self.ironic.delete_port.call_count)
|
||||||
|
|
||||||
def test_delete_port_by_mac(self):
|
def test_delete_port_by_mac(self):
|
||||||
self.node_info.delete_port('mac0')
|
self.node_info.delete_port('mac0')
|
||||||
|
|
||||||
|
@ -0,0 +1,11 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes issues in Inspector where various tasks would not have retry logic
|
||||||
|
applied to them and may sporadically fail. This is because the OpenStack
|
||||||
|
SDK does not comprehend the NodeLocked error, which previously
|
||||||
|
python-ironicclient silently handled. Basic operations such as
|
||||||
|
"power reboot" and "set boot device" will now be retried automatically if
|
||||||
|
they fail.
|
||||||
|
For more information, please see
|
||||||
|
`story 2009107 <https://storyboard.openstack.org/#!/story/2009107>`_.
|
Loading…
x
Reference in New Issue
Block a user