Merge "Do not tear down node upon cleaning failure"
This commit is contained in:
commit
028ab71d3f
ironic
releasenotes/notes
@ -385,6 +385,8 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
|
|||||||
set_fail_state=True):
|
set_fail_state=True):
|
||||||
"""Put a failed node in CLEANFAIL and maintenance."""
|
"""Put a failed node in CLEANFAIL and maintenance."""
|
||||||
node = task.node
|
node = task.node
|
||||||
|
node.fault = faults.CLEAN_FAILURE
|
||||||
|
node.maintenance = True
|
||||||
|
|
||||||
if tear_down_cleaning:
|
if tear_down_cleaning:
|
||||||
try:
|
try:
|
||||||
@ -410,9 +412,7 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True,
|
|||||||
# for automated cleaning, it is AVAILABLE.
|
# for automated cleaning, it is AVAILABLE.
|
||||||
manual_clean = node.target_provision_state == states.MANAGEABLE
|
manual_clean = node.target_provision_state == states.MANAGEABLE
|
||||||
node.last_error = msg
|
node.last_error = msg
|
||||||
node.maintenance = True
|
|
||||||
node.maintenance_reason = msg
|
node.maintenance_reason = msg
|
||||||
node.fault = faults.CLEAN_FAILURE
|
|
||||||
node.save()
|
node.save()
|
||||||
|
|
||||||
if set_fail_state and node.provision_state != states.CLEANFAIL:
|
if set_fail_state and node.provision_state != states.CLEANFAIL:
|
||||||
|
@ -32,6 +32,7 @@ from oslo_utils import strutils
|
|||||||
import six
|
import six
|
||||||
|
|
||||||
from ironic.common import exception
|
from ironic.common import exception
|
||||||
|
from ironic.common import faults
|
||||||
from ironic.common.glance_service import service_utils
|
from ironic.common.glance_service import service_utils
|
||||||
from ironic.common.i18n import _
|
from ironic.common.i18n import _
|
||||||
from ironic.common import image_service
|
from ironic.common import image_service
|
||||||
@ -935,10 +936,11 @@ def tear_down_inband_cleaning(task, manage_boot=True):
|
|||||||
"""Tears down the environment setup for in-band cleaning.
|
"""Tears down the environment setup for in-band cleaning.
|
||||||
|
|
||||||
This method does the following:
|
This method does the following:
|
||||||
1. Powers off the bare metal node.
|
1. Powers off the bare metal node (unless the node is fast
|
||||||
2. If 'manage_boot' parameter is set to true, it also
|
tracked or there was a cleaning failure).
|
||||||
calls the 'clean_up_ramdisk' method of boot interface to clean up
|
2. If 'manage_boot' parameter is set to true, it also calls
|
||||||
the environment that was set for booting agent ramdisk.
|
the 'clean_up_ramdisk' method of boot interface to clean
|
||||||
|
up the environment that was set for booting agent ramdisk.
|
||||||
3. Deletes the cleaning ports which were setup as part
|
3. Deletes the cleaning ports which were setup as part
|
||||||
of cleaning.
|
of cleaning.
|
||||||
|
|
||||||
@ -950,7 +952,11 @@ def tear_down_inband_cleaning(task, manage_boot=True):
|
|||||||
removed.
|
removed.
|
||||||
"""
|
"""
|
||||||
fast_track = manager_utils.is_fast_track(task)
|
fast_track = manager_utils.is_fast_track(task)
|
||||||
if not fast_track:
|
|
||||||
|
node = task.node
|
||||||
|
cleaning_failure = (node.fault == faults.CLEAN_FAILURE)
|
||||||
|
|
||||||
|
if not (fast_track or cleaning_failure):
|
||||||
manager_utils.node_power_action(task, states.POWER_OFF)
|
manager_utils.node_power_action(task, states.POWER_OFF)
|
||||||
|
|
||||||
if manage_boot:
|
if manage_boot:
|
||||||
@ -958,7 +964,7 @@ def tear_down_inband_cleaning(task, manage_boot=True):
|
|||||||
|
|
||||||
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
power_state_to_restore = manager_utils.power_on_node_if_needed(task)
|
||||||
task.driver.network.remove_cleaning_network(task)
|
task.driver.network.remove_cleaning_network(task)
|
||||||
if not fast_track:
|
if not (fast_track or cleaning_failure):
|
||||||
manager_utils.restore_power_state_if_needed(
|
manager_utils.restore_power_state_if_needed(
|
||||||
task, power_state_to_restore)
|
task, power_state_to_restore)
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ from testtools import matchers
|
|||||||
|
|
||||||
from ironic.common import boot_devices
|
from ironic.common import boot_devices
|
||||||
from ironic.common import exception
|
from ironic.common import exception
|
||||||
|
from ironic.common import faults
|
||||||
from ironic.common import image_service
|
from ironic.common import image_service
|
||||||
from ironic.common import states
|
from ironic.common import states
|
||||||
from ironic.common import utils as common_utils
|
from ironic.common import utils as common_utils
|
||||||
@ -1761,12 +1762,14 @@ class AgentMethodsTestCase(db_base.DbTestCase):
|
|||||||
def _test_tear_down_inband_cleaning(
|
def _test_tear_down_inband_cleaning(
|
||||||
self, power_mock, remove_cleaning_network_mock,
|
self, power_mock, remove_cleaning_network_mock,
|
||||||
clean_up_ramdisk_mock, is_fast_track_mock,
|
clean_up_ramdisk_mock, is_fast_track_mock,
|
||||||
manage_boot=True, fast_track=False):
|
manage_boot=True, fast_track=False, cleaning_error=False):
|
||||||
is_fast_track_mock.return_value = fast_track
|
is_fast_track_mock.return_value = fast_track
|
||||||
with task_manager.acquire(
|
with task_manager.acquire(
|
||||||
self.context, self.node.uuid, shared=False) as task:
|
self.context, self.node.uuid, shared=False) as task:
|
||||||
|
if cleaning_error:
|
||||||
|
task.node.fault = faults.CLEAN_FAILURE
|
||||||
utils.tear_down_inband_cleaning(task, manage_boot=manage_boot)
|
utils.tear_down_inband_cleaning(task, manage_boot=manage_boot)
|
||||||
if not fast_track:
|
if not (fast_track or cleaning_error):
|
||||||
power_mock.assert_called_once_with(task, states.POWER_OFF)
|
power_mock.assert_called_once_with(task, states.POWER_OFF)
|
||||||
else:
|
else:
|
||||||
self.assertFalse(power_mock.called)
|
self.assertFalse(power_mock.called)
|
||||||
@ -1786,6 +1789,9 @@ class AgentMethodsTestCase(db_base.DbTestCase):
|
|||||||
def test_tear_down_inband_cleaning_fast_track(self):
|
def test_tear_down_inband_cleaning_fast_track(self):
|
||||||
self._test_tear_down_inband_cleaning(fast_track=True)
|
self._test_tear_down_inband_cleaning(fast_track=True)
|
||||||
|
|
||||||
|
def test_tear_down_inband_cleaning_cleaning_error(self):
|
||||||
|
self._test_tear_down_inband_cleaning(cleaning_error=True)
|
||||||
|
|
||||||
def test_build_agent_options_conf(self):
|
def test_build_agent_options_conf(self):
|
||||||
self.config(api_url='https://api-url', group='conductor')
|
self.config(api_url='https://api-url', group='conductor')
|
||||||
options = utils.build_agent_options(self.node)
|
options = utils.build_agent_options(self.node)
|
||||||
|
11
releasenotes/notes/fix-do-not-tear-down-nodes-upon-cleaning-failure-a9cda6ae71ed2540.yaml
Normal file
11
releasenotes/notes/fix-do-not-tear-down-nodes-upon-cleaning-failure-a9cda6ae71ed2540.yaml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Fixes a bug where ironic would shut a node down upon cleaning failure.
|
||||||
|
Now, the node stays powered on (as documented and intended).
|
||||||
|
upgrade:
|
||||||
|
- |
|
||||||
|
When a failure occurs during cleaning, nodes will no longer be shut down. The
|
||||||
|
behaviour was changed to prevent harm and allow for an admin intervention
|
||||||
|
when sensitive operations, such as firmware upgrades, are performed and fail
|
||||||
|
during cleaning.
|
Loading…
x
Reference in New Issue
Block a user