Skip initial reboot to IPA when updating firmware out-of-band
This change enables Ironic to skip initial reboot to IPA when performing out-of-band firmware. Change-Id: Id055a4ddbde3dbe336717e5f06ca6eb024b90c9f Signed-off-by: Jacob Anders <janders@redhat.com>
This commit is contained in:
@@ -38,6 +38,7 @@ def do_node_service(task, service_steps=None, disable_ramdisk=False):
|
||||
:param disable_ramdisk: Whether to skip booting ramdisk for servicing.
|
||||
"""
|
||||
node = task.node
|
||||
ramdisk_needed = None
|
||||
try:
|
||||
# NOTE(ghe): Valid power and network values are needed to perform
|
||||
# a service operation.
|
||||
@@ -54,42 +55,53 @@ def do_node_service(task, service_steps=None, disable_ramdisk=False):
|
||||
node.set_driver_internal_info('service_disable_ramdisk',
|
||||
disable_ramdisk)
|
||||
task.node.save()
|
||||
|
||||
utils.node_update_cache(task)
|
||||
|
||||
# Allow the deploy driver to set up the ramdisk again (necessary for IPA)
|
||||
try:
|
||||
if not disable_ramdisk:
|
||||
prepare_result = task.driver.deploy.prepare_service(task)
|
||||
else:
|
||||
LOG.info('Skipping preparing for service in-band service since '
|
||||
'out-of-band only service has been requested for node '
|
||||
'%s', node.uuid)
|
||||
prepare_result = None
|
||||
except Exception as e:
|
||||
msg = (_('Failed to prepare node %(node)s for service: %(e)s')
|
||||
% {'node': node.uuid, 'e': e})
|
||||
return utils.servicing_error_handler(task, msg, traceback=True)
|
||||
|
||||
if prepare_result == states.SERVICEWAIT:
|
||||
# Prepare is asynchronous, the deploy driver will need to
|
||||
# set node.driver_internal_info['service_steps'] and
|
||||
# node.service_step and then make an RPC call to
|
||||
# continue_node_service to start service operations.
|
||||
task.process_event('wait')
|
||||
return
|
||||
try:
|
||||
conductor_steps.set_node_service_steps(
|
||||
task, disable_ramdisk=disable_ramdisk)
|
||||
except exception.InvalidParameterValue:
|
||||
if disable_ramdisk:
|
||||
# NOTE(janders) raising log severity since this will result
|
||||
# in servicing failure
|
||||
LOG.error('Failed to compose list of service steps for node '
|
||||
'%s', node.uuid)
|
||||
raise
|
||||
LOG.debug('Unable to compose list of service steps for node %(node)s '
|
||||
'will retry once ramdisk is booted.', {'node': node.uuid})
|
||||
ramdisk_needed = True
|
||||
except Exception as e:
|
||||
# Catch all exceptions and follow the error handling
|
||||
# path so things are cleaned up properly.
|
||||
msg = (_('Cannot service node %(node)s: %(msg)s')
|
||||
% {'node': node.uuid, 'msg': e})
|
||||
return utils.servicing_error_handler(task, msg)
|
||||
|
||||
steps = node.driver_internal_info.get('service_steps', [])
|
||||
step_index = 0 if steps else None
|
||||
for step in steps:
|
||||
step_requires_ramdisk = step.get('requires_ramdisk')
|
||||
if step_requires_ramdisk:
|
||||
LOG.debug('Found service step %s requiring ramdisk '
|
||||
'for node %s', step, task.node.uuid)
|
||||
ramdisk_needed = True
|
||||
|
||||
if ramdisk_needed and not disable_ramdisk:
|
||||
try:
|
||||
prepare_result = task.driver.deploy.prepare_service(task)
|
||||
except Exception as e:
|
||||
msg = (_('Failed to prepare node %(node)s for service: %(e)s')
|
||||
% {'node': node.uuid, 'e': e})
|
||||
return utils.servicing_error_handler(task, msg, traceback=True)
|
||||
if prepare_result == states.SERVICEWAIT:
|
||||
# Prepare is asynchronous, the deploy driver will need to
|
||||
# set node.driver_internal_info['service_steps'] and
|
||||
# node.service_step and then make an RPC call to
|
||||
# continue_node_service to start service operations.
|
||||
task.process_event('wait')
|
||||
return
|
||||
else:
|
||||
LOG.debug('Will proceed with servicing node %(node)s '
|
||||
'without booting the ramdisk.', {'node': node.uuid})
|
||||
|
||||
do_next_service_step(task, step_index, disable_ramdisk=disable_ramdisk)
|
||||
|
||||
|
||||
|
||||
@@ -496,9 +496,11 @@ def set_node_service_steps(task, disable_ramdisk=False):
|
||||
are accepted.
|
||||
:raises: InvalidParameterValue if there is a problem with the user's
|
||||
clean steps.
|
||||
:raises: NodeCleaningFailure if there was a problem getting the
|
||||
:raises: NodeServicingFailure if there was a problem getting the
|
||||
service steps.
|
||||
"""
|
||||
# FIXME(janders) it seems NodeServicingFailure is never actually raised,
|
||||
# perhaps we should remove it?
|
||||
node = task.node
|
||||
steps = _validate_user_service_steps(
|
||||
task, node.driver_internal_info.get('service_steps', []),
|
||||
|
||||
@@ -1750,7 +1750,7 @@ def prepare_agent_boot(task):
|
||||
task.driver.boot.prepare_ramdisk(task, deploy_opts)
|
||||
|
||||
|
||||
def reboot_to_finish_step(task, timeout=None):
|
||||
def reboot_to_finish_step(task, timeout=None, disable_ramdisk=None):
|
||||
"""Reboot the node into IPA to finish a deploy/clean step.
|
||||
|
||||
:param task: a TaskManager instance.
|
||||
@@ -1759,8 +1759,14 @@ def reboot_to_finish_step(task, timeout=None):
|
||||
:returns: states.CLEANWAIT if cleaning operation in progress
|
||||
or states.DEPLOYWAIT if deploy operation in progress.
|
||||
"""
|
||||
disable_ramdisk = task.node.driver_internal_info.get(
|
||||
'cleaning_disable_ramdisk')
|
||||
if disable_ramdisk is None:
|
||||
if task.node.provision_state in [states.CLEANING, states.CLEANWAIT]:
|
||||
disable_ramdisk = task.node.driver_internal_info.get(
|
||||
'cleaning_disable_ramdisk')
|
||||
elif task.node.provision_state in [states.SERVICING,
|
||||
states.SERVICEWAIT]:
|
||||
disable_ramdisk = task.node.driver_internal_info.get(
|
||||
'service_disable_ramdisk')
|
||||
if not disable_ramdisk:
|
||||
if (manager_utils.is_fast_track(task)
|
||||
and not task.node.disable_power_off):
|
||||
|
||||
@@ -146,7 +146,7 @@ class RedfishFirmware(base.FirmwareInterface):
|
||||
requires_ramdisk=True)
|
||||
@base.service_step(priority=0, abortable=False,
|
||||
argsinfo=_FW_SETTINGS_ARGSINFO,
|
||||
requires_ramdisk=True)
|
||||
requires_ramdisk=False)
|
||||
@base.cache_firmware_components
|
||||
def update(self, task, settings):
|
||||
"""Update the Firmware on the node using the settings for components.
|
||||
@@ -181,7 +181,8 @@ class RedfishFirmware(base.FirmwareInterface):
|
||||
polling=True
|
||||
)
|
||||
|
||||
return deploy_utils.reboot_to_finish_step(task, timeout=wait_interval)
|
||||
return deploy_utils.reboot_to_finish_step(task, timeout=wait_interval,
|
||||
disable_ramdisk=True)
|
||||
|
||||
def _execute_firmware_update(self, node, update_service, settings):
|
||||
"""Executes the next firmware update to the node
|
||||
|
||||
@@ -109,14 +109,21 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
||||
mock_validate.side_effect = exception.NetworkError()
|
||||
self.__do_node_service_validate_fail(mock_validate)
|
||||
|
||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||
autospec=True)
|
||||
def test__do_node_service_prepare_service_fail(self, mock_prep,
|
||||
mock_validate,
|
||||
service_steps=None):
|
||||
# Exception from task.driver.deploy.prepare_cleaning should cause node
|
||||
mock_steps,
|
||||
service_steps=[]):
|
||||
# NOTE(janders) after removing unconditional initial reboot into
|
||||
# ramdisk, set_node_service_steps needs to InvalidParameterValue
|
||||
# to force boot into ramdisk
|
||||
mock_steps.side_effect = exception.InvalidParameterValue('error')
|
||||
# Exception from task.driver.deploy.prepare_service should cause node
|
||||
# to go to SERVICEFAIL
|
||||
mock_prep.side_effect = exception.InvalidParameterValue('error')
|
||||
tgt_prov_state = states.ACTIVE
|
||||
@@ -135,17 +142,76 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
||||
self.assertFalse(node.maintenance)
|
||||
self.assertIsNone(node.fault)
|
||||
|
||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||
autospec=True)
|
||||
def test__do_node_service_prepare_service_wait(self, mock_prep,
|
||||
mock_validate):
|
||||
mock_validate,
|
||||
mock_steps):
|
||||
service_steps = [
|
||||
{'step': 'trigger_servicewait', 'priority': 10,
|
||||
'interface': 'vendor'}
|
||||
]
|
||||
mock_steps.side_effect = exception.InvalidParameterValue('error')
|
||||
mock_prep.return_value = states.SERVICEWAIT
|
||||
tgt_prov_state = states.ACTIVE
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.SERVICING,
|
||||
target_provision_state=tgt_prov_state,
|
||||
vendor_interface='fake')
|
||||
with task_manager.acquire(
|
||||
self.context, node.uuid, shared=False) as task:
|
||||
servicing.do_node_service(task, service_steps=service_steps)
|
||||
node.refresh()
|
||||
self.assertEqual(states.SERVICEWAIT, node.provision_state)
|
||||
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||
mock_prep.assert_called_once_with(mock.ANY, mock.ANY)
|
||||
mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||
autospec=True)
|
||||
def test__do_node_service_out_of_band(self, mock_prep,
|
||||
mock_validate):
|
||||
# NOTE(janders) this test ensures ramdisk isn't prepared if
|
||||
# steps do not require it
|
||||
service_steps = [
|
||||
{'step': 'trigger_servicewait', 'priority': 10,
|
||||
'interface': 'vendor'}
|
||||
]
|
||||
mock_prep.return_value = states.SERVICEWAIT
|
||||
tgt_prov_state = states.ACTIVE
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
provision_state=states.SERVICING,
|
||||
target_provision_state=tgt_prov_state,
|
||||
vendor_interface='fake')
|
||||
with task_manager.acquire(
|
||||
self.context, node.uuid, shared=False) as task:
|
||||
servicing.do_node_service(task, service_steps=service_steps)
|
||||
node.refresh()
|
||||
self.assertEqual(states.SERVICEWAIT, node.provision_state)
|
||||
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||
mock_prep.assert_not_called()
|
||||
mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
|
||||
|
||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||
autospec=True)
|
||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||
autospec=True)
|
||||
def test__do_node_service_requires_ramdisk_fallback(self, mock_prep,
|
||||
mock_validate):
|
||||
# NOTE(janders): here we set 'requires_ramdisk': 'True'
|
||||
# to force ramdisk_needed to be set and trigger prepare ramdisk
|
||||
service_steps = [
|
||||
{'step': 'trigger_servicewait', 'priority': 10,
|
||||
'interface': 'vendor', 'requires_ramdisk': 'True'}
|
||||
]
|
||||
mock_prep.return_value = states.SERVICEWAIT
|
||||
tgt_prov_state = states.ACTIVE
|
||||
node = obj_utils.create_test_node(
|
||||
@@ -194,11 +260,8 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||
autospec=True)
|
||||
def __do_node_service_steps_fail(self, mock_steps, mock_validate,
|
||||
service_steps=None, invalid_exc=True):
|
||||
if invalid_exc:
|
||||
mock_steps.side_effect = exception.InvalidParameterValue('invalid')
|
||||
else:
|
||||
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||
service_steps=None):
|
||||
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||
tgt_prov_state = states.ACTIVE
|
||||
node = obj_utils.create_test_node(
|
||||
self.context, driver='fake-hardware',
|
||||
@@ -224,12 +287,8 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
||||
def test_do_node_service_steps_fail_poweroff(self, mock_steps,
|
||||
mock_validate,
|
||||
mock_power,
|
||||
service_steps=None,
|
||||
invalid_exc=True):
|
||||
if invalid_exc:
|
||||
mock_steps.side_effect = exception.InvalidParameterValue('invalid')
|
||||
else:
|
||||
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||
service_steps=None):
|
||||
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||
tgt_prov_state = states.ACTIVE
|
||||
self.config(poweroff_in_cleanfail=True, group='conductor')
|
||||
node = obj_utils.create_test_node(
|
||||
@@ -249,9 +308,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
||||
self.assertFalse(mock_power.called)
|
||||
|
||||
def test__do_node_service_steps_fail(self):
|
||||
for invalid in (True, False):
|
||||
self.__do_node_service_steps_fail(service_steps=[self.deploy_raid],
|
||||
invalid_exc=invalid)
|
||||
self.__do_node_service_steps_fail(service_steps=[self.deploy_raid])
|
||||
|
||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||
autospec=True)
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
---
|
||||
fixes:
|
||||
- |
|
||||
Removes initial, unconditional reboot into ramdisk during servicing when
|
||||
not required by specified service steps. This reduces the total number of
|
||||
reboots needed when performing servicing, speeding up the process.
|
||||
|
||||
Reference in New Issue
Block a user