Skip initial reboot to IPA when updating firmware out-of-band
This change enables Ironic to skip initial reboot to IPA when performing out-of-band firmware. Change-Id: Id055a4ddbde3dbe336717e5f06ca6eb024b90c9f Signed-off-by: Jacob Anders <janders@redhat.com>
This commit is contained in:
@@ -38,6 +38,7 @@ def do_node_service(task, service_steps=None, disable_ramdisk=False):
|
|||||||
:param disable_ramdisk: Whether to skip booting ramdisk for servicing.
|
:param disable_ramdisk: Whether to skip booting ramdisk for servicing.
|
||||||
"""
|
"""
|
||||||
node = task.node
|
node = task.node
|
||||||
|
ramdisk_needed = None
|
||||||
try:
|
try:
|
||||||
# NOTE(ghe): Valid power and network values are needed to perform
|
# NOTE(ghe): Valid power and network values are needed to perform
|
||||||
# a service operation.
|
# a service operation.
|
||||||
@@ -54,42 +55,53 @@ def do_node_service(task, service_steps=None, disable_ramdisk=False):
|
|||||||
node.set_driver_internal_info('service_disable_ramdisk',
|
node.set_driver_internal_info('service_disable_ramdisk',
|
||||||
disable_ramdisk)
|
disable_ramdisk)
|
||||||
task.node.save()
|
task.node.save()
|
||||||
|
|
||||||
utils.node_update_cache(task)
|
utils.node_update_cache(task)
|
||||||
|
|
||||||
# Allow the deploy driver to set up the ramdisk again (necessary for IPA)
|
|
||||||
try:
|
|
||||||
if not disable_ramdisk:
|
|
||||||
prepare_result = task.driver.deploy.prepare_service(task)
|
|
||||||
else:
|
|
||||||
LOG.info('Skipping preparing for service in-band service since '
|
|
||||||
'out-of-band only service has been requested for node '
|
|
||||||
'%s', node.uuid)
|
|
||||||
prepare_result = None
|
|
||||||
except Exception as e:
|
|
||||||
msg = (_('Failed to prepare node %(node)s for service: %(e)s')
|
|
||||||
% {'node': node.uuid, 'e': e})
|
|
||||||
return utils.servicing_error_handler(task, msg, traceback=True)
|
|
||||||
|
|
||||||
if prepare_result == states.SERVICEWAIT:
|
|
||||||
# Prepare is asynchronous, the deploy driver will need to
|
|
||||||
# set node.driver_internal_info['service_steps'] and
|
|
||||||
# node.service_step and then make an RPC call to
|
|
||||||
# continue_node_service to start service operations.
|
|
||||||
task.process_event('wait')
|
|
||||||
return
|
|
||||||
try:
|
try:
|
||||||
conductor_steps.set_node_service_steps(
|
conductor_steps.set_node_service_steps(
|
||||||
task, disable_ramdisk=disable_ramdisk)
|
task, disable_ramdisk=disable_ramdisk)
|
||||||
|
except exception.InvalidParameterValue:
|
||||||
|
if disable_ramdisk:
|
||||||
|
# NOTE(janders) raising log severity since this will result
|
||||||
|
# in servicing failure
|
||||||
|
LOG.error('Failed to compose list of service steps for node '
|
||||||
|
'%s', node.uuid)
|
||||||
|
raise
|
||||||
|
LOG.debug('Unable to compose list of service steps for node %(node)s '
|
||||||
|
'will retry once ramdisk is booted.', {'node': node.uuid})
|
||||||
|
ramdisk_needed = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch all exceptions and follow the error handling
|
|
||||||
# path so things are cleaned up properly.
|
|
||||||
msg = (_('Cannot service node %(node)s: %(msg)s')
|
msg = (_('Cannot service node %(node)s: %(msg)s')
|
||||||
% {'node': node.uuid, 'msg': e})
|
% {'node': node.uuid, 'msg': e})
|
||||||
return utils.servicing_error_handler(task, msg)
|
return utils.servicing_error_handler(task, msg)
|
||||||
|
|
||||||
steps = node.driver_internal_info.get('service_steps', [])
|
steps = node.driver_internal_info.get('service_steps', [])
|
||||||
step_index = 0 if steps else None
|
step_index = 0 if steps else None
|
||||||
|
for step in steps:
|
||||||
|
step_requires_ramdisk = step.get('requires_ramdisk')
|
||||||
|
if step_requires_ramdisk:
|
||||||
|
LOG.debug('Found service step %s requiring ramdisk '
|
||||||
|
'for node %s', step, task.node.uuid)
|
||||||
|
ramdisk_needed = True
|
||||||
|
|
||||||
|
if ramdisk_needed and not disable_ramdisk:
|
||||||
|
try:
|
||||||
|
prepare_result = task.driver.deploy.prepare_service(task)
|
||||||
|
except Exception as e:
|
||||||
|
msg = (_('Failed to prepare node %(node)s for service: %(e)s')
|
||||||
|
% {'node': node.uuid, 'e': e})
|
||||||
|
return utils.servicing_error_handler(task, msg, traceback=True)
|
||||||
|
if prepare_result == states.SERVICEWAIT:
|
||||||
|
# Prepare is asynchronous, the deploy driver will need to
|
||||||
|
# set node.driver_internal_info['service_steps'] and
|
||||||
|
# node.service_step and then make an RPC call to
|
||||||
|
# continue_node_service to start service operations.
|
||||||
|
task.process_event('wait')
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
LOG.debug('Will proceed with servicing node %(node)s '
|
||||||
|
'without booting the ramdisk.', {'node': node.uuid})
|
||||||
|
|
||||||
do_next_service_step(task, step_index, disable_ramdisk=disable_ramdisk)
|
do_next_service_step(task, step_index, disable_ramdisk=disable_ramdisk)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -496,9 +496,11 @@ def set_node_service_steps(task, disable_ramdisk=False):
|
|||||||
are accepted.
|
are accepted.
|
||||||
:raises: InvalidParameterValue if there is a problem with the user's
|
:raises: InvalidParameterValue if there is a problem with the user's
|
||||||
clean steps.
|
clean steps.
|
||||||
:raises: NodeCleaningFailure if there was a problem getting the
|
:raises: NodeServicingFailure if there was a problem getting the
|
||||||
service steps.
|
service steps.
|
||||||
"""
|
"""
|
||||||
|
# FIXME(janders) it seems NodeServicingFailure is never actually raised,
|
||||||
|
# perhaps we should remove it?
|
||||||
node = task.node
|
node = task.node
|
||||||
steps = _validate_user_service_steps(
|
steps = _validate_user_service_steps(
|
||||||
task, node.driver_internal_info.get('service_steps', []),
|
task, node.driver_internal_info.get('service_steps', []),
|
||||||
|
|||||||
@@ -1750,7 +1750,7 @@ def prepare_agent_boot(task):
|
|||||||
task.driver.boot.prepare_ramdisk(task, deploy_opts)
|
task.driver.boot.prepare_ramdisk(task, deploy_opts)
|
||||||
|
|
||||||
|
|
||||||
def reboot_to_finish_step(task, timeout=None):
|
def reboot_to_finish_step(task, timeout=None, disable_ramdisk=None):
|
||||||
"""Reboot the node into IPA to finish a deploy/clean step.
|
"""Reboot the node into IPA to finish a deploy/clean step.
|
||||||
|
|
||||||
:param task: a TaskManager instance.
|
:param task: a TaskManager instance.
|
||||||
@@ -1759,8 +1759,14 @@ def reboot_to_finish_step(task, timeout=None):
|
|||||||
:returns: states.CLEANWAIT if cleaning operation in progress
|
:returns: states.CLEANWAIT if cleaning operation in progress
|
||||||
or states.DEPLOYWAIT if deploy operation in progress.
|
or states.DEPLOYWAIT if deploy operation in progress.
|
||||||
"""
|
"""
|
||||||
disable_ramdisk = task.node.driver_internal_info.get(
|
if disable_ramdisk is None:
|
||||||
'cleaning_disable_ramdisk')
|
if task.node.provision_state in [states.CLEANING, states.CLEANWAIT]:
|
||||||
|
disable_ramdisk = task.node.driver_internal_info.get(
|
||||||
|
'cleaning_disable_ramdisk')
|
||||||
|
elif task.node.provision_state in [states.SERVICING,
|
||||||
|
states.SERVICEWAIT]:
|
||||||
|
disable_ramdisk = task.node.driver_internal_info.get(
|
||||||
|
'service_disable_ramdisk')
|
||||||
if not disable_ramdisk:
|
if not disable_ramdisk:
|
||||||
if (manager_utils.is_fast_track(task)
|
if (manager_utils.is_fast_track(task)
|
||||||
and not task.node.disable_power_off):
|
and not task.node.disable_power_off):
|
||||||
|
|||||||
@@ -146,7 +146,7 @@ class RedfishFirmware(base.FirmwareInterface):
|
|||||||
requires_ramdisk=True)
|
requires_ramdisk=True)
|
||||||
@base.service_step(priority=0, abortable=False,
|
@base.service_step(priority=0, abortable=False,
|
||||||
argsinfo=_FW_SETTINGS_ARGSINFO,
|
argsinfo=_FW_SETTINGS_ARGSINFO,
|
||||||
requires_ramdisk=True)
|
requires_ramdisk=False)
|
||||||
@base.cache_firmware_components
|
@base.cache_firmware_components
|
||||||
def update(self, task, settings):
|
def update(self, task, settings):
|
||||||
"""Update the Firmware on the node using the settings for components.
|
"""Update the Firmware on the node using the settings for components.
|
||||||
@@ -181,7 +181,8 @@ class RedfishFirmware(base.FirmwareInterface):
|
|||||||
polling=True
|
polling=True
|
||||||
)
|
)
|
||||||
|
|
||||||
return deploy_utils.reboot_to_finish_step(task, timeout=wait_interval)
|
return deploy_utils.reboot_to_finish_step(task, timeout=wait_interval,
|
||||||
|
disable_ramdisk=True)
|
||||||
|
|
||||||
def _execute_firmware_update(self, node, update_service, settings):
|
def _execute_firmware_update(self, node, update_service, settings):
|
||||||
"""Executes the next firmware update to the node
|
"""Executes the next firmware update to the node
|
||||||
|
|||||||
@@ -109,14 +109,21 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
|||||||
mock_validate.side_effect = exception.NetworkError()
|
mock_validate.side_effect = exception.NetworkError()
|
||||||
self.__do_node_service_validate_fail(mock_validate)
|
self.__do_node_service_validate_fail(mock_validate)
|
||||||
|
|
||||||
|
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||||
|
autospec=True)
|
||||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test__do_node_service_prepare_service_fail(self, mock_prep,
|
def test__do_node_service_prepare_service_fail(self, mock_prep,
|
||||||
mock_validate,
|
mock_validate,
|
||||||
service_steps=None):
|
mock_steps,
|
||||||
# Exception from task.driver.deploy.prepare_cleaning should cause node
|
service_steps=[]):
|
||||||
|
# NOTE(janders) after removing unconditional initial reboot into
|
||||||
|
# ramdisk, set_node_service_steps needs to InvalidParameterValue
|
||||||
|
# to force boot into ramdisk
|
||||||
|
mock_steps.side_effect = exception.InvalidParameterValue('error')
|
||||||
|
# Exception from task.driver.deploy.prepare_service should cause node
|
||||||
# to go to SERVICEFAIL
|
# to go to SERVICEFAIL
|
||||||
mock_prep.side_effect = exception.InvalidParameterValue('error')
|
mock_prep.side_effect = exception.InvalidParameterValue('error')
|
||||||
tgt_prov_state = states.ACTIVE
|
tgt_prov_state = states.ACTIVE
|
||||||
@@ -135,17 +142,76 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
|||||||
self.assertFalse(node.maintenance)
|
self.assertFalse(node.maintenance)
|
||||||
self.assertIsNone(node.fault)
|
self.assertIsNone(node.fault)
|
||||||
|
|
||||||
|
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||||
|
autospec=True)
|
||||||
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def test__do_node_service_prepare_service_wait(self, mock_prep,
|
def test__do_node_service_prepare_service_wait(self, mock_prep,
|
||||||
mock_validate):
|
mock_validate,
|
||||||
|
mock_steps):
|
||||||
service_steps = [
|
service_steps = [
|
||||||
{'step': 'trigger_servicewait', 'priority': 10,
|
{'step': 'trigger_servicewait', 'priority': 10,
|
||||||
'interface': 'vendor'}
|
'interface': 'vendor'}
|
||||||
]
|
]
|
||||||
|
mock_steps.side_effect = exception.InvalidParameterValue('error')
|
||||||
|
mock_prep.return_value = states.SERVICEWAIT
|
||||||
|
tgt_prov_state = states.ACTIVE
|
||||||
|
node = obj_utils.create_test_node(
|
||||||
|
self.context, driver='fake-hardware',
|
||||||
|
provision_state=states.SERVICING,
|
||||||
|
target_provision_state=tgt_prov_state,
|
||||||
|
vendor_interface='fake')
|
||||||
|
with task_manager.acquire(
|
||||||
|
self.context, node.uuid, shared=False) as task:
|
||||||
|
servicing.do_node_service(task, service_steps=service_steps)
|
||||||
|
node.refresh()
|
||||||
|
self.assertEqual(states.SERVICEWAIT, node.provision_state)
|
||||||
|
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||||
|
mock_prep.assert_called_once_with(mock.ANY, mock.ANY)
|
||||||
|
mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
|
||||||
|
|
||||||
|
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||||
|
autospec=True)
|
||||||
|
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||||
|
autospec=True)
|
||||||
|
def test__do_node_service_out_of_band(self, mock_prep,
|
||||||
|
mock_validate):
|
||||||
|
# NOTE(janders) this test ensures ramdisk isn't prepared if
|
||||||
|
# steps do not require it
|
||||||
|
service_steps = [
|
||||||
|
{'step': 'trigger_servicewait', 'priority': 10,
|
||||||
|
'interface': 'vendor'}
|
||||||
|
]
|
||||||
|
mock_prep.return_value = states.SERVICEWAIT
|
||||||
|
tgt_prov_state = states.ACTIVE
|
||||||
|
node = obj_utils.create_test_node(
|
||||||
|
self.context, driver='fake-hardware',
|
||||||
|
provision_state=states.SERVICING,
|
||||||
|
target_provision_state=tgt_prov_state,
|
||||||
|
vendor_interface='fake')
|
||||||
|
with task_manager.acquire(
|
||||||
|
self.context, node.uuid, shared=False) as task:
|
||||||
|
servicing.do_node_service(task, service_steps=service_steps)
|
||||||
|
node.refresh()
|
||||||
|
self.assertEqual(states.SERVICEWAIT, node.provision_state)
|
||||||
|
self.assertEqual(tgt_prov_state, node.target_provision_state)
|
||||||
|
mock_prep.assert_not_called()
|
||||||
|
mock_validate.assert_called_once_with(mock.ANY, mock.ANY)
|
||||||
|
|
||||||
|
@mock.patch('ironic.drivers.modules.network.flat.FlatNetwork.validate',
|
||||||
|
autospec=True)
|
||||||
|
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.prepare_service',
|
||||||
|
autospec=True)
|
||||||
|
def test__do_node_service_requires_ramdisk_fallback(self, mock_prep,
|
||||||
|
mock_validate):
|
||||||
|
# NOTE(janders): here we set 'requires_ramdisk': 'True'
|
||||||
|
# to force ramdisk_needed to be set and trigger prepare ramdisk
|
||||||
|
service_steps = [
|
||||||
|
{'step': 'trigger_servicewait', 'priority': 10,
|
||||||
|
'interface': 'vendor', 'requires_ramdisk': 'True'}
|
||||||
|
]
|
||||||
mock_prep.return_value = states.SERVICEWAIT
|
mock_prep.return_value = states.SERVICEWAIT
|
||||||
tgt_prov_state = states.ACTIVE
|
tgt_prov_state = states.ACTIVE
|
||||||
node = obj_utils.create_test_node(
|
node = obj_utils.create_test_node(
|
||||||
@@ -194,11 +260,8 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
|||||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
def __do_node_service_steps_fail(self, mock_steps, mock_validate,
|
def __do_node_service_steps_fail(self, mock_steps, mock_validate,
|
||||||
service_steps=None, invalid_exc=True):
|
service_steps=None):
|
||||||
if invalid_exc:
|
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||||
mock_steps.side_effect = exception.InvalidParameterValue('invalid')
|
|
||||||
else:
|
|
||||||
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
|
||||||
tgt_prov_state = states.ACTIVE
|
tgt_prov_state = states.ACTIVE
|
||||||
node = obj_utils.create_test_node(
|
node = obj_utils.create_test_node(
|
||||||
self.context, driver='fake-hardware',
|
self.context, driver='fake-hardware',
|
||||||
@@ -224,12 +287,8 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
|||||||
def test_do_node_service_steps_fail_poweroff(self, mock_steps,
|
def test_do_node_service_steps_fail_poweroff(self, mock_steps,
|
||||||
mock_validate,
|
mock_validate,
|
||||||
mock_power,
|
mock_power,
|
||||||
service_steps=None,
|
service_steps=None):
|
||||||
invalid_exc=True):
|
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
||||||
if invalid_exc:
|
|
||||||
mock_steps.side_effect = exception.InvalidParameterValue('invalid')
|
|
||||||
else:
|
|
||||||
mock_steps.side_effect = exception.NodeCleaningFailure('failure')
|
|
||||||
tgt_prov_state = states.ACTIVE
|
tgt_prov_state = states.ACTIVE
|
||||||
self.config(poweroff_in_cleanfail=True, group='conductor')
|
self.config(poweroff_in_cleanfail=True, group='conductor')
|
||||||
node = obj_utils.create_test_node(
|
node = obj_utils.create_test_node(
|
||||||
@@ -249,9 +308,7 @@ class DoNodeServiceTestCase(db_base.DbTestCase):
|
|||||||
self.assertFalse(mock_power.called)
|
self.assertFalse(mock_power.called)
|
||||||
|
|
||||||
def test__do_node_service_steps_fail(self):
|
def test__do_node_service_steps_fail(self):
|
||||||
for invalid in (True, False):
|
self.__do_node_service_steps_fail(service_steps=[self.deploy_raid])
|
||||||
self.__do_node_service_steps_fail(service_steps=[self.deploy_raid],
|
|
||||||
invalid_exc=invalid)
|
|
||||||
|
|
||||||
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
@mock.patch.object(conductor_steps, 'set_node_service_steps',
|
||||||
autospec=True)
|
autospec=True)
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
---
|
||||||
|
fixes:
|
||||||
|
- |
|
||||||
|
Removes initial, unconditional reboot into ramdisk during servicing when
|
||||||
|
not required by specified service steps. This reduces the total number of
|
||||||
|
reboots needed when performing servicing, speeding up the process.
|
||||||
|
|
||||||
Reference in New Issue
Block a user