From 30a85bd0cecc395a83790e637368ce4476cdf7f9 Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Thu, 7 Jan 2021 17:46:20 +0100 Subject: [PATCH] API to force manual cleaning without booting IPA Adds a new argument disable_ramdisk to the manual cleaning API. Only steps that are marked with requires_ramdisk=False can be run in this mode. Cleaning prepare/tear down is not done. Some steps (like redfish BIOS) currently require IPA to detect a successful reboot. They are not marked with requires_ramdisk just yet. Change-Id: Icacac871603bd48536188813647bc669c574de2a Story: #2008491 Task: #41540 --- .../baremetal-api-v1-node-management.inc | 5 ++ api-ref/source/parameters.yaml | 8 +++ .../contributor/webapi-version-history.rst | 8 ++- ironic/api/controllers/v1/node.py | 17 +++--- ironic/api/controllers/v1/utils.py | 11 ++++ ironic/api/controllers/v1/versions.py | 4 +- ironic/common/release_mappings.py | 4 +- ironic/conductor/cleaning.py | 51 ++++++++++++------ ironic/conductor/manager.py | 9 ++-- ironic/conductor/rpcapi.py | 18 +++++-- ironic/conductor/steps.py | 31 ++++++++--- ironic/conductor/utils.py | 1 + ironic/drivers/base.py | 10 +++- ironic/drivers/modules/agent_base.py | 2 + .../unit/api/controllers/v1/test_node.py | 36 ++++++++++++- ironic/tests/unit/conductor/test_cleaning.py | 53 ++++++++++++++----- ironic/tests/unit/conductor/test_manager.py | 10 ++-- ironic/tests/unit/conductor/test_steps.py | 39 +++++++++++++- .../disable-ramdisk-5156a009812fbb17.yaml | 13 +++++ 19 files changed, 265 insertions(+), 65 deletions(-) create mode 100644 releasenotes/notes/disable-ramdisk-5156a009812fbb17.yaml diff --git a/api-ref/source/baremetal-api-v1-node-management.inc b/api-ref/source/baremetal-api-v1-node-management.inc index cae2c06893..0d543a669a 100644 --- a/api-ref/source/baremetal-api-v1-node-management.inc +++ b/api-ref/source/baremetal-api-v1-node-management.inc @@ -363,6 +363,10 @@ detailed documentation of the Ironic State Machine is available ``deploy_steps`` can be provided when settings the node's provision target state to ``active`` or ``rebuild``. +.. versionadded:: 1.70 + ``disable_ramdisk`` can be provided to avoid booting the ramdisk during + manual cleaning. + Normal response code: 202 Error codes: @@ -382,6 +386,7 @@ Request - clean_steps: clean_steps - deploy_steps: deploy_steps - rescue_password: rescue_password + - disable_ramdisk: disable_ramdisk **Example request to deploy a Node, using a configdrive served via local webserver:** diff --git a/api-ref/source/parameters.yaml b/api-ref/source/parameters.yaml index 62d3f2ec45..2db02f8532 100644 --- a/api-ref/source/parameters.yaml +++ b/api-ref/source/parameters.yaml @@ -780,6 +780,14 @@ description: in: body required: true type: string +disable_ramdisk: + description: | + If set to ``true``, the ironic-python-agent ramdisk will not be booted for + cleaning. Only clean steps explicitly marked as not requiring ramdisk can + be executed in this mode. Only allowed for manual cleaning. + in: body + required: false + type: boolean driver_info: description: | All the metadata required by the driver to manage this Node. List of fields diff --git a/doc/source/contributor/webapi-version-history.rst b/doc/source/contributor/webapi-version-history.rst index fa91d075c5..3bb781291f 100644 --- a/doc/source/contributor/webapi-version-history.rst +++ b/doc/source/contributor/webapi-version-history.rst @@ -2,10 +2,16 @@ REST API Version History ======================== +1.70 (Wallaby, TBD) +------------------- + +Add support for ``disable_ramdisk`` parameter to provisioning endpoint +``/v1/nodes/{node_ident}/states/provision``. + 1.69 (Wallaby, 16.2) ---------------------- -Add support for ``deploy-steps`` parameter to provisioning endpoint +Add support for ``deploy_steps`` parameter to provisioning endpoint ``/v1/nodes/{node_ident}/states/provision``. Available and optional when target is 'active' or 'rebuild'. diff --git a/ironic/api/controllers/v1/node.py b/ironic/api/controllers/v1/node.py index be5f0106de..328d020a56 100644 --- a/ironic/api/controllers/v1/node.py +++ b/ironic/api/controllers/v1/node.py @@ -793,7 +793,7 @@ class NodeStatesController(rest.RestController): def _do_provision_action(self, rpc_node, target, configdrive=None, clean_steps=None, deploy_steps=None, - rescue_password=None): + rescue_password=None, disable_ramdisk=None): topic = api.request.rpcapi.get_topic_for(rpc_node) # Note that there is a race condition. The node state(s) could change # by the time the RPC call is made and the TaskManager manager gets a @@ -834,7 +834,8 @@ class NodeStatesController(rest.RestController): msg, status_code=http_client.BAD_REQUEST) _check_clean_steps(clean_steps) api.request.rpcapi.do_node_clean( - api.request.context, rpc_node.uuid, clean_steps, topic) + api.request.context, rpc_node.uuid, clean_steps, + disable_ramdisk, topic=topic) elif target in PROVISION_ACTION_STATES: api.request.rpcapi.do_provisioning_action( api.request.context, rpc_node.uuid, target, topic) @@ -849,10 +850,11 @@ class NodeStatesController(rest.RestController): configdrive=args.types(type(None), dict, str), clean_steps=args.types(type(None), list), deploy_steps=args.types(type(None), list), - rescue_password=args.string) + rescue_password=args.string, + disable_ramdisk=args.boolean) def provision(self, node_ident, target, configdrive=None, clean_steps=None, deploy_steps=None, - rescue_password=None): + rescue_password=None, disable_ramdisk=None): """Asynchronous trigger the provisioning of the node. This will set the target provision state of the node, and a @@ -909,6 +911,7 @@ class NodeStatesController(rest.RestController): :param rescue_password: A string representing the password to be set inside the rescue environment. This is required (and only valid), when target is "rescue". + :param disable_ramdisk: Whether to skip booting ramdisk for cleaning. :raises: NodeLocked (HTTP 409) if the node is currently locked. :raises: ClientSideError (HTTP 409) if the node is already being provisioned. @@ -920,7 +923,7 @@ class NodeStatesController(rest.RestController): performed because the node is in maintenance mode. :raises: NoFreeConductorWorker (HTTP 503) if no workers are available. :raises: NotAcceptable (HTTP 406) if the API version specified does - not allow the requested state transition. + not allow the requested state transition or parameters. """ rpc_node = api_utils.check_node_policy_and_retrieve( 'baremetal:node:set_provision_state', node_ident) @@ -951,6 +954,7 @@ class NodeStatesController(rest.RestController): state=rpc_node.provision_state) api_utils.check_allow_configdrive(target, configdrive) + api_utils.check_allow_clean_disable_ramdisk(target, disable_ramdisk) if clean_steps and target != ir_states.VERBS['clean']: msg = (_('"clean_steps" is only valid when setting target ' @@ -973,7 +977,8 @@ class NodeStatesController(rest.RestController): raise exception.NotAcceptable() self._do_provision_action(rpc_node, target, configdrive, clean_steps, - deploy_steps, rescue_password) + deploy_steps, rescue_password, + disable_ramdisk) # Set the HTTP Location Header url_args = '/'.join([node_ident, 'states']) diff --git a/ironic/api/controllers/v1/utils.py b/ironic/api/controllers/v1/utils.py index 90a2a258c5..3fd24c4462 100644 --- a/ironic/api/controllers/v1/utils.py +++ b/ironic/api/controllers/v1/utils.py @@ -1972,3 +1972,14 @@ def check_allow_deploy_steps(target, deploy_steps): 'provision state to %s or %s') % allowed_states) raise exception.ClientSideError( msg, status_code=http_client.BAD_REQUEST) + + +def check_allow_clean_disable_ramdisk(target, disable_ramdisk): + if disable_ramdisk is None: + return + elif api.request.version.minor < versions.MINOR_70_CLEAN_DISABLE_RAMDISK: + raise exception.NotAcceptable( + _("disable_ramdisk is not acceptable in this API version")) + elif target != "clean": + raise exception.BadRequest( + _("disable_ramdisk is supported only with manual cleaning")) diff --git a/ironic/api/controllers/v1/versions.py b/ironic/api/controllers/v1/versions.py index d00d68f606..bff79e75e4 100644 --- a/ironic/api/controllers/v1/versions.py +++ b/ironic/api/controllers/v1/versions.py @@ -107,6 +107,7 @@ BASE_VERSION = 1 # v1.67: Add support for port_uuid/portgroup_uuid in node vif_attach # v1.68: Add agent_verify_ca to heartbeat. # v1.69: Add deploy_steps to provisioning +# v1.70: Add disable_ramdisk to manual cleaning. MINOR_0_JUNO = 0 MINOR_1_INITIAL_VERSION = 1 @@ -178,6 +179,7 @@ MINOR_66_NODE_NETWORK_DATA = 66 MINOR_67_NODE_VIF_ATTACH_PORT = 67 MINOR_68_HEARTBEAT_VERIFY_CA = 68 MINOR_69_DEPLOY_STEPS = 69 +MINOR_70_CLEAN_DISABLE_RAMDISK = 70 # When adding another version, update: # - MINOR_MAX_VERSION @@ -185,7 +187,7 @@ MINOR_69_DEPLOY_STEPS = 69 # explanation of what changed in the new version # - common/release_mappings.py, RELEASE_MAPPING['master']['api'] -MINOR_MAX_VERSION = MINOR_69_DEPLOY_STEPS +MINOR_MAX_VERSION = MINOR_70_CLEAN_DISABLE_RAMDISK # String representations of the minor and maximum versions _MIN_VERSION_STRING = '{}.{}'.format(BASE_VERSION, MINOR_1_INITIAL_VERSION) diff --git a/ironic/common/release_mappings.py b/ironic/common/release_mappings.py index 370600db9e..43f461c496 100644 --- a/ironic/common/release_mappings.py +++ b/ironic/common/release_mappings.py @@ -302,8 +302,8 @@ RELEASE_MAPPING = { } }, 'master': { - 'api': '1.69', - 'rpc': '1.52', + 'api': '1.70', + 'rpc': '1.53', 'objects': { 'Allocation': ['1.1'], 'Node': ['1.35'], diff --git a/ironic/conductor/cleaning.py b/ironic/conductor/cleaning.py index fdcddd19c0..20a864e56b 100644 --- a/ironic/conductor/cleaning.py +++ b/ironic/conductor/cleaning.py @@ -27,7 +27,7 @@ LOG = log.getLogger(__name__) @task_manager.require_exclusive_lock -def do_node_clean(task, clean_steps=None): +def do_node_clean(task, clean_steps=None, disable_ramdisk=False): """Internal RPC method to perform cleaning of a node. :param task: a TaskManager instance with an exclusive lock on its node @@ -35,6 +35,7 @@ def do_node_clean(task, clean_steps=None): perform. Is None For automated cleaning (default). For more information, see the clean_steps parameter of :func:`ConductorManager.do_node_clean`. + :param disable_ramdisk: Whether to skip booting ramdisk for cleaning. """ node = task.node manual_clean = clean_steps is not None @@ -64,7 +65,8 @@ def do_node_clean(task, clean_steps=None): # NOTE(ghe): Valid power and network values are needed to perform # a cleaning. task.driver.power.validate(task) - task.driver.network.validate(task) + if not disable_ramdisk: + task.driver.network.validate(task) except exception.InvalidParameterValue as e: msg = (_('Validation failed. Cannot clean node %(node)s. ' 'Error: %(msg)s') % @@ -74,6 +76,7 @@ def do_node_clean(task, clean_steps=None): if manual_clean: info = node.driver_internal_info info['clean_steps'] = clean_steps + info['cleaning_disable_ramdisk'] = disable_ramdisk node.driver_internal_info = info node.save() @@ -83,7 +86,13 @@ def do_node_clean(task, clean_steps=None): # Allow the deploy driver to set up the ramdisk again (necessary for # IPA cleaning) try: - prepare_result = task.driver.deploy.prepare_cleaning(task) + if not disable_ramdisk: + prepare_result = task.driver.deploy.prepare_cleaning(task) + else: + LOG.info('Skipping preparing for in-band cleaning since ' + 'out-of-band only cleaning has been requested for node ' + '%s', node.uuid) + prepare_result = None except Exception as e: msg = (_('Failed to prepare node %(node)s for cleaning: %(e)s') % {'node': node.uuid, 'e': e}) @@ -102,7 +111,8 @@ def do_node_clean(task, clean_steps=None): return try: - conductor_steps.set_node_cleaning_steps(task) + conductor_steps.set_node_cleaning_steps( + task, disable_ramdisk=disable_ramdisk) except (exception.InvalidParameterValue, exception.NodeCleaningFailure) as e: msg = (_('Cannot clean node %(node)s. Error: %(msg)s') @@ -111,13 +121,13 @@ def do_node_clean(task, clean_steps=None): steps = node.driver_internal_info.get('clean_steps', []) step_index = 0 if steps else None - do_next_clean_step(task, step_index) + do_next_clean_step(task, step_index, disable_ramdisk=disable_ramdisk) @utils.fail_on_error(utils.cleaning_error_handler, _("Unexpected error when processing next clean step")) @task_manager.require_exclusive_lock -def do_next_clean_step(task, step_index): +def do_next_clean_step(task, step_index, disable_ramdisk=None): """Do cleaning, starting from the specified clean step. :param task: a TaskManager instance with an exclusive lock @@ -125,6 +135,7 @@ def do_next_clean_step(task, step_index): is the index (from 0) into the list of clean steps in the node's driver_internal_info['clean_steps']. Is None if there are no steps to execute. + :param disable_ramdisk: Whether to skip booting ramdisk for cleaning. """ node = task.node # For manual cleaning, the target provision state is MANAGEABLE, @@ -135,6 +146,10 @@ def do_next_clean_step(task, step_index): else: steps = node.driver_internal_info['clean_steps'][step_index:] + if disable_ramdisk is None: + disable_ramdisk = node.driver_internal_info.get( + 'cleaning_disable_ramdisk', False) + LOG.info('Executing %(state)s on node %(node)s, remaining steps: ' '%(steps)s', {'node': node.uuid, 'steps': steps, 'state': node.provision_state}) @@ -182,7 +197,8 @@ def do_next_clean_step(task, step_index): '%(exc)s') % {'node': node.uuid, 'exc': e, 'step': node.clean_step}) - driver_utils.collect_ramdisk_logs(task.node, label='cleaning') + if not disable_ramdisk: + driver_utils.collect_ramdisk_logs(task.node, label='cleaning') utils.cleaning_error_handler(task, msg, traceback=True) return @@ -206,22 +222,23 @@ def do_next_clean_step(task, step_index): LOG.info('Node %(node)s finished clean step %(step)s', {'node': node.uuid, 'step': step}) - if CONF.agent.deploy_logs_collect == 'always': + if CONF.agent.deploy_logs_collect == 'always' and not disable_ramdisk: driver_utils.collect_ramdisk_logs(task.node, label='cleaning') # Clear clean_step node.clean_step = None utils.wipe_cleaning_internal_info(task) node.save() - try: - task.driver.deploy.tear_down_cleaning(task) - except Exception as e: - msg = (_('Failed to tear down from cleaning for node %(node)s, ' - 'reason: %(err)s') - % {'node': node.uuid, 'err': e}) - return utils.cleaning_error_handler(task, msg, - traceback=True, - tear_down_cleaning=False) + if not disable_ramdisk: + try: + task.driver.deploy.tear_down_cleaning(task) + except Exception as e: + msg = (_('Failed to tear down from cleaning for node %(node)s, ' + 'reason: %(err)s') + % {'node': node.uuid, 'err': e}) + return utils.cleaning_error_handler(task, msg, + traceback=True, + tear_down_cleaning=False) LOG.info('Node %s cleaning complete', node.uuid) event = 'manage' if manual_clean or node.retired else 'done' diff --git a/ironic/conductor/manager.py b/ironic/conductor/manager.py index dfbb7835ec..aafdd41def 100644 --- a/ironic/conductor/manager.py +++ b/ironic/conductor/manager.py @@ -91,7 +91,7 @@ class ConductorManager(base_manager.BaseConductorManager): # NOTE(rloo): This must be in sync with rpcapi.ConductorAPI's. # NOTE(pas-ha): This also must be in sync with # ironic.common.release_mappings.RELEASE_MAPPING['master'] - RPC_API_VERSION = '1.52' + RPC_API_VERSION = '1.53' target = messaging.Target(version=RPC_API_VERSION) @@ -1036,7 +1036,8 @@ class ConductorManager(base_manager.BaseConductorManager): exception.NodeInMaintenance, exception.NodeLocked, exception.NoFreeConductorWorker) - def do_node_clean(self, context, node_id, clean_steps): + def do_node_clean(self, context, node_id, clean_steps, + disable_ramdisk=False): """RPC method to initiate manual cleaning. :param context: an admin context. @@ -1057,6 +1058,7 @@ class ConductorManager(base_manager.BaseConductorManager): { 'interface': deploy', 'step': 'upgrade_firmware', 'args': {'force': True} } + :param disable_ramdisk: Optional. Whether to disable the ramdisk boot. :raises: InvalidParameterValue if power validation fails. :raises: InvalidStateRequested if the node is not in manageable state. :raises: NodeLocked if node is locked by another conductor. @@ -1093,7 +1095,8 @@ class ConductorManager(base_manager.BaseConductorManager): task.process_event( 'clean', callback=self._spawn_worker, - call_args=(cleaning.do_node_clean, task, clean_steps), + call_args=(cleaning.do_node_clean, task, clean_steps, + disable_ramdisk), err_handler=utils.provisioning_error_handler, target_state=states.MANAGEABLE) except exception.InvalidState: diff --git a/ironic/conductor/rpcapi.py b/ironic/conductor/rpcapi.py index e41cb77919..68293d8f1a 100644 --- a/ironic/conductor/rpcapi.py +++ b/ironic/conductor/rpcapi.py @@ -105,13 +105,14 @@ class ConductorAPI(object): | get_supported_indicators. | 1.51 - Added agent_verify_ca to heartbeat. | 1.52 - Added deploy steps argument to provisioning + | 1.53 - Added disable_ramdisk to do_node_clean. """ # NOTE(rloo): This must be in sync with manager.ConductorManager's. # NOTE(pas-ha): This also must be in sync with # ironic.common.release_mappings.RELEASE_MAPPING['master'] - RPC_API_VERSION = '1.52' + RPC_API_VERSION = '1.53' def __init__(self, topic=None): super(ConductorAPI, self).__init__() @@ -890,12 +891,14 @@ class ConductorAPI(object): return cctxt.call(context, 'get_raid_logical_disk_properties', driver_name=driver_name) - def do_node_clean(self, context, node_id, clean_steps, topic=None): + def do_node_clean(self, context, node_id, clean_steps, + disable_ramdisk=None, topic=None): """Signal to conductor service to perform manual cleaning on a node. :param context: request context. :param node_id: node ID or UUID. :param clean_steps: a list of clean step dictionaries. + :param disable_ramdisk: Whether to skip booting ramdisk for cleaning. :param topic: RPC topic. Defaults to self.topic. :raises: InvalidParameterValue if validation of power driver interface failed. @@ -905,9 +908,16 @@ class ConductorAPI(object): :raises: NoFreeConductorWorker when there is no free worker to start async task. """ - cctxt = self.client.prepare(topic=topic or self.topic, version='1.32') + # Avoid sending unset parameters to simplify upgrades. + params = {} + version = '1.32' + if disable_ramdisk is not None: + params['disable_ramdisk'] = disable_ramdisk + version = '1.53' + + cctxt = self.client.prepare(topic=topic or self.topic, version=version) return cctxt.call(context, 'do_node_clean', - node_id=node_id, clean_steps=clean_steps) + node_id=node_id, clean_steps=clean_steps, **params) def heartbeat(self, context, node_id, callback_url, agent_version, agent_token=None, agent_verify_ca=None, topic=None): diff --git a/ironic/conductor/steps.py b/ironic/conductor/steps.py index b381866102..aea467342b 100644 --- a/ironic/conductor/steps.py +++ b/ironic/conductor/steps.py @@ -161,13 +161,15 @@ def _get_deployment_steps(task, enabled=False, sort=True): enabled=enabled, sort_step_key=sort_key) -def set_node_cleaning_steps(task): +def set_node_cleaning_steps(task, disable_ramdisk=False): """Set up the node with clean step information for cleaning. For automated cleaning, get the clean steps from the driver. For manual cleaning, the user's clean steps are known but need to be validated against the driver's clean steps. + :param disable_ramdisk: If `True`, only steps with requires_ramdisk=False + are accepted. :raises: InvalidParameterValue if there is a problem with the user's clean steps. :raises: NodeCleaningFailure if there was a problem getting the @@ -190,8 +192,8 @@ def set_node_cleaning_steps(task): # Now that we know what the driver's available clean steps are, we can # do further checks to validate the user's clean steps. steps = node.driver_internal_info['clean_steps'] - driver_internal_info['clean_steps'] = ( - _validate_user_clean_steps(task, steps)) + driver_internal_info['clean_steps'] = _validate_user_clean_steps( + task, steps, disable_ramdisk=disable_ramdisk) node.clean_step = {} driver_internal_info['clean_step_index'] = None @@ -382,7 +384,8 @@ def _validate_deploy_steps_unique(user_steps): return errors -def _validate_user_step(task, user_step, driver_step, step_type): +def _validate_user_step(task, user_step, driver_step, step_type, + disable_ramdisk=False): """Validate a user-specified step. :param task: A TaskManager object @@ -424,6 +427,8 @@ def _validate_user_step(task, user_step, driver_step, step_type): 'required': False } } } :param step_type: either 'clean' or 'deploy'. + :param disable_ramdisk: If `True`, only steps with requires_ramdisk=False + are accepted. Only makes sense for manual cleaning at the moment. :return: a list of validation error strings for the step. """ errors = [] @@ -453,6 +458,9 @@ def _validate_user_step(task, user_step, driver_step, step_type): {'type': step_type, 'step': user_step, 'miss': ', '.join(missing)}) errors.append(error) + if disable_ramdisk and driver_step.get('requires_ramdisk', True): + error = _('clean step %s requires booting a ramdisk') % user_step + errors.append(error) if step_type == 'clean': # Copy fields that should not be provided by a user @@ -477,7 +485,8 @@ def _validate_user_step(task, user_step, driver_step, step_type): def _validate_user_steps(task, user_steps, driver_steps, step_type, - error_prefix=None, skip_missing=False): + error_prefix=None, skip_missing=False, + disable_ramdisk=False): """Validate the user-specified steps. :param task: A TaskManager object @@ -522,6 +531,9 @@ def _validate_user_steps(task, user_steps, driver_steps, step_type, :param step_type: either 'clean' or 'deploy'. :param error_prefix: String to use as a prefix for exception messages, or None. + :param skip_missing: Whether to silently ignore unknown steps. + :param disable_ramdisk: If `True`, only steps with requires_ramdisk=False + are accepted. Only makes sense for manual cleaning at the moment. :raises: InvalidParameterValue if validation of steps fails. :raises: NodeCleaningFailure or InstanceDeployFailure if there was a problem getting the steps from the driver. @@ -554,7 +566,7 @@ def _validate_user_steps(task, user_steps, driver_steps, step_type, continue step_errors = _validate_user_step(task, user_step, driver_step, - step_type) + step_type, disable_ramdisk) errors.extend(step_errors) result.append(user_step) @@ -572,7 +584,7 @@ def _validate_user_steps(task, user_steps, driver_steps, step_type, return result -def _validate_user_clean_steps(task, user_steps): +def _validate_user_clean_steps(task, user_steps, disable_ramdisk=False): """Validate the user-specified clean steps. :param task: A TaskManager object @@ -588,13 +600,16 @@ def _validate_user_clean_steps(task, user_steps): { 'interface': 'deploy', 'step': 'upgrade_firmware', 'args': {'force': True} } + :param disable_ramdisk: If `True`, only steps with requires_ramdisk=False + are accepted. :raises: InvalidParameterValue if validation of clean steps fails. :raises: NodeCleaningFailure if there was a problem getting the clean steps from the driver. :return: validated clean steps update with information from the driver """ driver_steps = _get_cleaning_steps(task, enabled=False, sort=False) - return _validate_user_steps(task, user_steps, driver_steps, 'clean') + return _validate_user_steps(task, user_steps, driver_steps, 'clean', + disable_ramdisk=disable_ramdisk) def _validate_user_deploy_steps(task, user_steps, error_prefix=None, diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 4d99553720..66c5cf8c75 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -543,6 +543,7 @@ def wipe_cleaning_internal_info(task): info.pop('clean_step_index', None) info.pop('cleaning_reboot', None) info.pop('cleaning_polling', None) + info.pop('cleaning_disable_ramdisk', None) info.pop('skip_current_clean_step', None) info.pop('steps_validated', None) task.node.driver_internal_info = info diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index 7d137f9c4d..db655a4744 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -245,7 +245,9 @@ class BaseInterface(object, metaclass=abc.ABCMeta): 'priority': method._clean_step_priority, 'abortable': method._clean_step_abortable, 'argsinfo': method._clean_step_argsinfo, - 'interface': instance.interface_type} + 'interface': instance.interface_type, + 'requires_ramdisk': + method._clean_step_requires_ramdisk} instance.clean_steps.append(step) if getattr(method, '_is_deploy_step', False): # Create a DeployStep to represent this method @@ -1716,7 +1718,8 @@ def _validate_argsinfo(argsinfo): {'arg': arg}) -def clean_step(priority, abortable=False, argsinfo=None): +def clean_step(priority, abortable=False, argsinfo=None, + requires_ramdisk=True): """Decorator for cleaning steps. Cleaning steps may be used in manual or automated cleaning. @@ -1770,6 +1773,8 @@ def clean_step(priority, abortable=False, argsinfo=None): 'required': Boolean. Optional; default is False. True if this argument is required. If so, it must be specified in the clean request; false if it is optional. + :param requires_ramdisk: Whether this step requires the ramdisk + to be running. Should be set to False for purely out-of-band steps. :raises InvalidParameterValue: if any of the arguments are invalid """ def decorator(func): @@ -1790,6 +1795,7 @@ def clean_step(priority, abortable=False, argsinfo=None): _validate_argsinfo(argsinfo) func._clean_step_argsinfo = argsinfo + func._clean_step_requires_ramdisk = requires_ramdisk return func return decorator diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index da90bb5031..ff872c23ff 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -946,6 +946,8 @@ class AgentDeployMixin(HeartbeatMixin, AgentOobStepsMixin): 'step': step, 'type': step_type})) + if step_type == 'clean': + step['requires_ramdisk'] = True steps[step['interface']].append(step) # Save hardware manager version, steps, and date diff --git a/ironic/tests/unit/api/controllers/v1/test_node.py b/ironic/tests/unit/api/controllers/v1/test_node.py index c983f6d86f..185e8f27e1 100644 --- a/ironic/tests/unit/api/controllers/v1/test_node.py +++ b/ironic/tests/unit/api/controllers/v1/test_node.py @@ -5677,7 +5677,41 @@ ORHMKeXMO8fcK0By7CiMKwHSXCoEQgfQhWwpMdSsO8LgHCjh87DQc= """ self.assertEqual(b'', ret.body) mock_check.assert_called_once_with(clean_steps) mock_rpcapi.assert_called_once_with(mock.ANY, mock.ANY, self.node.uuid, - clean_steps, 'test-topic') + clean_steps, None, + topic='test-topic') + + @mock.patch.object(rpcapi.ConductorAPI, 'do_node_clean', autospec=True) + @mock.patch.object(api_node, '_check_clean_steps', autospec=True) + def test_clean_disable_ramdisk(self, mock_check, mock_rpcapi): + self.node.provision_state = states.MANAGEABLE + self.node.save() + clean_steps = [{"step": "upgrade_firmware", "interface": "deploy"}] + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['clean'], + 'clean_steps': clean_steps, + 'disable_ramdisk': True}, + headers={api_base.Version.string: "1.70"}) + self.assertEqual(http_client.ACCEPTED, ret.status_code) + self.assertEqual(b'', ret.body) + mock_check.assert_called_once_with(clean_steps) + mock_rpcapi.assert_called_once_with(mock.ANY, mock.ANY, + self.node.uuid, + clean_steps, True, + topic='test-topic') + + @mock.patch.object(rpcapi.ConductorAPI, 'do_node_clean', autospec=True) + @mock.patch.object(api_node, '_check_clean_steps', autospec=True) + def test_clean_disable_ramdisk_old_api(self, mock_check, mock_rpcapi): + self.node.provision_state = states.MANAGEABLE + self.node.save() + clean_steps = [{"step": "upgrade_firmware", "interface": "deploy"}] + ret = self.put_json('/nodes/%s/states/provision' % self.node.uuid, + {'target': states.VERBS['clean'], + 'clean_steps': clean_steps, + 'disable_ramdisk': True}, + headers={api_base.Version.string: "1.69"}, + expect_errors=True) + self.assertEqual(http_client.NOT_ACCEPTABLE, ret.status_code) def test_adopt_raises_error_before_1_17(self): """Test that a lower API client cannot use the adopt verb""" diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py index e2e00cf0b3..18d99d0049 100644 --- a/ironic/tests/unit/conductor/test_cleaning.py +++ b/ironic/tests/unit/conductor/test_cleaning.py @@ -418,7 +418,7 @@ class DoNodeCleanTestCase(db_base.DbTestCase): node.refresh() self.assertEqual(states.CLEANFAIL, node.provision_state) self.assertEqual(tgt_prov_state, node.target_provision_state) - mock_steps.assert_called_once_with(mock.ANY) + mock_steps.assert_called_once_with(mock.ANY, disable_ramdisk=False) self.assertFalse(node.maintenance) self.assertIsNone(node.fault) @@ -439,7 +439,8 @@ class DoNodeCleanTestCase(db_base.DbTestCase): @mock.patch('ironic.drivers.modules.fake.FakePower.validate', autospec=True) def __do_node_clean(self, mock_power_valid, mock_network_valid, - mock_next_step, mock_steps, clean_steps=None): + mock_next_step, mock_steps, clean_steps=None, + disable_ramdisk=False): if clean_steps: tgt_prov_state = states.MANAGEABLE driver_info = {} @@ -457,14 +458,21 @@ class DoNodeCleanTestCase(db_base.DbTestCase): with task_manager.acquire( self.context, node.uuid, shared=False) as task: - cleaning.do_node_clean(task, clean_steps=clean_steps) + cleaning.do_node_clean(task, clean_steps=clean_steps, + disable_ramdisk=disable_ramdisk) node.refresh() mock_power_valid.assert_called_once_with(mock.ANY, task) - mock_network_valid.assert_called_once_with(mock.ANY, task) - mock_next_step.assert_called_once_with(task, 0) - mock_steps.assert_called_once_with(task) + if disable_ramdisk: + mock_network_valid.assert_not_called() + else: + mock_network_valid.assert_called_once_with(mock.ANY, task) + + mock_next_step.assert_called_once_with( + task, 0, disable_ramdisk=disable_ramdisk) + mock_steps.assert_called_once_with( + task, disable_ramdisk=disable_ramdisk) if clean_steps: self.assertEqual(clean_steps, node.driver_internal_info['clean_steps']) @@ -480,6 +488,10 @@ class DoNodeCleanTestCase(db_base.DbTestCase): def test__do_node_clean_manual(self): self.__do_node_clean(clean_steps=[self.deploy_raid]) + def test__do_node_clean_manual_disable_ramdisk(self): + self.__do_node_clean(clean_steps=[self.deploy_raid], + disable_ramdisk=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', autospec=True) def _do_next_clean_step_first_step_async(self, return_state, mock_execute, @@ -623,13 +635,16 @@ class DoNodeCleanTestCase(db_base.DbTestCase): self._do_next_clean_step_last_step_noop(fast_track=True) @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) + @mock.patch('ironic.drivers.modules.fake.FakeDeploy.tear_down_cleaning', + autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step', autospec=True) def _do_next_clean_step_all(self, mock_deploy_execute, - mock_power_execute, mock_collect_logs, - manual=False): + mock_power_execute, mock_tear_down, + mock_collect_logs, + manual=False, disable_ramdisk=False): # Run all steps from start to finish (all synchronous) tgt_prov_state = states.MANAGEABLE if manual else states.AVAILABLE @@ -653,7 +668,19 @@ class DoNodeCleanTestCase(db_base.DbTestCase): with task_manager.acquire( self.context, node.uuid, shared=False) as task: - cleaning.do_next_clean_step(task, 0) + cleaning.do_next_clean_step( + task, 0, disable_ramdisk=disable_ramdisk) + + mock_power_execute.assert_called_once_with(task.driver.power, task, + self.clean_steps[1]) + mock_deploy_execute.assert_has_calls( + [mock.call(task.driver.deploy, task, self.clean_steps[0]), + mock.call(task.driver.deploy, task, self.clean_steps[2])]) + if disable_ramdisk: + mock_tear_down.assert_not_called() + else: + mock_tear_down.assert_called_once_with( + task.driver.deploy, task) node.refresh() @@ -664,11 +691,6 @@ class DoNodeCleanTestCase(db_base.DbTestCase): self.assertNotIn('clean_step_index', node.driver_internal_info) self.assertEqual('test', node.driver_internal_info['goober']) self.assertIsNone(node.driver_internal_info['clean_steps']) - mock_power_execute.assert_called_once_with(mock.ANY, mock.ANY, - self.clean_steps[1]) - mock_deploy_execute.assert_has_calls( - [mock.call(mock.ANY, mock.ANY, self.clean_steps[0]), - mock.call(mock.ANY, mock.ANY, self.clean_steps[2])]) self.assertFalse(mock_collect_logs.called) def test_do_next_clean_step_automated_all(self): @@ -677,6 +699,9 @@ class DoNodeCleanTestCase(db_base.DbTestCase): def test_do_next_clean_step_manual_all(self): self._do_next_clean_step_all(manual=True) + def test_do_next_clean_step_manual_all_disable_ramdisk(self): + self._do_next_clean_step_all(manual=True, disable_ramdisk=True) + @mock.patch('ironic.drivers.utils.collect_ramdisk_logs', autospec=True) @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step', autospec=True) diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py index f332faf461..2957e6b918 100644 --- a/ironic/tests/unit/conductor/test_manager.py +++ b/ironic/tests/unit/conductor/test_manager.py @@ -2416,7 +2416,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_power_valid.assert_called_once_with(mock.ANY, mock.ANY) mock_network_valid.assert_called_once_with(mock.ANY, mock.ANY) mock_spawn.assert_called_with( - self.service, cleaning.do_node_clean, mock.ANY, clean_steps) + self.service, cleaning.do_node_clean, mock.ANY, clean_steps, False) node.refresh() # Node will be moved to CLEANING self.assertEqual(states.CLEANING, node.provision_state) @@ -2446,7 +2446,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_power_valid.assert_called_once_with(mock.ANY, mock.ANY) mock_network_valid.assert_called_once_with(mock.ANY, mock.ANY) mock_spawn.assert_called_with( - self.service, cleaning.do_node_clean, mock.ANY, clean_steps) + self.service, cleaning.do_node_clean, mock.ANY, clean_steps, False) node.refresh() # Node will be moved to CLEANING self.assertEqual(states.CLEANING, node.provision_state) @@ -2480,7 +2480,7 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): mock_power_valid.assert_called_once_with(mock.ANY, mock.ANY) mock_network_valid.assert_called_once_with(mock.ANY, mock.ANY) mock_spawn.assert_called_with( - self.service, cleaning.do_node_clean, mock.ANY, clean_steps) + self.service, cleaning.do_node_clean, mock.ANY, clean_steps, False) node.refresh() # Make sure states were rolled back self.assertEqual(prv_state, node.provision_state) @@ -2549,8 +2549,8 @@ class DoNodeCleanTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase): node.refresh() self.assertEqual(states.CLEANING, node.provision_state) self.assertEqual(tgt_prv_state, node.target_provision_state) - mock_spawn.assert_called_with(self.service, - cleaning.continue_node_clean, mock.ANY) + mock_spawn.assert_called_with( + self.service, cleaning.continue_node_clean, mock.ANY) def test_continue_node_clean_automated(self): self._continue_node_clean(states.CLEANWAIT) diff --git a/ironic/tests/unit/conductor/test_steps.py b/ironic/tests/unit/conductor/test_steps.py index 1053ff236b..d85a9d94d1 100644 --- a/ironic/tests/unit/conductor/test_steps.py +++ b/ironic/tests/unit/conductor/test_steps.py @@ -708,7 +708,8 @@ class NodeCleaningStepsTestCase(db_base.DbTestCase): node.driver_internal_info['clean_steps']) self.assertEqual({}, node.clean_step) self.assertFalse(mock_steps.called) - mock_validate_user_steps.assert_called_once_with(task, clean_steps) + mock_validate_user_steps.assert_called_once_with( + task, clean_steps, disable_ramdisk=False) @mock.patch.object(conductor_steps, '_get_cleaning_steps', autospec=True) def test__validate_user_clean_steps(self, mock_steps): @@ -792,6 +793,42 @@ class NodeCleaningStepsTestCase(db_base.DbTestCase): task, user_steps) mock_steps.assert_called_once_with(task, enabled=False, sort=False) + @mock.patch.object(conductor_steps, '_get_cleaning_steps', autospec=True) + def test__validate_user_clean_steps_requires_ramdisk(self, mock_steps): + node = obj_utils.create_test_node(self.context) + mock_steps.return_value = self.clean_steps + self.clean_steps[1]['requires_ramdisk'] = False + + user_steps = [{'step': 'update_firmware', 'interface': 'power'}, + {'step': 'erase_disks', 'interface': 'deploy'}] + + with task_manager.acquire(self.context, node.uuid) as task: + self.assertRaises(exception.InvalidParameterValue, + conductor_steps._validate_user_clean_steps, + task, user_steps, disable_ramdisk=True) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + @mock.patch.object(conductor_steps, '_get_cleaning_steps', autospec=True) + def test__validate_user_clean_steps_disable_ramdisk(self, mock_steps): + node = obj_utils.create_test_node(self.context) + for step in self.clean_steps: + step['requires_ramdisk'] = False + mock_steps.return_value = self.clean_steps + + user_steps = [{'step': 'update_firmware', 'interface': 'power'}, + {'step': 'erase_disks', 'interface': 'deploy'}] + + with task_manager.acquire(self.context, node.uuid) as task: + result = conductor_steps._validate_user_clean_steps( + task, user_steps, disable_ramdisk=True) + mock_steps.assert_called_once_with(task, enabled=False, sort=False) + + expected = [{'step': 'update_firmware', 'interface': 'power', + 'priority': 10, 'abortable': False}, + {'step': 'erase_disks', 'interface': 'deploy', + 'priority': 20, 'abortable': True}] + self.assertEqual(expected, result) + @mock.patch.object(conductor_steps, '_get_deployment_templates', autospec=True) diff --git a/releasenotes/notes/disable-ramdisk-5156a009812fbb17.yaml b/releasenotes/notes/disable-ramdisk-5156a009812fbb17.yaml new file mode 100644 index 0000000000..e317a5d6b2 --- /dev/null +++ b/releasenotes/notes/disable-ramdisk-5156a009812fbb17.yaml @@ -0,0 +1,13 @@ +--- +features: + - | + Adds a new ``disable_ramdisk`` parameter to the manual cleaning API. If set + to ``true``, IPA won't get booted for cleaning. Only steps explicitly + marked as compatible can be executed this way. + + The parameter is available in the API version 1.70. +other: + - | + Clean steps can now be marked with ``requires_ramdisk=False`` to make them + compatible with the new ``disable_ramdisk`` argument of the manual cleaning + API.