diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index 1203ffb134..e537219c63 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -444,7 +444,7 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True, task.process_event('fail', target_state=target_state) -def deploying_error_handler(task, logmsg, errmsg, traceback=False, +def deploying_error_handler(task, logmsg, errmsg=None, traceback=False, clean_up=True): """Put a failed node in DEPLOYFAIL. @@ -454,6 +454,7 @@ def deploying_error_handler(task, logmsg, errmsg, traceback=False, :param traceback: Boolean; True to log a traceback :param clean_up: Boolean; True to clean up """ + errmsg = errmsg or logmsg node = task.node LOG.error(logmsg, exc_info=traceback) node.last_error = errmsg @@ -755,15 +756,15 @@ def validate_instance_info_traits(node): raise exception.InvalidParameterValue(err) -def _notify_conductor_resume_operation(task, operation, method): +def notify_conductor_resume_operation(task, operation): """Notify the conductor to resume an operation. :param task: the task :param operation: the operation, a string - :param method: The name of the RPC method, a string """ - LOG.debug('Sending RPC to conductor to resume %(op)s for node %(node)s', - {'op': operation, 'node': task.node.uuid}) + LOG.debug('Sending RPC to conductor to resume %(op)s steps for node ' + '%(node)s', {'op': operation, 'node': task.node.uuid}) + method = 'continue_node_%s' % operation from ironic.conductor import rpcapi uuid = task.node.uuid rpc = rpcapi.ConductorAPI() @@ -774,12 +775,11 @@ def _notify_conductor_resume_operation(task, operation, method): def notify_conductor_resume_clean(task): - _notify_conductor_resume_operation(task, 'cleaning', 'continue_node_clean') + notify_conductor_resume_operation(task, 'clean') def notify_conductor_resume_deploy(task): - _notify_conductor_resume_operation(task, 'deploying', - 'continue_node_deploy') + notify_conductor_resume_operation(task, 'deploy') def skip_automated_cleaning(node): diff --git a/ironic/drivers/modules/agent_base.py b/ironic/drivers/modules/agent_base.py index c5f4871d42..e8426dc517 100644 --- a/ironic/drivers/modules/agent_base.py +++ b/ironic/drivers/modules/agent_base.py @@ -42,26 +42,27 @@ LOG = log.getLogger(__name__) METRICS = metrics_utils.get_metrics_logger(__name__) -# This contains a nested dictionary containing the post clean step -# hooks registered for each clean step of every interface. -# Every key of POST_CLEAN_STEP_HOOKS is an interface and its value -# is a dictionary. For this inner dictionary, the key is the name of -# the clean-step method in the interface, and the value is the post -# clean-step hook -- the function that is to be called after successful -# completion of the clean step. +# This contains a nested dictionary containing the post clean/deploy step hooks +# registered for each clean/deploy step of every interface. +# Every key is an interface and its value is a dictionary. For this inner +# dictionary, the key is the name of the clean-/deploy-step method in the +# interface, and the value is the post clean-/deploy-step hook -- the function +# that is to be called after successful completion of the clean/deploy step. # # For example: -# POST_CLEAN_STEP_HOOKS = +# _POST_STEP_HOOKS = { +# {'clean': # { # 'raid': {'create_configuration': , # 'delete_configuration': } # } +# } # # It means that method '' is to be called after # successfully completing the clean step 'create_configuration' of # raid interface. '' is to be called after # completing 'delete_configuration' of raid interface. -POST_CLEAN_STEP_HOOKS = {} +_POST_STEP_HOOKS = {'clean': {}, 'deploy': {}} VENDOR_PROPERTIES = { 'deploy_forces_oob_reboot': _( @@ -114,39 +115,70 @@ def post_clean_step_hook(interface, step): step hook. """ def decorator(func): - POST_CLEAN_STEP_HOOKS.setdefault(interface, {})[step] = func + _POST_STEP_HOOKS['clean'].setdefault(interface, {})[step] = func return func return decorator -def _get_post_clean_step_hook(node): - """Get post clean step hook for the currently executing clean step. +@METRICS.timer('post_deploy_step_hook') +def post_deploy_step_hook(interface, step): + """Decorator method for adding a post deploy step hook. - This method reads node.clean_step and returns the post clean - step hook for the currently executing clean step. + This is a mechanism for adding a post deploy step hook for a particular + deploy step. The hook will get executed after the deploy step gets + executed successfully. The hook is not invoked on failure of the deploy + step. + + Any method to be made as a hook may be decorated with + @post_deploy_step_hook mentioning the interface and step after which the + hook should be executed. A TaskManager instance and the object for the + last completed command (provided by agent) will be passed to the hook + method. The return value of this method will be ignored. Any exception + raised by this method will be treated as a failure of the deploy step and + the node will be moved to DEPLOYFAIL state. + + :param interface: name of the interface + :param step: The name of the step after which it should be executed. + :returns: A method which registers the given method as a post deploy + step hook. + """ + def decorator(func): + _POST_STEP_HOOKS['deploy'].setdefault(interface, {})[step] = func + return func + + return decorator + + +def _get_post_step_hook(node, step_type): + """Get post clean/deploy step hook for the currently executing step. :param node: a node object + :param step_type: 'clean' or 'deploy' :returns: a method if there is a post clean step hook for this clean step; None otherwise """ - interface = node.clean_step.get('interface') - step = node.clean_step.get('step') + step_obj = node.clean_step if step_type == 'clean' else node.deploy_step + interface = step_obj.get('interface') + step = step_obj.get('step') try: - return POST_CLEAN_STEP_HOOKS[interface][step] + return _POST_STEP_HOOKS[step_type][interface][step] except KeyError: pass -def _cleaning_reboot(task): - """Reboots a node out of band after a clean step that requires it. +def _post_step_reboot(task, step_type): + """Reboots a node out of band after a clean/deploy step that requires it. - If an agent clean step has 'reboot_requested': True, reboots the - node when the step is completed. Will put the node in CLEANFAIL - if the node cannot be rebooted. + If an agent step has 'reboot_requested': True, reboots the node when + the step is completed. Will put the node in CLEANFAIL/DEPLOYFAIL if + the node cannot be rebooted. :param task: a TaskManager instance + :param step_type: 'clean' or 'deploy' """ + current_step = (task.node.clean_step if step_type == 'clean' + else task.node.deploy_step) try: # NOTE(fellypefca): Call prepare_ramdisk on ensure that the # baremetal node boots back into the ramdisk after reboot. @@ -154,19 +186,25 @@ def _cleaning_reboot(task): task.driver.boot.prepare_ramdisk(task, deploy_opts) manager_utils.node_power_action(task, states.REBOOT) except Exception as e: - msg = (_('Reboot requested by clean step %(step)s failed for ' + msg = (_('Reboot requested by %(type)s step %(step)s failed for ' 'node %(node)s: %(err)s') % - {'step': task.node.clean_step, + {'step': current_step, 'node': task.node.uuid, - 'err': e}) + 'err': e, + 'type': step_type}) LOG.error(msg, exc_info=not isinstance(e, exception.IronicException)) # do not set cleaning_reboot if we didn't reboot - manager_utils.cleaning_error_handler(task, msg) + if step_type == 'clean': + manager_utils.cleaning_error_handler(task, msg) + else: + manager_utils.deploying_error_handler(task, msg) return # Signify that we've rebooted driver_internal_info = task.node.driver_internal_info - driver_internal_info['cleaning_reboot'] = True + field = ('cleaning_reboot' if step_type == 'clean' + else 'deployment_reboot') + driver_internal_info[field] = True if not driver_internal_info.get('agent_secret_token_pregenerated', False): # Wipes out the existing recorded token because the machine will # need to re-establish the token. @@ -175,8 +213,8 @@ def _cleaning_reboot(task): task.node.save() -def _get_completed_cleaning_command(task, commands): - """Returns None or a completed cleaning command from the agent. +def _get_completed_command(task, commands, step_type): + """Returns None or a completed clean/deploy command from the agent. :param task: a TaskManager instance to act on. :param commands: a set of command results from the agent, typically @@ -187,28 +225,32 @@ def _get_completed_cleaning_command(task, commands): last_command = commands[-1] - if last_command['command_name'] != 'execute_clean_step': - # catches race condition where execute_clean_step is still + if last_command['command_name'] != 'execute_%s_step' % step_type: + # catches race condition where execute_step is still # processing so the command hasn't started yet - LOG.debug('Expected agent last command to be "execute_clean_step" ' + LOG.debug('Expected agent last command to be "execute_%(type)s_step" ' 'for node %(node)s, instead got "%(command)s". Waiting ' 'for next heartbeat.', {'node': task.node.uuid, - 'command': last_command['command_name']}) + 'command': last_command['command_name'], + 'type': step_type}) return last_result = last_command.get('command_result') or {} - last_step = last_result.get('clean_step') + last_step = last_result.get('%s_step' % step_type) + current_step = (task.node.clean_step if step_type == 'clean' + else task.node.deploy_step) if last_command['command_status'] == 'RUNNING': - LOG.debug('Clean step still running for node %(node)s: %(step)s', - {'step': last_step, 'node': task.node.uuid}) + LOG.debug('%(type)s step still running for node %(node)s: %(step)s', + {'step': last_step, 'node': task.node.uuid, + 'type': step_type.capitalize()}) return elif (last_command['command_status'] == 'SUCCEEDED' - and last_step != task.node.clean_step): - # A previous clean_step was running, the new command has not yet - # started. - LOG.debug('Clean step not yet started for node %(node)s: %(step)s', - {'step': last_step, 'node': task.node.uuid}) + and last_step != current_step): + # A previous step was running, the new command has not yet started. + LOG.debug('%(type)s step not yet started for node %(node)s: %(step)s', + {'step': last_step, 'node': task.node.uuid, + 'type': step_type.capitalize()}) return else: return last_command @@ -233,32 +275,29 @@ def log_and_raise_deployment_error(task, msg, collect_logs=True, exc=None): raise exception.InstanceDeployFailure(msg) -def get_clean_steps(task, interface=None, override_priorities=None): - """Get the list of cached clean steps from the agent. +def get_steps(task, step_type, interface=None, override_priorities=None): + """Get the list of cached clean or deploy steps from the agent. - #TODO(JoshNang) move to BootInterface - - The clean steps cache is updated at the beginning of cleaning. + The steps cache is updated at the beginning of cleaning or deploy. :param task: a TaskManager object containing the node - :param interface: The interface for which clean steps + :param step_type: 'clean' or 'deploy' + :param interface: The interface for which clean/deploy steps are to be returned. If this is not provided, it returns the - clean steps for all interfaces. + steps for all interfaces. :param override_priorities: a dictionary with keys being step names and values being new priorities for them. If a step isn't in this dictionary, the step's original priority is used. - :raises NodeCleaningFailure: if the clean steps are not yet cached, - for example, when a node has just been enrolled and has not been - cleaned yet. - :returns: A list of clean step dictionaries + :returns: A list of clean/deploy step dictionaries """ node = task.node try: - all_steps = node.driver_internal_info['agent_cached_clean_steps'] + all_steps = node.driver_internal_info['agent_cached_%s_steps' + % step_type] except KeyError: - raise exception.NodeCleaningFailure(_('Cleaning steps are not yet ' - 'available for node %(node)s') - % {'node': node.uuid}) + LOG.debug('%(type)s steps are not yet available for node %(node)s', + {'type': step_type.capitalize(), 'node': node.uuid}) + return [] if interface: steps = [step.copy() for step in all_steps.get(interface, [])] @@ -277,26 +316,40 @@ def get_clean_steps(task, interface=None, override_priorities=None): return steps -def execute_clean_step(task, step): - """Execute a clean step asynchronously on the agent. +def _raise(step_type, msg): + assert step_type in ('clean', 'deploy') + exc = (exception.NodeCleaningFailure if step_type == 'clean' + else exception.InstanceDeployFailure) + raise exc(msg) - #TODO(JoshNang) move to BootInterface + +def execute_step(task, step, step_type): + """Execute a clean or deploy step asynchronously on the agent. :param task: a TaskManager object containing the node - :param step: a clean step dictionary to execute - :raises: NodeCleaningFailure if the agent does not return a command status - :returns: states.CLEANWAIT to signify the step will be completed async + :param step: a step dictionary to execute + :param step_type: 'clean' or 'deploy' + :raises: NodeCleaningFailure (clean step) or InstanceDeployFailure (deploy + step) if the agent does not return a command status. + :returns: states.CLEANWAIT/DEPLOYWAIT to signify the step will be + completed async """ client = _get_client() ports = objects.Port.list_by_node_id( task.context, task.node.id) - result = client.execute_clean_step(step, task.node, ports) + call = getattr(client, 'execute_%s_step' % step_type) + result = call(step, task.node, ports) if not result.get('command_status'): - raise exception.NodeCleaningFailure(_( + _raise(step_type, _( 'Agent on node %(node)s returned bad command result: ' '%(result)s') % {'node': task.node.uuid, 'result': result.get('command_error')}) - return states.CLEANWAIT + return states.CLEANWAIT if step_type == 'clean' else states.DEPLOYWAIT + + +def execute_clean_step(task, step): + # NOTE(dtantsur): left for compatibility with agent-based hardware types. + return execute_step(task, step, 'clean') class HeartbeatMixin(object): @@ -346,19 +399,33 @@ class HeartbeatMixin(object): """ + def refresh_steps(self, task, step_type): + """Refresh the node's cached clean steps + + :param task: a TaskManager instance + :param step_type: "clean" or "deploy" + """ + def refresh_clean_steps(self, task): """Refresh the node's cached clean steps :param task: a TaskManager instance + """ + return self.refresh_steps(task, 'clean') + def process_next_step(self, task, step_type): + """Start the next clean/deploy step if the previous one is complete. + + :param task: a TaskManager instance + :param step_type: "clean" or "deploy" """ def continue_cleaning(self, task): """Start the next cleaning step if the previous one is complete. :param task: a TaskManager instance - """ + return self.process_next_step(task, 'clean') @property def heartbeat_allowed_states(self): @@ -555,53 +622,60 @@ class AgentDeployMixin(HeartbeatMixin): 'erase_devices_metadata': CONF.deploy.erase_devices_metadata_priority, } - return get_clean_steps( - task, interface='deploy', + return get_steps( + task, 'clean', interface='deploy', override_priorities=new_priorities) - @METRICS.timer('AgentDeployMixin.refresh_clean_steps') - def refresh_clean_steps(self, task): - """Refresh the node's cached clean steps from the booted agent. + @METRICS.timer('AgentDeployMixin.refresh_steps') + def refresh_steps(self, task, step_type): + """Refresh the node's cached clean/deploy steps from the booted agent. - Gets the node's clean steps from the booted agent and caches them. + Gets the node's steps from the booted agent and caches them. The steps are cached to make get_clean_steps() calls synchronous, and - should be refreshed as soon as the agent boots to start cleaning or - if cleaning is restarted because of a cleaning version mismatch. + should be refreshed as soon as the agent boots to start cleaning/deploy + or if cleaning is restarted because of a hardware manager version + mismatch. :param task: a TaskManager instance - :raises: NodeCleaningFailure if the agent returns invalid results + :param step_type: 'clean' or 'deploy' + :raises: NodeCleaningFailure or InstanceDeployFailure if the agent + returns invalid results """ node = task.node previous_steps = node.driver_internal_info.get( - 'agent_cached_clean_steps') - LOG.debug('Refreshing agent clean step cache for node %(node)s. ' + 'agent_cached_%s_steps' % step_type) + LOG.debug('Refreshing agent %(type)s step cache for node %(node)s. ' 'Previously cached steps: %(steps)s', - {'node': node.uuid, 'steps': previous_steps}) + {'node': node.uuid, 'type': step_type, + 'steps': previous_steps}) - agent_result = self._client.get_clean_steps(node, task.ports).get( - 'command_result', {}) - missing = set(['clean_steps', 'hardware_manager_version']).difference( - agent_result) + call = getattr(self._client, 'get_%s_steps' % step_type) + agent_result = call(node, task.ports).get('command_result', {}) + missing = set(['%s_steps' % step_type, + 'hardware_manager_version']).difference(agent_result) if missing: - raise exception.NodeCleaningFailure(_( - 'agent get_clean_steps for node %(node)s returned an invalid ' - 'result. Keys: %(keys)s are missing from result: %(result)s.') + _raise(step_type, _( + 'agent get_%(type)s_steps for node %(node)s returned an ' + 'invalid result. Keys: %(keys)s are missing from result: ' + '%(result)s.') % ({'node': node.uuid, 'keys': missing, - 'result': agent_result})) + 'result': agent_result, 'type': step_type})) # agent_result['clean_steps'] looks like # {'HardwareManager': [{step1},{steps2}...], ...} steps = collections.defaultdict(list) - for step_list in agent_result['clean_steps'].values(): + for step_list in agent_result['%s_steps' % step_type].values(): for step in step_list: missing = set(['interface', 'step', 'priority']).difference( step) if missing: - raise exception.NodeCleaningFailure(_( - 'agent get_clean_steps for node %(node)s returned an ' - 'invalid clean step. Keys: %(keys)s are missing from ' - 'step: %(step)s.') % ({'node': node.uuid, - 'keys': missing, 'step': step})) + _raise(step_type, _( + 'agent get_%(type)s_steps for node %(node)s returned ' + 'an invalid %(type)s step. Keys: %(keys)s are missing' + 'from step: %(step)s.') % ({'node': node.uuid, + 'keys': missing, + 'step': step, + 'type': step_type})) steps[step['interface']].append(step) @@ -609,12 +683,14 @@ class AgentDeployMixin(HeartbeatMixin): info = node.driver_internal_info info['hardware_manager_version'] = agent_result[ 'hardware_manager_version'] - info['agent_cached_clean_steps'] = dict(steps) - info['agent_cached_clean_steps_refreshed'] = str(timeutils.utcnow()) + info['agent_cached_%s_steps' % step_type] = dict(steps) + info['agent_cached_%s_steps_refreshed' % step_type] = str( + timeutils.utcnow()) node.driver_internal_info = info node.save() - LOG.debug('Refreshed agent clean step cache for node %(node)s: ' - '%(steps)s', {'node': node.uuid, 'steps': steps}) + LOG.debug('Refreshed agent %(type)s step cache for node %(node)s: ' + '%(steps)s', {'node': node.uuid, 'steps': steps, + 'type': step_type}) @METRICS.timer('AgentDeployMixin.execute_clean_step') def execute_clean_step(self, task, step): @@ -626,24 +702,27 @@ class AgentDeployMixin(HeartbeatMixin): status :returns: states.CLEANWAIT to signify the step will be completed async """ - return execute_clean_step(task, step) + return execute_step(task, step, 'clean') - @METRICS.timer('AgentDeployMixin.continue_cleaning') - def continue_cleaning(self, task, **kwargs): - """Start the next cleaning step if the previous one is complete. + @METRICS.timer('AgentDeployMixin.process_next_step') + def process_next_step(self, task, step_type, **kwargs): + """Start the next clean/deploy step if the previous one is complete. In order to avoid errors and make agent upgrades painless, the agent compares the version of all hardware managers at the start of the - cleaning (the agent's get_clean_steps() call) and before executing - each clean step. If the version has changed between steps, the agent is - unable to tell if an ordering change will cause a cleaning issue so - it returns CLEAN_VERSION_MISMATCH. For automated cleaning, we restart - the entire cleaning cycle. For manual cleaning, we don't. + process (the agent's get_clean|deploy_steps() call) and before + executing each step. If the version has changed between steps, + the agent is unable to tell if an ordering change will cause an issue + so it returns CLEAN_VERSION_MISMATCH. For automated cleaning, we + restart the entire cleaning cycle. For manual cleaning or deploy, + we don't. - Additionally, if a clean_step includes the reboot_requested property + Additionally, if a step includes the reboot_requested property set to True, this method will coordinate the reboot once the step is completed. """ + assert step_type in ('clean', 'deploy') + node = task.node # For manual clean, the target provision state is MANAGEABLE, whereas # for automated cleaning, it is (the default) AVAILABLE. @@ -651,47 +730,61 @@ class AgentDeployMixin(HeartbeatMixin): agent_commands = self._client.get_commands_status(task.node) if not agent_commands: - if task.node.driver_internal_info.get('cleaning_reboot'): + field = ('cleaning_reboot' if step_type == 'clean' + else 'deployment_reboot') + if task.node.driver_internal_info.get(field): # Node finished a cleaning step that requested a reboot, and # this is the first heartbeat after booting. Continue cleaning. info = task.node.driver_internal_info - info.pop('cleaning_reboot', None) + info.pop(field, None) task.node.driver_internal_info = info task.node.save() - manager_utils.notify_conductor_resume_clean(task) + manager_utils.notify_conductor_resume_operation(task, + step_type) return else: # Agent has no commands whatsoever return - command = _get_completed_cleaning_command(task, agent_commands) - LOG.debug('Cleaning command status for node %(node)s on step %(step)s:' + current_step = (node.clean_step if step_type == 'clean' + else node.deploy_step) + command = _get_completed_command(task, agent_commands, step_type) + LOG.debug('%(type)s command status for node %(node)s on step %(step)s:' ' %(command)s', {'node': node.uuid, - 'step': node.clean_step, - 'command': command}) + 'step': current_step, + 'command': command, + 'type': step_type}) if not command: # Agent command in progress return if command.get('command_status') == 'FAILED': - msg = (_('Agent returned error for clean step %(step)s on node ' + msg = (_('Agent returned error for %(type)s step %(step)s on node ' '%(node)s : %(err)s.') % {'node': node.uuid, 'err': command.get('command_error'), - 'step': node.clean_step}) + 'step': current_step, + 'type': step_type}) LOG.error(msg) return manager_utils.cleaning_error_handler(task, msg) - elif command.get('command_status') == 'CLEAN_VERSION_MISMATCH': + elif command.get('command_status') in ('CLEAN_VERSION_MISMATCH', + 'DEPLOY_VERSION_MISMATCH'): # Cache the new clean steps (and 'hardware_manager_version') try: - self.refresh_clean_steps(task) + self.refresh_steps(task, step_type) except exception.NodeCleaningFailure as e: msg = (_('Could not continue cleaning on node ' '%(node)s: %(err)s.') % {'node': node.uuid, 'err': e}) LOG.exception(msg) return manager_utils.cleaning_error_handler(task, msg) + except exception.InstanceDeployFailure as e: + msg = (_('Could not continue deployment on node ' + '%(node)s: %(err)s.') % + {'node': node.uuid, 'err': e}) + LOG.exception(msg) + return manager_utils.deploying_error_handler(task, msg) if manual_clean: # Don't restart manual cleaning if agent reboots to a new @@ -708,60 +801,77 @@ class AgentDeployMixin(HeartbeatMixin): node.driver_internal_info = driver_internal_info node.save() else: - # Restart cleaning, agent must have rebooted to new version - LOG.info('During automated cleaning, node %s detected a ' - 'clean version mismatch. Resetting clean steps ' - 'and rebooting the node.', node.uuid) + # Restart the process, agent must have rebooted to new version + LOG.info('During %(type)s, node %(node)s detected a ' + '%(type)s version mismatch. Resetting %(type)s steps ' + 'and rebooting the node.', + {'type': step_type, 'node': node.uuid}) try: conductor_steps.set_node_cleaning_steps(task) - except exception.NodeCleaningFailure: + except exception.NodeCleaningFailure as e: msg = (_('Could not restart automated cleaning on node ' - '%(node)s: %(err)s.') % - {'node': node.uuid, - 'err': command.get('command_error'), + '%(node)s after step %(step)s: %(err)s.') % + {'node': node.uuid, 'err': e, 'step': node.clean_step}) LOG.exception(msg) return manager_utils.cleaning_error_handler(task, msg) + except exception.InstanceDeployFailure as e: + msg = (_('Could not restart deployment on node ' + '%(node)s after step %(step)s: %(err)s.') % + {'node': node.uuid, 'err': e, + 'step': node.deploy_step}) + LOG.exception(msg) + return manager_utils.deploying_error_handler(task, msg) - manager_utils.notify_conductor_resume_clean(task) + manager_utils.notify_conductor_resume_operation(task, step_type) elif command.get('command_status') == 'SUCCEEDED': - clean_step_hook = _get_post_clean_step_hook(node) - if clean_step_hook is not None: - LOG.debug('For node %(node)s, executing post clean step ' - 'hook %(method)s for clean step %(step)s', - {'method': clean_step_hook.__name__, + step_hook = _get_post_step_hook(node, step_type) + if step_hook is not None: + LOG.debug('For node %(node)s, executing post %(type)s step ' + 'hook %(method)s for %(type)s step %(step)s', + {'method': step_hook.__name__, 'node': node.uuid, - 'step': node.clean_step}) + 'step': current_step, + 'type': step_type}) try: - clean_step_hook(task, command) + step_hook(task, command) except Exception as e: - msg = (_('For node %(node)s, post clean step hook ' - '%(method)s failed for clean step %(step)s.' + msg = (_('For node %(node)s, post %(type)s step hook ' + '%(method)s failed for %(type)s step %(step)s.' '%(cls)s: %(error)s') % - {'method': clean_step_hook.__name__, + {'method': step_hook.__name__, 'node': node.uuid, 'error': e, 'cls': e.__class__.__name__, - 'step': node.clean_step}) + 'step': current_step, + 'type': step_type}) LOG.exception(msg) - return manager_utils.cleaning_error_handler(task, msg) + if step_type == 'clean': + return manager_utils.cleaning_error_handler(task, msg) + else: + return manager_utils.deploying_error_handler(task, msg) - if task.node.clean_step.get('reboot_requested'): - _cleaning_reboot(task) + if current_step.get('reboot_requested'): + _post_step_reboot(task, step_type) return - LOG.info('Agent on node %s returned cleaning command success, ' - 'moving to next clean step', node.uuid) - manager_utils.notify_conductor_resume_clean(task) + LOG.info('Agent on node %(node)s returned %(type)s command ' + 'success, moving to next step', + {'node': node.uuid, 'type': step_type}) + manager_utils.notify_conductor_resume_operation(task, step_type) else: - msg = (_('Agent returned unknown status for clean step %(step)s ' - 'on node %(node)s : %(err)s.') % + msg = (_('Agent returned unknown status for %(type)s step %(step)s' + ' on node %(node)s : %(err)s.') % {'node': node.uuid, 'err': command.get('command_status'), - 'step': node.clean_step}) + 'step': current_step, + 'type': step_type}) LOG.error(msg) - return manager_utils.cleaning_error_handler(task, msg) + if step_type == 'clean': + return manager_utils.cleaning_error_handler(task, msg) + else: + return manager_utils.deploying_error_handler(task, msg) @METRICS.timer('AgentDeployMixin.reboot_and_finish_deploy') def reboot_and_finish_deploy(self, task): diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index ee437ea6fe..3b26bda23e 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -163,6 +163,9 @@ class AgentClient(object): * a dictionary containing keys clean_result and clean_step for the command clean.execute_clean_step; + * a dictionary containing keys deploy_result + and deploy_step for the command + deploy.execute_deploy_step; * a string representing result message for the command standby.cache_image; * None for the command standby.sync.> @@ -336,6 +339,69 @@ class AgentClient(object): method='clean.execute_clean_step', params=params) + @METRICS.timer('AgentClient.get_deploy_steps') + def get_deploy_steps(self, node, ports): + """Get deploy steps from agent. + + :param node: A node object. + :param ports: Ports associated with the node. + :raises: IronicException when failed to issue the request or there was + a malformed response from the agent. + :raises: AgentAPIError when agent failed to execute specified command. + :returns: A dict containing command response from agent. + See :func:`get_commands_status` for a command result sample. + The value of key command_result is in the form of: + + :: + + { + 'deploy_steps': , + 'hardware_manager_version': + } + + """ + params = { + 'node': node.as_dict(secure=True), + 'ports': [port.as_dict() for port in ports] + } + return self._command(node=node, + method='deploy.get_deploy_steps', + params=params, + wait=True) + + @METRICS.timer('AgentClient.execute_deploy_step') + def execute_deploy_step(self, step, node, ports): + """Execute specified deploy step. + + :param step: A deploy step dictionary to execute. + :param node: A Node object. + :param ports: Ports associated with the node. + :raises: IronicException when failed to issue the request or there was + a malformed response from the agent. + :raises: AgentAPIError when agent failed to execute specified command. + :returns: A dict containing command response from agent. + See :func:`get_commands_status` for a command result sample. + The value of key command_result is in the form of: + + :: + + { + 'deploy_result': , + 'deploy_step': + } + + """ + params = { + 'step': step, + 'node': node.as_dict(secure=True), + 'ports': [port.as_dict() for port in ports], + 'deploy_version': node.driver_internal_info.get( + 'hardware_manager_version') + } + return self._command(node=node, + method='deploy.execute_deploy_step', + params=params) + @METRICS.timer('AgentClient.power_off') def power_off(self, node): """Soft powers off the bare metal node by shutting down ramdisk OS. diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index d389d468da..2ec5649c6e 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1689,33 +1689,30 @@ class MiscTestCase(db_base.DbTestCase): @mock.patch.object(rpcapi.ConductorAPI, 'continue_node_deploy', autospec=True) @mock.patch.object(rpcapi.ConductorAPI, 'get_topic_for', autospec=True) - def test__notify_conductor_resume_operation(self, mock_topic, - mock_rpc_call): + def test_notify_conductor_resume_operation(self, mock_topic, + mock_rpc_call): mock_topic.return_value = 'topic' with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - conductor_utils._notify_conductor_resume_operation( - task, 'deploying', 'continue_node_deploy') + conductor_utils.notify_conductor_resume_operation(task, 'deploy') mock_rpc_call.assert_called_once_with( mock.ANY, task.context, self.node.uuid, topic='topic') - @mock.patch.object(conductor_utils, '_notify_conductor_resume_operation', + @mock.patch.object(conductor_utils, 'notify_conductor_resume_operation', autospec=True) def test_notify_conductor_resume_clean(self, mock_resume): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: conductor_utils.notify_conductor_resume_clean(task) - mock_resume.assert_called_once_with( - task, 'cleaning', 'continue_node_clean') + mock_resume.assert_called_once_with(task, 'clean') - @mock.patch.object(conductor_utils, '_notify_conductor_resume_operation', + @mock.patch.object(conductor_utils, 'notify_conductor_resume_operation', autospec=True) def test_notify_conductor_resume_deploy(self, mock_resume): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: conductor_utils.notify_conductor_resume_deploy(task) - mock_resume.assert_called_once_with( - task, 'deploying', 'continue_node_deploy') + mock_resume.assert_called_once_with(task, 'deploy') @mock.patch.object(time, 'sleep', autospec=True) @mock.patch.object(fake.FakePower, 'get_power_state', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index 72cf49bc80..354e041598 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -1042,33 +1042,33 @@ class TestAgentDeploy(db_base.DbTestCase): set_dhcp_provider_mock.assert_called_once_with() clean_dhcp_mock.assert_called_once_with(task) - @mock.patch.object(agent_base, 'get_clean_steps', autospec=True) - def test_get_clean_steps(self, mock_get_clean_steps): + @mock.patch.object(agent_base, 'get_steps', autospec=True) + def test_get_clean_steps(self, mock_get_steps): # Test getting clean steps mock_steps = [{'priority': 10, 'interface': 'deploy', 'step': 'erase_devices'}] - mock_get_clean_steps.return_value = mock_steps + mock_get_steps.return_value = mock_steps with task_manager.acquire(self.context, self.node.uuid) as task: steps = self.driver.get_clean_steps(task) - mock_get_clean_steps.assert_called_once_with( - task, interface='deploy', + mock_get_steps.assert_called_once_with( + task, 'clean', interface='deploy', override_priorities={'erase_devices': None, 'erase_devices_metadata': None}) self.assertEqual(mock_steps, steps) - @mock.patch.object(agent_base, 'get_clean_steps', autospec=True) - def test_get_clean_steps_config_priority(self, mock_get_clean_steps): + @mock.patch.object(agent_base, 'get_steps', autospec=True) + def test_get_clean_steps_config_priority(self, mock_get_steps): # Test that we can override the priority of get clean steps # Use 0 because it is an edge case (false-y) and used in devstack self.config(erase_devices_priority=0, group='deploy') self.config(erase_devices_metadata_priority=0, group='deploy') mock_steps = [{'priority': 10, 'interface': 'deploy', 'step': 'erase_devices'}] - mock_get_clean_steps.return_value = mock_steps + mock_get_steps.return_value = mock_steps with task_manager.acquire(self.context, self.node.uuid) as task: self.driver.get_clean_steps(task) - mock_get_clean_steps.assert_called_once_with( - task, interface='deploy', + mock_get_steps.assert_called_once_with( + task, 'clean', interface='deploy', override_priorities={'erase_devices': 0, 'erase_devices_metadata': 0}) @@ -1774,7 +1774,7 @@ class AgentRAIDTestCase(db_base.DbTestCase): } self.node = object_utils.create_test_node(self.context, **n) - @mock.patch.object(agent_base, 'get_clean_steps', autospec=True) + @mock.patch.object(agent_base, 'get_steps', autospec=True) def test_get_clean_steps(self, get_steps_mock): get_steps_mock.return_value = [ {'step': 'create_configuration', 'interface': 'raid', @@ -1789,7 +1789,7 @@ class AgentRAIDTestCase(db_base.DbTestCase): self.assertEqual(0, ret[1]['priority']) @mock.patch.object(raid, 'filter_target_raid_config') - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_create_configuration(self, execute_mock, filter_target_raid_config_mock): with task_manager.acquire(self.context, self.node.uuid) as task: @@ -1802,10 +1802,11 @@ class AgentRAIDTestCase(db_base.DbTestCase): self.assertEqual( self.target_raid_config, task.node.driver_internal_info['target_raid_config']) - execute_mock.assert_called_once_with(task, self.clean_step) + execute_mock.assert_called_once_with(task, self.clean_step, + 'clean') @mock.patch.object(raid, 'filter_target_raid_config') - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_create_configuration_skip_root(self, execute_mock, filter_target_raid_config_mock): with task_manager.acquire(self.context, self.node.uuid) as task: @@ -1819,13 +1820,14 @@ class AgentRAIDTestCase(db_base.DbTestCase): return_value = task.driver.raid.create_configuration( task, create_root_volume=False) self.assertEqual(states.CLEANWAIT, return_value) - execute_mock.assert_called_once_with(task, self.clean_step) + execute_mock.assert_called_once_with(task, self.clean_step, + 'clean') self.assertEqual( exp_target_raid_config, task.node.driver_internal_info['target_raid_config']) @mock.patch.object(raid, 'filter_target_raid_config') - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_create_configuration_skip_nonroot(self, execute_mock, filter_target_raid_config_mock): with task_manager.acquire(self.context, self.node.uuid) as task: @@ -1839,13 +1841,14 @@ class AgentRAIDTestCase(db_base.DbTestCase): return_value = task.driver.raid.create_configuration( task, create_nonroot_volumes=False) self.assertEqual(states.CLEANWAIT, return_value) - execute_mock.assert_called_once_with(task, self.clean_step) + execute_mock.assert_called_once_with(task, self.clean_step, + 'clean') self.assertEqual( exp_target_raid_config, task.node.driver_internal_info['target_raid_config']) @mock.patch.object(raid, 'filter_target_raid_config') - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_create_configuration_no_target_raid_config_after_skipping( self, execute_mock, filter_target_raid_config_mock): with task_manager.acquire(self.context, self.node.uuid) as task: @@ -1860,7 +1863,7 @@ class AgentRAIDTestCase(db_base.DbTestCase): self.assertFalse(execute_mock.called) @mock.patch.object(raid, 'filter_target_raid_config') - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_create_configuration_empty_target_raid_config( self, execute_mock, filter_target_raid_config_mock): execute_mock.return_value = states.CLEANING @@ -1890,7 +1893,7 @@ class AgentRAIDTestCase(db_base.DbTestCase): self.node.clean_step = {'interface': 'raid', 'step': 'create_configuration'} command = {'command_result': {'clean_result': 'foo'}} - create_hook = agent_base._get_post_clean_step_hook(self.node) + create_hook = agent_base._get_post_step_hook(self.node, 'clean') with task_manager.acquire(self.context, self.node.uuid) as task: create_hook(task, command) update_raid_info_mock.assert_called_once_with(task.node, 'foo') @@ -1906,13 +1909,14 @@ class AgentRAIDTestCase(db_base.DbTestCase): task, command) self.assertFalse(update_raid_info_mock.called) - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_delete_configuration(self, execute_mock): execute_mock.return_value = states.CLEANING with task_manager.acquire(self.context, self.node.uuid) as task: return_value = task.driver.raid.delete_configuration(task) - execute_mock.assert_called_once_with(task, self.clean_step) + execute_mock.assert_called_once_with(task, self.clean_step, + 'clean') self.assertEqual(states.CLEANING, return_value) def test__delete_configuration_final(self): @@ -1931,7 +1935,7 @@ class AgentRAIDTestCase(db_base.DbTestCase): 'step': 'delete_configuration'} self.node.raid_config = {'foo': 'bar'} command = {'command_result': {'clean_result': 'foo'}} - delete_hook = agent_base._get_post_clean_step_hook(self.node) + delete_hook = agent_base._get_post_step_hook(self.node, 'clean') with task_manager.acquire(self.context, self.node.uuid) as task: delete_hook(task, command) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base.py b/ironic/tests/unit/drivers/modules/test_agent_base.py index 5eb64e6e0a..62b2e892f6 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base.py @@ -222,7 +222,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, 'reboot_to_instance', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_in_maintenance(self, ncrc_mock, rti_mock, cd_mock): # NOTE(pas-ha) checking only for states that are not noop @@ -253,7 +253,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, 'reboot_to_instance', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_in_maintenance_abort(self, ncrc_mock, rti_mock, cd_mock): @@ -289,7 +289,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, 'reboot_to_instance', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_with_reservation(self, ncrc_mock, rti_mock, cd_mock): # NOTE(pas-ha) checking only for states that are not noop @@ -316,7 +316,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, 'reboot_to_instance', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_noops_in_wrong_state(self, ncrc_mock, rti_mock, cd_mock, log_mock): @@ -343,7 +343,7 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, 'reboot_to_instance', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_noops_in_wrong_state2(self, ncrc_mock, rti_mock, cd_mock): @@ -426,10 +426,10 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, - 'refresh_clean_steps', autospec=True) + 'refresh_steps', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_resume_clean(self, mock_notify, mock_set_steps, mock_refresh, mock_touch): @@ -441,17 +441,17 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0') mock_touch.assert_called_once_with(mock.ANY) - mock_refresh.assert_called_once_with(mock.ANY, task) - mock_notify.assert_called_once_with(task) + mock_refresh.assert_called_once_with(mock.ANY, task, 'clean') + mock_notify.assert_called_once_with(task, 'clean') mock_set_steps.assert_called_once_with(task) @mock.patch.object(manager_utils, 'cleaning_error_handler') @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) @mock.patch.object(agent_base.HeartbeatMixin, - 'refresh_clean_steps', autospec=True) + 'refresh_steps', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) def test_heartbeat_resume_clean_fails(self, mock_notify, mock_set_steps, mock_refresh, mock_touch, @@ -1496,7 +1496,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.assertFalse(prepare_mock.called) self.assertFalse(failed_state_mock.called) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) @@ -1519,19 +1519,20 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): with task_manager.acquire(self.context, self.node['uuid'], shared=False) as task: self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task) + notify_mock.assert_called_once_with(task, 'clean') @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test__cleaning_reboot(self, mock_reboot, mock_prepare, mock_build_opt): + def test__post_step_reboot(self, mock_reboot, mock_prepare, + mock_build_opt): with task_manager.acquire(self.context, self.node['uuid'], shared=False) as task: i_info = task.node.driver_internal_info i_info['agent_secret_token'] = 'magicvalue01' task.node.driver_internal_info = i_info - agent_base._cleaning_reboot(task) + agent_base._post_step_reboot(task, 'clean') self.assertTrue(mock_build_opt.called) self.assertTrue(mock_prepare.called) mock_reboot.assert_called_once_with(task, states.REBOOT) @@ -1543,7 +1544,27 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test__cleaning_reboot_pregenerated_token( + def test__post_step_reboot_deploy(self, mock_reboot, mock_prepare, + mock_build_opt): + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + i_info = task.node.driver_internal_info + i_info['agent_secret_token'] = 'magicvalue01' + task.node.driver_internal_info = i_info + agent_base._post_step_reboot(task, 'deploy') + self.assertTrue(mock_build_opt.called) + self.assertTrue(mock_prepare.called) + mock_reboot.assert_called_once_with(task, states.REBOOT) + self.assertTrue( + task.node.driver_internal_info['deployment_reboot']) + self.assertNotIn('agent_secret_token', + task.node.driver_internal_info) + + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, + autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test__post_step_reboot_pregenerated_token( self, mock_reboot, mock_prepare, mock_build_opt): with task_manager.acquire(self.context, self.node['uuid'], shared=False) as task: @@ -1551,7 +1572,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): i_info['agent_secret_token'] = 'magicvalue01' i_info['agent_secret_token_pregenerated'] = True task.node.driver_internal_info = i_info - agent_base._cleaning_reboot(task) + agent_base._post_step_reboot(task, 'clean') self.assertTrue(mock_build_opt.called) self.assertTrue(mock_prepare.called) mock_reboot.assert_called_once_with(task, states.REBOOT) @@ -1563,18 +1584,35 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): autospec=True) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) - def test__cleaning_reboot_fail(self, mock_reboot, mock_handler, - mock_prepare, mock_build_opt): + def test__post_step_reboot_fail(self, mock_reboot, mock_handler, + mock_prepare, mock_build_opt): mock_reboot.side_effect = RuntimeError("broken") with task_manager.acquire(self.context, self.node['uuid'], shared=False) as task: - agent_base._cleaning_reboot(task) + agent_base._post_step_reboot(task, 'clean') mock_reboot.assert_called_once_with(task, states.REBOOT) mock_handler.assert_called_once_with(task, mock.ANY) self.assertNotIn('cleaning_reboot', task.node.driver_internal_info) + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, + autospec=True) + @mock.patch.object(manager_utils, 'deploying_error_handler', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test__post_step_reboot_fail_deploy(self, mock_reboot, mock_handler, + mock_prepare, mock_build_opt): + mock_reboot.side_effect = RuntimeError("broken") + + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + agent_base._post_step_reboot(task, 'deploy') + mock_reboot.assert_called_once_with(task, states.REBOOT) + mock_handler.assert_called_once_with(task, mock.ANY) + self.assertNotIn('deployment_reboot', + task.node.driver_internal_info) + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', spec_set=True, autospec=True) @@ -1603,7 +1641,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) reboot_mock.assert_called_once_with(task, states.REBOOT) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) @@ -1621,16 +1659,17 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.node.save() # Represents a freshly booted agent with no commands status_mock.return_value = [] + with task_manager.acquire(self.context, self.node['uuid'], shared=False) as task: self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task) + notify_mock.assert_called_once_with(task, 'clean') self.assertNotIn('cleaning_reboot', task.node.driver_internal_info) @mock.patch.object(agent_base, - '_get_post_clean_step_hook', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + '_get_post_step_hook', autospec=True) + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) @@ -1653,14 +1692,14 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): shared=False) as task: self.deploy.continue_cleaning(task) - get_hook_mock.assert_called_once_with(task.node) + get_hook_mock.assert_called_once_with(task.node, 'clean') hook_mock.assert_called_once_with(task, command_status) - notify_mock.assert_called_once_with(task) + notify_mock.assert_called_once_with(task, 'clean') - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_base, - '_get_post_clean_step_hook', autospec=True) + '_get_post_step_hook', autospec=True) @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) @@ -1685,12 +1724,12 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): shared=False) as task: self.deploy.continue_cleaning(task) - get_hook_mock.assert_called_once_with(task.node) + get_hook_mock.assert_called_once_with(task.node, 'clean') hook_mock.assert_called_once_with(task, command_status) error_handler_mock.assert_called_once_with(task, mock.ANY) self.assertFalse(notify_mock.called) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) @@ -1719,7 +1758,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) self.assertFalse(notify_mock.called) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) @@ -1752,10 +1791,10 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_base.AgentDeployMixin, - 'refresh_clean_steps', autospec=True) + 'refresh_steps', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def _test_continue_cleaning_clean_version_mismatch( @@ -1772,8 +1811,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): with task_manager.acquire(self.context, self.node.uuid, shared=False) as task: self.deploy.continue_cleaning(task) - notify_mock.assert_called_once_with(task) - refresh_steps_mock.assert_called_once_with(mock.ANY, task) + notify_mock.assert_called_once_with(task, 'clean') + refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') if manual: self.assertFalse( task.node.driver_internal_info['skip_current_clean_step']) @@ -1792,10 +1831,10 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): @mock.patch.object(manager_utils, 'cleaning_error_handler', autospec=True) @mock.patch.object(conductor_steps, 'set_node_cleaning_steps', autospec=True) - @mock.patch.object(manager_utils, 'notify_conductor_resume_clean', + @mock.patch.object(manager_utils, 'notify_conductor_resume_operation', autospec=True) @mock.patch.object(agent_base.AgentDeployMixin, - 'refresh_clean_steps', autospec=True) + 'refresh_steps', autospec=True) @mock.patch.object(agent_client.AgentClient, 'get_commands_status', autospec=True) def test_continue_cleaning_clean_version_mismatch_fail( @@ -1816,7 +1855,7 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) status_mock.assert_called_once_with(mock.ANY, task.node) - refresh_steps_mock.assert_called_once_with(mock.ANY, task) + refresh_steps_mock.assert_called_once_with(mock.ANY, task, 'clean') error_mock.assert_called_once_with(task, mock.ANY) self.assertFalse(notify_mock.called) self.assertFalse(steps_mock.called) @@ -1836,37 +1875,8 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): self.deploy.continue_cleaning(task) error_mock.assert_called_once_with(task, mock.ANY) - def _test_clean_step_hook(self, hook_dict_mock): - """Helper method for unit tests related to clean step hooks. - - This is a helper method for other unit tests related to - clean step hooks. It acceps a mock 'hook_dict_mock' which is - a MagicMock and sets it up to function as a mock dictionary. - After that, it defines a dummy hook_method for two clean steps - raid.create_configuration and raid.delete_configuration. - - :param hook_dict_mock: An instance of mock.MagicMock() which - is the mocked value of agent_base.POST_CLEAN_STEP_HOOKS - :returns: a tuple, where the first item is the hook method created - by this method and second item is the backend dictionary for - the mocked hook_dict_mock - """ - hook_dict = {} - - def get(key, default): - return hook_dict.get(key, default) - - def getitem(self, key): - return hook_dict[key] - - def setdefault(key, default): - if key not in hook_dict: - hook_dict[key] = default - return hook_dict[key] - - hook_dict_mock.get = get - hook_dict_mock.__getitem__ = getitem - hook_dict_mock.setdefault = setdefault + def _test_clean_step_hook(self): + """Helper method for unit tests related to clean step hooks.""" some_function_mock = mock.MagicMock() @agent_base.post_clean_step_hook( @@ -1876,43 +1886,41 @@ class AgentDeployMixinTest(AgentDeployMixinBaseTest): def hook_method(): some_function_mock('some-arguments') - return hook_method, hook_dict + return hook_method - @mock.patch.object(agent_base, 'POST_CLEAN_STEP_HOOKS', - spec_set=dict) - def test_post_clean_step_hook(self, hook_dict_mock): + @mock.patch.object(agent_base, '_POST_STEP_HOOKS', + {'clean': {}, 'deploy': {}}) + def test_post_clean_step_hook(self): # This unit test makes sure that hook methods are registered # properly and entries are made in # agent_base.POST_CLEAN_STEP_HOOKS - hook_method, hook_dict = self._test_clean_step_hook(hook_dict_mock) - self.assertEqual(hook_method, - hook_dict['raid']['create_configuration']) - self.assertEqual(hook_method, - hook_dict['raid']['delete_configuration']) + hook_method = self._test_clean_step_hook() + hooks = agent_base._POST_STEP_HOOKS['clean'] + self.assertEqual(hook_method, hooks['raid']['create_configuration']) + self.assertEqual(hook_method, hooks['raid']['delete_configuration']) - @mock.patch.object(agent_base, 'POST_CLEAN_STEP_HOOKS', - spec_set=dict) - def test__get_post_clean_step_hook(self, hook_dict_mock): - # Check if agent_base._get_post_clean_step_hook can get + @mock.patch.object(agent_base, '_POST_STEP_HOOKS', + {'clean': {}, 'deploy': {}}) + def test__get_post_step_hook(self): + # Check if agent_base._get_post_step_hook can get # clean step for which hook is registered. - hook_method, hook_dict = self._test_clean_step_hook(hook_dict_mock) + hook_method = self._test_clean_step_hook() self.node.clean_step = {'step': 'create_configuration', 'interface': 'raid'} self.node.save() - hook_returned = agent_base._get_post_clean_step_hook(self.node) + hook_returned = agent_base._get_post_step_hook(self.node, 'clean') self.assertEqual(hook_method, hook_returned) - @mock.patch.object(agent_base, 'POST_CLEAN_STEP_HOOKS', - spec_set=dict) - def test__get_post_clean_step_hook_no_hook_registered( - self, hook_dict_mock): - # Make sure agent_base._get_post_clean_step_hook returns + @mock.patch.object(agent_base, '_POST_STEP_HOOKS', + {'clean': {}, 'deploy': {}}) + def test__get_post_step_hook_no_hook_registered(self): + # Make sure agent_base._get_post_step_hook returns # None when no clean step hook is registered for the clean step. - hook_method, hook_dict = self._test_clean_step_hook(hook_dict_mock) + self._test_clean_step_hook() self.node.clean_step = {'step': 'some-clean-step', 'interface': 'some-other-interface'} self.node.save() - hook_returned = agent_base._get_post_clean_step_hook(self.node) + hook_returned = agent_base._get_post_step_hook(self.node, 'clean') self.assertIsNone(hook_returned) @mock.patch.object(manager_utils, 'restore_power_state_if_needed', @@ -1982,16 +1990,22 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): ] } } + # NOTE(dtantsur): deploy steps are structurally identical to clean + # steps, reusing self.clean_steps for simplicity + self.deploy_steps = { + 'hardware_manager_version': '1', + 'deploy_steps': self.clean_steps['clean_steps'], + } @mock.patch.object(agent_client.AgentClient, 'get_clean_steps', autospec=True) - def test_refresh_clean_steps(self, client_mock): + def test_refresh_steps(self, client_mock): client_mock.return_value = { 'command_result': self.clean_steps} with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - self.deploy.refresh_clean_steps(task) + self.deploy.refresh_steps(task, 'clean') client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) @@ -2010,9 +2024,36 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): self.assertEqual([self.clean_steps['clean_steps'][ 'SpecificHardwareManager'][1]], steps['raid']) + @mock.patch.object(agent_client.AgentClient, 'get_deploy_steps', + autospec=True) + def test_refresh_steps_deploy(self, client_mock): + client_mock.return_value = { + 'command_result': self.deploy_steps} + + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + self.deploy.refresh_steps(task, 'deploy') + + client_mock.assert_called_once_with(mock.ANY, task.node, + task.ports) + self.assertEqual('1', task.node.driver_internal_info[ + 'hardware_manager_version']) + self.assertIn('agent_cached_deploy_steps_refreshed', + task.node.driver_internal_info) + steps = task.node.driver_internal_info['agent_cached_deploy_steps'] + self.assertEqual({'deploy', 'raid'}, set(steps)) + # Since steps are returned in dicts, they have non-deterministic + # ordering + self.assertIn(self.clean_steps['clean_steps'][ + 'GenericHardwareManager'][0], steps['deploy']) + self.assertIn(self.clean_steps['clean_steps'][ + 'SpecificHardwareManager'][0], steps['deploy']) + self.assertEqual([self.clean_steps['clean_steps'][ + 'SpecificHardwareManager'][1]], steps['raid']) + @mock.patch.object(agent_client.AgentClient, 'get_clean_steps', autospec=True) - def test_refresh_clean_steps_missing_steps(self, client_mock): + def test_refresh_steps_missing_steps(self, client_mock): del self.clean_steps['clean_steps'] client_mock.return_value = { 'command_result': self.clean_steps} @@ -2021,14 +2062,14 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): self.context, self.node.uuid, shared=False) as task: self.assertRaisesRegex(exception.NodeCleaningFailure, 'invalid result', - self.deploy.refresh_clean_steps, - task) + self.deploy.refresh_steps, + task, 'clean') client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) @mock.patch.object(agent_client.AgentClient, 'get_clean_steps', autospec=True) - def test_refresh_clean_steps_missing_interface(self, client_mock): + def test_refresh_steps_missing_interface(self, client_mock): step = self.clean_steps['clean_steps']['SpecificHardwareManager'][1] del step['interface'] client_mock.return_value = { @@ -2038,16 +2079,16 @@ class TestRefreshCleanSteps(AgentDeployMixinBaseTest): self.context, self.node.uuid, shared=False) as task: self.assertRaisesRegex(exception.NodeCleaningFailure, 'invalid clean step', - self.deploy.refresh_clean_steps, - task) + self.deploy.refresh_steps, + task, 'clean') client_mock.assert_called_once_with(mock.ANY, task.node, task.ports) -class CleanStepMethodsTestCase(db_base.DbTestCase): +class StepMethodsTestCase(db_base.DbTestCase): def setUp(self): - super(CleanStepMethodsTestCase, self).setUp() + super(StepMethodsTestCase, self).setUp() self.clean_steps = { 'deploy': [ @@ -2072,10 +2113,10 @@ class CleanStepMethodsTestCase(db_base.DbTestCase): self.ports = [object_utils.create_test_port(self.context, node_id=self.node.id)] - def test_agent_get_clean_steps(self): + def test_agent_get_steps(self): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - response = agent_base.get_clean_steps(task) + response = agent_base.get_steps(task, 'clean') # Since steps are returned in dicts, they have non-deterministic # ordering @@ -2084,40 +2125,55 @@ class CleanStepMethodsTestCase(db_base.DbTestCase): self.assertIn(self.clean_steps['deploy'][1], response) self.assertIn(self.clean_steps['raid'][0], response) - def test_get_clean_steps_custom_interface(self): + def test_agent_get_steps_deploy(self): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - response = agent_base.get_clean_steps(task, interface='raid') + task.node.driver_internal_info = { + 'agent_cached_deploy_steps': self.clean_steps + } + response = agent_base.get_steps(task, 'deploy') + + # Since steps are returned in dicts, they have non-deterministic + # ordering + self.assertThat(response, matchers.HasLength(3)) + self.assertIn(self.clean_steps['deploy'][0], response) + self.assertIn(self.clean_steps['deploy'][1], response) + self.assertIn(self.clean_steps['raid'][0], response) + + def test_get_steps_custom_interface(self): + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + response = agent_base.get_steps(task, 'clean', interface='raid') self.assertThat(response, matchers.HasLength(1)) self.assertEqual(self.clean_steps['raid'], response) - def test_get_clean_steps_override_priorities(self): + def test_get_steps_override_priorities(self): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: new_priorities = {'create_configuration': 42} - response = agent_base.get_clean_steps( - task, interface='raid', override_priorities=new_priorities) + response = agent_base.get_steps( + task, 'clean', interface='raid', + override_priorities=new_priorities) self.assertEqual(42, response[0]['priority']) - def test_get_clean_steps_override_priorities_none(self): + def test_get_steps_override_priorities_none(self): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: # this is simulating the default value of a configuration option new_priorities = {'create_configuration': None} - response = agent_base.get_clean_steps( - task, interface='raid', override_priorities=new_priorities) + response = agent_base.get_steps( + task, 'clean', interface='raid', + override_priorities=new_priorities) self.assertEqual(10, response[0]['priority']) - def test_get_clean_steps_missing_steps(self): + def test_get_steps_missing_steps(self): info = self.node.driver_internal_info del info['agent_cached_clean_steps'] self.node.driver_internal_info = info self.node.save() with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - self.assertRaises(exception.NodeCleaningFailure, - agent_base.get_clean_steps, - task) + self.assertEqual([], agent_base.get_steps(task, 'clean')) @mock.patch('ironic.objects.Port.list_by_node_id', spec_set=types.FunctionType) @@ -2130,11 +2186,25 @@ class CleanStepMethodsTestCase(db_base.DbTestCase): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - response = agent_base.execute_clean_step( - task, - self.clean_steps['deploy'][0]) + response = agent_base.execute_step( + task, self.clean_steps['deploy'][0], 'clean') self.assertEqual(states.CLEANWAIT, response) + @mock.patch('ironic.objects.Port.list_by_node_id', + spec_set=types.FunctionType) + @mock.patch.object(agent_client.AgentClient, 'execute_deploy_step', + autospec=True) + def test_execute_deploy_step(self, client_mock, list_ports_mock): + client_mock.return_value = { + 'command_status': 'SUCCEEDED'} + list_ports_mock.return_value = self.ports + + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + response = agent_base.execute_step( + task, self.clean_steps['deploy'][0], 'deploy') + self.assertEqual(states.DEPLOYWAIT, response) + @mock.patch('ironic.objects.Port.list_by_node_id', spec_set=types.FunctionType) @mock.patch.object(agent_client.AgentClient, 'execute_clean_step', @@ -2146,9 +2216,8 @@ class CleanStepMethodsTestCase(db_base.DbTestCase): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - response = agent_base.execute_clean_step( - task, - self.clean_steps['deploy'][0]) + response = agent_base.execute_step( + task, self.clean_steps['deploy'][0], 'clean') self.assertEqual(states.CLEANWAIT, response) @mock.patch('ironic.objects.Port.list_by_node_id', @@ -2163,7 +2232,6 @@ class CleanStepMethodsTestCase(db_base.DbTestCase): with task_manager.acquire( self.context, self.node.uuid, shared=False) as task: - response = agent_base.execute_clean_step( - task, - self.clean_steps['deploy'][0]) + response = agent_base.execute_step( + task, self.clean_steps['deploy'][0], 'clean') self.assertEqual(states.CLEANWAIT, response) diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py index 3c382f7922..ab887e96b1 100644 --- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py +++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py @@ -968,7 +968,7 @@ class ISCSIDeployTestCase(db_base.DbTestCase): tear_down_cleaning_mock.assert_called_once_with( task, manage_boot=True) - @mock.patch.object(agent_base, 'get_clean_steps', autospec=True) + @mock.patch.object(agent_base, 'get_steps', autospec=True) def test_get_clean_steps(self, mock_get_clean_steps): # Test getting clean steps self.config(group='deploy', erase_devices_priority=10) @@ -981,19 +981,19 @@ class ISCSIDeployTestCase(db_base.DbTestCase): with task_manager.acquire(self.context, self.node.uuid) as task: steps = task.driver.deploy.get_clean_steps(task) mock_get_clean_steps.assert_called_once_with( - task, interface='deploy', + task, 'clean', interface='deploy', override_priorities={ 'erase_devices': 10, 'erase_devices_metadata': 5}) self.assertEqual(mock_steps, steps) - @mock.patch.object(agent_base, 'execute_clean_step', autospec=True) + @mock.patch.object(agent_base, 'execute_step', autospec=True) def test_execute_clean_step(self, agent_execute_clean_step_mock): with task_manager.acquire(self.context, self.node.uuid) as task: task.driver.deploy.execute_clean_step( task, {'some-step': 'step-info'}) agent_execute_clean_step_mock.assert_called_once_with( - task, {'some-step': 'step-info'}) + task, {'some-step': 'step-info'}, 'clean') @mock.patch.object(agent_base.AgentDeployMixin, 'reboot_and_finish_deploy', autospec=True)