Add CLEANWAIT state

This patch adds the CLEANWAIT state. When a node is in CLEANWAIT means
that the ramdisk is executing a clean step (async). When the node is
in CLEANING state it means that the conductor is executing a clean step
(sync).

This is the first patch of a series that aim to make nodes in CLEANWAIT
abortable. We still need a way need some way to tell if a step is
abortable; aborting steps could have negative effects such as bricking
things.

Depends-On: I195ecd90e7e4165504da5ac330cee3fc7c3039c2
Co-Authored-By: Jim Rollenhagen <jim@jimrollenhagen.com>
Partial-Bug: #1455825
Change-Id: Ic2bc4f147f68947f53d341fda5e0c8d7b594a553
This commit is contained in:
Lucas Alvares Gomes 2015-07-09 16:29:12 +01:00
parent 7f88443bce
commit b6ed09e297
12 changed files with 192 additions and 49 deletions

View File

@ -132,6 +132,13 @@ represented in target_provision_state.
CLEANING = 'cleaning' CLEANING = 'cleaning'
""" Node is being automatically cleaned to prepare it for provisioning. """ """ Node is being automatically cleaned to prepare it for provisioning. """
CLEANWAIT = 'clean wait'
""" Node is waiting to be cleaned.
This will be the node `provision_state` while the node is waiting for
the driver to finish cleaning step.
"""
CLEANFAIL = 'clean failed' CLEANFAIL = 'clean failed'
""" Node failed cleaning. This requires operator intervention to resolve. """ """ Node failed cleaning. This requires operator intervention to resolve. """
@ -220,6 +227,7 @@ machine.add_state(DEPLOYFAIL, target=ACTIVE, **watchers)
# Add clean* states # Add clean* states
machine.add_state(CLEANING, target=AVAILABLE, **watchers) machine.add_state(CLEANING, target=AVAILABLE, **watchers)
machine.add_state(CLEANWAIT, target=AVAILABLE, **watchers)
machine.add_state(CLEANFAIL, target=AVAILABLE, **watchers) machine.add_state(CLEANFAIL, target=AVAILABLE, **watchers)
# Add delete* states # Add delete* states
@ -278,6 +286,11 @@ machine.add_transition(CLEANING, AVAILABLE, 'done')
# If cleaning fails, wait for operator intervention # If cleaning fails, wait for operator intervention
machine.add_transition(CLEANING, CLEANFAIL, 'fail') machine.add_transition(CLEANING, CLEANFAIL, 'fail')
machine.add_transition(CLEANWAIT, CLEANFAIL, 'fail')
# A deployment may also wait on external callbacks
machine.add_transition(CLEANING, CLEANWAIT, 'wait')
machine.add_transition(CLEANWAIT, CLEANING, 'resume')
# An operator may want to move a CLEANFAIL node to MANAGEABLE, to perform # An operator may want to move a CLEANFAIL node to MANAGEABLE, to perform
# other actions like zapping # other actions like zapping

View File

@ -830,7 +830,7 @@ class ConductorManager(periodic_task.PeriodicTasks):
:param context: an admin context. :param context: an admin context.
:param node_id: the id or uuid of a node. :param node_id: the id or uuid of a node.
:raises: InvalidStateRequested if the node is not in CLEANING state :raises: InvalidStateRequested if the node is not in CLEANWAIT state
:raises: NoFreeConductorWorker when there is no free worker to start :raises: NoFreeConductorWorker when there is no free worker to start
async task async task
:raises: NodeLocked if node is locked by another conductor. :raises: NodeLocked if node is locked by another conductor.
@ -841,15 +841,27 @@ class ConductorManager(periodic_task.PeriodicTasks):
with task_manager.acquire(context, node_id, shared=False, with task_manager.acquire(context, node_id, shared=False,
purpose='node cleaning') as task: purpose='node cleaning') as task:
if task.node.provision_state != states.CLEANING: # TODO(lucasagomes): CLEANING here for backwards compat
# with previous code, otherwise nodes in CLEANING when this
# is deployed would fail. Should be removed once the M
# release starts.
if task.node.provision_state not in (states.CLEANWAIT,
states.CLEANING):
raise exception.InvalidStateRequested(_( raise exception.InvalidStateRequested(_(
'Cannot continue cleaning on %(node)s, node is in ' 'Cannot continue cleaning on %(node)s, node is in '
'%(state)s state, should be %(clean_state)s') % '%(state)s state, should be %(clean_state)s') %
{'node': task.node.uuid, {'node': task.node.uuid,
'state': task.node.provision_state, 'state': task.node.provision_state,
'clean_state': states.CLEANING}) 'clean_state': states.CLEANWAIT})
task.set_spawn_error_hook(cleaning_error_handler, task.node, task.set_spawn_error_hook(cleaning_error_handler, task.node,
'Failed to run next clean step') 'Failed to run next clean step')
# TODO(lucasagomes): This conditional is here for backwards
# compat with previous code. Should be removed once the M
# release starts.
if task.node.provision_state == states.CLEANWAIT:
task.process_event('resume')
task.spawn_after( task.spawn_after(
self._spawn_worker, self._spawn_worker,
self._do_next_clean_step, self._do_next_clean_step,
@ -892,11 +904,20 @@ class ConductorManager(periodic_task.PeriodicTasks):
% {'node': node.uuid, 'e': e}) % {'node': node.uuid, 'e': e})
LOG.exception(msg) LOG.exception(msg)
return cleaning_error_handler(task, msg) return cleaning_error_handler(task, msg)
# TODO(lucasagomes): Should be removed once the M release starts
if prepare_result == states.CLEANING: if prepare_result == states.CLEANING:
LOG.warning(_LW('Returning CLEANING for asynchronous prepare '
'cleaning has been deprecated. Please use '
'CLEANWAIT instead.'))
prepare_result = states.CLEANWAIT
if prepare_result == states.CLEANWAIT:
# Prepare is asynchronous, the deploy driver will need to # Prepare is asynchronous, the deploy driver will need to
# set node.driver_internal_info['clean_steps'] and # set node.driver_internal_info['clean_steps'] and
# node.clean_step and then make an RPC call to # node.clean_step and then make an RPC call to
# continue_node_cleaning to start cleaning. # continue_node_cleaning to start cleaning.
task.process_event('wait')
return return
set_node_cleaning_steps(task) set_node_cleaning_steps(task)
@ -951,15 +972,23 @@ class ConductorManager(periodic_task.PeriodicTasks):
cleaning_error_handler(task, msg) cleaning_error_handler(task, msg)
return return
# Check if the step is done or not. The step should return # TODO(lucasagomes): Should be removed once the M release starts
# states.CLEANING if the step is still being executed, or
# None if the step is done.
if result == states.CLEANING: if result == states.CLEANING:
LOG.warning(_LW('Returning CLEANING for asynchronous clean '
'steps has been deprecated. Please use '
'CLEANWAIT instead.'))
result = states.CLEANWAIT
# Check if the step is done or not. The step should return
# states.CLEANWAIT if the step is still being executed, or
# None if the step is done.
if result == states.CLEANWAIT:
# Kill this worker, the async step will make an RPC call to # Kill this worker, the async step will make an RPC call to
# continue_node_clean to continue cleaning # continue_node_clean to continue cleaning
LOG.info(_LI('Clean step %(step)s on node %(node)s being ' LOG.info(_LI('Clean step %(step)s on node %(node)s being '
'executed asynchronously, waiting for driver.') % 'executed asynchronously, waiting for driver.') %
{'node': node.uuid, 'step': step}) {'node': node.uuid, 'step': step})
task.process_event('wait')
return return
elif result is not None: elif result is not None:
msg = (_('While executing step %(step)s on node ' msg = (_('While executing step %(step)s on node '
@ -1091,14 +1120,15 @@ class ConductorManager(periodic_task.PeriodicTasks):
# (through to its DB API call) so that we can eliminate our call # (through to its DB API call) so that we can eliminate our call
# and first set of checks below. # and first set of checks below.
exclude_states = (states.DEPLOYWAIT, states.ENROLL) exclude_states = (states.DEPLOYWAIT, states.CLEANWAIT, states.ENROLL)
filters = {'reserved': False, 'maintenance': False} filters = {'reserved': False, 'maintenance': False}
node_iter = self.iter_nodes(fields=['id'], filters=filters) node_iter = self.iter_nodes(fields=['id'], filters=filters)
for (node_uuid, driver, node_id) in node_iter: for (node_uuid, driver, node_id) in node_iter:
try: try:
# NOTE(deva): we should not acquire a lock on a node in # NOTE(deva): we should not acquire a lock on a node in
# DEPLOYWAIT, as this could cause an error within # DEPLOYWAIT/CLEANWAIT, as this could cause an
# a deploy ramdisk POSTing back at the same time. # error within a deploy ramdisk POSTing back at
# the same time.
# TODO(deva): refactor this check, because it needs to be done # TODO(deva): refactor this check, because it needs to be done
# in every periodic task, not just this one. # in every periodic task, not just this one.
node = objects.Node.get_by_id(context, node_id) node = objects.Node.get_by_id(context, node_id)

View File

@ -177,7 +177,7 @@ class BaseInterface(object):
A clean step should take a single argument: a TaskManager object. A clean step should take a single argument: a TaskManager object.
A step can be executed synchronously or asynchronously. A step should A step can be executed synchronously or asynchronously. A step should
return None if the method has completed synchronously or return None if the method has completed synchronously or
states.CLEANING if the step will continue to execute asynchronously. states.CLEANWAIT if the step will continue to execute asynchronously.
If the step executes asynchronously, it should issue a call to the If the step executes asynchronously, it should issue a call to the
'continue_node_clean' RPC, so the conductor can begin the next 'continue_node_clean' RPC, so the conductor can begin the next
clean step. clean step.
@ -185,7 +185,7 @@ class BaseInterface(object):
:param task: A TaskManager object :param task: A TaskManager object
:param step: The clean step dictionary representing the step to execute :param step: The clean step dictionary representing the step to execute
:returns: None if this method has completed synchronously, or :returns: None if this method has completed synchronously, or
states.CLEANING if the step will continue to execute states.CLEANWAIT if the step will continue to execute
asynchronously. asynchronously.
""" """
return getattr(self, step['step'])(task) return getattr(self, step['step'])(task)
@ -312,7 +312,7 @@ class DeployInterface(BaseInterface):
:param task: a TaskManager instance containing the node to act on. :param task: a TaskManager instance containing the node to act on.
:returns: If this function is going to be asynchronous, should return :returns: If this function is going to be asynchronous, should return
`states.CLEANING`. Otherwise, should return `None`. The interface `states.CLEANWAIT`. Otherwise, should return `None`. The interface
will need to call _get_cleaning_steps and then RPC to will need to call _get_cleaning_steps and then RPC to
continue_node_cleaning continue_node_cleaning
""" """
@ -859,13 +859,14 @@ class InspectInterface(object):
def clean_step(priority): def clean_step(priority):
"""Decorator for cleaning and zapping steps. """Decorator for cleaning and zapping steps.
If priority is greater than 0, the function will be executed as part of the If priority is greater than 0, the function will be executed as part
CLEANING state for any node using the interface with the decorated clean of the CLEANING (sync) or CLEANWAIT (async) state for any node using
step. During CLEANING, a list of steps will be ordered by priority for all the interface with the decorated clean step. During the cleaning,
interfaces associated with the node, and then execute_clean_step() will be a list of steps will be ordered by priority for all interfaces
called on each step. Steps will be executed based on priority, with the associated with the node, and then execute_clean_step() will be
highest priority step being called first, the next highest priority called on each step. Steps will be executed based on priority,
being call next, and so on. with the highest priority step being called first, the next highest
priority being call next, and so on.
Decorated clean steps should take a single argument, a TaskManager object. Decorated clean steps should take a single argument, a TaskManager object.
@ -877,7 +878,7 @@ def clean_step(priority):
Clean steps can be either synchronous or asynchronous. If the step is Clean steps can be either synchronous or asynchronous. If the step is
synchronous, it should return `None` when finished, and the conductor will synchronous, it should return `None` when finished, and the conductor will
continue on to the next step. If the step is asynchronous, the step should continue on to the next step. If the step is asynchronous, the step should
return `states.CLEANING` to signal to the conductor. When the step is return `states.CLEANWAIT` to signal to the conductor. When the step is
complete, the step should make an RPC call to `continue_node_clean` to move complete, the step should make an RPC call to `continue_node_clean` to move
to the next step in cleaning. to the next step in cleaning.

View File

@ -385,7 +385,7 @@ class AgentDeploy(base.DeployInterface):
:param step: a clean step dictionary to execute :param step: a clean step dictionary to execute
:raises: NodeCleaningFailure if the agent does not return a command :raises: NodeCleaningFailure if the agent does not return a command
status status
:returns: states.CLEANING to signify the step will be completed async :returns: states.CLEANWAIT to signify the step will be completed async
""" """
return deploy_utils.agent_execute_clean_step(task, step) return deploy_utils.agent_execute_clean_step(task, step)
@ -395,7 +395,7 @@ class AgentDeploy(base.DeployInterface):
:param task: a TaskManager object containing the node :param task: a TaskManager object containing the node
:raises NodeCleaningFailure: if the previous cleaning ports cannot :raises NodeCleaningFailure: if the previous cleaning ports cannot
be removed or if new cleaning ports cannot be created be removed or if new cleaning ports cannot be created
:returns: states.CLEANING to signify an asynchronous prepare :returns: states.CLEANWAIT to signify an asynchronous prepare
""" """
provider = dhcp_factory.DHCPFactory() provider = dhcp_factory.DHCPFactory()
# If we have left over ports from a previous cleaning, remove them # If we have left over ports from a previous cleaning, remove them
@ -416,7 +416,7 @@ class AgentDeploy(base.DeployInterface):
_prepare_pxe_boot(task) _prepare_pxe_boot(task)
_do_pxe_boot(task, ports) _do_pxe_boot(task, ports)
# Tell the conductor we are waiting for the agent to boot. # Tell the conductor we are waiting for the agent to boot.
return states.CLEANING return states.CLEANWAIT
def tear_down_cleaning(self, task): def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning. """Clean up the PXE and DHCP files after cleaning.

View File

@ -263,14 +263,22 @@ class BaseAgentVendor(base.VendorInterface):
elif (node.provision_state == states.DEPLOYWAIT and elif (node.provision_state == states.DEPLOYWAIT and
self.deploy_has_started(task)): self.deploy_has_started(task)):
node.touch_provisioning() node.touch_provisioning()
elif (node.provision_state == states.CLEANING and # TODO(lucasagomes): CLEANING here for backwards compat
not node.clean_step): # with previous code, otherwise nodes in CLEANING when this
# is deployed would fail. Should be removed once the M
# release starts.
elif (node.provision_state in (states.CLEANWAIT, states.CLEANING)
and not node.clean_step):
# Agent booted from prepare_cleaning # Agent booted from prepare_cleaning
LOG.debug('Node %s just booted to start cleaning.', node.uuid) LOG.debug('Node %s just booted to start cleaning.', node.uuid)
manager.set_node_cleaning_steps(task) manager.set_node_cleaning_steps(task)
self._notify_conductor_resume_clean(task) self._notify_conductor_resume_clean(task)
elif (node.provision_state == states.CLEANING and # TODO(lucasagomes): CLEANING here for backwards compat
node.clean_step): # with previous code, otherwise nodes in CLEANING when this
# is deployed would fail. Should be removed once the M
# release starts.
elif (node.provision_state in (states.CLEANWAIT, states.CLEANING)
and node.clean_step):
self.continue_cleaning(task, **kwargs) self.continue_cleaning(task, **kwargs)
except Exception as e: except Exception as e:

View File

@ -959,7 +959,7 @@ def agent_execute_clean_step(task, step):
:param task: a TaskManager object containing the node :param task: a TaskManager object containing the node
:param step: a clean step dictionary to execute :param step: a clean step dictionary to execute
:raises: NodeCleaningFailure if the agent does not return a command status :raises: NodeCleaningFailure if the agent does not return a command status
:returns: states.CLEANING to signify the step will be completed async :returns: states.CLEANWAIT to signify the step will be completed async
""" """
client = agent_client.AgentClient() client = agent_client.AgentClient()
ports = objects.Port.list_by_node_id( ports = objects.Port.list_by_node_id(
@ -970,7 +970,7 @@ def agent_execute_clean_step(task, step):
'Agent on node %(node)s returned bad command result: ' 'Agent on node %(node)s returned bad command result: '
'%(result)s') % {'node': task.node.uuid, '%(result)s') % {'node': task.node.uuid,
'result': result.get('command_error')}) 'result': result.get('command_error')})
return states.CLEANING return states.CLEANWAIT
def agent_add_clean_params(task): def agent_add_clean_params(task):

View File

@ -592,7 +592,7 @@ class IloVirtualMediaAgentDeploy(base.DeployInterface):
:param task: a TaskManager object containing the node :param task: a TaskManager object containing the node
:param step: a clean step dictionary to execute :param step: a clean step dictionary to execute
:returns: states.CLEANING to signify the step will be completed async :returns: states.CLEANWAIT to signify the step will be completed async
""" """
return deploy_utils.agent_execute_clean_step(task, step) return deploy_utils.agent_execute_clean_step(task, step)
@ -614,7 +614,7 @@ class IloVirtualMediaAgentDeploy(base.DeployInterface):
_prepare_agent_vmedia_boot(task) _prepare_agent_vmedia_boot(task)
# Tell the conductor we are waiting for the agent to boot. # Tell the conductor we are waiting for the agent to boot.
return states.CLEANING return states.CLEANWAIT
def tear_down_cleaning(self, task): def tear_down_cleaning(self, task):
"""Clean up the PXE and DHCP files after cleaning.""" """Clean up the PXE and DHCP files after cleaning."""

View File

@ -1583,7 +1583,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
def test_continue_node_clean_worker_pool_full(self, mock_spawn): def test_continue_node_clean_worker_pool_full(self, mock_spawn):
# Test the appropriate exception is raised if the worker pool is full # Test the appropriate exception is raised if the worker pool is full
prv_state = states.CLEANING prv_state = states.CLEANWAIT
tgt_prv_state = states.AVAILABLE tgt_prv_state = states.AVAILABLE
node = obj_utils.create_test_node(self.context, driver='fake', node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=prv_state, provision_state=prv_state,
@ -1599,14 +1599,11 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
# Make sure things were rolled back
self.assertEqual(prv_state, node.provision_state)
self.assertEqual(tgt_prv_state, node.target_provision_state)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
def test_continue_node_clean_wrong_state(self, mock_spawn): def test_continue_node_clean_wrong_state(self, mock_spawn):
# Test the appropriate exception is raised if node isn't already # Test the appropriate exception is raised if node isn't already
# in CLEANING state # in CLEANWAIT state
prv_state = states.DELETING prv_state = states.DELETING
tgt_prv_state = states.AVAILABLE tgt_prv_state = states.AVAILABLE
node = obj_utils.create_test_node(self.context, driver='fake', node = obj_utils.create_test_node(self.context, driver='fake',
@ -1628,9 +1625,9 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
self.assertIsNone(node.reservation) self.assertIsNone(node.reservation)
@mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker') @mock.patch('ironic.conductor.manager.ConductorManager._spawn_worker')
def test_continue_node_clean(self, mock_spawn): def _continue_node_clean(self, return_state, mock_spawn):
# test a node can continue cleaning via RPC # test a node can continue cleaning via RPC
prv_state = states.CLEANING prv_state = return_state
tgt_prv_state = states.AVAILABLE tgt_prv_state = states.AVAILABLE
driver_info = {'clean_steps': self.clean_steps} driver_info = {'clean_steps': self.clean_steps}
node = obj_utils.create_test_node(self.context, driver='fake', node = obj_utils.create_test_node(self.context, driver='fake',
@ -1647,6 +1644,12 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
mock.ANY, self.clean_steps, mock.ANY, self.clean_steps,
self.clean_steps[1]) self.clean_steps[1])
def test_continue_node_clean(self):
self._continue_node_clean(states.CLEANWAIT)
def test_continue_node_clean_backward_compat(self):
self._continue_node_clean(states.CLEANING)
@mock.patch('ironic.drivers.modules.fake.FakePower.validate') @mock.patch('ironic.drivers.modules.fake.FakePower.validate')
def test__do_node_clean_validate_fail(self, mock_validate): def test__do_node_clean_validate_fail(self, mock_validate):
# InvalidParameterValue should be cause node to go to CLEANFAIL # InvalidParameterValue should be cause node to go to CLEANFAIL
@ -1717,7 +1720,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
self.assertEqual(states.AVAILABLE, node.target_provision_state) self.assertEqual(states.AVAILABLE, node.target_provision_state)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step')
def test__do_next_clean_step_first_step_async(self, mock_execute): def _do_next_clean_step_first_step_async(self, return_state, mock_execute):
# Execute the first async clean step on a node # Execute the first async clean step on a node
node = obj_utils.create_test_node( node = obj_utils.create_test_node(
self.context, driver='fake', self.context, driver='fake',
@ -1725,7 +1728,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
target_provision_state=states.AVAILABLE, target_provision_state=states.AVAILABLE,
last_error=None, last_error=None,
clean_step={}) clean_step={})
mock_execute.return_value = states.CLEANING mock_execute.return_value = return_state
self._start_service() self._start_service()
@ -1737,12 +1740,19 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
self.assertEqual(states.CLEANING, node.provision_state) self.assertEqual(states.CLEANWAIT, node.provision_state)
self.assertEqual(self.clean_steps[0], node.clean_step) self.assertEqual(self.clean_steps[0], node.clean_step)
mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[0]) mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[0])
def test_do_next_clean_step_first_step_async(self):
self._do_next_clean_step_first_step_async(states.CLEANWAIT)
def test_do_next_clean_step_first_step_async_backward_compat(self):
self._do_next_clean_step_first_step_async(states.CLEANING)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step') @mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step')
def test__do_next_clean_step_continue_from_last_step(self, mock_execute): def _do_next_clean_step_continue_from_last_step(self, return_state,
mock_execute):
# Resume an in-progress cleaning after the first async step # Resume an in-progress cleaning after the first async step
node = obj_utils.create_test_node( node = obj_utils.create_test_node(
self.context, driver='fake', self.context, driver='fake',
@ -1750,7 +1760,7 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
target_provision_state=states.AVAILABLE, target_provision_state=states.AVAILABLE,
last_error=None, last_error=None,
clean_step=self.clean_steps[0]) clean_step=self.clean_steps[0])
mock_execute.return_value = states.CLEANING mock_execute.return_value = return_state
self._start_service() self._start_service()
@ -1762,10 +1772,48 @@ class DoNodeCleanTestCase(_ServiceSetUpMixin, tests_db_base.DbTestCase):
self.service._worker_pool.waitall() self.service._worker_pool.waitall()
node.refresh() node.refresh()
self.assertEqual(states.CLEANING, node.provision_state) self.assertEqual(states.CLEANWAIT, node.provision_state)
self.assertEqual(self.clean_steps[1], node.clean_step) self.assertEqual(self.clean_steps[1], node.clean_step)
mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[1]) mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[1])
def test_do_next_clean_step_continue_from_last_step(self):
self._do_next_clean_step_continue_from_last_step(states.CLEANWAIT)
def test_do_next_clean_step_continue_from_last_step_backward_compat(self):
self._do_next_clean_step_continue_from_last_step(states.CLEANING)
@mock.patch('ironic.drivers.modules.fake.FakePower.execute_clean_step')
def _do_next_clean_step_continue_from_last_cleaning(self, return_state,
mock_execute):
# Resume an in-progress cleaning after the first async step
node = obj_utils.create_test_node(
self.context, driver='fake',
provision_state=states.CLEANING,
target_provision_state=states.AVAILABLE,
last_error=None,
clean_step=self.clean_steps[0])
mock_execute.return_value = return_state
self._start_service()
with task_manager.acquire(
self.context, node['id'], shared=False) as task:
self.service._do_next_clean_step(task, self.clean_steps,
self.clean_steps[0])
self.service._worker_pool.waitall()
node.refresh()
self.assertEqual(states.CLEANWAIT, node.provision_state)
self.assertEqual(self.clean_steps[1], node.clean_step)
mock_execute.assert_called_once_with(mock.ANY, self.clean_steps[1])
def test_do_next_clean_step_continue_from_last_cleaning(self):
self._do_next_clean_step_continue_from_last_cleaning(states.CLEANWAIT)
def test_do_next_clean_step_continue_from_last_cleaning_backward_com(self):
self._do_next_clean_step_continue_from_last_cleaning(states.CLEANING)
@mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step') @mock.patch('ironic.drivers.modules.fake.FakeDeploy.execute_clean_step')
def test__do_next_clean_step_last_step_noop(self, mock_execute): def test__do_next_clean_step_last_step_noop(self, mock_execute):
# Resume where last_step is the last cleaning step, should be noop # Resume where last_step is the last cleaning step, should be noop

View File

@ -791,7 +791,7 @@ class IloVirtualMediaAgentDeployTestCase(db_base.DbTestCase):
shared=False) as task: shared=False) as task:
returned_state = task.driver.deploy.prepare_cleaning(task) returned_state = task.driver.deploy.prepare_cleaning(task)
vmedia_boot_mock.assert_called_once_with(task) vmedia_boot_mock.assert_called_once_with(task)
self.assertEqual(states.CLEANING, returned_state) self.assertEqual(states.CLEANWAIT, returned_state)
create_port_mock.assert_called_once_with(mock.ANY, task) create_port_mock.assert_called_once_with(mock.ANY, task)
delete_mock.assert_called_once_with(mock.ANY, task) delete_mock.assert_called_once_with(mock.ANY, task)
self.assertEqual(task.node.driver_internal_info.get( self.assertEqual(task.node.driver_internal_info.get(

View File

@ -312,7 +312,7 @@ class TestAgentDeploy(db_base.DbTestCase):
create_mock.return_value = ports create_mock.return_value = ports
with task_manager.acquire( with task_manager.acquire(
self.context, self.node['uuid'], shared=False) as task: self.context, self.node['uuid'], shared=False) as task:
self.assertEqual(states.CLEANING, self.assertEqual(states.CLEANWAIT,
self.driver.prepare_cleaning(task)) self.driver.prepare_cleaning(task))
prepare_mock.assert_called_once_with(task) prepare_mock.assert_called_once_with(task)
boot_mock.assert_called_once_with(task, ports) boot_mock.assert_called_once_with(task, ports)

View File

@ -23,6 +23,7 @@ import mock
from ironic.common import boot_devices from ironic.common import boot_devices
from ironic.common import exception from ironic.common import exception
from ironic.common import states from ironic.common import states
from ironic.conductor import manager
from ironic.conductor import task_manager from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils from ironic.conductor import utils as manager_utils
from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import agent_base_vendor
@ -296,6 +297,48 @@ class TestBaseAgentVendor(db_base.DbTestCase):
'1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy ' '1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy '
'is done. exception: LlamaException') 'is done. exception: LlamaException')
@mock.patch.object(manager, 'set_node_cleaning_steps', autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'_notify_conductor_resume_clean', autospec=True)
def test_heartbeat_resume_clean(self, mock_notify, mock_set_steps):
kwargs = {
'agent_url': 'http://127.0.0.1:9999/bar'
}
self.node.clean_step = {}
for state in (states.CLEANWAIT, states.CLEANING):
self.node.provision_state = state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=True) as task:
self.passthru.heartbeat(task, **kwargs)
mock_notify.assert_called_once_with(mock.ANY, task)
mock_set_steps.assert_called_once_with(task)
mock_notify.reset_mock()
mock_set_steps.reset_mock()
@mock.patch.object(agent_base_vendor.BaseAgentVendor,
'continue_cleaning', autospec=True)
def test_heartbeat_continue_cleaning(self, mock_continue):
kwargs = {
'agent_url': 'http://127.0.0.1:9999/bar'
}
self.node.clean_step = {
'priority': 10,
'interface': 'deploy',
'step': 'foo',
'reboot_requested': False
}
for state in (states.CLEANWAIT, states.CLEANING):
self.node.provision_state = state
self.node.save()
with task_manager.acquire(
self.context, self.node.uuid, shared=True) as task:
self.passthru.heartbeat(task, **kwargs)
mock_continue.assert_called_once_with(mock.ANY, task, **kwargs)
mock_continue.reset_mock()
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'continue_deploy', @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'continue_deploy',
autospec=True) autospec=True)
@mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance', @mock.patch.object(agent_base_vendor.BaseAgentVendor, 'reboot_to_instance',

View File

@ -1843,7 +1843,7 @@ class AgentCleaningTestCase(db_base.DbTestCase):
response = utils.agent_execute_clean_step( response = utils.agent_execute_clean_step(
task, task,
self.clean_steps['clean_steps']['GenericHardwareManager'][0]) self.clean_steps['clean_steps']['GenericHardwareManager'][0])
self.assertEqual(states.CLEANING, response) self.assertEqual(states.CLEANWAIT, response)
@mock.patch('ironic.objects.Port.list_by_node_id', @mock.patch('ironic.objects.Port.list_by_node_id',
spec_set=types.FunctionType) spec_set=types.FunctionType)
@ -1859,7 +1859,7 @@ class AgentCleaningTestCase(db_base.DbTestCase):
response = utils.agent_execute_clean_step( response = utils.agent_execute_clean_step(
task, task,
self.clean_steps['clean_steps']['GenericHardwareManager'][0]) self.clean_steps['clean_steps']['GenericHardwareManager'][0])
self.assertEqual(states.CLEANING, response) self.assertEqual(states.CLEANWAIT, response)
@mock.patch('ironic.objects.Port.list_by_node_id', @mock.patch('ironic.objects.Port.list_by_node_id',
spec_set=types.FunctionType) spec_set=types.FunctionType)
@ -1876,7 +1876,7 @@ class AgentCleaningTestCase(db_base.DbTestCase):
response = utils.agent_execute_clean_step( response = utils.agent_execute_clean_step(
task, task,
self.clean_steps['clean_steps']['GenericHardwareManager'][0]) self.clean_steps['clean_steps']['GenericHardwareManager'][0])
self.assertEqual(states.CLEANING, response) self.assertEqual(states.CLEANWAIT, response)
def test_agent_add_clean_params(self): def test_agent_add_clean_params(self):
cfg.CONF.agent.agent_erase_devices_iterations = 2 cfg.CONF.agent.agent_erase_devices_iterations = 2