Refresh fsm in task when a shared lock is upgraded

If the fsm is advanced while a task is holding a shared lock, and then
the task upgrades the lock to an exclusive lock, the fsm object for the
task still is in the old state. If the now-exclusive lock attempts to
(correctly)advance the state machine, it may explode, thinking it's in
the old state and the transition is invalid.

Use a getter/setter for self.node in the TaskManager object, so that
whenever we set a node object (like we do when upgrading a lock), the
fsm will be re-initialized and in sync with reality.

The task manager also still has a crutch for nodes in the NOSTATE
provision_state, for compatibility with Juno, which was to be removed in
Kilo, but nobody did. Instead of moving this crutch into the setter as
well, we remove it (and its unit test) here. We also update the unit
test nodes to use provision_state AVAILABLE by default, as around 1000
unit tests use the default and begin failing in the task manager if the
default is NOSTATE.

Last, we remove NOSTATE from states.ALLOWED_DELETE_STATES, as we should
never have a node in NOSTATE by now, and deleting the crutch causes the
test for this attribute to fail.

Change-Id: I0a0277742d512a8ad6e41f25d1c04c13fcf8d6a2
Closes-Bug: #1619232
This commit is contained in:
Jim Rollenhagen 2016-09-01 13:48:36 +00:00
parent 739355c091
commit 88fc516678
5 changed files with 36 additions and 34 deletions

View File

@ -189,8 +189,7 @@ UPDATE_ALLOWED_STATES = (DEPLOYFAIL, INSPECTING, INSPECTFAIL, CLEANFAIL, ERROR,
VERIFYING, ADOPTFAIL)
"""Transitional states in which we allow updating a node."""
DELETE_ALLOWED_STATES = (AVAILABLE, NOSTATE, MANAGEABLE, ENROLL,
ADOPTFAIL)
DELETE_ALLOWED_STATES = (AVAILABLE, MANAGEABLE, ENROLL, ADOPTFAIL)
"""States in which node deletion is allowed."""
STABLE_STATES = (ENROLL, MANAGEABLE, AVAILABLE, ACTIVE, ERROR)

View File

@ -192,7 +192,7 @@ class TaskManager(object):
self._on_error_method = None
self.context = context
self.node = None
self._node = None
self.node_id = node_id
self.shared = shared
@ -218,19 +218,21 @@ class TaskManager(object):
self.driver = driver_factory.build_driver_for_task(
self, driver_name=driver_name)
# NOTE(deva): this handles the Juno-era NOSTATE state
# and should be deleted after Kilo is released
if self.node.provision_state is states.NOSTATE:
self.node.provision_state = states.AVAILABLE
self.node.save()
self.fsm.initialize(start_state=self.node.provision_state,
target_state=self.node.target_provision_state)
except Exception:
with excutils.save_and_reraise_exception():
self.release_resources()
@property
def node(self):
return self._node
@node.setter
def node(self, node):
self._node = node
if node is not None:
self.fsm.initialize(start_state=self.node.provision_state,
target_state=self.node.target_provision_state)
def _lock(self):
self._debug_timer.restart()

View File

@ -763,27 +763,6 @@ class ServiceDoNodeDeployTestCase(mgr_utils.ServiceSetUpMixin,
mock_iwdi):
self._test_do_node_deploy_validate_fail(mock_validate, mock_iwdi)
@mock.patch('ironic.conductor.task_manager.TaskManager.process_event')
def test_deploy_with_nostate_converts_to_available(self, mock_pe,
mock_iwdi):
# expressly create a node using the Juno-era NOSTATE state
# and assert that it does not result in an error, and that the state
# is converted to the new AVAILABLE state.
# Mock the process_event call, because the transitions from
# AVAILABLE are tested thoroughly elsewhere
# NOTE(deva): This test can be deleted after Kilo is released
mock_iwdi.return_value = False
self._start_service()
node = obj_utils.create_test_node(self.context, driver='fake',
provision_state=states.NOSTATE)
self.assertEqual(states.NOSTATE, node.provision_state)
self.service.do_node_deploy(self.context, node.uuid)
self.assertTrue(mock_pe.called)
node.refresh()
self.assertEqual(states.AVAILABLE, node.provision_state)
mock_iwdi.assert_called_once_with(self.context, node.instance_info)
self.assertFalse(node.driver_internal_info['is_whole_disk_image'])
def test_do_node_deploy_partial_ok(self, mock_iwdi):
mock_iwdi.return_value = False
self._start_service()

View File

@ -396,6 +396,28 @@ class TaskManagerTestCase(tests_db_base.DbTestCase):
get_ports_mock.assert_called_once_with(self.context, self.node.id)
get_portgroups_mock.assert_called_once_with(self.context, self.node.id)
def test_upgrade_lock_refreshes_fsm(self, get_portgroups_mock,
get_ports_mock, build_driver_mock,
reserve_mock, release_mock,
node_get_mock):
reserve_mock.return_value = self.node
node_get_mock.return_value = self.node
with task_manager.acquire(self.context, 'fake-node-id',
shared=True) as task1:
self.assertEqual(states.AVAILABLE, task1.node.provision_state)
with task_manager.acquire(self.context, 'fake-node-id',
shared=False) as task2:
# move the node to manageable
task2.process_event('manage')
self.assertEqual(states.MANAGEABLE, task1.node.provision_state)
# now upgrade our shared task and try to go to cleaning
# this will explode if task1's FSM doesn't get refreshed
task1.upgrade_lock()
task1.process_event('provide')
self.assertEqual(states.CLEANING, task1.node.provision_state)
def test_spawn_after(
self, get_portgroups_mock, get_ports_mock, build_driver_mock,
reserve_mock, release_mock, node_get_mock):

View File

@ -216,7 +216,7 @@ def get_test_node(**kw):
'conductor_affinity': kw.get('conductor_affinity', None),
'power_state': kw.get('power_state', states.NOSTATE),
'target_power_state': kw.get('target_power_state', states.NOSTATE),
'provision_state': kw.get('provision_state', states.NOSTATE),
'provision_state': kw.get('provision_state', states.AVAILABLE),
'target_provision_state': kw.get('target_provision_state',
states.NOSTATE),
'provision_updated_at': kw.get('provision_updated_at'),