Fix updating nodes with removed or broken drivers

Currently when updating a node we try to acquire the lock on it using
its old database record, including its old driver. This is not correct
when updating a driver, since the current conductor may not have access
to the old driver (and the old driver may no longer be enabled at all).

Since we do not need task.driver at all when updating nodes, this change
stops populating it in update_node.

Conflicts:
	ironic/conductor/task_manager.py

Change-Id: I510c3bfbd11b01fef05341be2bb04c6bd01bf8ac
Story: #2004741
Task: #28812
(cherry picked from commit cea8d74914)
This commit is contained in:
Dmitry Tantsur 2019-01-09 12:41:24 +01:00
parent ef50e5a2ea
commit 588a9ee021
5 changed files with 51 additions and 4 deletions

View File

@ -199,7 +199,12 @@ class ConductorManager(base_manager.BaseConductorManager):
driver_factory.check_and_update_node_interfaces(node_obj)
# NOTE(dtantsur): if we're updating the driver from an invalid value,
# loading the old driver may be impossible. Since we only need to
# update the node record in the database, skip loading the driver
# completely.
with task_manager.acquire(context, node_id, shared=False,
load_driver=False,
purpose='node update') as task:
# Prevent instance_uuid overwriting
if ('instance_uuid' in delta and node_obj.instance_uuid

View File

@ -149,7 +149,8 @@ def require_exclusive_lock(f):
return wrapper
def acquire(context, node_id, shared=False, purpose='unspecified action'):
def acquire(context, node_id, shared=False, purpose='unspecified action',
**kwargs):
"""Shortcut for acquiring a lock on a Node.
:param context: Request context.
@ -162,7 +163,8 @@ def acquire(context, node_id, shared=False, purpose='unspecified action'):
"""
# NOTE(lintan): This is a workaround to set the context of periodic tasks.
context.ensure_thread_contain_context()
return TaskManager(context, node_id, shared=shared, purpose=purpose)
return TaskManager(context, node_id, shared=shared, purpose=purpose,
**kwargs)
class TaskManager(object):
@ -174,7 +176,8 @@ class TaskManager(object):
"""
def __init__(self, context, node_id, shared=False,
purpose='unspecified action'):
purpose='unspecified action',
load_driver=True):
"""Create a new TaskManager.
Acquire a lock on a node. The lock can be either shared or
@ -187,6 +190,9 @@ class TaskManager(object):
:param shared: Boolean indicating whether to take a shared or exclusive
lock. Default: False.
:param purpose: human-readable purpose to put to debug logs.
:param load_driver: whether to load the ``driver`` object. Set this to
False if loading the driver is undesired or
impossible.
:raises: DriverNotFound
:raises: InterfaceNotFoundInEntrypoint
:raises: NodeNotFound
@ -231,7 +237,10 @@ class TaskManager(object):
context, self.node.id)
self.volume_targets = objects.VolumeTarget.list_by_node_id(
context, self.node.id)
self.driver = driver_factory.build_driver_for_task(self)
if load_driver:
self.driver = driver_factory.build_driver_for_task(self)
else:
self.driver = None
except Exception:
with excutils.save_and_reraise_exception():

View File

@ -566,6 +566,16 @@ class UpdateNodeTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
node.refresh()
self.assertEqual(existing_driver, node.driver)
def test_update_node_from_invalid_driver(self):
existing_driver = 'fake-hardware'
wrong_driver = 'wrong-driver'
node = obj_utils.create_test_node(self.context, driver=wrong_driver)
node.driver = existing_driver
result = self.service.update_node(self.context, node)
self.assertEqual(existing_driver, result.driver)
node.refresh()
self.assertEqual(existing_driver, node.driver)
UpdateInterfaces = namedtuple('UpdateInterfaces', ('old', 'new'))
# NOTE(dtantsur): "old" interfaces here do not match the defaults, so that
# we can test resetting them.

View File

@ -79,6 +79,24 @@ class TaskManagerTestCase(db_base.DbTestCase):
release_mock.assert_called_once_with(self.context, self.host,
self.node.id)
def test_no_driver(self, get_voltgt_mock, get_volconn_mock,
get_portgroups_mock, get_ports_mock,
build_driver_mock, reserve_mock, release_mock,
node_get_mock):
reserve_mock.return_value = self.node
with task_manager.TaskManager(self.context, 'fake-node-id',
load_driver=False) as task:
self.assertEqual(self.context, task.context)
self.assertEqual(self.node, task.node)
self.assertEqual(get_ports_mock.return_value, task.ports)
self.assertEqual(get_portgroups_mock.return_value, task.portgroups)
self.assertEqual(get_volconn_mock.return_value,
task.volume_connectors)
self.assertEqual(get_voltgt_mock.return_value, task.volume_targets)
self.assertIsNone(task.driver)
self.assertFalse(task.shared)
self.assertFalse(build_driver_mock.called)
def test_excl_nested_acquire(
self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock,
get_ports_mock, build_driver_mock,

View File

@ -0,0 +1,5 @@
---
fixes:
- |
A bug has been fixed in the node update code that could cause the nodes
to become not updatable if their driver is no longer available.