Fix updating nodes with removed or broken drivers

Currently when updating a node we try to acquire the lock on it using
its old database record, including its old driver. This is not correct
when updating a driver, since the current conductor may not have access
to the old driver (and the old driver may no longer be enabled at all).

Since we do not need task.driver at all when updating nodes, this change
stops populating it in update_node.

Change-Id: I510c3bfbd11b01fef05341be2bb04c6bd01bf8ac
Story: #2004741
Task: #28812
This commit is contained in:
Dmitry Tantsur 2019-01-09 12:41:24 +01:00
parent b77fe3c427
commit cea8d74914
5 changed files with 47 additions and 2 deletions

View File

@ -219,7 +219,12 @@ class ConductorManager(base_manager.BaseConductorManager):
driver_factory.check_and_update_node_interfaces(node_obj)
# NOTE(dtantsur): if we're updating the driver from an invalid value,
# loading the old driver may be impossible. Since we only need to
# update the node record in the database, skip loading the driver
# completely.
with task_manager.acquire(context, node_id, shared=False,
load_driver=False,
purpose='node update') as task:
# Prevent instance_uuid overwriting
if ('instance_uuid' in delta and node_obj.instance_uuid

View File

@ -170,7 +170,8 @@ class TaskManager(object):
"""
def __init__(self, context, node_id, shared=False,
purpose='unspecified action', retry=True):
purpose='unspecified action', retry=True,
load_driver=True):
"""Create a new TaskManager.
Acquire a lock on a node. The lock can be either shared or
@ -184,6 +185,9 @@ class TaskManager(object):
lock. Default: False.
:param purpose: human-readable purpose to put to debug logs.
:param retry: whether to retry locking if it fails. Default: True.
:param load_driver: whether to load the ``driver`` object. Set this to
False if loading the driver is undesired or
impossible.
:raises: DriverNotFound
:raises: InterfaceNotFoundInEntrypoint
:raises: NodeNotFound
@ -229,7 +233,10 @@ class TaskManager(object):
context, self.node.id)
self.volume_targets = objects.VolumeTarget.list_by_node_id(
context, self.node.id)
if load_driver:
self.driver = driver_factory.build_driver_for_task(self)
else:
self.driver = None
except Exception:
with excutils.save_and_reraise_exception():

View File

@ -625,6 +625,16 @@ class UpdateNodeTestCase(mgr_utils.ServiceSetUpMixin, db_base.DbTestCase):
node.refresh()
self.assertEqual(existing_driver, node.driver)
def test_update_node_from_invalid_driver(self):
existing_driver = 'fake-hardware'
wrong_driver = 'wrong-driver'
node = obj_utils.create_test_node(self.context, driver=wrong_driver)
node.driver = existing_driver
result = self.service.update_node(self.context, node)
self.assertEqual(existing_driver, result.driver)
node.refresh()
self.assertEqual(existing_driver, node.driver)
UpdateInterfaces = namedtuple('UpdateInterfaces', ('old', 'new'))
# NOTE(dtantsur): "old" interfaces here do not match the defaults, so that
# we can test resetting them.

View File

@ -79,6 +79,24 @@ class TaskManagerTestCase(db_base.DbTestCase):
release_mock.assert_called_once_with(self.context, self.host,
self.node.id)
def test_no_driver(self, get_voltgt_mock, get_volconn_mock,
get_portgroups_mock, get_ports_mock,
build_driver_mock, reserve_mock, release_mock,
node_get_mock):
reserve_mock.return_value = self.node
with task_manager.TaskManager(self.context, 'fake-node-id',
load_driver=False) as task:
self.assertEqual(self.context, task.context)
self.assertEqual(self.node, task.node)
self.assertEqual(get_ports_mock.return_value, task.ports)
self.assertEqual(get_portgroups_mock.return_value, task.portgroups)
self.assertEqual(get_volconn_mock.return_value,
task.volume_connectors)
self.assertEqual(get_voltgt_mock.return_value, task.volume_targets)
self.assertIsNone(task.driver)
self.assertFalse(task.shared)
self.assertFalse(build_driver_mock.called)
def test_excl_nested_acquire(
self, get_voltgt_mock, get_volconn_mock, get_portgroups_mock,
get_ports_mock, build_driver_mock,

View File

@ -0,0 +1,5 @@
---
fixes:
- |
A bug has been fixed in the node update code that could cause the nodes
to become not updatable if their driver is no longer available.