Fix inactive session error in compute node creation
In the fix for bug 1839560 [1][2], soft-deleted compute nodes may be restored, to ensure we can reuse ironic node UUIDs as compute node UUIDs. While this seems to largely work, it results in some nasty errors being generated [3]: InvalidRequestError This session is in 'inactive' state, due to the SQL transaction being rolled back; no further SQL can be emitted within this transaction. This happens because compute_node_create is decorated with pick_context_manager_writer, which begins a transaction. While _compute_node_get_and_update_deleted claims that calling a second pick_context_manager_writer decorated function will begin a new subtransaction, this does not appear to be the case. This change removes pick_context_manager_writer from the compute_node_create function, and adds a new _compute_node_create function which ensures the transaction is finished if _compute_node_get_and_update_deleted is called. The new unit test added here fails without this change. This change marks the removal of the final FIXME from the functional test added in [4]. [1] https://bugs.launchpad.net/nova/+bug/1839560 [2] https://git.openstack.org/cgit/openstack/nova/commit/?id=89dd74ac7f1028daadf86cb18948e27fe9d1d411 [3] http://paste.openstack.org/show/786350/ [4] https://review.opendev.org/#/c/695012/ Change-Id: Iae119ea8776bc7f2e5dbe2e502a743217beded73 Closes-Bug: #1853159 Related-Bug: #1853009
This commit is contained in:

committed by
Lee Yarwood

parent
a8492e8878
commit
2383cbb4a5
@@ -768,7 +768,7 @@ def compute_node_search_by_hypervisor(context, hypervisor_match):
|
||||
|
||||
|
||||
@pick_context_manager_writer
|
||||
def compute_node_create(context, values):
|
||||
def _compute_node_create(context, values):
|
||||
"""Create a compute node from the values dictionary.
|
||||
|
||||
:param context: The security context
|
||||
@@ -781,8 +781,21 @@ def compute_node_create(context, values):
|
||||
|
||||
compute_node_ref = models.ComputeNode()
|
||||
compute_node_ref.update(values)
|
||||
compute_node_ref.save(context.session)
|
||||
return compute_node_ref
|
||||
|
||||
|
||||
# NOTE(mgoddard): We avoid decorating this with @pick_context_manager_writer,
|
||||
# so that we get a separate transaction in the exception handler. This avoids
|
||||
# an error message about inactive DB sessions during a transaction rollback.
|
||||
# See https://bugs.launchpad.net/nova/+bug/1853159.
|
||||
def compute_node_create(context, values):
|
||||
"""Creates a new ComputeNode and populates the capacity fields
|
||||
with the most recent data. Will restore a soft deleted compute node if a
|
||||
UUID has been explicitly requested.
|
||||
"""
|
||||
try:
|
||||
compute_node_ref.save(context.session)
|
||||
compute_node_ref = _compute_node_create(context, values)
|
||||
except db_exc.DBDuplicateEntry:
|
||||
with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
|
||||
# Check to see if we have a (soft) deleted ComputeNode with the
|
||||
|
@@ -85,10 +85,6 @@ class NodeRebalanceDeletedComputeNodeRaceTestCase(
|
||||
|
||||
# host_b[1]: Finds no compute record in RT. Tries to create one
|
||||
# (_init_compute_node).
|
||||
# FIXME(mgoddard): This shows a traceback with SQL rollback due to
|
||||
# soft-deleted node. The create seems to succeed but breaks the RT
|
||||
# update for this node. See
|
||||
# https://bugs.launchpad.net/nova/+bug/1853159.
|
||||
host_b.manager.update_available_resource(self.ctxt)
|
||||
self._assert_hypervisor_api(self.nodename, expected_host='host_b')
|
||||
# There should only be one resource provider (fake-node).
|
||||
@@ -164,41 +160,12 @@ class NodeRebalanceDeletedComputeNodeRaceTestCase(
|
||||
self.ctxt, cn, cascade=True)
|
||||
|
||||
# host_b[3]: Should recreate compute node and resource provider.
|
||||
# FIXME(mgoddard): Resource provider not recreated here, due to
|
||||
# https://bugs.launchpad.net/nova/+bug/1853159.
|
||||
host_b.manager.update_available_resource(self.ctxt)
|
||||
|
||||
# Verify that the node was recreated.
|
||||
self._assert_hypervisor_api(self.nodename, 'host_b')
|
||||
|
||||
# But due to https://bugs.launchpad.net/nova/+bug/1853159 the compute
|
||||
# node is not cached in the RT.
|
||||
self.assertNotIn(self.nodename, host_b.manager.rt.compute_nodes)
|
||||
|
||||
# There is no RP.
|
||||
rps = self._get_all_providers()
|
||||
self.assertEqual(0, len(rps), rps)
|
||||
|
||||
# But the RP exists in the provider tree.
|
||||
self.assertFalse(host_b.manager.rt.reportclient._provider_tree.exists(
|
||||
self.nodename))
|
||||
|
||||
# host_b[1]: Should add compute node to RT cache and recreate resource
|
||||
# provider.
|
||||
host_b.manager.update_available_resource(self.ctxt)
|
||||
|
||||
# Verify that the node still exists.
|
||||
self._assert_hypervisor_api(self.nodename, 'host_b')
|
||||
|
||||
# And it is now in the RT cache.
|
||||
self.assertIn(self.nodename, host_b.manager.rt.compute_nodes)
|
||||
|
||||
# The resource provider has now been created.
|
||||
rps = self._get_all_providers()
|
||||
self.assertEqual(1, len(rps), rps)
|
||||
self.assertEqual(self.nodename, rps[0]['name'])
|
||||
|
||||
# This fails due to the lack of a resource provider.
|
||||
self.assertIn(
|
||||
'Skipping removal of allocations for deleted instances',
|
||||
self.stdlog.logger.output)
|
||||
|
@@ -5183,6 +5183,22 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin):
|
||||
self.assertRaises(db_exc.DBDuplicateEntry,
|
||||
db.compute_node_create, self.ctxt, other_node)
|
||||
|
||||
def test_compute_node_create_duplicate_uuid(self):
|
||||
"""Tests to make sure that no exception is raised when trying to create
|
||||
a compute node with the same host, hypervisor_hostname and uuid values
|
||||
as another compute node that was previously soft-deleted.
|
||||
"""
|
||||
# Prior to fixing https://bugs.launchpad.net/nova/+bug/1853159, this
|
||||
# raised the following error:
|
||||
# sqlalchemy.exc.InvalidRequestError: This session is in 'inactive'
|
||||
# state, due to the SQL transaction being rolled back; no further SQL
|
||||
# can be emitted within this transaction.
|
||||
constraint = db.constraint(host=db.equal_any(self.item['host']))
|
||||
db.compute_node_delete(
|
||||
self.ctxt, self.item['id'], constraint=constraint)
|
||||
new_node = db.compute_node_create(self.ctxt, self.compute_node_dict)
|
||||
self.assertEqual(self.item['uuid'], new_node['uuid'])
|
||||
|
||||
def test_compute_node_get_all(self):
|
||||
nodes = db.compute_node_get_all(self.ctxt)
|
||||
self.assertEqual(1, len(nodes))
|
||||
|
Reference in New Issue
Block a user