Merge "Restore soft-deleted compute node with same uuid" into stable/rocky

This commit is contained in:
Zuul 2019-08-29 18:22:11 +00:00 committed by Gerrit Code Review
commit 7be800d14a
3 changed files with 75 additions and 25 deletions

View File

@ -30,6 +30,7 @@ from oslo_db.sqlalchemy import enginefacade
from oslo_db.sqlalchemy import update_match from oslo_db.sqlalchemy import update_match
from oslo_db.sqlalchemy import utils as sqlalchemyutils from oslo_db.sqlalchemy import utils as sqlalchemyutils
from oslo_log import log as logging from oslo_log import log as logging
from oslo_utils import excutils
from oslo_utils import importutils from oslo_utils import importutils
from oslo_utils import timeutils from oslo_utils import timeutils
from oslo_utils import uuidutils from oslo_utils import uuidutils
@ -692,11 +693,54 @@ def compute_node_create(context, values):
compute_node_ref = models.ComputeNode() compute_node_ref = models.ComputeNode()
compute_node_ref.update(values) compute_node_ref.update(values)
compute_node_ref.save(context.session) try:
compute_node_ref.save(context.session)
except db_exc.DBDuplicateEntry:
with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
# Check to see if we have a (soft) deleted ComputeNode with the
# same UUID and if so just update it and mark as no longer (soft)
# deleted. See bug 1839560 for details.
if 'uuid' in values:
# Get a fresh context for a new DB session and allow it to
# get a deleted record.
ctxt = nova.context.get_admin_context(read_deleted='yes')
compute_node_ref = _compute_node_get_and_update_deleted(
ctxt, values)
# If we didn't get anything back we failed to find the node
# by uuid and update it so re-raise the DBDuplicateEntry.
if compute_node_ref:
err_ctx.reraise = False
return compute_node_ref return compute_node_ref
@pick_context_manager_writer
def _compute_node_get_and_update_deleted(context, values):
"""Find a ComputeNode by uuid, update and un-delete it.
This is a special case from the ``compute_node_create`` method which
needs to be separate to get a new Session.
This method will update the ComputeNode, if found, to have deleted=0 and
deleted_at=None values.
:param context: request auth context which should be able to read deleted
records
:param values: values used to update the ComputeNode record - must include
uuid
:return: updated ComputeNode sqlalchemy model object if successfully found
and updated, None otherwise
"""
cn = model_query(
context, models.ComputeNode).filter_by(uuid=values['uuid']).first()
if cn:
# Update with the provided values but un-soft-delete.
update_values = copy.deepcopy(values)
update_values['deleted'] = 0
update_values['deleted_at'] = None
return compute_node_update(context, cn.id, update_values)
@oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True) @oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
@pick_context_manager_writer @pick_context_manager_writer
def compute_node_update(context, compute_id, values): def compute_node_update(context, compute_id, values):

View File

@ -14,7 +14,6 @@ from oslo_log import log as logging
from nova import context from nova import context
from nova.db import api as db_api from nova.db import api as db_api
from nova import exception
from nova import objects from nova import objects
from nova import test from nova import test
from nova.tests import fixtures as nova_fixtures from nova.tests import fixtures as nova_fixtures
@ -90,30 +89,25 @@ class PeriodicNodeRecreateTestCase(test.TestCase,
# Now stub the driver again to report node2 as being back and run # Now stub the driver again to report node2 as being back and run
# the periodic task. # the periodic task.
compute.manager.driver._nodes = ['node1', 'node2'] compute.manager.driver._nodes = ['node1', 'node2']
LOG.info('Running update_available_resource which should bring back '
'node2.')
compute.manager.update_available_resource(ctxt) compute.manager.update_available_resource(ctxt)
# FIXME(mriedem): This is bug 1839560 where the ResourceTracker fails # The DBDuplicateEntry error should have been handled and resulted in
# to create a ComputeNode for node2 because of conflicting UUIDs. # updating the (soft) deleted record to no longer be deleted.
log = self.stdlog.logger.output log = self.stdlog.logger.output
self.assertIn('Error updating resources for node node2', log) self.assertNotIn('DBDuplicateEntry', log)
self.assertIn('DBDuplicateEntry', log) # Should have two reported hypervisors again.
# Should still only have one reported hypervisor (node1).
hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
self.assertEqual(1, len(hypervisors), hypervisors)
# Test the workaround for bug 1839560 by archiving the deleted node2
# compute_nodes table record which will allow the periodic to create a
# new entry for node2. We can remove this when the bug is fixed.
LOG.info('Archiving the database.')
archived = db_api.archive_deleted_rows(1000)[0]
self.assertIn('compute_nodes', archived)
self.assertEqual(1, archived['compute_nodes'])
with utils.temporary_mutation(ctxt, read_deleted='yes'):
self.assertRaises(exception.ComputeHostNotFound,
objects.ComputeNode.get_by_host_and_nodename,
ctxt, 'node1', 'node2')
# Now run the periodic again and we should have a new ComputeNode for
# node2.
LOG.info('Running update_available_resource which should create a new '
'ComputeNode record for node2.')
compute.manager.update_available_resource(ctxt)
hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors'] hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
self.assertEqual(2, len(hypervisors), hypervisors) self.assertEqual(2, len(hypervisors), hypervisors)
# Now that the node2 record was un-soft-deleted, archiving should not
# remove any compute_nodes.
LOG.info('Archiving the database.')
archived = db_api.archive_deleted_rows(1000)[0]
self.assertNotIn('compute_nodes', archived)
cn2 = objects.ComputeNode.get_by_host_and_nodename(
ctxt, 'node1', 'node2')
self.assertFalse(cn2.deleted)
self.assertIsNone(cn2.deleted_at)
# The node2 id and uuid should not have changed in the DB.
self.assertEqual(cn.id, cn2.id)
self.assertEqual(cn.uuid, cn2.uuid)

View File

@ -6912,6 +6912,18 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin):
new_stats = jsonutils.loads(self.item['stats']) new_stats = jsonutils.loads(self.item['stats'])
self.assertEqual(self.stats, new_stats) self.assertEqual(self.stats, new_stats)
def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
"""Tests to make sure that DBDuplicateEntry is raised when trying to
create a duplicate ComputeNode with the same host and
hypervisor_hostname values but different uuid values. This makes
sure that when _compute_node_get_and_update_deleted returns None
the DBDuplicateEntry is re-raised.
"""
other_node = dict(self.compute_node_dict)
other_node['uuid'] = uuidutils.generate_uuid()
self.assertRaises(db_exc.DBDuplicateEntry,
db.compute_node_create, self.ctxt, other_node)
def test_compute_node_get_all(self): def test_compute_node_get_all(self):
nodes = db.compute_node_get_all(self.ctxt) nodes = db.compute_node_get_all(self.ctxt)
self.assertEqual(1, len(nodes)) self.assertEqual(1, len(nodes))