Browse Source

Merge "Restore soft-deleted compute node with same uuid" into stable/stein

changes/19/679519/1
Zuul 3 weeks ago
parent
commit
400040d04d

+ 45
- 1
nova/db/sqlalchemy/api.py View File

@@ -30,6 +30,7 @@ from oslo_db.sqlalchemy import enginefacade
30 30
 from oslo_db.sqlalchemy import update_match
31 31
 from oslo_db.sqlalchemy import utils as sqlalchemyutils
32 32
 from oslo_log import log as logging
33
+from oslo_utils import excutils
33 34
 from oslo_utils import importutils
34 35
 from oslo_utils import timeutils
35 36
 from oslo_utils import uuidutils
@@ -700,11 +701,54 @@ def compute_node_create(context, values):
700 701
 
701 702
     compute_node_ref = models.ComputeNode()
702 703
     compute_node_ref.update(values)
703
-    compute_node_ref.save(context.session)
704
+    try:
705
+        compute_node_ref.save(context.session)
706
+    except db_exc.DBDuplicateEntry:
707
+        with excutils.save_and_reraise_exception(logger=LOG) as err_ctx:
708
+            # Check to see if we have a (soft) deleted ComputeNode with the
709
+            # same UUID and if so just update it and mark as no longer (soft)
710
+            # deleted. See bug 1839560 for details.
711
+            if 'uuid' in values:
712
+                # Get a fresh context for a new DB session and allow it to
713
+                # get a deleted record.
714
+                ctxt = nova.context.get_admin_context(read_deleted='yes')
715
+                compute_node_ref = _compute_node_get_and_update_deleted(
716
+                    ctxt, values)
717
+                # If we didn't get anything back we failed to find the node
718
+                # by uuid and update it so re-raise the DBDuplicateEntry.
719
+                if compute_node_ref:
720
+                    err_ctx.reraise = False
704 721
 
705 722
     return compute_node_ref
706 723
 
707 724
 
725
+@pick_context_manager_writer
726
+def _compute_node_get_and_update_deleted(context, values):
727
+    """Find a ComputeNode by uuid, update and un-delete it.
728
+
729
+    This is a special case from the ``compute_node_create`` method which
730
+    needs to be separate to get a new Session.
731
+
732
+    This method will update the ComputeNode, if found, to have deleted=0 and
733
+    deleted_at=None values.
734
+
735
+    :param context: request auth context which should be able to read deleted
736
+        records
737
+    :param values: values used to update the ComputeNode record - must include
738
+        uuid
739
+    :return: updated ComputeNode sqlalchemy model object if successfully found
740
+        and updated, None otherwise
741
+    """
742
+    cn = model_query(
743
+        context, models.ComputeNode).filter_by(uuid=values['uuid']).first()
744
+    if cn:
745
+        # Update with the provided values but un-soft-delete.
746
+        update_values = copy.deepcopy(values)
747
+        update_values['deleted'] = 0
748
+        update_values['deleted_at'] = None
749
+        return compute_node_update(context, cn.id, update_values)
750
+
751
+
708 752
 @oslo_db_api.wrap_db_retry(max_retries=5, retry_on_deadlock=True)
709 753
 @pick_context_manager_writer
710 754
 def compute_node_update(context, compute_id, values):

+ 17
- 23
nova/tests/functional/regressions/test_bug_1839560.py View File

@@ -14,7 +14,6 @@ from oslo_log import log as logging
14 14
 
15 15
 from nova import context
16 16
 from nova.db import api as db_api
17
-from nova import exception
18 17
 from nova import objects
19 18
 from nova import test
20 19
 from nova.tests import fixtures as nova_fixtures
@@ -91,30 +90,25 @@ class PeriodicNodeRecreateTestCase(test.TestCase,
91 90
         # Now stub the driver again to report node2 as being back and run
92 91
         # the periodic task.
93 92
         compute.manager.driver._nodes = ['node1', 'node2']
93
+        LOG.info('Running update_available_resource which should bring back '
94
+                 'node2.')
94 95
         compute.manager.update_available_resource(ctxt)
95
-        # FIXME(mriedem): This is bug 1839560 where the ResourceTracker fails
96
-        # to create a ComputeNode for node2 because of conflicting UUIDs.
96
+        # The DBDuplicateEntry error should have been handled and resulted in
97
+        # updating the (soft) deleted record to no longer be deleted.
97 98
         log = self.stdlog.logger.output
98
-        self.assertIn('Error updating resources for node node2', log)
99
-        self.assertIn('DBDuplicateEntry', log)
100
-        # Should still only have one reported hypervisor (node1).
99
+        self.assertNotIn('DBDuplicateEntry', log)
100
+        # Should have two reported hypervisors again.
101 101
         hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
102
-        self.assertEqual(1, len(hypervisors), hypervisors)
103
-        # Test the workaround for bug 1839560 by archiving the deleted node2
104
-        # compute_nodes table record which will allow the periodic to create a
105
-        # new entry for node2. We can remove this when the bug is fixed.
102
+        self.assertEqual(2, len(hypervisors), hypervisors)
103
+        # Now that the node2 record was un-soft-deleted, archiving should not
104
+        # remove any compute_nodes.
106 105
         LOG.info('Archiving the database.')
107 106
         archived = db_api.archive_deleted_rows(1000)[0]
108
-        self.assertIn('compute_nodes', archived)
109
-        self.assertEqual(1, archived['compute_nodes'])
110
-        with utils.temporary_mutation(ctxt, read_deleted='yes'):
111
-            self.assertRaises(exception.ComputeHostNotFound,
112
-                              objects.ComputeNode.get_by_host_and_nodename,
113
-                              ctxt, 'node1', 'node2')
114
-        # Now run the periodic again and we should have a new ComputeNode for
115
-        # node2.
116
-        LOG.info('Running update_available_resource which should create a new '
117
-                 'ComputeNode record for node2.')
118
-        compute.manager.update_available_resource(ctxt)
119
-        hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
120
-        self.assertEqual(2, len(hypervisors), hypervisors)
107
+        self.assertNotIn('compute_nodes', archived)
108
+        cn2 = objects.ComputeNode.get_by_host_and_nodename(
109
+            ctxt, 'node1', 'node2')
110
+        self.assertFalse(cn2.deleted)
111
+        self.assertIsNone(cn2.deleted_at)
112
+        # The node2 id and uuid should not have changed in the DB.
113
+        self.assertEqual(cn.id, cn2.id)
114
+        self.assertEqual(cn.uuid, cn2.uuid)

+ 12
- 0
nova/tests/unit/db/test_db_api.py View File

@@ -7014,6 +7014,18 @@ class ComputeNodeTestCase(test.TestCase, ModelsObjectComparatorMixin):
7014 7014
         new_stats = jsonutils.loads(self.item['stats'])
7015 7015
         self.assertEqual(self.stats, new_stats)
7016 7016
 
7017
+    def test_compute_node_create_duplicate_host_hypervisor_hostname(self):
7018
+        """Tests to make sure that DBDuplicateEntry is raised when trying to
7019
+        create a duplicate ComputeNode with the same host and
7020
+        hypervisor_hostname values but different uuid values. This makes
7021
+        sure that when _compute_node_get_and_update_deleted returns None
7022
+        the DBDuplicateEntry is re-raised.
7023
+        """
7024
+        other_node = dict(self.compute_node_dict)
7025
+        other_node['uuid'] = uuidutils.generate_uuid()
7026
+        self.assertRaises(db_exc.DBDuplicateEntry,
7027
+                          db.compute_node_create, self.ctxt, other_node)
7028
+
7017 7029
     def test_compute_node_get_all(self):
7018 7030
         nodes = db.compute_node_get_all(self.ctxt)
7019 7031
         self.assertEqual(1, len(nodes))

Loading…
Cancel
Save