Browse Source

Merge "Add functional regression recreate test for bug 1839560" into stable/stein

changes/19/679519/1
Zuul 3 weeks ago
parent
commit
aff669f7cd
2 changed files with 134 additions and 0 deletions
  1. 120
    0
      nova/tests/functional/regressions/test_bug_1839560.py
  2. 14
    0
      nova/virt/fake.py

+ 120
- 0
nova/tests/functional/regressions/test_bug_1839560.py View File

@@ -0,0 +1,120 @@
1
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
2
+# not use this file except in compliance with the License. You may obtain
3
+# a copy of the License at
4
+#
5
+#      http://www.apache.org/licenses/LICENSE-2.0
6
+#
7
+# Unless required by applicable law or agreed to in writing, software
8
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10
+# License for the specific language governing permissions and limitations
11
+# under the License.
12
+
13
+from oslo_log import log as logging
14
+
15
+from nova import context
16
+from nova.db import api as db_api
17
+from nova import exception
18
+from nova import objects
19
+from nova import test
20
+from nova.tests import fixtures as nova_fixtures
21
+from nova.tests.functional import fixtures as func_fixtures
22
+from nova.tests.functional import integrated_helpers
23
+from nova import utils
24
+from nova.virt import fake as fake_virt
25
+
26
+LOG = logging.getLogger(__name__)
27
+
28
+
29
+class PeriodicNodeRecreateTestCase(test.TestCase,
30
+                                   integrated_helpers.InstanceHelperMixin):
31
+    """Regression test for bug 1839560 introduced in Rocky.
32
+
33
+    When an ironic node is undergoing maintenance the driver will not report
34
+    it as an available node to the ComputeManager.update_available_resource
35
+    periodic task. The ComputeManager will then (soft) delete a ComputeNode
36
+    record for that no-longer-available node. If/when the ironic node is
37
+    available again and the driver reports it, the ResourceTracker will attempt
38
+    to create a ComputeNode record for the ironic node.
39
+
40
+    The regression with change Ia69fabce8e7fd7de101e291fe133c6f5f5f7056a is
41
+    that the ironic node uuid is used as the ComputeNode.uuid and there is
42
+    a unique constraint on the ComputeNode.uuid value in the database. So
43
+    trying to create a ComputeNode with the same uuid (after the ironic node
44
+    comes back from being unavailable) fails with a DuplicateEntry error since
45
+    there is a (soft) deleted version of the ComputeNode with the same uuid
46
+    in the database.
47
+    """
48
+    def setUp(self):
49
+        super(PeriodicNodeRecreateTestCase, self).setUp()
50
+        # We need the PlacementFixture for the compute nodes to report in but
51
+        # otherwise don't care about placement for this test.
52
+        self.useFixture(func_fixtures.PlacementFixture())
53
+        # Start up the API so we can query the os-hypervisors API.
54
+        self.api = self.useFixture(nova_fixtures.OSAPIFixture(
55
+            api_version='v2.1')).admin_api
56
+        # Make sure we're using the fake driver that has predictable uuids
57
+        # for each node.
58
+        self.flags(compute_driver='fake.PredictableNodeUUIDDriver')
59
+
60
+    def test_update_available_resource_node_recreate(self):
61
+        # First we create a compute service to manage a couple of fake nodes.
62
+        # When start_service runs, it will create the node1 and node2
63
+        # ComputeNodes.
64
+        fake_virt.set_nodes(['node1', 'node2'])
65
+        self.addCleanup(fake_virt.restore_nodes)
66
+        compute = self.start_service('compute', 'node1')
67
+        # Now we should have two compute nodes, make sure the hypervisors API
68
+        # shows them.
69
+        hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
70
+        self.assertEqual(2, len(hypervisors), hypervisors)
71
+        self.assertEqual({'node1', 'node2'},
72
+                         set([hyp['hypervisor_hostname']
73
+                              for hyp in hypervisors]))
74
+        # Now stub the driver to only report node1. This is making it look like
75
+        # node2 is no longer available when update_available_resource runs.
76
+        compute.manager.driver._nodes = ['node1']
77
+        ctxt = context.get_admin_context()
78
+        compute.manager.update_available_resource(ctxt)
79
+        # node2 should have been deleted, check the logs and API.
80
+        log = self.stdlog.logger.output
81
+        self.assertIn('Deleting orphan compute node', log)
82
+        self.assertIn('hypervisor host is node2', log)
83
+        hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
84
+        self.assertEqual(1, len(hypervisors), hypervisors)
85
+        self.assertEqual('node1', hypervisors[0]['hypervisor_hostname'])
86
+        # But the node2 ComputeNode is still in the database with deleted!=0.
87
+        with utils.temporary_mutation(ctxt, read_deleted='yes'):
88
+            cn = objects.ComputeNode.get_by_host_and_nodename(
89
+                ctxt, 'node1', 'node2')
90
+            self.assertTrue(cn.deleted)
91
+        # Now stub the driver again to report node2 as being back and run
92
+        # the periodic task.
93
+        compute.manager.driver._nodes = ['node1', 'node2']
94
+        compute.manager.update_available_resource(ctxt)
95
+        # FIXME(mriedem): This is bug 1839560 where the ResourceTracker fails
96
+        # to create a ComputeNode for node2 because of conflicting UUIDs.
97
+        log = self.stdlog.logger.output
98
+        self.assertIn('Error updating resources for node node2', log)
99
+        self.assertIn('DBDuplicateEntry', log)
100
+        # Should still only have one reported hypervisor (node1).
101
+        hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
102
+        self.assertEqual(1, len(hypervisors), hypervisors)
103
+        # Test the workaround for bug 1839560 by archiving the deleted node2
104
+        # compute_nodes table record which will allow the periodic to create a
105
+        # new entry for node2. We can remove this when the bug is fixed.
106
+        LOG.info('Archiving the database.')
107
+        archived = db_api.archive_deleted_rows(1000)[0]
108
+        self.assertIn('compute_nodes', archived)
109
+        self.assertEqual(1, archived['compute_nodes'])
110
+        with utils.temporary_mutation(ctxt, read_deleted='yes'):
111
+            self.assertRaises(exception.ComputeHostNotFound,
112
+                              objects.ComputeNode.get_by_host_and_nodename,
113
+                              ctxt, 'node1', 'node2')
114
+        # Now run the periodic again and we should have a new ComputeNode for
115
+        # node2.
116
+        LOG.info('Running update_available_resource which should create a new '
117
+                 'ComputeNode record for node2.')
118
+        compute.manager.update_available_resource(ctxt)
119
+        hypervisors = self.api.api_get('/os-hypervisors').body['hypervisors']
120
+        self.assertEqual(2, len(hypervisors), hypervisors)

+ 14
- 0
nova/virt/fake.py View File

@@ -27,6 +27,7 @@ import collections
27 27
 import contextlib
28 28
 import copy
29 29
 import time
30
+import uuid
30 31
 
31 32
 import fixtures
32 33
 import os_resource_classes as orc
@@ -738,6 +739,19 @@ class FakeFinishMigrationFailDriver(FakeDriver):
738 739
         raise exception.VirtualInterfaceCreateException()
739 740
 
740 741
 
742
+class PredictableNodeUUIDDriver(SmallFakeDriver):
743
+    """SmallFakeDriver variant that reports a predictable node uuid in
744
+    get_available_resource, like IronicDriver.
745
+    """
746
+    def get_available_resource(self, nodename):
747
+        resources = super(
748
+            PredictableNodeUUIDDriver, self).get_available_resource(nodename)
749
+        # This is used in ComputeNode.update_from_virt_driver which is called
750
+        # from the ResourceTracker when creating a ComputeNode.
751
+        resources['uuid'] = uuid.uuid5(uuid.NAMESPACE_DNS, nodename)
752
+        return resources
753
+
754
+
741 755
 class FakeRescheduleDriver(FakeDriver):
742 756
     """FakeDriver derivative that triggers a reschedule on the first spawn
743 757
     attempt. This is expected to only be used in tests that have more than

Loading…
Cancel
Save