Browse Source

Make sure to not try reserving a reserved node

After the switch to openstacksdk we no longer have a sufficient check
on node's availability or maintenance. This patch restores it.

Change-Id: I2c85cf0adb02061b3dd85f19dd10c8a5af1118da
Dmitry Tantsur 2 months ago
parent
commit
e795f6c841

+ 13
- 9
metalsmith/_provisioner.py View File

@@ -98,25 +98,29 @@ class Provisioner(_utils.GetNodeMixin):
98 98
 
99 99
         if candidates:
100 100
             nodes = [self._get_node(node) for node in candidates]
101
-            filters = [
102
-                _scheduler.NodeTypeFilter(resource_class, conductor_group),
103
-            ]
104 101
         else:
102
+            kwargs = {}
103
+            if conductor_group:
104
+                kwargs['conductor_group'] = conductor_group
105 105
             nodes = list(self.connection.baremetal.nodes(
106
+                associated=False,
107
+                provision_state='available',
108
+                maintenance=False,
106 109
                 resource_class=resource_class,
107
-                conductor_group=conductor_group,
108
-                details=True))
110
+                details=True,
111
+                **kwargs))
109 112
             if not nodes:
110 113
                 raise exceptions.NodesNotFound(resource_class, conductor_group)
111 114
             # Ensure parallel executions don't try nodes in the same sequence
112 115
             random.shuffle(nodes)
113
-            # No need to filter by resource_class and conductor_group any more
114
-            filters = []
115 116
 
116 117
         LOG.debug('Candidate nodes: %s', nodes)
117 118
 
118
-        filters.append(_scheduler.CapabilitiesFilter(capabilities))
119
-        filters.append(_scheduler.TraitsFilter(traits))
119
+        filters = [
120
+            _scheduler.NodeTypeFilter(resource_class, conductor_group),
121
+            _scheduler.CapabilitiesFilter(capabilities),
122
+            _scheduler.TraitsFilter(traits),
123
+        ]
120 124
         if predicate is not None:
121 125
             filters.append(_scheduler.CustomPredicateFilter(predicate))
122 126
 

+ 27
- 6
metalsmith/_scheduler.py View File

@@ -123,12 +123,33 @@ class NodeTypeFilter(Filter):
123 123
         self.conductor_group = conductor_group
124 124
 
125 125
     def __call__(self, node):
126
-        return (
127
-            (self.resource_class is None or
128
-             node.resource_class == self.resource_class) and
129
-            (self.conductor_group is None or
130
-             node.conductor_group == self.conductor_group)
131
-        )
126
+        if node.instance_id:
127
+            LOG.debug('Node %s is already reserved', _utils.log_res(node))
128
+            return False
129
+
130
+        if node.is_maintenance:
131
+            LOG.debug('Node %s is in maintenance', _utils.log_res(node))
132
+            return False
133
+
134
+        if (self.resource_class is not None
135
+                and node.resource_class != self.resource_class):
136
+            LOG.debug('Resource class %(real)s does not match the expected '
137
+                      'value of %(exp)s for node %(node)s',
138
+                      {'node': _utils.log_res(node),
139
+                       'exp': self.resource_class,
140
+                       'real': node.resource_class})
141
+            return False
142
+
143
+        if (self.conductor_group is not None
144
+                and node.conductor_group != self.conductor_group):
145
+            LOG.debug('Conductor group %(real)s does not match the expected '
146
+                      'value of %(exp)s for node %(node)s',
147
+                      {'node': _utils.log_res(node),
148
+                       'exp': self.conductor_group,
149
+                       'real': node.conductor_group})
150
+            return False
151
+
152
+        return True
132 153
 
133 154
     def fail(self):
134 155
         raise exceptions.NodesNotFound(self.resource_class,

+ 2
- 2
metalsmith/exceptions.py View File

@@ -35,9 +35,9 @@ class NodesNotFound(ReservationFailed):
35 35
 
36 36
     def __init__(self, resource_class, conductor_group):
37 37
         message = "No available nodes%(rc)s found%(cg)s" % {
38
-            'rc': 'with resource class %s' % resource_class
38
+            'rc': ' with resource class %s' % resource_class
39 39
             if resource_class else '',
40
-            'cg': 'in conductor group %s' % (conductor_group or '<default>')
40
+            'cg': ' in conductor group %s' % (conductor_group or '<default>')
41 41
             if conductor_group is not None else ''
42 42
         }
43 43
         self.requested_resource_class = resource_class

+ 14
- 5
metalsmith/test/test_provisioner.py View File

@@ -29,7 +29,7 @@ from metalsmith import sources
29 29
 
30 30
 NODE_FIELDS = ['name', 'id', 'instance_info', 'instance_id', 'is_maintenance',
31 31
                'maintenance_reason', 'properties', 'provision_state', 'extra',
32
-               'last_error', 'traits']
32
+               'last_error', 'traits', 'resource_class', 'conductor_group']
33 33
 
34 34
 
35 35
 class TestInit(testtools.TestCase):
@@ -98,6 +98,8 @@ class TestReserveNode(Base):
98 98
         kwargs.setdefault('id', '000')
99 99
         kwargs.setdefault('properties', {'local_gb': 100})
100 100
         kwargs.setdefault('instance_info', {})
101
+        kwargs.setdefault('instance_id', None)
102
+        kwargs.setdefault('is_maintenance', False)
101 103
         return mock.Mock(spec=NODE_FIELDS, **kwargs)
102 104
 
103 105
     def test_no_nodes(self):
@@ -108,7 +110,7 @@ class TestReserveNode(Base):
108 110
         self.assertFalse(self.api.baremetal.update_node.called)
109 111
 
110 112
     def test_simple_ok(self):
111
-        nodes = [self._node()]
113
+        nodes = [self._node(resource_class='control')]
112 114
         self.api.baremetal.nodes.return_value = nodes
113 115
 
114 116
         node = self.pr.reserve_node('control')
@@ -129,7 +131,8 @@ class TestReserveNode(Base):
129 131
 
130 132
     def test_with_capabilities(self):
131 133
         nodes = [
132
-            self._node(properties={'local_gb': 100, 'capabilities': caps})
134
+            self._node(properties={'local_gb': 100, 'capabilities': caps},
135
+                       resource_class='control')
133 136
             for caps in ['answer:1', 'answer:42', None]
134 137
         ]
135 138
         expected = nodes[1]
@@ -235,8 +238,14 @@ class TestReserveNode(Base):
235 238
             instance_info={'capabilities': {'cat': 'meow'}})
236 239
 
237 240
     def test_provided_nodes_no_match(self):
238
-        nodes = [self._node(resource_class='compute', conductor_group='loc1'),
239
-                 self._node(resource_class='control', conductor_group='loc2')]
241
+        nodes = [
242
+            self._node(resource_class='compute', conductor_group='loc1'),
243
+            self._node(resource_class='control', conductor_group='loc2'),
244
+            self._node(resource_class='control', conductor_group='loc1',
245
+                       is_maintenance=True),
246
+            self._node(resource_class='control', conductor_group='loc1',
247
+                       instance_id='abcd')
248
+        ]
240 249
 
241 250
         self.assertRaises(exceptions.NodesNotFound,
242 251
                           self.pr.reserve_node, candidates=nodes,

+ 5
- 0
releasenotes/notes/associated-993c26ac5dc0cfc0.yaml View File

@@ -0,0 +1,5 @@
1
+---
2
+critical:
3
+  - |
4
+    Fixes a regression that caused deployed nodes to be picked for deployment
5
+    again.

Loading…
Cancel
Save