Browse Source

Merge "Check for deploy.deploy deploy step in heartbeat" into stable/stein

changes/52/676152/2
Zuul 1 month ago
parent
commit
cdf5bca715

+ 34
- 11
ironic/drivers/modules/agent_base_vendor.py View File

@@ -250,6 +250,24 @@ class HeartbeatMixin(object):
250 250
         :returns: True if the deployment is completed. False otherwise
251 251
         """
252 252
 
253
+    def in_core_deploy_step(self, task):
254
+        """Check if we are in the deploy.deploy deploy step.
255
+
256
+        Assumes that we are in the DEPLOYWAIT state.
257
+
258
+        :param task: a TaskManager instance
259
+        :returns: True if the current deploy step is deploy.deploy.
260
+        """
261
+        # TODO(mgoddard): Remove this 'if' in the Train release, after the
262
+        # deprecation period for supporting drivers with no deploy steps.
263
+        if not task.node.driver_internal_info.get('deploy_steps'):
264
+            return True
265
+
266
+        step = task.node.deploy_step
267
+        return (step
268
+                and step['interface'] == 'deploy'
269
+                and step['step'] == 'deploy')
270
+
253 271
     def reboot_to_instance(self, task):
254 272
         """Method invoked after the deployment is completed.
255 273
 
@@ -333,17 +351,22 @@ class HeartbeatMixin(object):
333 351
                 LOG.debug('Heartbeat from node %(node)s in maintenance mode; '
334 352
                           'not taking any action.', {'node': node.uuid})
335 353
                 return
336
-            elif (node.provision_state == states.DEPLOYWAIT
337
-                  and not self.deploy_has_started(task)):
338
-                msg = _('Node failed to deploy.')
339
-                self.continue_deploy(task)
340
-            elif (node.provision_state == states.DEPLOYWAIT
341
-                  and self.deploy_is_done(task)):
342
-                msg = _('Node failed to move to active state.')
343
-                self.reboot_to_instance(task)
344
-            elif (node.provision_state == states.DEPLOYWAIT
345
-                  and self.deploy_has_started(task)):
346
-                node.touch_provisioning()
354
+            # NOTE(mgoddard): Only handle heartbeats during DEPLOYWAIT if we
355
+            # are currently in the core deploy.deploy step. Other deploy steps
356
+            # may cause the agent to boot, but we should not trigger deployment
357
+            # at that point.
358
+            elif node.provision_state == states.DEPLOYWAIT:
359
+                if self.in_core_deploy_step(task):
360
+                    if not self.deploy_has_started(task):
361
+                        msg = _('Node failed to deploy.')
362
+                        self.continue_deploy(task)
363
+                    elif self.deploy_is_done(task):
364
+                        msg = _('Node failed to move to active state.')
365
+                        self.reboot_to_instance(task)
366
+                    else:
367
+                        node.touch_provisioning()
368
+                else:
369
+                    node.touch_provisioning()
347 370
             elif node.provision_state == states.CLEANWAIT:
348 371
                 node.touch_provisioning()
349 372
                 if not node.clean_step:

+ 135
- 3
ironic/tests/unit/drivers/modules/test_agent_base_vendor.py View File

@@ -80,6 +80,97 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
80 80
         super(HeartbeatMixinTest, self).setUp()
81 81
         self.deploy = agent_base_vendor.HeartbeatMixin()
82 82
 
83
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
84
+                       'in_core_deploy_step', autospec=True)
85
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
86
+                       'deploy_has_started', autospec=True)
87
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
88
+                       autospec=True)
89
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
90
+                       'reboot_to_instance', autospec=True)
91
+    def test_heartbeat_continue_deploy(self, rti_mock, cd_mock,
92
+                                       deploy_started_mock,
93
+                                       in_deploy_mock):
94
+        in_deploy_mock.return_value = True
95
+        deploy_started_mock.return_value = False
96
+        self.node.provision_state = states.DEPLOYWAIT
97
+        self.node.save()
98
+        with task_manager.acquire(self.context, self.node.uuid,
99
+                                  shared=True) as task:
100
+            self.deploy.heartbeat(task, 'url', '3.2.0')
101
+            self.assertFalse(task.shared)
102
+            self.assertEqual(
103
+                'url', task.node.driver_internal_info['agent_url'])
104
+            self.assertEqual(
105
+                '3.2.0',
106
+                task.node.driver_internal_info['agent_version'])
107
+            cd_mock.assert_called_once_with(self.deploy, task)
108
+            self.assertFalse(rti_mock.called)
109
+
110
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
111
+                       'in_core_deploy_step', autospec=True)
112
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
113
+                       'deploy_has_started', autospec=True)
114
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
115
+                       'deploy_is_done', autospec=True)
116
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
117
+                       autospec=True)
118
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
119
+                       'reboot_to_instance', autospec=True)
120
+    def test_heartbeat_reboot_to_instance(self, rti_mock, cd_mock,
121
+                                          deploy_is_done_mock,
122
+                                          deploy_started_mock,
123
+                                          in_deploy_mock):
124
+        in_deploy_mock.return_value = True
125
+        deploy_started_mock.return_value = True
126
+        deploy_is_done_mock.return_value = True
127
+        self.node.provision_state = states.DEPLOYWAIT
128
+        self.node.save()
129
+        with task_manager.acquire(self.context, self.node.uuid,
130
+                                  shared=True) as task:
131
+            self.deploy.heartbeat(task, 'url', '3.2.0')
132
+            self.assertFalse(task.shared)
133
+            self.assertEqual(
134
+                'url', task.node.driver_internal_info['agent_url'])
135
+            self.assertEqual(
136
+                '3.2.0',
137
+                task.node.driver_internal_info['agent_version'])
138
+            self.assertFalse(cd_mock.called)
139
+            rti_mock.assert_called_once_with(self.deploy, task)
140
+
141
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
142
+                       'in_core_deploy_step', autospec=True)
143
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
144
+                       'deploy_has_started', autospec=True)
145
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
146
+                       'deploy_is_done', autospec=True)
147
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
148
+                       autospec=True)
149
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
150
+                       'reboot_to_instance', autospec=True)
151
+    def test_heartbeat_not_in_core_deploy_step(self, rti_mock, cd_mock,
152
+                                               deploy_is_done_mock,
153
+                                               deploy_started_mock,
154
+                                               in_deploy_mock):
155
+        # Check that heartbeats do not trigger deployment actions when not in
156
+        # the deploy.deploy step.
157
+        in_deploy_mock.return_value = False
158
+        self.node.provision_state = states.DEPLOYWAIT
159
+        self.node.save()
160
+        with task_manager.acquire(self.context, self.node.uuid,
161
+                                  shared=True) as task:
162
+            self.deploy.heartbeat(task, 'url', '3.2.0')
163
+            self.assertFalse(task.shared)
164
+            self.assertEqual(
165
+                'url', task.node.driver_internal_info['agent_url'])
166
+            self.assertEqual(
167
+                '3.2.0',
168
+                task.node.driver_internal_info['agent_version'])
169
+            self.assertFalse(deploy_started_mock.called)
170
+            self.assertFalse(deploy_is_done_mock.called)
171
+            self.assertFalse(cd_mock.called)
172
+            self.assertFalse(rti_mock.called)
173
+
83 174
     @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'continue_deploy',
84 175
                        autospec=True)
85 176
     @mock.patch.object(agent_base_vendor.HeartbeatMixin,
@@ -157,6 +248,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
157 248
             self.assertEqual(0, rti_mock.call_count)
158 249
             self.assertEqual(0, cd_mock.call_count)
159 250
 
251
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
252
+                       'in_core_deploy_step', autospec=True)
160 253
     @mock.patch.object(agent_base_vendor.HeartbeatMixin,
161 254
                        'deploy_has_started', autospec=True)
162 255
     @mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
@@ -164,7 +257,9 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
164 257
                        autospec=True)
165 258
     @mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
166 259
     def test_heartbeat_deploy_done_fails(self, log_mock, done_mock,
167
-                                         failed_mock, deploy_started_mock):
260
+                                         failed_mock, deploy_started_mock,
261
+                                         in_deploy_mock):
262
+        in_deploy_mock.return_value = True
168 263
         deploy_started_mock.return_value = True
169 264
         done_mock.side_effect = Exception('LlamaException')
170 265
         with task_manager.acquire(
@@ -179,6 +274,8 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
179 274
                 'Exception: LlamaException for node %(node)s',
180 275
                 {'node': task.node.uuid})
181 276
 
277
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
278
+                       'in_core_deploy_step', autospec=True)
182 279
     @mock.patch.object(agent_base_vendor.HeartbeatMixin,
183 280
                        'deploy_has_started', autospec=True)
184 281
     @mock.patch.object(deploy_utils, 'set_failed_state', autospec=True)
@@ -187,7 +284,9 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
187 284
     @mock.patch.object(agent_base_vendor.LOG, 'exception', autospec=True)
188 285
     def test_heartbeat_deploy_done_raises_with_event(self, log_mock, done_mock,
189 286
                                                      failed_mock,
190
-                                                     deploy_started_mock):
287
+                                                     deploy_started_mock,
288
+                                                     in_deploy_mock):
289
+        in_deploy_mock.return_value = True
191 290
         deploy_started_mock.return_value = True
192 291
         with task_manager.acquire(
193 292
                 self.context, self.node['uuid'], shared=False) as task:
@@ -341,12 +440,16 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
341 440
             task, 'Asynchronous exception: Node failed to perform '
342 441
             'rescue operation. Exception: some failure for node')
343 442
 
443
+    @mock.patch.object(agent_base_vendor.HeartbeatMixin,
444
+                       'in_core_deploy_step', autospec=True)
344 445
     @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True)
345 446
     @mock.patch.object(agent_base_vendor.HeartbeatMixin,
346 447
                        'deploy_has_started', autospec=True)
347 448
     def test_heartbeat_touch_provisioning_and_url_save(self,
348 449
                                                        mock_deploy_started,
349
-                                                       mock_touch):
450
+                                                       mock_touch,
451
+                                                       mock_in_deploy):
452
+        mock_in_deploy.return_value = True
350 453
         mock_deploy_started.return_value = True
351 454
 
352 455
         self.node.provision_state = states.DEPLOYWAIT
@@ -381,6 +484,35 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest):
381 484
                     task.node.driver_internal_info['agent_last_heartbeat'])
382 485
                 self.assertEqual(provision_state, task.node.provision_state)
383 486
 
487
+    def test_in_core_deploy_step(self):
488
+        self.node.deploy_step = {
489
+            'interface': 'deploy', 'step': 'deploy', 'priority': 100}
490
+        info = self.node.driver_internal_info
491
+        info['deploy_steps'] = [self.node.deploy_step]
492
+        self.node.driver_internal_info = info
493
+        self.node.save()
494
+        with task_manager.acquire(
495
+                self.context, self.node.uuid, shared=False) as task:
496
+            self.assertTrue(self.deploy.in_core_deploy_step(task))
497
+
498
+    def test_in_core_deploy_step_no_steps_list(self):
499
+        # Need to handle drivers without deploy step support, remove in the
500
+        # Train release.
501
+        with task_manager.acquire(
502
+                self.context, self.node.uuid, shared=False) as task:
503
+            self.assertTrue(self.deploy.in_core_deploy_step(task))
504
+
505
+    def test_in_core_deploy_step_in_other_step(self):
506
+        self.node.deploy_step = {
507
+            'interface': 'deploy', 'step': 'other-step', 'priority': 100}
508
+        info = self.node.driver_internal_info
509
+        info['deploy_steps'] = [self.node.deploy_step]
510
+        self.node.driver_internal_info = info
511
+        self.node.save()
512
+        with task_manager.acquire(
513
+                self.context, self.node.uuid, shared=False) as task:
514
+            self.assertFalse(self.deploy.in_core_deploy_step(task))
515
+
384 516
 
385 517
 class AgentRescueTests(AgentDeployMixinBaseTest):
386 518
 

+ 6
- 0
releasenotes/notes/fix-deploywait-errors-during-deploy-5a4279c0c1a6d4d9.yaml View File

@@ -0,0 +1,6 @@
1
+---
2
+fixes:
3
+  - |
4
+    Fixes spurious deployment warnings being logged by the
5
+    ``ironic-conductor`` service indicating that the heartbeats from the
6
+    deployment ramdisk could not be processed in ``DEPLOYWAIT`` state.

Loading…
Cancel
Save