Browse Source

Exclude generated objects from wait logic

This excludes the following generated objects from wait logic:

1. cronjob-generated jobs: these are not directly part of the release,
   so better not to wait on them. if there is a desire to wait on initial
   cronjob success, we can add a separate "type: cronjob" wait for this
   for that purpose.
2. job-generated pods: for the purposes of waiting on jobs, one should
   ensure their configuration includes a "type: job" wait. Once
   controller-based waits are included by default we can also consider
   excluding controller-owned pods from the "type: pod" wait, as those
   will be handled by the controller-based waits then.

Change-Id: Ibf56c6fef9ef72b62da0b066c92c5f29ee4ecb5f
changes/85/640885/1
Sean Eagan 2 months ago
parent
commit
c838b2def0
2 changed files with 106 additions and 24 deletions
  1. 45
    15
      armada/handlers/wait.py
  2. 61
    9
      armada/tests/unit/handlers/test_wait.py

+ 45
- 15
armada/handlers/wait.py View File

@@ -154,15 +154,23 @@ class ResourceWait(ABC):
154 154
         '''
155 155
         pass
156 156
 
157
-    def include_resource(self, resource):
157
+    def get_exclude_reason(self, resource):
158 158
         '''
159
-        Test to include or exclude a resource in a wait operation. This method
160
-        can be used to exclude resources that should not be included in wait
161
-        operations (e.g. test pods).
159
+        If a resource should be excluded from the wait, returns a message
160
+        to explain why. Unless overridden, all resources are included.
162 161
         :param resource: resource to test
163
-        :returns: boolean representing test result
162
+        :returns: string representing exclude reason
164 163
         '''
165
-        return True
164
+        return None
165
+
166
+    def include_resource(self, resource):
167
+        exclude_reason = self.get_exclude_reason(resource)
168
+
169
+        if exclude_reason:
170
+            LOG.debug('Excluding %s %s from wait: %s', self.resource_type,
171
+                      resource.metadata.name, exclude_reason)
172
+
173
+        return not exclude_reason
166 174
 
167 175
     def handle_resource(self, resource):
168 176
         resource_name = resource.metadata.name
@@ -353,18 +361,21 @@ class PodWait(ResourceWait):
353 361
             resource_type, chart_wait, labels,
354 362
             chart_wait.k8s.client.list_namespaced_pod, **kwargs)
355 363
 
356
-    def include_resource(self, resource):
364
+    def get_exclude_reason(self, resource):
357 365
         pod = resource
358
-        include = not is_test_pod(pod)
359 366
 
360
-        # NOTE(drewwalters96): Test pods may cause wait operations to fail
361
-        # when old resources remain from previous upgrades/tests. Indicate that
362
-        # test pods should not be included in wait operations.
363
-        if not include:
364
-            LOG.debug('Test pod %s will be skipped during wait operations.',
365
-                      pod.metadata.name)
367
+        # Exclude helm test pods
368
+        # TODO: Possibly exclude other helm hook pods/jobs (besides tests)?
369
+        if is_test_pod(pod):
370
+            return 'helm test pod'
371
+
372
+        # Exclude job pods
373
+        # TODO: Once controller-based waits are enabled by default, ignore
374
+        # controller-owned pods as well.
375
+        if has_owner(pod, 'Job'):
376
+            return 'generated by job (wait on that instead if not already)'
366 377
 
367
-        return include
378
+        return None
368 379
 
369 380
     def is_resource_ready(self, resource):
370 381
         pod = resource
@@ -392,6 +403,15 @@ class JobWait(ResourceWait):
392 403
             resource_type, chart_wait, labels,
393 404
             chart_wait.k8s.batch_api.list_namespaced_job, **kwargs)
394 405
 
406
+    def get_exclude_reason(self, resource):
407
+        job = resource
408
+
409
+        # Exclude cronjob jobs
410
+        if has_owner(job, 'CronJob'):
411
+            return 'generated by cronjob (not part of release)'
412
+
413
+        return None
414
+
395 415
     def is_resource_ready(self, resource):
396 416
         job = resource
397 417
         name = job.metadata.name
@@ -406,6 +426,16 @@ class JobWait(ResourceWait):
406 426
         return (msg.format(name), True)
407 427
 
408 428
 
429
+def has_owner(resource, kind=None):
430
+    owner_references = resource.metadata.owner_references or []
431
+
432
+    for owner in owner_references:
433
+        if not kind or kind == owner.kind:
434
+            return True
435
+
436
+    return False
437
+
438
+
409 439
 CountOrPercent = collections.namedtuple('CountOrPercent',
410 440
                                         'number is_percent source')
411 441
 

+ 61
- 9
armada/tests/unit/handlers/test_wait.py View File

@@ -194,12 +194,13 @@ class PodWaitTestCase(base.ArmadaTestCase):
194 194
 
195 195
     def test_include_resource(self):
196 196
 
197
-        def mock_resource(annotations):
197
+        def mock_resource(annotations={}, owner_references=None):
198 198
             resource = mock.Mock()
199 199
             resource.metadata.annotations = annotations
200
+            resource.metadata.owner_references = owner_references
200 201
             return resource
201 202
 
202
-        test_resources = [
203
+        test_pods = [
203 204
             mock_resource({
204 205
                 'key': 'value',
205 206
                 'helm.sh/hook': 'test-success'
@@ -207,18 +208,69 @@ class PodWaitTestCase(base.ArmadaTestCase):
207 208
             mock_resource({'helm.sh/hook': 'test-failure'}),
208 209
             mock_resource({'helm.sh/hook': 'test-success,pre-install'})
209 210
         ]
210
-        non_test_resources = [
211
+        job_pods = [
212
+            mock_resource(owner_references=[mock.Mock(kind='Job')]),
213
+            mock_resource(owner_references=[
214
+                mock.Mock(kind='NotAJob'),
215
+                mock.Mock(kind='Job')
216
+            ])
217
+        ]
218
+        included_pods = [
219
+            mock_resource(),
220
+            mock_resource(owner_references=[]),
211 221
             mock_resource({'helm.sh/hook': 'pre-install'}),
212 222
             mock_resource({'key': 'value'}),
213
-            mock_resource({})
223
+            mock_resource(owner_references=[mock.Mock(kind='NotAJob')])
224
+        ]
225
+
226
+        unit = self.get_unit({})
227
+
228
+        # Validate test pods excluded
229
+        for pod in test_pods:
230
+            self.assertFalse(unit.include_resource(pod))
231
+
232
+        # Validate test pods excluded
233
+        for pod in job_pods:
234
+            self.assertFalse(unit.include_resource(pod))
235
+
236
+        # Validate other resources included
237
+        for pod in included_pods:
238
+            self.assertTrue(unit.include_resource(pod))
239
+
240
+
241
+class JobWaitTestCase(base.ArmadaTestCase):
242
+
243
+    def get_unit(self, labels):
244
+        return wait.JobWait(
245
+            resource_type='job', chart_wait=mock.MagicMock(), labels=labels)
246
+
247
+    def test_include_resource(self):
248
+
249
+        def mock_resource(annotations={}, owner_references=None):
250
+            resource = mock.Mock()
251
+            resource.metadata.annotations = annotations
252
+            resource.metadata.owner_references = owner_references
253
+            return resource
254
+
255
+        cronjob_jobs = [
256
+            mock_resource(owner_references=[mock.Mock(kind='CronJob')]),
257
+            mock_resource(owner_references=[
258
+                mock.Mock(kind='NotACronJob'),
259
+                mock.Mock(kind='CronJob')
260
+            ])
261
+        ]
262
+        included_jobs = [
263
+            mock_resource(),
264
+            mock_resource(owner_references=[]),
265
+            mock_resource(owner_references=[mock.Mock(kind='NotAJob')])
214 266
         ]
215 267
 
216 268
         unit = self.get_unit({})
217 269
 
218
-        # Validate test resources excluded
219
-        for resource in test_resources:
220
-            self.assertFalse(unit.include_resource(resource))
270
+        # Validate test pods excluded
271
+        for job in cronjob_jobs:
272
+            self.assertFalse(unit.include_resource(job))
221 273
 
222 274
         # Validate other resources included
223
-        for resource in non_test_resources:
224
-            self.assertTrue(unit.include_resource(resource))
275
+        for job in included_jobs:
276
+            self.assertTrue(unit.include_resource(job))

Loading…
Cancel
Save