Browse Source

Support requests for specific providers

In Zuul we support a paradigm where a job can be paused, whereupon
dependent jobs then request nodes and are started.  It is nearly
always the case that the user would like the nodes for the dependent
jobs to be in the same provider as the parent, as the use cases
generally involve transferring data between the two.

To support this, add a 'provider' attribute to the node request which,
if present, means that all on-line launchers for that provider must
decline the request before anyone else processes it.  This will cuase
the desired behavior if everything is working, and if some calamity
befalls that launcher, other launchers can still attempt to fulfill
the request, which might work, or might not, but performing that last
ditch effort is fine once there are no alternatives.

Change-Id: I91fe05081695d454651f6068eac5c08ac30ff899
tags/3.5.0
James E. Blair 2 months ago
parent
commit
3561e278c6
3 changed files with 83 additions and 0 deletions
  1. 22
    0
      nodepool/launcher.py
  2. 57
    0
      nodepool/tests/unit/test_launcher.py
  3. 4
    0
      nodepool/zk.py

+ 22
- 0
nodepool/launcher.py View File

@@ -168,6 +168,12 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
168 168
         if provider.max_concurrency == 0:
169 169
             return True
170 170
 
171
+        # Get the launchers which are currently online.  This may
172
+        # become out of date as the loop progresses, but it should be
173
+        # good enough to determine whether we should process requests
174
+        # which express a preference for a specific provider.
175
+        launchers = self.zk.getRegisteredLaunchers()
176
+
171 177
         # Sort requests by queue priority, then, for all requests at
172 178
         # the same priority, use the relative_priority field to
173 179
         # further sort, then finally, the submission order.
@@ -210,6 +216,22 @@ class PoolWorker(threading.Thread, stats.StatsReporter):
210 216
             if self.launcher_id in req.declined_by:
211 217
                 continue
212 218
 
219
+            # Skip this request if it is requesting another provider
220
+            # which is online
221
+            if req.provider and req.provider != self.provider_name:
222
+                # The request is asking for a specific provider
223
+                candidate_launchers = set(
224
+                    [x.id for x in launchers
225
+                     if x.provider_name == req.provider])
226
+                if candidate_launchers:
227
+                    # There is a launcher online which can satisfy the request
228
+                    if not candidate_launchers.issubset(set(req.declined_by)):
229
+                        # It has not yet declined the request, so yield to it.
230
+                        self.log.debug(
231
+                            "Yielding request %s to provider %s %s",
232
+                            req.id, req.provider, candidate_launchers)
233
+                        continue
234
+
213 235
             self.log.debug("Locking request %s", req.id)
214 236
             try:
215 237
                 self.zk.lockNodeRequest(req, blocking=False)

+ 57
- 0
nodepool/tests/unit/test_launcher.py View File

@@ -536,6 +536,63 @@ class TestLauncher(tests.DBTestCase):
536 536
         self.assertEqual(nodes[0].username, 'zuul')
537 537
         self.assertNotEqual(nodes[0].host_keys, [])
538 538
 
539
+    def test_node_request_provider(self):
540
+        """Test that a node request for a specific provider is honored"""
541
+        configfile = self.setup_config('node_two_provider.yaml')
542
+        self.useBuilder(configfile)
543
+        pool = self.useNodepool(configfile, watermark_sleep=1)
544
+        pool.start()
545
+        # Validate we have images in both providers
546
+        self.waitForImage('fake-provider', 'fake-image')
547
+        self.waitForImage('fake-provider2', 'fake-image')
548
+        self.waitForNodes('fake-label', 1)
549
+
550
+        req1 = zk.NodeRequest()
551
+        req1.state = zk.REQUESTED
552
+        req1.provider = 'fake-provider'
553
+        req1.node_types.append('fake-label')
554
+        self.zk.storeNodeRequest(req1)
555
+
556
+        req2 = zk.NodeRequest()
557
+        req2.state = zk.REQUESTED
558
+        req2.provider = 'fake-provider2'
559
+        req2.node_types.append('fake-label')
560
+        self.zk.storeNodeRequest(req2)
561
+
562
+        req1 = self.waitForNodeRequest(req1)
563
+        self.assertEqual(req1.state, zk.FULFILLED)
564
+        self.assertEqual(len(req1.nodes), 1)
565
+        node = self.zk.getNode(req1.nodes[0])
566
+        self.assertEqual(node.provider, 'fake-provider')
567
+
568
+        req2 = self.waitForNodeRequest(req2)
569
+        self.assertEqual(req2.state, zk.FULFILLED)
570
+        self.assertEqual(len(req2.nodes), 1)
571
+        node = self.zk.getNode(req2.nodes[0])
572
+        self.assertEqual(node.provider, 'fake-provider2')
573
+
574
+    def test_node_request_invalid_provider(self):
575
+        """Test that a node request for a missing provider is handled"""
576
+        configfile = self.setup_config('node_two_provider.yaml')
577
+        self.useBuilder(configfile)
578
+        pool = self.useNodepool(configfile, watermark_sleep=1)
579
+        pool.start()
580
+        # Validate we have images in both providers
581
+        self.waitForImage('fake-provider', 'fake-image')
582
+        self.waitForImage('fake-provider2', 'fake-image')
583
+        self.waitForNodes('fake-label', 1)
584
+
585
+        req1 = zk.NodeRequest()
586
+        req1.state = zk.REQUESTED
587
+        req1.provider = 'missing-provider'
588
+        req1.node_types.append('fake-label')
589
+        self.zk.storeNodeRequest(req1)
590
+
591
+        req1 = self.waitForNodeRequest(req1)
592
+        self.assertEqual(req1.state, zk.FULFILLED)
593
+        self.assertEqual(len(req1.nodes), 1)
594
+        self.zk.getNode(req1.nodes[0])
595
+
539 596
     def test_node_boot_from_volume(self):
540 597
         """Test that an image and node are created from a volume"""
541 598
         configfile = self.setup_config('node_boot_from_volume.yaml')

+ 4
- 0
nodepool/zk.py View File

@@ -442,6 +442,7 @@ class NodeRequest(BaseModel):
442 442
         self.nodes = []
443 443
         self.reuse = True
444 444
         self.requestor = None
445
+        self.provider = None
445 446
         self.relative_priority = 0
446 447
 
447 448
     def __repr__(self):
@@ -458,6 +459,7 @@ class NodeRequest(BaseModel):
458 459
                     self.nodes == other.nodes and
459 460
                     self.reuse == other.reuse and
460 461
                     self.requestor == other.requestor and
462
+                    self.provider == other.provider and
461 463
                     self.relative_priority == other.relative_priority)
462 464
         else:
463 465
             return False
@@ -472,6 +474,7 @@ class NodeRequest(BaseModel):
472 474
         d['nodes'] = self.nodes
473 475
         d['reuse'] = self.reuse
474 476
         d['requestor'] = self.requestor
477
+        d['provider'] = self.provider
475 478
         d['relative_priority'] = self.relative_priority
476 479
         return d
477 480
 
@@ -497,6 +500,7 @@ class NodeRequest(BaseModel):
497 500
         self.nodes = d.get('nodes', [])
498 501
         self.reuse = d.get('reuse', True)
499 502
         self.requestor = d.get('requestor')
503
+        self.provider = d.get('provider')
500 504
         self.relative_priority = d.get('relative_priority', 0)
501 505
 
502 506
 

Loading…
Cancel
Save