Browse Source

Support traits for scheduling

Story: #2003685
Task: #26209
Change-Id: I4895c414abc55ece6cf56133f13ffaa7cd4f5f92
Dmitry Tantsur 7 months ago
parent
commit
df831309ba

+ 1
- 0
.zuul.yaml View File

@@ -124,6 +124,7 @@
124 124
       metalsmith_precreate_port: false
125 125
       metalsmith_partition_image: test-centos-partition
126 126
       metalsmith_whole_disk_image: test-centos-wholedisk
127
+      metalsmith_traits: [CUSTOM_GOLD]
127 128
 
128 129
 - job:
129 130
     name: metalsmith-integration-glance-netboot-cirros-iscsi-py3

+ 4
- 1
metalsmith/_cmd.py View File

@@ -76,6 +76,7 @@ def _do_deploy(api, args, formatter):
76 76
     node = api.reserve_node(resource_class=args.resource_class,
77 77
                             conductor_group=args.conductor_group,
78 78
                             capabilities=capabilities,
79
+                            traits=args.trait,
79 80
                             candidates=args.candidate)
80 81
     instance = api.provision_node(node,
81 82
                                   image=source,
@@ -153,7 +154,9 @@ def _parse_args(args, config):
153 154
                         help='root disk size (in GiB), defaults to (local_gb '
154 155
                         '- 2)')
155 156
     deploy.add_argument('--capability', action='append', metavar='NAME=VALUE',
156
-                        default=[], help='capabilities the nodes should have')
157
+                        default=[], help='capabilities the node should have')
158
+    deploy.add_argument('--trait', action='append',
159
+                        default=[], help='trait the node should have')
157 160
     deploy.add_argument('--ssh-public-key', help='SSH public key to load')
158 161
     deploy.add_argument('--hostname', help='Host name to use, defaults to '
159 162
                         'Node\'s name or UUID')

+ 11
- 3
metalsmith/_provisioner.py View File

@@ -67,7 +67,8 @@ class Provisioner(object):
67 67
         self._dry_run = dry_run
68 68
 
69 69
     def reserve_node(self, resource_class=None, conductor_group=None,
70
-                     capabilities=None, candidates=None, predicate=None):
70
+                     capabilities=None, traits=None, candidates=None,
71
+                     predicate=None):
71 72
         """Find and reserve a suitable node.
72 73
 
73 74
         Example::
@@ -81,6 +82,7 @@ class Provisioner(object):
81 82
             Value ``None`` means any group, use empty string "" for nodes
82 83
             from the default group.
83 84
         :param capabilities: Requested capabilities as a dict.
85
+        :param traits: Requested traits as a list of strings.
84 86
         :param candidates: List of nodes (UUIDs, names or `Node` objects)
85 87
             to pick from. The filters (for resource class and capabilities)
86 88
             are still applied to the provided list. The order in which
@@ -111,15 +113,21 @@ class Provisioner(object):
111 113
         LOG.debug('Candidate nodes: %s', nodes)
112 114
 
113 115
         filters.append(_scheduler.CapabilitiesFilter(capabilities))
116
+        filters.append(_scheduler.TraitsFilter(traits))
114 117
         if predicate is not None:
115 118
             filters.append(_scheduler.CustomPredicateFilter(predicate))
116 119
 
117 120
         reserver = _scheduler.IronicReserver(self._api)
118 121
         node = _scheduler.schedule_node(nodes, filters, reserver,
119 122
                                         dry_run=self._dry_run)
123
+
124
+        update = {}
120 125
         if capabilities:
121
-            node = self._api.update_node(
122
-                node, {'/instance_info/capabilities': capabilities})
126
+            update['/instance_info/capabilities'] = capabilities
127
+        if traits:
128
+            update['/instance_info/traits'] = traits
129
+        if update:
130
+            node = self._api.update_node(node, update)
123 131
 
124 132
         LOG.debug('Reserved node: %s', node)
125 133
         return node

+ 38
- 0
metalsmith/_scheduler.py View File

@@ -142,6 +142,9 @@ class CapabilitiesFilter(Filter):
142 142
         self._counter = collections.Counter()
143 143
 
144 144
     def __call__(self, node):
145
+        if not self._capabilities:
146
+            return True
147
+
145 148
         try:
146 149
             caps = _utils.get_capabilities(node)
147 150
         except Exception:
@@ -181,6 +184,41 @@ class CapabilitiesFilter(Filter):
181 184
         raise exceptions.CapabilitiesNotFound(message, self._capabilities)
182 185
 
183 186
 
187
+class TraitsFilter(Filter):
188
+    """Filter that checks traits."""
189
+
190
+    def __init__(self, traits):
191
+        self._traits = traits
192
+        self._counter = collections.Counter()
193
+
194
+    def __call__(self, node):
195
+        if not self._traits:
196
+            return True
197
+
198
+        traits = node.traits or []
199
+        LOG.debug('Traits for node %(node)s: %(traits)s',
200
+                  {'node': _utils.log_node(node), 'traits': traits})
201
+        for trait in traits:
202
+            self._counter[trait] += 1
203
+
204
+        missing = set(self._traits) - set(traits)
205
+        if missing:
206
+            LOG.debug('Node %(node)s does not have traits %(missing)s',
207
+                      {'node': _utils.log_node(node), 'missing': missing})
208
+            return False
209
+
210
+        return True
211
+
212
+    def fail(self):
213
+        existing = ", ".join("%s (%d node(s))" % item
214
+                             for item in self._counter.items())
215
+        requested = ', '.join(self._traits)
216
+        message = ("No available nodes found with traits %(req)s, "
217
+                   "existing traits: %(exist)s" %
218
+                   {'req': requested, 'exist': existing or 'none'})
219
+        raise exceptions.TraitsNotFound(message, self._traits)
220
+
221
+
184 222
 class CustomPredicateFilter(Filter):
185 223
 
186 224
     def __init__(self, predicate):

+ 11
- 0
metalsmith/exceptions.py View File

@@ -67,6 +67,17 @@ class CapabilitiesNotFound(ReservationFailed):
67 67
         super(CapabilitiesNotFound, self).__init__(message)
68 68
 
69 69
 
70
+class TraitsNotFound(ReservationFailed):
71
+    """Requested traits do not match any nodes.
72
+
73
+    :ivar requested_traits: Requested node's traits.
74
+    """
75
+
76
+    def __init__(self, message, traits):
77
+        self.requested_traits = traits
78
+        super(TraitsNotFound, self).__init__(message)
79
+
80
+
70 81
 class ValidationFailed(ReservationFailed):
71 82
     """Validation failed for all requested nodes."""
72 83
 

+ 47
- 0
metalsmith/test/test_cmd.py View File

@@ -58,6 +58,7 @@ class TestDeploy(testtools.TestCase):
58 58
             resource_class='compute',
59 59
             conductor_group=None,
60 60
             capabilities={},
61
+            traits=[],
61 62
             candidates=None
62 63
         )
63 64
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -105,6 +106,7 @@ class TestDeploy(testtools.TestCase):
105 106
             resource_class='compute',
106 107
             conductor_group=None,
107 108
             capabilities={},
109
+            traits=[],
108 110
             candidates=None
109 111
         )
110 112
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -179,6 +181,7 @@ class TestDeploy(testtools.TestCase):
179 181
             resource_class='compute',
180 182
             conductor_group=None,
181 183
             capabilities={},
184
+            traits=[],
182 185
             candidates=None
183 186
         )
184 187
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -203,6 +206,7 @@ class TestDeploy(testtools.TestCase):
203 206
             resource_class='compute',
204 207
             conductor_group=None,
205 208
             capabilities={},
209
+            traits=[],
206 210
             candidates=None
207 211
         )
208 212
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -235,6 +239,7 @@ class TestDeploy(testtools.TestCase):
235 239
             resource_class='compute',
236 240
             conductor_group=None,
237 241
             capabilities={},
242
+            traits=[],
238 243
             candidates=None
239 244
         )
240 245
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -269,6 +274,7 @@ class TestDeploy(testtools.TestCase):
269 274
             resource_class='compute',
270 275
             conductor_group=None,
271 276
             capabilities={},
277
+            traits=[],
272 278
             candidates=None
273 279
         )
274 280
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -301,6 +307,7 @@ class TestDeploy(testtools.TestCase):
301 307
             resource_class='compute',
302 308
             conductor_group=None,
303 309
             capabilities={},
310
+            traits=[],
304 311
             candidates=None
305 312
         )
306 313
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -333,6 +340,7 @@ class TestDeploy(testtools.TestCase):
333 340
             resource_class='compute',
334 341
             conductor_group=None,
335 342
             capabilities={},
343
+            traits=[],
336 344
             candidates=None
337 345
         )
338 346
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -390,6 +398,32 @@ class TestDeploy(testtools.TestCase):
390 398
             resource_class='compute',
391 399
             conductor_group=None,
392 400
             capabilities={'foo': 'bar', 'answer': '42'},
401
+            traits=[],
402
+            candidates=None
403
+        )
404
+        mock_pr.return_value.provision_node.assert_called_once_with(
405
+            mock_pr.return_value.reserve_node.return_value,
406
+            image='myimg',
407
+            nics=[{'network': 'mynet'}],
408
+            root_disk_size=None,
409
+            config=mock.ANY,
410
+            hostname=None,
411
+            netboot=False,
412
+            wait=1800)
413
+
414
+    def test_args_traits(self, mock_os_conf, mock_pr):
415
+        args = ['deploy', '--network', 'mynet', '--image', 'myimg',
416
+                '--trait', 'foo:bar', '--trait', 'answer:42',
417
+                '--resource-class', 'compute']
418
+        _cmd.main(args)
419
+        mock_pr.assert_called_once_with(
420
+            cloud_region=mock_os_conf.return_value.get_one.return_value,
421
+            dry_run=False)
422
+        mock_pr.return_value.reserve_node.assert_called_once_with(
423
+            resource_class='compute',
424
+            conductor_group=None,
425
+            capabilities={},
426
+            traits=['foo:bar', 'answer:42'],
393 427
             candidates=None
394 428
         )
395 429
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -417,6 +451,7 @@ class TestDeploy(testtools.TestCase):
417 451
                 resource_class='compute',
418 452
                 conductor_group=None,
419 453
                 capabilities={},
454
+                traits=[],
420 455
                 candidates=None
421 456
             )
422 457
             mock_pr.return_value.provision_node.assert_called_once_with(
@@ -443,6 +478,7 @@ class TestDeploy(testtools.TestCase):
443 478
             resource_class='compute',
444 479
             conductor_group=None,
445 480
             capabilities={},
481
+            traits=[],
446 482
             candidates=None
447 483
         )
448 484
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -472,6 +508,7 @@ class TestDeploy(testtools.TestCase):
472 508
             resource_class='compute',
473 509
             conductor_group=None,
474 510
             capabilities={},
511
+            traits=[],
475 512
             candidates=None
476 513
         )
477 514
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -498,6 +535,7 @@ class TestDeploy(testtools.TestCase):
498 535
             resource_class='compute',
499 536
             conductor_group=None,
500 537
             capabilities={},
538
+            traits=[],
501 539
             candidates=None
502 540
         )
503 541
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -520,6 +558,7 @@ class TestDeploy(testtools.TestCase):
520 558
             resource_class='compute',
521 559
             conductor_group=None,
522 560
             capabilities={},
561
+            traits=[],
523 562
             candidates=None
524 563
         )
525 564
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -544,6 +583,7 @@ class TestDeploy(testtools.TestCase):
544 583
             resource_class='compute',
545 584
             conductor_group=None,
546 585
             capabilities={},
586
+            traits=[],
547 587
             candidates=None
548 588
         )
549 589
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -568,6 +608,7 @@ class TestDeploy(testtools.TestCase):
568 608
             resource_class='compute',
569 609
             conductor_group=None,
570 610
             capabilities={},
611
+            traits=[],
571 612
             candidates=None
572 613
         )
573 614
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -591,6 +632,7 @@ class TestDeploy(testtools.TestCase):
591 632
             resource_class=None,
592 633
             conductor_group=None,
593 634
             capabilities={},
635
+            traits=[],
594 636
             candidates=['node1', 'node2']
595 637
         )
596 638
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -614,6 +656,7 @@ class TestDeploy(testtools.TestCase):
614 656
             resource_class='compute',
615 657
             conductor_group='loc1',
616 658
             capabilities={},
659
+            traits=[],
617 660
             candidates=None
618 661
         )
619 662
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -638,6 +681,7 @@ class TestDeploy(testtools.TestCase):
638 681
             resource_class='compute',
639 682
             conductor_group=None,
640 683
             capabilities={},
684
+            traits=[],
641 685
             candidates=None
642 686
         )
643 687
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -666,6 +710,7 @@ class TestDeploy(testtools.TestCase):
666 710
             resource_class='compute',
667 711
             conductor_group=None,
668 712
             capabilities={},
713
+            traits=[],
669 714
             candidates=None
670 715
         )
671 716
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -703,6 +748,7 @@ class TestDeploy(testtools.TestCase):
703 748
             resource_class='compute',
704 749
             conductor_group=None,
705 750
             capabilities={},
751
+            traits=[],
706 752
             candidates=None
707 753
         )
708 754
         mock_pr.return_value.provision_node.assert_called_once_with(
@@ -726,6 +772,7 @@ class TestDeploy(testtools.TestCase):
726 772
             resource_class='compute',
727 773
             conductor_group=None,
728 774
             capabilities={},
775
+            traits=[],
729 776
             candidates=None
730 777
         )
731 778
         mock_pr.return_value.provision_node.assert_called_once_with(

+ 18
- 1
metalsmith/test/test_provisioner.py View File

@@ -29,7 +29,7 @@ from metalsmith import sources
29 29
 
30 30
 NODE_FIELDS = ['name', 'uuid', 'instance_info', 'instance_uuid', 'maintenance',
31 31
                'maintenance_reason', 'properties', 'provision_state', 'extra',
32
-               'last_error']
32
+               'last_error', 'traits']
33 33
 
34 34
 
35 35
 class TestInit(testtools.TestCase):
@@ -138,6 +138,23 @@ class TestReserveNode(Base):
138 138
         self.api.update_node.assert_called_once_with(
139 139
             node, {'/instance_info/capabilities': {'answer': '42'}})
140 140
 
141
+    def test_with_traits(self):
142
+        nodes = [
143
+            mock.Mock(spec=['uuid', 'name', 'properties'],
144
+                      properties={'local_gb': 100}, traits=traits)
145
+            for traits in [['foo', 'answer:1'], ['answer:42', 'foo'],
146
+                           ['answer'], None]
147
+        ]
148
+        expected = nodes[1]
149
+        self.api.list_nodes.return_value = nodes
150
+        self.api.reserve_node.side_effect = lambda n, instance_uuid: n
151
+
152
+        node = self.pr.reserve_node(traits=['foo', 'answer:42'])
153
+
154
+        self.assertIs(node, expected)
155
+        self.api.update_node.assert_called_once_with(
156
+            node, {'/instance_info/traits': ['foo', 'answer:42']})
157
+
141 158
     def test_custom_predicate(self):
142 159
         nodes = [
143 160
             mock.Mock(spec=['uuid', 'name', 'properties'],

+ 32
- 0
metalsmith/test/test_scheduler.py View File

@@ -164,6 +164,38 @@ class TestCapabilitiesFilter(testtools.TestCase):
164 164
                                fltr.fail)
165 165
 
166 166
 
167
+class TestTraitsFilter(testtools.TestCase):
168
+
169
+    def test_fail_no_traits(self):
170
+        fltr = _scheduler.TraitsFilter(['tr1', 'tr2'])
171
+        self.assertRaisesRegex(exceptions.TraitsNotFound,
172
+                               'No available nodes found with traits '
173
+                               'tr1, tr2, existing traits: none',
174
+                               fltr.fail)
175
+
176
+    def test_no_traits(self):
177
+        fltr = _scheduler.TraitsFilter([])
178
+        node = mock.Mock(spec=['name', 'uuid'])
179
+        self.assertTrue(fltr(node))
180
+
181
+    def test_ok(self):
182
+        fltr = _scheduler.TraitsFilter(['tr1', 'tr2'])
183
+        node = mock.Mock(spec=['name', 'uuid', 'traits'],
184
+                         traits=['tr3', 'tr2', 'tr1'])
185
+        self.assertTrue(fltr(node))
186
+
187
+    def test_missing_one(self):
188
+        fltr = _scheduler.TraitsFilter(['tr1', 'tr2'])
189
+        node = mock.Mock(spec=['name', 'uuid', 'traits'],
190
+                         traits=['tr3', 'tr1'])
191
+        self.assertFalse(fltr(node))
192
+
193
+    def test_missing_all(self):
194
+        fltr = _scheduler.TraitsFilter(['tr1', 'tr2'])
195
+        node = mock.Mock(spec=['name', 'uuid', 'traits'], traits=None)
196
+        self.assertFalse(fltr(node))
197
+
198
+
167 199
 class TestIronicReserver(testtools.TestCase):
168 200
 
169 201
     def setUp(self):

+ 6
- 0
roles/metalsmith_deployment/README.rst View File

@@ -35,6 +35,8 @@ The following optional variables provide the defaults for Instance_ attributes:
35 35
     the default for ``root_size``.
36 36
 ``metalsmith_ssh_public_keys``
37 37
     the default for ``ssh_public_keys``.
38
+``metalsmith_traits``
39
+    the default for ``traits``.
38 40
 ``metalsmith_user_name``
39 41
     the default for ``user_name``, the default value is ``metalsmith``.
40 42
 
@@ -93,6 +95,8 @@ Each instances has the following attributes:
93 95
 
94 96
 ``ssh_public_keys`` (defaults to ``metalsmith_ssh_public_keys``)
95 97
     list of file names with SSH public keys to put to the node.
98
+``traits``
99
+    list of traits the node should have.
96 100
 ``user_name`` (defaults to ``metalsmith_user_name``)
97 101
     name of the user to create on the instance via configdrive. Requires
98 102
     cloud-init_ on the image.
@@ -121,6 +125,8 @@ Example
121 125
                 root_size: 100
122 126
                 capabilities:
123 127
                   boot_mode: uefi
128
+                traits:
129
+                  - CUSTOM_GPU
124 130
               - hostname: compute-1
125 131
                 resource_class: compute
126 132
                 root_size: 100

+ 1
- 0
roles/metalsmith_deployment/defaults/main.yml View File

@@ -8,6 +8,7 @@ metalsmith_netboot: false
8 8
 metalsmith_nics: []
9 9
 metalsmith_resource_class:
10 10
 metalsmith_root_size:
11
+metalsmith_traits: []
11 12
 metalsmith_ssh_public_keys: []
12 13
 metalsmith_user_name: metalsmith
13 14
 

+ 4
- 0
roles/metalsmith_deployment/tasks/main.yml View File

@@ -6,6 +6,9 @@
6 6
     {% for cap_name, cap_value in capabilities.items() %}
7 7
       --capability {{ cap_name }}={{ cap_value }}
8 8
     {% endfor %}
9
+    {% for trait in traits %}
10
+      --trait {{ trait }}
11
+    {% endfor %}
9 12
     {% for nic in nics %}
10 13
       {% for nic_type, nic_value in nic.items() %}
11 14
         --{{ nic_type }} {{ nic_value }}
@@ -51,6 +54,7 @@
51 54
     root_size: "{{ instance.root_size | default(metalsmith_root_size) }}"
52 55
     ssh_public_keys: "{{ instance.ssh_public_keys | default(metalsmith_ssh_public_keys) }}"
53 56
     state: "{{ instance.state | default('present') }}"
57
+    traits: "{{ instance.traits | default(metalsmith_traits) }}"
54 58
     user_name: "{{ instance.user_name | default(metalsmith_user_name) }}"
55 59
   with_items: "{{ metalsmith_instances }}"
56 60
   loop_control:

Loading…
Cancel
Save