Browse Source

Merge "Support multiple rack controllers"

Zuul 6 months ago
parent
commit
756a063c30

BIN
docs/source/images/architecture.png View File


+ 187
- 82
python/drydock_provisioner/drivers/node/maasdriver/actions/node.py View File

@@ -29,6 +29,8 @@ import drydock_provisioner.objects as objects
29 29
 
30 30
 from drydock_provisioner.control.util import get_internal_api_href
31 31
 from drydock_provisioner.orchestrator.actions.orchestrator import BaseAction
32
+from drydock_provisioner.drivers.node.maasdriver.errors import RackControllerConflict
33
+from drydock_provisioner.drivers.node.maasdriver.errors import ApiNotAvailable
32 34
 
33 35
 import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
34 36
 import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
@@ -138,25 +140,28 @@ class ValidateNodeServices(BaseMaasAction):
138 140
                             ctx_type='NA')
139 141
                         self.task.failure()
140 142
                     else:
143
+                        healthy_rackd = []
141 144
                         for r in rack_ctlrs:
142
-                            rack_svc = r.get_services()
143
-                            rack_name = r.hostname
144
-
145
-                            for s in rack_svc:
146
-                                if s in maas_rack.RackController.REQUIRED_SERVICES:
147
-                                    is_error = False
148
-                                    if rack_svc[s] not in ("running", "off"):
149
-                                        self.task.failure()
150
-                                        is_error = True
151
-                                    self.logger.info(
152
-                                        "Service %s on rackd %s is %s" %
153
-                                        (s, rack_name, rack_svc[s]))
154
-                                    self.task.add_status_msg(
155
-                                        msg="Service %s on rackd %s is %s" %
156
-                                        (s, rack_name, rack_svc[s]),
157
-                                        error=is_error,
158
-                                        ctx=rack_name,
159
-                                        ctx_type='rack_ctlr')
145
+                            if r.is_healthy():
146
+                                healthy_rackd.append(r.hostname)
147
+                            else:
148
+                                msg = "Rack controller %s not healthy." % r.hostname
149
+                                self.logger.info(msg)
150
+                                self.task.add_status_msg(
151
+                                    msg=msg,
152
+                                    error=True,
153
+                                    ctx=r.hostname,
154
+                                    ctx_type='rack_ctlr')
155
+                        if not healthy_rackd:
156
+                            msg = "No healthy rack controllers found."
157
+                            self.logger.info(msg)
158
+                            self.task.add_status_msg(
159
+                                msg=msg,
160
+                                error=True,
161
+                                ctx='maas',
162
+                                ctx_type='cluster')
163
+                            self.task.failure()
164
+
160 165
         except errors.TransientDriverError as ex:
161 166
             self.task.add_status_msg(
162 167
                 msg=str(ex), error=True, ctx='NA', ctx_type='NA', retry=True)
@@ -278,8 +283,7 @@ class DestroyNode(BaseMaasAction):
278 283
                                                       site_design)
279 284
         for n in nodes:
280 285
             try:
281
-                machine = machine_list.identify_baremetal_node(
282
-                    n, update_name=False)
286
+                machine = find_node_in_maas(self.maas_client, n)
283 287
 
284 288
                 if machine is None:
285 289
                     msg = "Could not locate machine for node {}".format(n.name)
@@ -288,6 +292,13 @@ class DestroyNode(BaseMaasAction):
288 292
                         msg=msg, error=False, ctx=n.name, ctx_type='node')
289 293
                     self.task.success(focus=n.get_id())
290 294
                     continue
295
+                elif type(machine) == maas_rack.RackController:
296
+                    msg = "Cannot delete rack controller {}.".format(n.name)
297
+                    self.logger.info(msg)
298
+                    self.task.add_status_msg(
299
+                        msg=msg, error=False, ctx=n.name, ctx_type='node')
300
+                    self.task.failure(focus=n.get_id())
301
+                    continue
291 302
 
292 303
                 # First release the node and erase its disks, if MaaS API allows
293 304
                 if machine.status_name in self.actionable_node_statuses:
@@ -687,7 +698,7 @@ class CreateNetworkTemplate(BaseMaasAction):
687 698
                     vlan_list.refresh()
688 699
                     vlan = vlan_list.select(subnet.vlan)
689 700
 
690
-                    if dhcp_on and not vlan.dhcp_on:
701
+                    if dhcp_on:
691 702
                         # check if design requires a dhcp relay and if the MaaS vlan already uses a dhcp_relay
692 703
                         msg = "DHCP enabled for subnet %s, activating in MaaS" % (
693 704
                             subnet.name)
@@ -702,12 +713,25 @@ class CreateNetworkTemplate(BaseMaasAction):
702 713
                             self.maas_client)
703 714
                         rack_ctlrs.refresh()
704 715
 
716
+                        # Reset DHCP stuff to avoid offline rack controllers
717
+
718
+                        vlan.reset_dhcp_mgmt()
705 719
                         dhcp_config_set = False
706 720
 
707 721
                         for r in rack_ctlrs:
708 722
                             if n.dhcp_relay_upstream_target is not None:
709 723
                                 if r.interface_for_ip(
710 724
                                         n.dhcp_relay_upstream_target):
725
+                                    if not r.is_healthy():
726
+                                        msg = ("Rack controller %s with DHCP relay is not healthy." %
727
+                                               r.hostname)
728
+                                        self.logger.info(msg)
729
+                                        self.task.add_status_msg(
730
+                                            msg=msg,
731
+                                            error=True,
732
+                                            ctx=n.name,
733
+                                            ctx_type='network')
734
+                                        break
711 735
                                     iface = r.interface_for_ip(
712 736
                                         n.dhcp_relay_upstream_target)
713 737
                                     vlan.relay_vlan = iface.vlan
@@ -730,21 +754,42 @@ class CreateNetworkTemplate(BaseMaasAction):
730 754
                                         self.logger.debug(msg)
731 755
                                         rackctl_id = r.resource_id
732 756
 
733
-                                        vlan.dhcp_on = True
734
-                                        vlan.primary_rack = rackctl_id
735
-                                        msg = "Enabling DHCP on VLAN %s managed by rack ctlr %s" % (
736
-                                            vlan.resource_id, rackctl_id)
737
-                                        self.logger.debug(msg)
738
-                                        self.task.add_status_msg(
739
-                                            msg=msg,
740
-                                            error=False,
741
-                                            ctx=n.name,
742
-                                            ctx_type='network')
743
-                                        vlan.update()
744
-                                        dhcp_config_set = True
757
+                                        if not r.is_healthy():
758
+                                            msg = ("Rack controller %s not healthy, skipping DHCP config." %
759
+                                                   r.resource_id)
760
+                                            self.logger.info(msg)
761
+                                            self.task.add_status_msg(
762
+                                                msg=msg,
763
+                                                error=True,
764
+                                                ctx=n.name,
765
+                                                ctx_type='network')
766
+                                            break
767
+                                        try:
768
+                                            vlan.dhcp_on = True
769
+                                            vlan.add_rack_controller(
770
+                                                rackctl_id)
771
+                                            msg = "Enabling DHCP on VLAN %s managed by rack ctlr %s" % (
772
+                                                vlan.resource_id, rackctl_id)
773
+                                            self.logger.debug(msg)
774
+                                            self.task.add_status_msg(
775
+                                                msg=msg,
776
+                                                error=False,
777
+                                                ctx=n.name,
778
+                                                ctx_type='network')
779
+                                            vlan.update()
780
+                                            dhcp_config_set = True
781
+                                        except RackControllerConflict as rack_ex:
782
+                                            msg = (
783
+                                                "More than two rack controllers on vlan %s, "
784
+                                                "skipping enabling %s." %
785
+                                                (vlan.resource_id, rackctl_id))
786
+                                            self.logger.debug(msg)
787
+                                            self.task.add_status_msg(
788
+                                                msg=msg,
789
+                                                error=False,
790
+                                                ctx=n.name,
791
+                                                ctx_type='network')
745 792
                                         break
746
-                            if dhcp_config_set:
747
-                                break
748 793
 
749 794
                         if not dhcp_config_set:
750 795
                             msg = "Network %s requires DHCP, but could not locate a rack controller to serve it." % (
@@ -757,9 +802,6 @@ class CreateNetworkTemplate(BaseMaasAction):
757 802
                                 ctx_type='network')
758 803
                             self.task.failure(focus=n.name)
759 804
 
760
-                    elif dhcp_on and vlan.dhcp_on:
761
-                        self.logger.info("DHCP already enabled for subnet %s" %
762
-                                         (subnet.resource_id))
763 805
                 except ValueError:
764 806
                     raise errors.DriverError("Inconsistent data from MaaS")
765 807
 
@@ -1026,21 +1068,6 @@ class IdentifyNode(BaseMaasAction):
1026 1068
     """Action to identify a node resource in MaaS matching a node design."""
1027 1069
 
1028 1070
     def start(self):
1029
-        try:
1030
-            machine_list = maas_machine.Machines(self.maas_client)
1031
-            machine_list.refresh()
1032
-        except Exception as ex:
1033
-            self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
1034
-            self.task.set_status(hd_fields.TaskStatus.Complete)
1035
-            self.task.failure()
1036
-            self.task.add_status_msg(
1037
-                msg='Error accessing MaaS Machines API: %s' % str(ex),
1038
-                error=True,
1039
-                ctx='NA',
1040
-                ctx_type='NA')
1041
-            self.task.save()
1042
-            return
1043
-
1044 1071
         self.task.set_status(hd_fields.TaskStatus.Running)
1045 1072
         self.task.save()
1046 1073
 
@@ -1062,37 +1089,56 @@ class IdentifyNode(BaseMaasAction):
1062 1089
 
1063 1090
         for n in nodes:
1064 1091
             try:
1065
-                machine = machine_list.identify_baremetal_node(
1066
-                    n, domain=n.get_domain(site_design))
1067
-                if machine is not None:
1068
-                    self.task.success(focus=n.get_id())
1092
+                machine = find_node_in_maas(self.maas_client, n)
1093
+                if machine is None:
1094
+                    self.task.failure(focus=n.get_id())
1095
+                    self.task.add_status_msg(
1096
+                        msg="Node %s not found in MaaS" % n.name,
1097
+                        error=True,
1098
+                        ctx=n.name,
1099
+                        ctx_type='node')
1100
+                elif type(machine) == maas_machine.Machine:
1101
+                    machine.update_identity(n, domain=n.get_domain(site_design))
1102
+                    msg = "Node %s identified in MaaS" % n.name
1103
+                    self.logger.debug(msg)
1069 1104
                     self.task.add_status_msg(
1070
-                        msg="Node %s identified in MaaS" % n.name,
1105
+                        msg=msg,
1071 1106
                         error=False,
1072 1107
                         ctx=n.name,
1073 1108
                         ctx_type='node')
1074
-                else:
1075
-                    self.task.failure(focus=n.get_id())
1109
+                    self.task.success(focus=n.get_id())
1110
+                elif type(machine) == maas_rack.RackController:
1111
+                    msg = "Rack controller %s identified in MaaS" % n.name
1112
+                    self.logger.debug(msg)
1076 1113
                     self.task.add_status_msg(
1077
-                        msg="Node %s not found in MaaS" % n.name,
1078
-                        error=True,
1114
+                        msg=msg,
1115
+                        error=False,
1079 1116
                         ctx=n.name,
1080 1117
                         ctx_type='node')
1118
+                    self.task.success(focus=n.get_id())
1119
+            except ApiNotAvailable as api_ex:
1120
+                self.logger.debug("Error accessing the MaaS API.", exc_info=api_ex)
1121
+                self.task.failure()
1122
+                self.task.add_status_msg(
1123
+                    msg='Error accessing MaaS API: %s' % str(api_ex),
1124
+                    error=True,
1125
+                    ctx='NA',
1126
+                    ctx_type='NA')
1127
+                self.task.save()
1081 1128
             except Exception as ex:
1129
+                self.logger.debug(
1130
+                    "Exception caught in identify node.", exc_info=ex)
1082 1131
                 self.task.failure(focus=n.get_id())
1083 1132
                 self.task.add_status_msg(
1084
-                    msg="Node %s not found in MaaS" % n.name,
1133
+                    msg="Error trying to location %s in MAAS" % n.name,
1085 1134
                     error=True,
1086 1135
                     ctx=n.name,
1087 1136
                     ctx_type='node')
1088
-                self.logger.debug(
1089
-                    "Exception caught in identify node.", exc_info=ex)
1090 1137
 
1091 1138
         self.task.set_status(hd_fields.TaskStatus.Complete)
1092 1139
         self.task.save()
1093 1140
         return
1094 1141
 
1095
-
1096 1142
 class ConfigureHardware(BaseMaasAction):
1097 1143
     """Action to start commissioning a server."""
1098 1144
 
@@ -1136,9 +1182,15 @@ class ConfigureHardware(BaseMaasAction):
1136 1182
             try:
1137 1183
                 self.logger.debug(
1138 1184
                     "Locating node %s for commissioning" % (n.name))
1139
-                machine = machine_list.identify_baremetal_node(
1140
-                    n, update_name=False)
1141
-                if machine is not None:
1185
+                machine = find_node_in_maas(self.maas_client, n)
1186
+                if type(machine) == maas_rack.RackController:
1187
+                    msg = "Located node %s in MaaS as rack controller. Skipping." % (
1188
+                        n.name)
1189
+                    self.logger.info(msg)
1190
+                    self.task.add_status_msg(
1191
+                        msg=msg, error=False, ctx=n.name, ctx_type='node')
1192
+                    self.task.success(focus=n.get_id())
1193
+                elif machine is not None:
1142 1194
                     if machine.status_name in [
1143 1195
                             'New', 'Broken', 'Failed commissioning',
1144 1196
                             'Failed testing'
@@ -1215,7 +1267,7 @@ class ConfigureHardware(BaseMaasAction):
1215 1267
                             msg=msg, error=False, ctx=n.name, ctx_type='node')
1216 1268
                         self.task.success(focus=n.get_id())
1217 1269
                     else:
1218
-                        msg = "Located node %s in MaaS, unknown status %s. Skipping..." % (
1270
+                        msg = "Located node %s in MaaS, unknown status %s. Skipping." % (
1219 1271
                             n, machine.status_name)
1220 1272
                         self.logger.warning(msg)
1221 1273
                         self.task.add_status_msg(
@@ -1323,10 +1375,20 @@ class ApplyNodeNetworking(BaseMaasAction):
1323 1375
                 self.logger.debug(
1324 1376
                     "Locating node %s for network configuration" % (n.name))
1325 1377
 
1326
-                machine = machine_list.identify_baremetal_node(
1327
-                    n, update_name=False)
1378
+                machine = find_node_in_maas(self.maas_client, n)
1328 1379
 
1329
-                if machine is not None:
1380
+                if type(machine) is maas_rack.RackController:
1381
+                    msg = ("Node %s is a rack controller, skipping deploy action." %
1382
+                           n.name)
1383
+                    self.logger.debug(msg)
1384
+                    self.task.add_status_msg(
1385
+                        msg=msg,
1386
+                        error=False,
1387
+                        ctx=n.name,
1388
+                        ctx_type='node')
1389
+                    self.task.success(focus=n.name)
1390
+                    continue
1391
+                elif machine is not None:
1330 1392
                     if machine.status_name.startswith('Failed Dep'):
1331 1393
                         msg = (
1332 1394
                             "Node %s has failed deployment, releasing to try again."
@@ -1677,8 +1739,7 @@ class ApplyNodePlatform(BaseMaasAction):
1677 1739
                 self.logger.debug(
1678 1740
                     "Locating node %s for platform configuration" % (n.name))
1679 1741
 
1680
-                machine = machine_list.identify_baremetal_node(
1681
-                    n, update_name=False)
1742
+                machine = find_node_in_maas(self.maas_client, n)
1682 1743
 
1683 1744
                 if machine is None:
1684 1745
                     msg = "Could not locate machine for node %s" % n.name
@@ -1695,7 +1756,14 @@ class ApplyNodePlatform(BaseMaasAction):
1695 1756
                     msg=msg, error=True, ctx=n.name, ctx_type='node')
1696 1757
                 continue
1697 1758
 
1698
-            if machine.status_name == 'Deployed':
1759
+            if type(machine) is maas_rack.RackController:
1760
+                msg = ("Skipping changes to rack controller %s." % n.name)
1761
+                self.logger.info(msg)
1762
+                self.task.add_status_msg(
1763
+                    msg=msg, error=False, ctx=n.name, ctx_type='node')
1764
+                self.task.success(focus=n.name)
1765
+                continue
1766
+            elif machine.status_name == 'Deployed':
1699 1767
                 msg = (
1700 1768
                     "Located node %s in MaaS, status deployed. Skipping "
1701 1769
                     "and considering success. Destroy node first if redeploy needed."
@@ -1856,8 +1924,7 @@ class ApplyNodeStorage(BaseMaasAction):
1856 1924
                 self.logger.debug(
1857 1925
                     "Locating node %s for storage configuration" % (n.name))
1858 1926
 
1859
-                machine = machine_list.identify_baremetal_node(
1860
-                    n, update_name=False)
1927
+                machine = find_node_in_maas(self.maas_client, n)
1861 1928
 
1862 1929
                 if machine is None:
1863 1930
                     msg = "Could not locate machine for node %s" % n.name
@@ -1874,7 +1941,15 @@ class ApplyNodeStorage(BaseMaasAction):
1874 1941
                 self.task.failure(focus=n.get_id())
1875 1942
                 continue
1876 1943
 
1877
-            if machine.status_name == 'Deployed':
1944
+            if type(machine) is maas_rack.RackController:
1945
+                msg = ("Skipping configuration updates to rack controller %s." %
1946
+                       n.name)
1947
+                self.logger.info(msg)
1948
+                self.task.add_status_msg(
1949
+                    msg=msg, error=False, ctx=n.name, ctx_type='node')
1950
+                self.task.success(focus=n.name)
1951
+                continue
1952
+            elif machine.status_name == 'Deployed':
1878 1953
                 msg = (
1879 1954
                     "Located node %s in MaaS, status deployed. Skipping "
1880 1955
                     "and considering success. Destroy node first if redeploy needed."
@@ -2202,9 +2277,16 @@ class DeployNode(BaseMaasAction):
2202 2277
 
2203 2278
         for n in nodes:
2204 2279
             try:
2205
-                machine = machine_list.identify_baremetal_node(
2206
-                    n, update_name=False)
2207
-                if machine.status_name.startswith(
2280
+                machine = find_node_in_maas(self.maas_client, n)
2281
+
2282
+                if type(machine) is maas_rack.RackController:
2283
+                    msg = "Skipping configuration of rack controller %s." % n.name
2284
+                    self.logger.info(msg)
2285
+                    self.task.add_status_msg(
2286
+                        msg=msg, error=False, ctx=n.name, ctx_type='node')
2287
+                    self.task.success(focus=n.name)
2288
+                    continue
2289
+                elif machine.status_name.startswith(
2208 2290
                         'Deployed') or machine.status_name.startswith(
2209 2291
                             'Deploying'):
2210 2292
                     msg = "Node %s already deployed or deploying, skipping." % (
@@ -2358,3 +2440,26 @@ class DeployNode(BaseMaasAction):
2358 2440
         self.task.save()
2359 2441
 
2360 2442
         return
2443
+
2444
+def find_node_in_maas(maas_client, node_model):
2445
+    """Find a node in MAAS matching the node_model.
2446
+
2447
+    Note that the returned Machine may be a simple Machine or
2448
+    a RackController.
2449
+
2450
+    :param maas_client: instance of an active session to MAAS
2451
+    :param node_model: instance of objects.Node to match
2452
+    :returns: instance of maasdriver.models.Machine
2453
+    """
2454
+
2455
+    machine_list = maas_machine.Machines(maas_client)
2456
+    machine_list.refresh()
2457
+    machine = machine_list.identify_baremetal_node(node_model)
2458
+
2459
+    if not machine:
2460
+        # If node isn't found a normal node, check rack controllers
2461
+        rackd_list = maas_rack.RackControllers(maas_client)
2462
+        rackd_list.refresh()
2463
+        machine = rackd_list.identify_baremetal_node(node_model)
2464
+
2465
+    return machine

+ 3
- 2
python/drydock_provisioner/drivers/node/maasdriver/api_client.py View File

@@ -128,8 +128,9 @@ class MaasRequestFactory(object):
128 128
 
129 129
             for (k, v) in files.items():
130 130
                 if v is None:
131
-                    continue
132
-                elif isinstance(v, list):
131
+                    v = ""
132
+
133
+                if isinstance(v, list):
133 134
                     for i in v:
134 135
                         value = base64.b64encode(
135 136
                             str(i).encode('utf-8')).decode('utf-8')

+ 26
- 0
python/drydock_provisioner/drivers/node/maasdriver/errors.py View File

@@ -0,0 +1,26 @@
1
+# Copyright 2018 AT&T Intellectual Property.  All other rights reserved.
2
+# Licensed under the Apache License, Version 2.0 (the "License");
3
+# you may not use this file except in compliance with the License.
4
+# You may obtain a copy of the License at
5
+#
6
+#     http://www.apache.org/licenses/LICENSE-2.0
7
+#
8
+# Unless required by applicable law or agreed to in writing, software
9
+# distributed under the License is distributed on an "AS IS" BASIS,
10
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+# See the License for the specific language governing permissions and
12
+# limitations under the License.
13
+"""Errors and exceptions specific to MAAS node driver."""
14
+import drydock_provisioner.error as errors
15
+
16
+
17
+class RackControllerConflict(errors.DriverError):
18
+    """Exception for settings that are not allowed because not enough
19
+       or too many rack controllers are attached to a network."""
20
+    pass
21
+
22
+
23
+class ApiNotAvailable(errors.DriverError):
24
+    """Exception when trying to utilize the MAAS API and the connection
25
+       fails."""
26
+    pass

+ 2
- 0
python/drydock_provisioner/drivers/node/maasdriver/models/base.py View File

@@ -37,6 +37,8 @@ class ResourceBase(object):
37 37
         for f in self.fields:
38 38
             if f in kwargs.keys():
39 39
                 setattr(self, f, kwargs.get(f))
40
+            else:
41
+                setattr(self, f, None)
40 42
 
41 43
     """
42 44
     Update resource attributes from MaaS

+ 12
- 0
python/drydock_provisioner/drivers/node/maasdriver/models/interface.py View File

@@ -235,6 +235,18 @@ class Interface(model_base.ResourceBase):
235 235
 
236 236
         return False
237 237
 
238
+    def responds_to_mac(self, mac_address):
239
+        """Check if this interface will respond to a MAC address.
240
+
241
+        :param str mac_address: the MAC address to check
242
+
243
+        :return: true if this interface will respond to this MAC
244
+        """
245
+        if mac_address.replace(':', '').upper() == self.mac_address.replace(':', '').upper():
246
+            return True
247
+
248
+        return False
249
+
238 250
     def set_mtu(self, new_mtu):
239 251
         """Set interface MTU.
240 252
 

+ 55
- 31
python/drydock_provisioner/drivers/node/maasdriver/models/machine.py View File

@@ -77,6 +77,18 @@ class Machine(model_base.ResourceBase):
77 77
                 return i
78 78
         return None
79 79
 
80
+    def interface_for_mac(self, mac_address):
81
+        """Find the machine interface that owns the specified ``mac_address``.
82
+
83
+        :param str mac_address: The MAC address
84
+
85
+        :return: the interface that responds to this MAC or None
86
+        """
87
+        for i in self.interfaces:
88
+            if i.responds_to_mac(mac_address):
89
+                return i
90
+        return None
91
+
80 92
     def get_power_params(self):
81 93
         """Load power parameters for this node from MaaS."""
82 94
         url = self.interpolate_url()
@@ -426,6 +438,30 @@ class Machine(model_base.ResourceBase):
426 438
             "Failed updating power parameters MAAS url {} - return code {}\n{}"
427 439
             .format(url, resp.status_code.resp.text))
428 440
 
441
+    def update_identity(self, n, domain="local"):
442
+        """Update this node's identity based on the Node object ``n``
443
+
444
+        :param objects.Node n: The Node object to use as reference
445
+        :param str domain: The DNS domain to register this node under
446
+        """
447
+        try:
448
+            self.hostname = n.name
449
+            self.domain = domain
450
+            self.update()
451
+            if n.oob_type == 'libvirt':
452
+                self.logger.debug(
453
+                    "Updating node %s MaaS power parameters for libvirt." %
454
+                    (n.name))
455
+                oob_params = n.oob_parameters
456
+                self.set_power_parameters(
457
+                    'virsh',
458
+                    power_address=oob_params.get('libvirt_uri'),
459
+                    power_id=n.name)
460
+            self.logger.debug("Updated MaaS resource %s hostname to %s" %
461
+                              (self.resource_id, n.name))
462
+        except Exception as ex:
463
+            self.logger.debug("Error updating MAAS node: %s" % str(ex))
464
+
429 465
     def to_dict(self):
430 466
         """Serialize this resource instance into a dict.
431 467
 
@@ -522,9 +558,7 @@ class Machines(model_base.ResourceCollectionBase):
522 558
         return node
523 559
 
524 560
     def identify_baremetal_node(self,
525
-                                node_model,
526
-                                update_name=True,
527
-                                domain="local"):
561
+                                node_model):
528 562
         """Find MaaS node resource matching Drydock BaremetalNode.
529 563
 
530 564
         Search all the defined MaaS Machines and attempt to match
@@ -532,7 +566,6 @@ class Machines(model_base.ResourceCollectionBase):
532 566
         the MaaS instance with the correct hostname
533 567
 
534 568
         :param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource
535
-        :param update_name: Whether Drydock should update the MaaS resource name to match the Drydock design
536 569
         """
537 570
         maas_node = None
538 571
 
@@ -552,46 +585,37 @@ class Machines(model_base.ResourceCollectionBase):
552 585
                     node_oob_ip
553 586
                 })
554 587
             except ValueError:
555
-                self.logger.warn(
588
+                self.logger.info(
556 589
                     "Error locating matching MaaS resource for OOB IP %s" %
557 590
                     (node_oob_ip))
558 591
                 return None
559 592
         else:
560 593
             # Use boot_mac for node's not using IPMI
561
-            node_boot_mac = node_model.boot_mac
594
+            nodes = self.find_nodes_with_mac(node_model.boot_mac)
562 595
 
563
-            if node_boot_mac is not None:
564
-                maas_node = self.singleton({'boot_mac': node_model.boot_mac})
596
+            if len(nodes) == 1:
597
+                maas_node = nodes[0]
598
+            else:
599
+                self.logger.debug("Error: Found %d nodes with MAC %s", len(nodes), node_model.boot_mac)
600
+                maas_node = None
565 601
 
566 602
         if maas_node is None:
567 603
             self.logger.info(
568 604
                 "Could not locate node %s in MaaS" % node_model.name)
569
-            return None
570
-
571
-        self.logger.debug("Found MaaS resource %s matching Node %s" %
572
-                          (maas_node.resource_id, node_model.get_id()))
573
-
574
-        if maas_node.hostname != node_model.name and update_name:
575
-            try:
576
-                maas_node.hostname = node_model.name
577
-                maas_node.domain = domain
578
-                maas_node.update()
579
-                if node_model.oob_type == 'libvirt':
580
-                    self.logger.debug(
581
-                        "Updating node %s MaaS power parameters for libvirt." %
582
-                        (node_model.name))
583
-                    oob_params = node_model.oob_parameters
584
-                    maas_node.set_power_parameters(
585
-                        'virsh',
586
-                        power_address=oob_params.get('libvirt_uri'),
587
-                        power_id=node_model.name)
588
-                self.logger.debug("Updated MaaS resource %s hostname to %s" %
589
-                                  (maas_node.resource_id, node_model.name))
590
-            except Exception as ex:
591
-                self.logger.debug("Error updating MAAS node: %s" % str(ex))
605
+        else:
606
+            self.logger.debug("Found MaaS resource %s matching Node %s" %
607
+                              (maas_node.resource_id, node_model.get_id()))
592 608
 
593 609
         return maas_node
594 610
 
611
+    def find_nodes_with_mac(self, mac_address):
612
+        """Find a list of nodes that own a NIC with ``mac_address``"""
613
+        node_list = []
614
+        for n in self.resources.values():
615
+            if n.interface_for_mac(mac_address):
616
+                node_list.append(n)
617
+        return node_list
618
+
595 619
     def query(self, query):
596 620
         """Custom query method to deal with complex fields."""
597 621
         result = list(self.resources.values())

+ 23
- 2
python/drydock_provisioner/drivers/node/maasdriver/models/rack_controller.py View File

@@ -13,7 +13,7 @@
13 13
 # limitations under the License.
14 14
 """Model for MaaS rack-controller API resource."""
15 15
 
16
-import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
16
+import drydock_provisioner.error as errors
17 17
 import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
18 18
 
19 19
 
@@ -64,8 +64,25 @@ class RackController(maas_machine.Machine):
64 64
 
65 65
         return svc_status
66 66
 
67
+    def update_identity(self, n, domain="local"):
68
+        """Cannot update rack controller identity."""
69
+        self.logger.debug("Cannot update rack controller identity for %s, no-op." %
70
+                          self.hostname)
71
+        return
67 72
 
68
-class RackControllers(model_base.ResourceCollectionBase):
73
+    def is_healthy(self):
74
+        """Check if this rack controller appears healthy based on service status."""
75
+        rack_svc = self.get_services()
76
+        healthy = True
77
+        for s in rack_svc:
78
+            if s in RackController.REQUIRED_SERVICES:
79
+                # TODO(sh8121att) for dhcpd, ensure it is running if this rack controller
80
+                # is a primary or secondary for a VLAN
81
+                if rack_svc[s] not in ("running", "off"):
82
+                    healthy = False
83
+        return healthy
84
+
85
+class RackControllers(maas_machine.Machines):
69 86
     """Model for a collection of rack controllers."""
70 87
 
71 88
     collection_url = 'rackcontrollers/'
@@ -73,3 +90,7 @@ class RackControllers(model_base.ResourceCollectionBase):
73 90
 
74 91
     def __init__(self, api_client, **kwargs):
75 92
         super().__init__(api_client)
93
+
94
+    def acquire_node(self, node_name):
95
+        """Acquire not valid for nodes that are Rack Controllers."""
96
+        raise errors.DriverError("Rack controllers cannot be acquired.")

+ 36
- 0
python/drydock_provisioner/drivers/node/maasdriver/models/vlan.py View File

@@ -14,6 +14,7 @@
14 14
 """Models representing MaaS VLAN resources."""
15 15
 
16 16
 import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
17
+from drydock_provisioner.drivers.node.maasdriver.errors import RackControllerConflict
17 18
 
18 19
 
19 20
 class Vlan(model_base.ResourceBase):
@@ -65,6 +66,41 @@ class Vlan(model_base.ResourceBase):
65 66
         else:
66 67
             self.vid = int(new_vid)
67 68
 
69
+    def add_rack_controller(self, rack_id):
70
+        """Add a rack controller that manages DHCP on this VLAN.
71
+
72
+        Whichever of primary_rack or secondary_rack, in that order,
73
+        is not set - set to ``rack_id``. If both are already set
74
+        raise RackControllerConflict exception.
75
+        """
76
+        if not self.primary_rack or self.primary_rack == rack_id:
77
+            self.logger.debug("Setting primary DHCP controller %s on VLAN %s", rack_id, self.resource_id)
78
+            self.primary_rack = rack_id
79
+        elif not self.secondary_rack or self.secondary_rack == rack_id:
80
+            self.logger.debug("Setting secondary DHCP controller %s on VLAN %s.", rack_id, self.resource_id)
81
+            self.secondary_rack = rack_id
82
+        else:
83
+            raise RackControllerConflict(
84
+                "Both primary and secondary rack controllers already set.")
85
+
86
+    def reset_dhcp_mgmt(self, commit=False):
87
+        """Reset the DHCP control for this VLAN.
88
+
89
+        Reset the settings in the model impacting DHCP control on this
90
+        VLAN. Only commit these changes to the MAAS API if ``commit`` is
91
+        True.
92
+
93
+        :param bool commit: Whether to commit reset to MAAS API
94
+        """
95
+        self.logger.debug("Resetting DHCP control on VLAN %s.", self.resource_id)
96
+        self.relay_vlan = None
97
+        self.dhcp_on = False
98
+        self.primary_rack = None
99
+        self.secondary_rack = None
100
+
101
+        if commit:
102
+            self.update()
103
+
68 104
     def set_dhcp_relay(self, relay_vlan_id):
69 105
         self.relay_vlan = relay_vlan_id
70 106
         self.update()

+ 1
- 3
python/drydock_provisioner/objects/node.py View File

@@ -53,12 +53,10 @@ class BaremetalNode(drydock_provisioner.objects.hostprofile.HostProfile):
53 53
                               site_design,
54 54
                               state_manager,
55 55
                               resolve_aliases=False):
56
-        self.logger.debug("Applying host profile to node %s" % self.name)
56
+        self.logger.debug("Compiling effective node model for %s" % self.name)
57 57
         self.apply_host_profile(site_design)
58
-        self.logger.debug("Applying hardware profile to node %s" % self.name)
59 58
         self.apply_hardware_profile(site_design)
60 59
         self.source = hd_fields.ModelSource.Compiled
61
-        self.logger.debug("Resolving kernel parameters on node %s" % self.name)
62 60
         self.resolve_kernel_params(site_design)
63 61
         if resolve_aliases:
64 62
             self.logger.debug(

+ 49
- 0
python/tests/unit/test_maasdriver_vlan.py View File

@@ -0,0 +1,49 @@
1
+# Copyright 2018 AT&T Intellectual Property.  All other rights reserved.
2
+#
3
+# Licensed under the Apache License, Version 2.0 (the "License");
4
+# you may not use this file except in compliance with the License.
5
+# You may obtain a copy of the License at
6
+#
7
+#     http://www.apache.org/licenses/LICENSE-2.0
8
+#
9
+# Unless required by applicable law or agreed to in writing, software
10
+# distributed under the License is distributed on an "AS IS" BASIS,
11
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+# See the License for the specific language governing permissions and
13
+# limitations under the License.
14
+'''Tests for the maasdriver node_results routine.'''
15
+import pytest
16
+
17
+from drydock_provisioner.drivers.node.maasdriver.models.vlan import Vlan
18
+from drydock_provisioner.drivers.node.maasdriver.errors import RackControllerConflict
19
+
20
+
21
+class TestMaasVlan():
22
+    def test_add_rack_controller(self, mocker):
23
+        '''Test vlan model method for setting a managing rack controller.'''
24
+
25
+        # A object to return that looks like a requests response
26
+        # object wrapping a MAAS API response
27
+        class MockedResponse():
28
+
29
+            status_code = 200
30
+
31
+        vlan_fields = {'name': 'test', 'dhcp_on': True, 'mtu': 1500}
32
+
33
+        primary_rack = "asdf79"
34
+        secondary_rack = "asdf80"
35
+        tertiary_rack = "asdf81"
36
+
37
+        api_client = mocker.MagicMock()
38
+        api_client.get.return_value = MockedResponse()
39
+
40
+        vlan_obj = Vlan(api_client, **vlan_fields)
41
+
42
+        vlan_obj.add_rack_controller(primary_rack)
43
+        assert vlan_obj.primary_rack == primary_rack
44
+
45
+        vlan_obj.add_rack_controller(secondary_rack)
46
+        assert vlan_obj.secondary_rack == secondary_rack
47
+
48
+        with pytest.raises(RackControllerConflict):
49
+            vlan_obj.add_rack_controller(tertiary_rack)

Loading…
Cancel
Save