A declarative host provisioning system.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

machine.py 25KB


  1. # Copyright 2017 AT&T Intellectual Property. All other rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """Model representing MAAS node/machine resource."""
  15. import logging
  16. import base64
  17. from threading import Lock, Condition
  18. import drydock_provisioner.error as errors
  19. import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
  20. import drydock_provisioner.drivers.node.maasdriver.models.interface as maas_interface
  21. import drydock_provisioner.drivers.node.maasdriver.models.blockdev as maas_blockdev
  22. import drydock_provisioner.drivers.node.maasdriver.models.volumegroup as maas_vg
  23. import drydock_provisioner.drivers.node.maasdriver.models.node_results as maas_nr
  24. from bson import BSON
  25. LOG = logging.getLogger(__name__)
  26. power_lock = Lock()
  27. power_cv = Condition(lock=power_lock)
  28. class Machine(model_base.ResourceBase):
  29. resource_url = 'machines/{resource_id}/'
  30. fields = [
  31. 'resource_id', 'hostname', 'power_type', 'power_state',
  32. 'power_parameters', 'interfaces', 'boot_interface', 'memory',
  33. 'cpu_count', 'tag_names', 'status_name', 'boot_mac', 'boot_ip',
  34. 'owner_data', 'block_devices', 'volume_groups', 'domain'
  35. ]
  36. json_fields = ['hostname', 'power_type', 'domain']
  37. def __init__(self, api_client, **kwargs):
  38. super(Machine, self).__init__(api_client, **kwargs)
  39. # Replace generic dicts with interface collection model
  40. if hasattr(self, 'resource_id'):
  41. self.interfaces = maas_interface.Interfaces(
  42. api_client, system_id=self.resource_id)
  43. self.interfaces.refresh()
  44. try:
  45. self.block_devices = maas_blockdev.BlockDevices(
  46. api_client, system_id=self.resource_id)
  47. self.block_devices.refresh()
  48. except Exception:
  49. self.logger.warning("Failed loading node %s block devices." %
  50. (self.resource_id))
  51. try:
  52. self.volume_groups = maas_vg.VolumeGroups(
  53. api_client, system_id=self.resource_id)
  54. self.volume_groups.refresh()
  55. except Exception:
  56. self.logger.warning(
  57. "Failed load node %s volume groups." % (self.resource_id))
  58. else:
  59. self.interfaces = None
  60. self.block_devices = None
  61. self.volume_groups = None
  62. def interface_for_ip(self, ip_address):
  63. """Find the machine interface that will respond to ip_address.
  64. :param ip_address: The IP address to check interfaces
  65. :return: The interface that responds to this IP or None
  66. """
  67. for i in self.interfaces:
  68. if i.responds_to_ip(ip_address):
  69. return i
  70. return None
  71. def interface_for_mac(self, mac_address):
  72. """Find the machine interface that owns the specified ``mac_address``.
  73. :param str mac_address: The MAC address
  74. :return: the interface that responds to this MAC or None
  75. """
  76. for i in self.interfaces:
  77. if i.responds_to_mac(mac_address):
  78. return i
  79. return None
  80. def get_power_params(self):
  81. """Load power parameters for this node from MaaS."""
  82. url = self.interpolate_url()
  83. resp = self.api_client.get(url, op='power_parameters')
  84. if resp.status_code == 200:
  85. self.power_parameters = resp.json()
  86. def reset_network_config(self):
  87. """Reset the node networking configuration."""
  88. self.logger.info("Resetting networking configuration on node %s" %
  89. (self.resource_id))
  90. url = self.interpolate_url()
  91. resp = self.api_client.post(url, op='restore_networking_configuration')
  92. if not resp.ok:
  93. msg = "Error resetting network on node %s: %s - %s" \
  94. % (self.resource_id, resp.status_code, resp.text)
  95. self.logger.error(msg)
  96. raise errors.DriverError(msg)
  97. def reset_storage_config(self):
  98. """Reset storage config on this machine.
  99. Removes all the volume groups/logical volumes and all the physical
  100. device partitions on this machine.
  101. """
  102. self.logger.info(
  103. "Resetting storage configuration on node %s" % (self.resource_id))
  104. if self.volume_groups is not None and self.volume_groups.len() > 0:
  105. for vg in self.volume_groups:
  106. self.logger.debug("Removing VG %s" % vg.name)
  107. vg.delete()
  108. else:
  109. self.logger.debug(
  110. "No VGs configured on node %s" % (self.resource_id))
  111. if self.block_devices is not None:
  112. for d in self.block_devices:
  113. if d.partitions is not None and d.partitions.len() > 0:
  114. self.logger.debug(
  115. "Clearing partitions on device %s" % d.name)
  116. d.clear_partitions()
  117. else:
  118. self.logger.debug(
  119. "No partitions found on device %s" % d.name)
  120. else:
  121. self.logger.debug(
  122. "No block devices found on node %s" % (self.resource_id))
  123. def set_storage_layout(self,
  124. layout_type='flat',
  125. root_device=None,
  126. root_size=None,
  127. boot_size=None,
  128. root_lv_size=None,
  129. root_vg_name=None,
  130. root_lv_name=None):
  131. """Set machine storage layout for the root disk.
  132. :param layout_type: Whether to use 'flat' (partitions) or 'lvm' for the root filesystem
  133. :param root_device: Name of the block device to place the root partition on
  134. :param root_size: Size of the root partition in bytes
  135. :param boot_size: Size of the boot partition in bytes
  136. :param root_lv_size: Size of the root logical volume in bytes for LVM layout
  137. :param root_vg_name: Name of the volume group with root LV
  138. :param root_lv_name: Name of the root LV
  139. """
  140. try:
  141. url = self.interpolate_url()
  142. self.block_devices.refresh()
  143. root_dev = self.block_devices.singleton({'name': root_device})
  144. if root_dev is None:
  145. msg = "Error: cannot find storage device %s to set as root device" % root_device
  146. self.logger.error(msg)
  147. raise errors.DriverError(msg)
  148. root_dev.set_bootable()
  149. data = {
  150. 'storage_layout': layout_type,
  151. 'root_device': root_dev.resource_id,
  152. }
  153. self.logger.debug("Setting node %s storage layout to %s" %
  154. (self.hostname, layout_type))
  155. if root_size:
  156. data['root_size'] = root_size
  157. if boot_size:
  158. data['boot_size'] = boot_size
  159. if layout_type == 'lvm':
  160. if root_lv_size:
  161. data['lv_size'] = root_lv_size
  162. if root_vg_name:
  163. data['vg_name'] = root_vg_name
  164. if root_lv_name:
  165. data['lv_name'] = root_lv_name
  166. resp = self.api_client.post(
  167. url, op='set_storage_layout', files=data)
  168. if not resp.ok:
  169. raise Exception(
  170. "MAAS Error: %s - %s" % (resp.status_code, resp.text))
  171. except Exception as ex:
  172. msg = "Error: failed configuring node %s storage layout: %s" % (
  173. self.resource_id, str(ex))
  174. self.logger.error(msg)
  175. raise errors.DriverError(msg)
  176. def release(self, erase_disk=False, secure_erase=False, quick_erase=False):
  177. """Release a node so it can be redeployed.
  178. Release is opposite of acquire/allocate. After a successful release, the node
  179. will be in Ready state.
  180. :param erase_disk: If true, the local disks on the machine will be erased.
  181. :param secure_erase: If erase_disk and secure_erase are set to True, and
  182. quick_erase is not specified (default to False), MaaS
  183. will try secure_erase first. If the drive does not
  184. support secure erase, MaaS will overwirte th entire
  185. drive with null butes.
  186. :param quick_erase: If erase_disk and quick_erase are true, 1MB at the
  187. start and at the end of the drive will be erased to make
  188. data recovery inconvenient.
  189. If all three parameters are True and the drive supports
  190. secure erase, secure_erase will have precedence.
  191. If the all three parameters are true, but the disk drive
  192. does not support secure erase, MaaS will do quick erase.
  193. But, if the disk drive supports neither secure nor
  194. quick erase, the disk will be re-written with null bytes.
  195. If erase_disk is true, but both secure_erase and quick_erase
  196. are Fasle (default), MAAS will overwrite the whole disk
  197. with null bytes.
  198. If erase_disk is false, MaaS will not erase the drive, before
  199. releasing the node.
  200. """
  201. url = self.interpolate_url()
  202. options = {
  203. 'erase': erase_disk,
  204. 'secure_erase': secure_erase,
  205. 'quick_erase': quick_erase,
  206. }
  207. resp = self.api_client.post(url, op='release', files=options)
  208. if not resp.ok:
  209. brief_msg = ("Error releasing node, received HTTP %s from MaaS" %
  210. resp.status_code)
  211. self.logger.error(brief_msg)
  212. self.logger.debug("MaaS response: %s" % resp.text)
  213. raise errors.DriverError(brief_msg)
  214. def delete(self):
  215. """ Reset the node storage, and delete it.
  216. After node deletion, the node resource is purged from MaaS resources.
  217. MaaS API machine delete call, only removes the machine from MaaS resource list.
  218. AFter delete, he namchine needs to be manually pwowered on to be re-enlisted
  219. in MaaS as a New node.
  220. :param erase_disk: If true, the node storage is reset, before node resource
  221. is deleted from maas.
  222. """
  223. url = self.interpolate_url()
  224. resp = self.api_client.delete(url)
  225. if not resp.ok:
  226. brief_msg = ("Error deleting node, received HTTP %s from MaaS" %
  227. resp.status_code)
  228. self.logger.error(brief_msg)
  229. self.logger.debug("MaaS response: %s" % resp.text)
  230. raise errors.DriverError(brief_msg)
  231. def commission(self, debug=False):
  232. """Start the MaaS commissioning process.
  233. :param debug: If true, enable ssh on the node and leave it power up after commission
  234. """
  235. url = self.interpolate_url()
  236. # If we want to debug this node commissioning, enable SSH
  237. # after commissioning and leave the node powered up
  238. options = {'enable_ssh': '1' if debug else '0'}
  239. resp = self.api_client.post(url, op='commission', files=options)
  240. # Need to sort out how to handle exceptions
  241. if not resp.ok:
  242. self.logger.error(
  243. "Error commissioning node, received HTTP %s from MaaS" %
  244. resp.status_code)
  245. self.logger.debug("MaaS response: %s" % resp.text)
  246. raise errors.DriverError(
  247. "Error commissioning node, received HTTP %s from MaaS" %
  248. resp.status_code)
  249. def deploy(self, user_data=None, platform=None, kernel=None):
  250. """Start the MaaS deployment process.
  251. :param user_data: ``str`` of cloud-init user data
  252. :param platform: Which image to install
  253. :param kernel: Which kernel to enable
  254. """
  255. deploy_options = {}
  256. if user_data is not None:
  257. deploy_options['user_data'] = base64.b64encode(
  258. user_data.encode('utf-8')).decode('utf-8')
  259. if platform is not None:
  260. deploy_options['distro_series'] = platform
  261. if kernel is not None:
  262. deploy_options['hwe_kernel'] = kernel
  263. url = self.interpolate_url()
  264. resp = self.api_client.post(
  265. url,
  266. op='deploy',
  267. files=deploy_options if len(deploy_options) > 0 else None)
  268. if not resp.ok:
  269. self.logger.error(
  270. "Error deploying node, received HTTP %s from MaaS" %
  271. resp.status_code)
  272. self.logger.debug("MaaS response: %s" % resp.text)
  273. raise errors.DriverError(
  274. "Error deploying node, received HTTP %s from MaaS" %
  275. resp.status_code)
  276. def get_network_interface(self, iface_name):
  277. if self.interfaces is not None:
  278. iface = self.interfaces.singleton({'name': iface_name})
  279. return iface
  280. def get_details(self):
  281. url = self.interpolate_url()
  282. resp = self.api_client.get(url, op='details')
  283. if resp.status_code == 200:
  284. detail_config = BSON.decode(resp.content)
  285. return detail_config
  286. def get_task_results(self, result_type='all'):
  287. """Get the result from tasks run during node deployment.
  288. :param str result_type: the type of results to return. One of
  289. ``all``, ``commissioning``, ``testing``, ``deploy``
  290. """
  291. node_results = maas_nr.NodeResults(
  292. self.api_client,
  293. system_id_list=[self.resource_id],
  294. result_type=result_type)
  295. node_results.refresh()
  296. return node_results
  297. def set_owner_data(self, key, value):
  298. """Add/update/remove node owner data.
  299. If the machine is not currently allocated to a user
  300. it cannot have owner data
  301. :param key: Key of the owner data
  302. :param value: Value of the owner data. If None, the key is removed
  303. """
  304. url = self.interpolate_url()
  305. resp = self.api_client.post(
  306. url, op='set_owner_data', files={key: value})
  307. if resp.status_code != 200:
  308. self.logger.error(
  309. "Error setting node metadata, received HTTP %s from MaaS" %
  310. resp.status_code)
  311. self.logger.debug("MaaS response: %s" % resp.text)
  312. raise errors.DriverError(
  313. "Error setting node metadata, received HTTP %s from MaaS" %
  314. resp.status_code)
  315. def set_power_parameters(self, power_type, **kwargs):
  316. """Set power parameters for this node.
  317. Only available after the node has been added to MAAS.
  318. :param power_type: The type of power management for the node
  319. :param kwargs: Each kwargs key will be prepended with 'power_parameters_' and
  320. added to the list of updates for the node.
  321. """
  322. with power_cv:
  323. if not power_type:
  324. raise errors.DriverError(
  325. "Cannot set power parameters. Must specify a power type.")
  326. url = self.interpolate_url()
  327. if kwargs:
  328. power_params = dict()
  329. self.logger.debug("Setting node power type to %s." % power_type)
  330. self.power_type = power_type
  331. power_params['power_type'] = power_type
  332. for k, v in kwargs.items():
  333. power_params['power_parameters_' + k] = v
  334. self.logger.debug("Updating node %s power parameters: %s" %
  335. (self.hostname, str(power_params)))
  336. resp = self.api_client.put(url, files=power_params)
  337. if resp.status_code == 200:
  338. return True
  339. raise errors.DriverError(
  340. "Failed updating power parameters MAAS url %s - return code %s\n%s"
  341. % (url, resp.status_code.resp.text))
  342. def reset_power_parameters(self):
  343. """Reset power type and parameters for this node to manual.
  344. This is done to address the MaaS api issue detecting multiple BMC NIC
  345. after a node delete.
  346. Only available after the node has been added to MAAS.
  347. """
  348. with power_cv:
  349. url = self.interpolate_url()
  350. self.logger.debug("Resetting node power type for machine {}".format(
  351. self.resource_id))
  352. self.power_type = 'manual'
  353. power_params = {'power_type': 'manual'}
  354. resp = self.api_client.put(url, files=power_params)
  355. if resp.status_code == 200:
  356. return True
  357. raise errors.DriverError(
  358. "Failed updating power parameters MAAS url {} - return code {}\n{}"
  359. .format(url, resp.status_code.resp.text))
  360. def update_identity(self, n, domain="local"):
  361. """Update this node's identity based on the Node object ``n``
  362. :param objects.Node n: The Node object to use as reference
  363. :param str domain: The DNS domain to register this node under
  364. """
  365. try:
  366. self.hostname = n.name
  367. self.domain = domain
  368. self.update()
  369. if n.oob_type == 'libvirt':
  370. self.logger.debug(
  371. "Updating node %s MaaS power parameters for libvirt." %
  372. (n.name))
  373. oob_params = n.oob_parameters
  374. self.set_power_parameters(
  375. 'virsh',
  376. power_address=oob_params.get('libvirt_uri'),
  377. power_id=n.name)
  378. self.logger.debug("Updated MaaS resource %s hostname to %s" %
  379. (self.resource_id, n.name))
  380. except Exception as ex:
  381. self.logger.debug("Error updating MAAS node: %s" % str(ex))
  382. def to_dict(self):
  383. """Serialize this resource instance into a dict.
  384. The dict format matches the
  385. MAAS representation of the resource
  386. """
  387. data_dict = {}
  388. for f in self.json_fields:
  389. if getattr(self, f, None) is not None:
  390. if f == 'resource_id':
  391. data_dict['system_id'] = getattr(self, f)
  392. else:
  393. data_dict[f] = getattr(self, f)
  394. return data_dict
  395. @classmethod
  396. def from_dict(cls, api_client, obj_dict):
  397. """Create a instance of this resource class based on a dict.
  398. Dict format matches MaaS type attributes
  399. Customized for Machine due to use of system_id instead of id
  400. as resource key
  401. :param api_client: Instance of api_client.MaasRequestFactory for accessing MaaS API
  402. :param obj_dict: Python dict as parsed from MaaS API JSON representing this resource type
  403. """
  404. refined_dict = {k: obj_dict.get(k, None) for k in cls.fields}
  405. if 'system_id' in obj_dict.keys():
  406. refined_dict['resource_id'] = obj_dict.get('system_id')
  407. # Capture the boot interface MAC to allow for node id of VMs
  408. if 'boot_interface' in obj_dict.keys():
  409. if isinstance(obj_dict['boot_interface'], dict):
  410. refined_dict['boot_mac'] = obj_dict['boot_interface'][
  411. 'mac_address']
  412. if len(obj_dict['boot_interface']['links']) > 0:
  413. refined_dict['boot_ip'] = obj_dict['boot_interface'][
  414. 'links'][0].get('ip_address', None)
  415. i = cls(api_client, **refined_dict)
  416. return i
  417. class Machines(model_base.ResourceCollectionBase):
  418. collection_url = 'machines/'
  419. collection_resource = Machine
  420. def __init__(self, api_client, **kwargs):
  421. super(Machines, self).__init__(api_client)
  422. # Add the OOB power parameters to each machine instance
  423. def collect_power_params(self):
  424. for k, v in self.resources.items():
  425. v.get_power_params()
  426. def acquire_node(self, node_name):
  427. """Acquire a commissioned node fro deployment.
  428. :param node_name: The hostname of a node to acquire
  429. """
  430. self.refresh()
  431. node = self.singleton({'hostname': node_name})
  432. if node is None:
  433. self.logger.info("Node %s not found" % (node_name))
  434. raise errors.DriverError("Node %s not found" % (node_name))
  435. if node.status_name != 'Ready':
  436. self.logger.info(
  437. "Node %s status '%s' does not allow deployment, should be 'Ready'."
  438. % (node_name, node.status_name))
  439. raise errors.DriverError(
  440. "Node %s status '%s' does not allow deployment, should be 'Ready'."
  441. % (node_name, node.status_name))
  442. url = self.interpolate_url()
  443. resp = self.api_client.post(
  444. url, op='allocate', files={'system_id': node.resource_id})
  445. if not resp.ok:
  446. self.logger.error(
  447. "Error acquiring node, MaaS returned %s" % resp.status_code)
  448. self.logger.debug("MaaS response: %s" % resp.text)
  449. raise errors.DriverError(
  450. "Error acquiring node, MaaS returned %s" % resp.status_code)
  451. return node
  452. def identify_baremetal_node(self,
  453. node_model):
  454. """Find MaaS node resource matching Drydock BaremetalNode.
  455. Search all the defined MaaS Machines and attempt to match
  456. one against the provided Drydock BaremetalNode model. Update
  457. the MaaS instance with the correct hostname
  458. :param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource
  459. """
  460. maas_node = None
  461. if node_model.oob_type == 'ipmi' or node_model.oob_type == 'redfish':
  462. node_oob_network = node_model.oob_parameters['network']
  463. node_oob_ip = node_model.get_network_address(node_oob_network)
  464. if node_oob_ip is None:
  465. self.logger.warn("Node model missing OOB IP address")
  466. raise ValueError('Node model missing OOB IP address')
  467. try:
  468. self.collect_power_params()
  469. maas_node = self.singleton({
  470. 'power_params.power_address':
  471. node_oob_ip
  472. })
  473. except ValueError:
  474. self.logger.info(
  475. "Error locating matching MaaS resource for OOB IP %s" %
  476. (node_oob_ip))
  477. return None
  478. else:
  479. # Use boot_mac for node's not using IPMI
  480. nodes = self.find_nodes_with_mac(node_model.boot_mac)
  481. if len(nodes) == 1:
  482. maas_node = nodes[0]
  483. else:
  484. self.logger.debug("Error: Found %d nodes with MAC %s", len(nodes), node_model.boot_mac)
  485. maas_node = None
  486. if maas_node is None:
  487. self.logger.info(
  488. "Could not locate node %s in MaaS" % node_model.name)
  489. else:
  490. self.logger.debug("Found MaaS resource %s matching Node %s" %
  491. (maas_node.resource_id, node_model.get_id()))
  492. return maas_node
  493. def find_nodes_with_mac(self, mac_address):
  494. """Find a list of nodes that own a NIC with ``mac_address``"""
  495. node_list = []
  496. for n in self.resources.values():
  497. if n.interface_for_mac(mac_address):
  498. node_list.append(n)
  499. return node_list
  500. def query(self, query):
  501. """Custom query method to deal with complex fields."""
  502. result = list(self.resources.values())
  503. for (k, v) in query.items():
  504. if k.startswith('power_params.'):
  505. field = k[13:]
  506. result = [
  507. i for i in result if str(
  508. getattr(i, 'power_parameters', {}).
  509. get(field, None)) == str(v)
  510. ]
  511. else:
  512. result = [
  513. i for i in result if str(getattr(i, k, None)) == str(v)
  514. ]
  515. return result
  516. def add(self, res):
  517. """Create a new resource in this collection in MaaS.
  518. Customize as Machine resources use 'system_id' instead of 'id'
  519. :param res: A instance of the Machine model
  520. """
  521. data_dict = res.to_dict()
  522. url = self.interpolate_url()
  523. resp = self.api_client.post(url, files=data_dict)
  524. if resp.status_code == 200:
  525. resp_json = resp.json()
  526. res.set_resource_id(resp_json.get('system_id'))
  527. return res
  528. raise errors.DriverError("Failed updating MAAS url %s - return code %s"
  529. % (url, resp.status_code))