drydock/drydock_provisioner/drivers/node/maasdriver/actions/node.py

2137 lines
94 KiB
Python

# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Task driver for completing node provisioning with Canonical MaaS 2.2+."""
import time
import logging
import re
import math
import yaml
from datetime import datetime
import drydock_provisioner.error as errors
import drydock_provisioner.config as config
import drydock_provisioner.objects.fields as hd_fields
import drydock_provisioner.objects.hostprofile as hostprofile
import drydock_provisioner.objects as objects
from drydock_provisioner.orchestrator.actions.orchestrator import BaseAction
import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
import drydock_provisioner.drivers.node.maasdriver.models.subnet as maas_subnet
import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
import drydock_provisioner.drivers.node.maasdriver.models.tag as maas_tag
import drydock_provisioner.drivers.node.maasdriver.models.sshkey as maas_keys
import drydock_provisioner.drivers.node.maasdriver.models.boot_resource as maas_boot_res
import drydock_provisioner.drivers.node.maasdriver.models.rack_controller as maas_rack
import drydock_provisioner.drivers.node.maasdriver.models.partition as maas_partition
import drydock_provisioner.drivers.node.maasdriver.models.volumegroup as maas_vg
import drydock_provisioner.drivers.node.maasdriver.models.repository as maas_repo
class BaseMaasAction(BaseAction):
def __init__(self, *args, maas_client=None):
super().__init__(*args)
self.maas_client = maas_client
self.logger = logging.getLogger(
config.config_mgr.conf.logging.nodedriver_logger_name)
class ValidateNodeServices(BaseMaasAction):
"""Action to validate MaaS is available and ready for use."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Running)
try:
if self.maas_client.test_connectivity():
self.logger.info("Able to connect to MaaS.")
self.task.add_status_msg(
msg='Able to connect to MaaS.',
error=False,
ctx='NA',
ctx_type='NA')
self.task.success()
if self.maas_client.test_authentication():
self.logger.info("Able to authenticate with MaaS API.")
self.task.add_status_msg(
msg='Able to authenticate with MaaS API.',
error=False,
ctx='NA',
ctx_type='NA')
self.task.success()
boot_res = maas_boot_res.BootResources(self.maas_client)
boot_res.refresh()
if boot_res.is_importing():
self.logger.info(
"MaaS still importing boot resources.")
self.task.add_status_msg(
msg='MaaS still importing boot resources.',
error=True,
ctx='NA',
ctx_type='NA')
self.task.failure()
else:
if boot_res.len() > 0:
self.logger.info("MaaS has synced boot resources.")
self.task.add_status_msg(
msg='MaaS has synced boot resources.',
error=False,
ctx='NA',
ctx_type='NA')
self.task.success()
else:
self.logger.info("MaaS has no boot resources.")
self.task.add_status_msg(
msg='MaaS has no boot resources.',
error=True,
ctx='NA',
ctx_type='NA')
self.task.failure()
rack_ctlrs = maas_rack.RackControllers(self.maas_client)
rack_ctlrs.refresh()
if rack_ctlrs.len() == 0:
self.logger.info(
"No rack controllers registered in MaaS")
self.task.add_status_msg(
msg='No rack controllers registered in MaaS',
error=True,
ctx='NA',
ctx_type='NA')
self.task.failure()
else:
for r in rack_ctlrs:
rack_svc = r.get_services()
rack_name = r.hostname
for s in rack_svc:
if s in maas_rack.RackController.REQUIRED_SERVICES:
is_error = False
if rack_svc[s] not in ("running", "off"):
self.task.failure()
is_error = True
self.logger.info(
"Service %s on rackd %s is %s" %
(s, rack_name, rack_svc[s]))
self.task.add_status_msg(
msg="Service %s on rackd %s is %s" %
(s, rack_name, rack_svc[s]),
error=is_error,
ctx=rack_name,
ctx_type='rack_ctlr')
except errors.TransientDriverError as ex:
self.task.add_status_msg(
msg=str(ex), error=True, ctx='NA', ctx_type='NA', retry=True)
self.task.failure()
except errors.PersistentDriverError as ex:
self.task.add_status_msg(
msg=str(ex), error=True, ctx='NA', ctx_type='NA')
self.task.failure()
except Exception as ex:
self.task.add_status_msg(
msg=str(ex), error=True, ctx='NA', ctx_type='NA')
self.task.failure()
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
class CreateStorageTemplate(BaseMaasAction):
"""Action for any site wide storage activity, not implemented."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
class CreateBootMedia(BaseMaasAction):
"""Action to import image resources."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
class PrepareHardwareConfig(BaseMaasAction):
"""Action to prepare commissioning configuration."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
class InterrogateNode(BaseMaasAction):
"""Action to query MaaS for build-time information about the node."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
class DestroyNode(BaseMaasAction):
"""Action to remove node from MaaS in preparation for redeploy."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
class CreateNetworkTemplate(BaseMaasAction):
"""Action for configuring the site wide network topology in MaaS."""
def start(self):
# TODO(sh8121att) Break this down into more functions
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
if not site_design.network_links:
msg = ("Site design has no network links, no work to do.")
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.success()
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
# Try to true up MaaS definitions of fabrics/vlans/subnets
# with the networks defined in Drydock
design_networks = list()
design_links = site_design.network_links or []
fabrics = maas_fabric.Fabrics(self.maas_client)
fabrics.refresh()
subnets = maas_subnet.Subnets(self.maas_client)
subnets.refresh()
for l in design_links:
if l.metalabels is not None:
# TODO(sh8121att): move metalabels into config
if 'noconfig' in l.metalabels:
self.logger.info(
"NetworkLink %s marked 'noconfig', skipping configuration including allowed networks."
% (l.name))
continue
fabrics_found = set()
# First loop through the possible Networks on this NetworkLink
# and validate that MaaS's self-discovered networking matches
# our design. This means all self-discovered networks that are matched
# to a link need to all be part of the same fabric. Otherwise there is no
# way to reconcile the discovered topology with the designed topology
for net_name in l.allowed_networks:
n = site_design.get_network(net_name)
if n is None:
msg = "Network %s allowed on link %s, but not defined." % (
net_name, l.name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=l.name,
ctx_type='network_link')
continue
maas_net = subnets.singleton({'cidr': n.cidr})
if maas_net is not None:
fabrics_found.add(maas_net.fabric)
if len(fabrics_found) > 1:
msg = "MaaS self-discovered network incompatible with NetworkLink %s" % l.name
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=l.name, ctx_type='network_link')
continue
elif len(fabrics_found) == 1:
link_fabric_id = fabrics_found.pop()
link_fabric = fabrics.select(link_fabric_id)
link_fabric.name = l.name
link_fabric.update()
else:
link_fabric = fabrics.singleton({'name': l.name})
if link_fabric is None:
link_fabric = maas_fabric.Fabric(
self.maas_client, name=l.name)
link_fabric = fabrics.add(link_fabric)
# Ensure that the MTU of the untagged VLAN on the fabric
# matches that on the NetworkLink config
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=link_fabric.resource_id)
vlan_list.refresh()
msg = "Updating native VLAN MTU = %d on network link %s" % (l.mtu,
l.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=l.name, ctx_type='network_link')
vlan = vlan_list.singleton({'vid': 0})
if vlan:
vlan.mtu = l.mtu
vlan.update()
else:
self.logger.warning("Unable to find native VLAN on fabric %s."
% link_fabric.resource_id)
# Now that we have the fabrics sorted out, check
# that VLAN tags and subnet attributes are correct
for net_name in l.allowed_networks:
n = site_design.get_network(net_name)
design_networks.append(n)
if n is None:
continue
try:
subnet = subnets.singleton({'cidr': n.cidr})
if subnet is None:
msg = "Subnet for network %s not found, creating..." % (
n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
fabric_list = maas_fabric.Fabrics(self.maas_client)
fabric_list.refresh()
fabric = fabric_list.singleton({'name': l.name})
if fabric is not None:
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=fabric.resource_id)
vlan_list.refresh()
vlan = vlan_list.singleton({
'vid':
n.vlan_id if n.vlan_id is not None else 0
})
if vlan is not None:
vlan.name = n.name
if getattr(n, 'mtu', None) is not None:
vlan.mtu = n.mtu
vlan.update()
msg = "VLAN %s found for network %s, updated attributes" % (
vlan.resource_id, n.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
else:
# Create a new VLAN in this fabric and assign subnet to it
vlan = maas_vlan.Vlan(
self.maas_client,
name=n.name,
vid=n.vlan_id,
mtu=getattr(n, 'mtu', None),
fabric_id=fabric.resource_id)
vlan = vlan_list.add(vlan)
msg = "VLAN %s created for network %s" % (
vlan.resource_id, n.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
# If subnet did not exist, create it here and attach it to the fabric/VLAN
subnet = maas_subnet.Subnet(
self.maas_client,
name=n.name,
cidr=n.cidr,
dns_servers=n.dns_servers,
fabric=fabric.resource_id,
vlan=vlan.resource_id,
gateway_ip=n.get_default_gateway())
subnet_list = maas_subnet.Subnets(self.maas_client)
subnet = subnet_list.add(subnet)
msg = "Created subnet %s for CIDR %s on VLAN %s" % (
subnet.resource_id, subnet.cidr, subnet.vlan)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
else:
msg = "Fabric %s should be created, but cannot locate it." % (
l.name)
self.logger.error(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='network_link')
else:
subnet.name = n.name
subnet.dns_servers = n.dns_servers
msg = "Subnet %s found for network %s, updated attributes" % (
subnet.resource_id, n.name)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
self.logger.info(msg)
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=subnet.fabric)
vlan_list.refresh()
vlan = vlan_list.select(subnet.vlan)
if vlan is not None:
vlan.name = n.name
vlan.set_vid(n.vlan_id)
if getattr(n, 'mtu', None) is not None:
vlan.mtu = n.mtu
vlan.update()
msg = "VLAN %s found for network %s, updated attributes" % (
vlan.resource_id, n.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
else:
msg = "MaaS subnet %s does not have a matching VLAN" % (
subnet.resource_id)
self.logger.error(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='network')
self.task.failure(focus=n.name)
# Check if the routes have a default route
subnet.gateway_ip = n.get_default_gateway()
subnet.update()
dhcp_on = False
for r in n.ranges:
try:
subnet.add_address_range(r)
if r.get('type', None) == 'dhcp':
dhcp_on = True
except Exception as e:
msg = "Error adding range to network %s: %s" % (
n.name, str(r))
self.logger.error(msg, exc_info=e)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='network')
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=subnet.fabric)
vlan_list.refresh()
vlan = vlan_list.select(subnet.vlan)
if dhcp_on and not vlan.dhcp_on:
# check if design requires a dhcp relay and if the MaaS vlan already uses a dhcp_relay
msg = "DHCP enabled for subnet %s, activating in MaaS" % (
subnet.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
rack_ctlrs = maas_rack.RackControllers(
self.maas_client)
rack_ctlrs.refresh()
dhcp_config_set = False
for r in rack_ctlrs:
if n.dhcp_relay_upstream_target is not None:
if r.interface_for_ip(
n.dhcp_relay_upstream_target):
iface = r.interface_for_ip(
n.dhcp_relay_upstream_target)
vlan.relay_vlan = iface.vlan
msg = "Relaying DHCP on vlan %s to vlan %s" % (
vlan.resource_id, vlan.relay_vlan)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
vlan.update()
dhcp_config_set = True
break
else:
for i in r.interfaces:
if i.vlan == vlan.resource_id:
msg = "Rack controller %s has interface on vlan %s" % (
r.resource_id, vlan.resource_id)
self.logger.debug(msg)
rackctl_id = r.resource_id
vlan.dhcp_on = True
vlan.primary_rack = rackctl_id
msg = "Enabling DHCP on VLAN %s managed by rack ctlr %s" % (
vlan.resource_id, rackctl_id)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='network')
vlan.update()
dhcp_config_set = True
break
if dhcp_config_set:
break
if not dhcp_config_set:
msg = "Network %s requires DHCP, but could not locate a rack controller to serve it." % (
n.name)
self.logger.error(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='network')
self.task.failure(focus=n.name)
elif dhcp_on and vlan.dhcp_on:
self.logger.info("DHCP already enabled for subnet %s" %
(subnet.resource_id))
except ValueError:
raise errors.DriverError("Inconsistent data from MaaS")
# Now that all networks should be created, we can setup routes between them
subnet_list = maas_subnet.Subnets(self.maas_client)
subnet_list.refresh()
for n in design_networks:
src_subnet = subnet_list.singleton({'cidr': n.cidr})
for r in n.routes:
# care for case of routedomain routes that are logical placeholders
if not r.get('subnet', None):
continue
route_net = r.get('subnet')
# Skip the default route case
if route_net == '0.0.0.0/0':
continue
dest_subnet = subnet_list.singleton({'cidr': route_net})
if dest_subnet is not None:
src_subnet.add_static_route(
dest_subnet.resource_id,
r.get('gateway'),
metric=r.get('metric', 100))
else:
msg = "Could not locate destination network for static route to %s." % route_net
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='network')
self.task.failure(focus=n.name)
self.logger.info(msg)
continue
# now validate that all subnets allowed on a link were created
for n in design_networks:
if n.metalabels is not None:
# TODO(sh8121att): move metalabels into config
if 'noconfig' in n.metalabels:
self.logger.info(
"Network %s marked 'noconfig', skipping validation." %
(n.name))
continue
exists = subnet_list.singleton({'cidr': n.cidr})
if exists is not None:
self.task.success(focus=n.name)
else:
msg = "Network %s defined, but not found in MaaS after network config task." % n.name
self.logger.error(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='network')
self.task.failure(focus=n.name)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
class ConfigureNodeProvisioner(BaseMaasAction):
"""Action for configuring site-wide node provisioner options."""
# These repo names cannot be deleted out of MAAS
# and maintain functionality
DEFAULT_REPOS = ['main_archive', 'ports_archive']
def start(self):
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
try:
current_repos = maas_repo.Repositories(self.maas_client)
current_repos.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg='Error accessing MaaS SshKeys API',
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
site_model = site_design.get_site()
repo_list = getattr(site_model, 'repositories', None) or []
if repo_list:
for r in repo_list:
try:
existing_repo = current_repos.singleton({
'name': r.get_id()
})
new_repo = self.create_maas_repo(self.maas_client, r)
if existing_repo:
new_repo.resource_id = existing_repo.resource_id
new_repo.update()
msg = "Updating repository definition for %s." % (
r.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.success()
else:
new_repo = current_repos.add(new_repo)
msg = "Adding repository definition for %s." % (r.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.success()
except Exception as ex:
msg = "Error adding repository to MaaS configuration: %s" % str(
ex)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx='NA', ctx_type='NA')
self.task.failure()
if repo_list.remove_unlisted:
defined_repos = [x.get_id() for x in repo_list]
to_delete = [r
for r
in current_repos
if r.name not in defined_repos]
for r in to_delete:
if r.name not in self.DEFAULT_REPOS:
r.delete()
current_repos.refresh()
else:
msg = ("No repositories to add, no work to do.")
self.logger.debug(msg)
self.task.success()
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
@staticmethod
def create_maas_repo(api_client, repo_obj):
"""Create a MAAS model of a repo based of a Drydock model.
If resource_id is specified, assign it the resource_id so the new instance
can be used to update an existing repo.
:param api_client: A MAAS API client configured to connect to MAAS
:param repo_obj: Instance of objects.Repository
"""
model_fields = dict()
if repo_obj.distributions:
model_fields['distributions'] = ','.join(repo_obj.distributions)
if repo_obj.components:
if repo_obj.get_id() in ConfigureNodeProvisioner.DEFAULT_REPOS:
model_fields['disabled_components'] = ','.join(repo_obj.get_disabled_components())
else:
model_fields['components'] = ','.join(repo_obj.components)
if repo_obj.get_disabled_subrepos():
model_fields['disabled_pockets'] = ','.join(repo_obj.get_disabled_subrepos())
if repo_obj.arches:
model_fields['arches'] = ','.join(repo_obj.arches)
model_fields['key'] = repo_obj.gpgkey
for k in ['name', 'url']:
model_fields[k] = getattr(repo_obj, k)
repo_model = maas_repo.Repository(api_client, **model_fields)
return repo_model
class ConfigureUserCredentials(BaseMaasAction):
"""Action for configuring user public keys."""
def start(self):
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
try:
current_keys = maas_keys.SshKeys(self.maas_client)
current_keys.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg='Error accessing MaaS SshKeys API',
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
site_model = site_design.get_site()
key_list = getattr(site_model, 'authorized_keys', None) or []
if key_list:
for k in key_list:
try:
if len(current_keys.query({
'key': k.replace("\n", "")
})) == 0:
new_key = maas_keys.SshKey(self.maas_client, key=k)
new_key = current_keys.add(new_key)
msg = "Added SSH key %s to MaaS user profile. Will be installed on all deployed nodes." % (
k[:16])
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.success()
else:
msg = "SSH key %s already exists in MaaS user profile." % k[:
16]
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.success()
except Exception as ex:
msg = "Error adding SSH key to MaaS user profile: %s" % str(
ex)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx='NA', ctx_type='NA')
self.task.failure()
else:
msg = ("No keys to add, no work to do.")
self.logger.debug(msg)
self.task.success()
self.task.add_status_msg(
msg=msg, error=False, ctx='NA', ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
class IdentifyNode(BaseMaasAction):
"""Action to identify a node resource in MaaS matching a node design."""
def start(self):
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg='Error accessing MaaS Machines API: %s' % str(ex),
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
nodes = self.orchestrator.process_node_filter(self.task.node_filter,
site_design)
for n in nodes:
try:
machine = machine_list.identify_baremetal_node(n)
if machine is not None:
self.task.success(focus=n.get_id())
self.task.add_status_msg(
msg="Node %s identified in MaaS" % n.name,
error=False,
ctx=n.name,
ctx_type='node')
else:
self.task.failure(focus=n.get_id())
self.task.add_status_msg(
msg="Node %s not found in MaaS" % n.name,
error=True,
ctx=n.name,
ctx_type='node')
except Exception as ex:
self.task.failure(focus=n.get_id())
self.task.add_status_msg(
msg="Node %s not found in MaaS" % n.name,
error=True,
ctx=n.name,
ctx_type='node')
self.logger.debug(
"Exception caught in identify node.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
class ConfigureHardware(BaseMaasAction):
"""Action to start commissioning a server."""
def start(self):
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg="Error accessing MaaS Machines API: %s" % str(ex),
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
nodes = self.orchestrator.process_node_filter(self.task.node_filter,
site_design)
# TODO(sh8121att): Better way of representing the node statuses than static strings
for n in nodes:
try:
self.logger.debug(
"Locating node %s for commissioning" % (n.name))
machine = machine_list.identify_baremetal_node(
n, update_name=False)
if machine is not None:
if machine.status_name in [
'New', 'Broken', 'Failed commissioning',
'Failed testing'
]:
self.logger.debug(
"Located node %s in MaaS, starting commissioning" %
(n.name))
machine.commission()
# Poll machine status
attempts = 0
max_attempts = config.config_mgr.conf.timeouts.configure_hardware * (
60 //
config.config_mgr.conf.maasdriver.poll_interval)
while (attempts < max_attempts and
(machine.status_name != 'Ready' and
not machine.status_name.startswith('Failed'))):
attempts = attempts + 1
time.sleep(
config.config_mgr.conf.maasdriver.poll_interval
)
try:
machine.refresh()
self.logger.debug(
"Polling node %s status attempt %d of %d: %s"
% (n.name, attempts, max_attempts,
machine.status_name))
except Exception:
self.logger.warning(
"Error updating node %s status during commissioning, will re-attempt."
% (n.name),
exc_info=True)
if machine.status_name == 'Ready':
msg = "Node %s commissioned." % (n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
self.task.success(focus=n.get_id())
self.collect_build_data(machine)
elif machine.status_name in ['Commissioning', 'Testing']:
msg = "Located node %s in MaaS, node already being commissioned. Skipping..." % (
n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.get_id())
elif machine.status_name in [
'Ready', 'Deploying', 'Allocated', 'Deployed'
]:
msg = "Located node %s in MaaS, node commissioned. Skipping..." % (
n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.get_id())
else:
msg = "Located node %s in MaaS, unknown status %s. Skipping..." % (
n, machine.status_name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
else:
msg = "Node %s not found in MaaS" % n.name
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
except Exception as ex:
msg = "Error commissioning node %s: %s" % (n.name, str(ex))
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
def collect_build_data(self, machine):
"""Collect MaaS build data after commissioning."""
self.logger.debug("Collecting build data for %s" % machine.hostname)
try:
data = machine.get_details()
if data:
for t, d in data.items():
if t in ('lshw', 'lldp'):
df = 'text/xml'
else:
df = 'text/plain'
bd = objects.BuildData(
node_name=machine.hostname,
task_id=self.task.get_id(),
generator=t,
collected_date=datetime.utcnow(),
data_format=df,
data_element=d.decode())
self.logger.debug(
"Saving build data from generator %s" % t)
self.state_manager.post_build_data(bd)
self.task.add_status_msg(
msg="Saving build data element.",
error=False,
ctx=machine.hostname,
ctx_type='node')
except Exception as ex:
self.logger.error(
"Error collecting node build data for %s" % machine.hostname,
exc_info=ex)
class ApplyNodeNetworking(BaseMaasAction):
"""Action to configure networking on a node."""
def start(self):
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
fabrics = maas_fabric.Fabrics(self.maas_client)
fabrics.refresh()
subnets = maas_subnet.Subnets(self.maas_client)
subnets.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg="Error accessing MaaS API: %s" % str(ex),
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
nodes = self.orchestrator.process_node_filter(self.task.node_filter,
site_design)
# TODO(sh8121att): Better way of representing the node statuses than static strings
for n in nodes:
try:
self.logger.debug(
"Locating node %s for network configuration" % (n.name))
machine = machine_list.identify_baremetal_node(
n, update_name=False)
if machine is not None:
if machine.status_name.startswith('Failed Dep'):
msg = (
"Node %s has failed deployment, releasing to try again."
% n.name)
self.logger.debug(msg)
try:
machine.release()
machine.refresh()
except errors.DriverError as ex:
msg = (
"Node %s could not be released, skipping deployment."
% n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(focus=n.name)
continue
msg = ("Released failed node %s to retry deployment." %
n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
if machine.status_name == 'Ready':
msg = "Located node %s in MaaS, starting interface configuration" % (
n.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
machine.reset_network_config()
machine.refresh()
for i in n.interfaces:
if not i.network_link:
self.logger.debug(
"Interface %s has no network link, skipping configuration."
% (i.device_name))
continue
nl = site_design.get_network_link(i.network_link)
if nl.metalabels is not None:
if 'noconfig' in nl.metalabels:
self.logger.info(
"Interface %s connected to NetworkLink %s marked 'noconfig', skipping."
% (i.device_name, nl.name))
continue
fabric = fabrics.singleton({'name': nl.name})
if fabric is None:
msg = "No fabric found for NetworkLink %s" % (
nl.name)
self.logger.error(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(focus=n.name)
continue
if nl.bonding_mode != hd_fields.NetworkLinkBondingMode.Disabled:
if len(i.get_hw_slaves()) > 1:
msg = "Building node %s interface %s as a bond." % (
n.name, i.device_name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
hw_iface_list = i.get_hw_slaves()
hw_iface_logicalname_list = []
for hw_iface in hw_iface_list:
hw_iface_logicalname_list.append(
n.get_logicalname(hw_iface))
iface = machine.interfaces.create_bond(
device_name=i.device_name,
parent_names=hw_iface_logicalname_list,
mtu=nl.mtu,
fabric=fabric.resource_id,
mode=nl.bonding_mode,
monitor_interval=nl.bonding_mon_rate,
downdelay=nl.bonding_down_delay,
updelay=nl.bonding_up_delay,
lacp_rate=nl.bonding_peer_rate,
hash_policy=nl.bonding_xmit_hash)
else:
msg = "Network link %s indicates bonding, " \
"interface %s has less than 2 slaves." % \
(nl.name, i.device_name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(focus=n.name)
continue
else:
if len(i.get_hw_slaves()) > 1:
msg = "Network link %s disables bonding, interface %s has multiple slaves." % \
(nl.name, i.device_name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(n.name)
continue
elif len(i.get_hw_slaves()) == 0:
msg = "Interface %s has 0 slaves." % (
i.device_name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(focus=n.name)
else:
msg = "Configuring interface %s on node %s" % (
i.device_name, n.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
hw_iface = i.get_hw_slaves()[0]
# TODO(sh8121att): HardwareProfile device alias integration
iface = machine.get_network_interface(
n.get_logicalname(hw_iface))
if iface is None:
msg = "Interface %s not found on node %s, skipping configuration" % (
i.device_name, machine.resource_id)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(focus=n.name)
continue
if iface.fabric_id == fabric.resource_id:
self.logger.debug(
"Interface %s already attached to fabric_id %s"
% (i.device_name, fabric.resource_id))
else:
self.logger.debug(
"Attaching node %s interface %s to fabric_id %s"
% (n.name, i.device_name,
fabric.resource_id))
iface.attach_fabric(
fabric_id=fabric.resource_id)
if iface.effective_mtu != nl.mtu:
self.logger.debug(
"Updating interface %s MTU to %s" %
(i.device_name, nl.mtu))
iface.set_mtu(nl.mtu)
for iface_net in getattr(i, 'networks', []):
dd_net = site_design.get_network(iface_net)
if dd_net is not None:
link_iface = None
if iface_net == getattr(
nl, 'native_network', None):
# If a node interface is attached to the native network for a link
# then the interface itself should be linked to network, not a VLAN
# tagged interface
self.logger.debug(
"Attaching node %s interface %s to untagged VLAN on fabric %s"
% (n.name, i.device_name,
fabric.resource_id))
link_iface = iface
else:
# For non-native networks, we create VLAN tagged interfaces as children
# of this interface
vlan_options = {
'vlan_tag': dd_net.vlan_id,
'parent_name': iface.name,
}
if dd_net.mtu is not None:
vlan_options['mtu'] = dd_net.mtu
self.logger.debug(
"Creating tagged interface for VLAN %s on system %s interface %s"
% (dd_net.vlan_id, n.name,
i.device_name))
try:
link_iface = machine.interfaces.create_vlan(
**vlan_options)
except errors.DriverError as ex:
msg = "Error creating interface: %s" % str(
ex)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
self.task.failure(focus=n.name)
continue
link_options = {}
link_options[
'primary'] = True if iface_net == getattr(
n, 'primary_network',
None) else False
link_options['subnet_cidr'] = dd_net.cidr
found = False
for a in getattr(n, 'addressing', []):
if a.network == iface_net:
link_options[
'ip_address'] = 'dhcp' if a.type == 'dhcp' else a.address
found = True
if not found:
msg = "No addressed assigned to network %s for node %s, link is L2 only." % (
iface_net, n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
link_options['ip_address'] = None
msg = "Linking system %s interface %s to subnet %s" % (
n.name, i.device_name, dd_net.cidr)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
link_iface.link_subnet(**link_options)
self.task.success(focus=n.name)
else:
self.task.failure(focus=n.name)
msg = "Did not find a defined Network %s to attach to interface" % iface_net
self.logger.error(msg)
self.task.add_status_msg(
msg=msg,
error=True,
ctx=n.name,
ctx_type='node')
elif machine.status_name == 'Broken':
msg = ("Located node %s in MaaS, status broken. Run "
"ConfigureHardware before configurating network"
% (n.name))
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.name)
elif machine.status_name == 'Deployed':
msg = (
"Located node %s in MaaS, status deployed. Skipping "
"and considering success. Destroy node first if redeploy needed."
% (n.name))
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.name)
else:
msg = "Located node %s in MaaS, unknown status %s. Skipping..." % (
n.name, machine.status_name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.name)
else:
msg = "Node %s not found in MaaS" % n.name
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure()
except Exception as ex:
msg = "Error configuring network for node %s: %s" % (n.name,
str(ex))
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.name)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
class ApplyNodePlatform(BaseMaasAction):
"""Action for configuring platform settings (OS/kernel) on a node."""
def start(self):
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
tag_list = maas_tag.Tags(self.maas_client)
tag_list.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg="Error accessing MaaS API: %s" % str(ex),
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
nodes = self.orchestrator.process_node_filter(self.task.node_filter,
site_design)
for n in nodes:
try:
self.logger.debug(
"Locating node %s for platform configuration" % (n.name))
machine = machine_list.identify_baremetal_node(
n, update_name=False)
if machine is None:
msg = "Could not locate machine for node %s" % n.name
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
except Exception as ex1:
msg = "Error locating machine for node %s: %s" % (n, str(ex1))
self.task.failure(focus=n.get_id())
self.logger.error(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
continue
if machine.status_name == 'Deployed':
msg = (
"Located node %s in MaaS, status deployed. Skipping "
"and considering success. Destroy node first if redeploy needed."
% (n.name))
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.name)
continue
try:
# Render the string of all kernel params for the node
kp_string = n.get_kernel_param_string()
if kp_string:
# Check if the node has an existing kernel params tag
node_kp_tag = tag_list.select("%s_kp" % (n.name))
self.logger.info(
"Configuring kernel parameters for node %s" % (n.name))
if node_kp_tag:
node_kp_tag.delete()
msg = "Creating kernel_params tag for node %s: %s" % (
n.name, kp_string)
self.logger.debug(msg)
node_kp_tag = maas_tag.Tag(
self.maas_client,
name="%s_kp" % (n.name),
kernel_opts=kp_string)
node_kp_tag = tag_list.add(node_kp_tag)
node_kp_tag.apply_to_node(machine.resource_id)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
msg = "Applied kernel parameters to node %s" % n.name
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.get_id())
else:
msg = "No kernel parameters to apply for %s." % n.name
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.get_id())
except Exception as ex2:
msg = "Error configuring kernel parameters for node %s" % (
n.name)
self.logger.error(msg + ": %s" % str(ex2))
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
try:
if n.tags is not None and len(n.tags) > 0:
self.logger.info(
"Configuring static tags for node %s" % (n.name))
for t in n.tags:
tag_list.refresh()
tag = tag_list.select(t)
if tag is None:
try:
self.logger.debug("Creating static tag %s" % t)
tag = maas_tag.Tag(self.maas_client, name=t)
tag = tag_list.add(tag)
except errors.DriverError:
tag_list.refresh()
tag = tag_list.select(t)
if tag is not None:
self.logger.debug(
"Tag %s arrived out of nowhere." % t)
else:
self.logger.error(
"Error creating tag %s." % t)
continue
msg = "Applying tag %s to node %s" % (
tag.resource_id, machine.resource_id)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
tag.apply_to_node(machine.resource_id)
self.logger.info(
"Applied static tags to node %s" % (n.name))
self.task.success(focus=n.get_id())
else:
msg = "No node tags to apply for %s." % n.name
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.get_id())
except Exception as ex3:
msg = "Error configuring static tags for node %s" % (n.name)
self.logger.error(msg + ": " + str(ex3))
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
class ApplyNodeStorage(BaseMaasAction):
"""Action configure node storage."""
# NOTE(sh8121att) Partition tables take size and it seems if the first partition
# on a block device attempts to use the full size of the device
# MAAS will fail that the device is not large enough
# It may be that the block device available_size does not include overhead
# for the partition table and once the table is written, there is not
# enough space for the 'full size' partition. So reserve the below
# when calculating 'rest of device' sizing w/ the '>' operator
#
# This size is based on documentation that for backwards compatability
# the first partition should start on LBA 63 and we'll assume 4096 byte
# blocks, thus 63 (add one for safety) x 4096 = 258048
PART_TABLE_RESERVATION = 258048
def start(self):
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg="Error accessing MaaS API: %s" % str(ex),
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
nodes = self.orchestrator.process_node_filter(self.task.node_filter,
site_design)
for n in nodes:
try:
self.logger.debug(
"Locating node %s for storage configuration" % (n.name))
machine = machine_list.identify_baremetal_node(
n, update_name=False)
if machine is None:
msg = "Could not locate machine for node %s" % n.name
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
except Exception as ex:
msg = "Error locating machine for node %s" % (n.name)
self.logger.error(msg + ": " + str(ex))
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
if machine.status_name == 'Deployed':
msg = (
"Located node %s in MaaS, status deployed. Skipping "
"and considering success. Destroy node first if redeploy needed."
% (n.name))
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.name)
continue
try:
"""
1. Clear VGs
2. Clear partitions
3. Apply partitioning
4. Create VGs
5. Create logical volumes
"""
msg = "Clearing current storage layout on node %s." % n.name
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
machine.reset_storage_config()
(root_dev, root_block) = n.find_fs_block_device('/')
(boot_dev, boot_block) = n.find_fs_block_device('/boot')
storage_layout = dict()
if isinstance(root_block, hostprofile.HostPartition):
storage_layout['layout_type'] = 'flat'
storage_layout['root_device'] = n.get_logicalname(
root_dev.name)
storage_layout['root_size'] = root_block.size
elif isinstance(root_block, hostprofile.HostVolume):
storage_layout['layout_type'] = 'lvm'
if len(root_dev.physical_devices) != 1:
msg = "Root LV in VG with multiple physical devices on node %s" % (
n.name)
self.logger.error(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
storage_layout['root_device'] = n.get_logicalname(
root_dev.physical_devices[0])
storage_layout['root_lv_size'] = root_block.size
storage_layout['root_lv_name'] = root_block.name
storage_layout['root_vg_name'] = root_dev.name
if boot_block is not None:
storage_layout['boot_size'] = boot_block.size
msg = "Setting node %s root storage layout: %s" % (
n.name, str(storage_layout))
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
machine.set_storage_layout(**storage_layout)
vg_devs = {}
for d in n.storage_devices:
maas_dev = machine.block_devices.singleton({
'name':
n.get_logicalname(d.name)
})
if maas_dev is None:
self.logger.warning(
"Dev %s (%s) not found on node %s" %
(d.name, n.get_logicalname(d.name), n.name))
continue
if d.volume_group is not None:
self.logger.debug(
"Adding dev %s (%s) to volume group %s" %
(d.name, n.get_logicalname(d.name),
d.volume_group))
if d.volume_group not in vg_devs:
vg_devs[d.volume_group] = {'b': [], 'p': []}
vg_devs[d.volume_group]['b'].append(
maas_dev.resource_id)
continue
self.logger.debug(
"Partitioning dev %s (%s) on node %s" %
(d.name, n.get_logicalname(d.name), n.name))
for p in d.partitions:
if p.is_sys():
self.logger.debug(
"Skipping manually configuring a system partition."
)
continue
maas_dev.refresh()
size = ApplyNodeStorage.calculate_bytes(
size_str=p.size, context=maas_dev)
part = maas_partition.Partition(
self.maas_client, size=size, bootable=p.bootable)
if p.part_uuid is not None:
part.uuid = p.part_uuid
msg = "Creating partition %s sized %d bytes on dev %s (%s)" % (
p.name, size, d.name, n.get_logicalname(d.name))
self.logger.debug(msg)
part = maas_dev.create_partition(part)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
if p.volume_group is not None:
self.logger.debug(
"Adding partition %s to volume group %s" %
(p.name, p.volume_group))
if p.volume_group not in vg_devs:
vg_devs[p.volume_group] = {'b': [], 'p': []}
vg_devs[p.volume_group]['p'].append(
part.resource_id)
if p.mountpoint is not None:
format_opts = {'fstype': p.fstype}
if p.fs_uuid is not None:
format_opts['uuid'] = str(p.fs_uuid)
if p.fs_label is not None:
format_opts['label'] = p.fs_label
msg = "Formatting partition %s as %s" % (p.name,
p.fstype)
self.logger.debug(msg)
part.format(**format_opts)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
mount_opts = {
'mount_point': p.mountpoint,
'mount_options': p.mount_options,
}
msg = "Mounting partition %s on %s" % (
p.name, p.mountpoint)
self.logger.debug(msg)
part.mount(**mount_opts)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
self.logger.debug(
"Finished configuring node %s partitions" % n.name)
for v in n.volume_groups:
if v.is_sys():
self.logger.debug(
"Skipping manually configuraing system VG.")
continue
if v.name not in vg_devs:
self.logger.warning(
"No physical volumes defined for VG %s, skipping."
% (v.name))
continue
maas_volgroup = maas_vg.VolumeGroup(
self.maas_client, name=v.name)
if v.vg_uuid is not None:
maas_volgroup.uuid = v.vg_uuid
if len(vg_devs[v.name]['b']) > 0:
maas_volgroup.block_devices = ','.join(
[str(x) for x in vg_devs[v.name]['b']])
if len(vg_devs[v.name]['p']) > 0:
maas_volgroup.partitions = ','.join(
[str(x) for x in vg_devs[v.name]['p']])
msg = "Create volume group %s on node %s" % (v.name,
n.name)
self.logger.debug(msg)
maas_volgroup = machine.volume_groups.add(maas_volgroup)
maas_volgroup.refresh()
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
for lv in v.logical_volumes:
calc_size = ApplyNodeStorage.calculate_bytes(
size_str=lv.size, context=maas_volgroup)
bd_id = maas_volgroup.create_lv(
name=lv.name, uuid_str=lv.lv_uuid, size=calc_size)
if lv.mountpoint is not None:
machine.refresh()
maas_lv = machine.block_devices.select(bd_id)
msg = "Formatting LV %s as filesystem on node %s." % (
lv.name, n.name)
self.logger.debug(msg)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
maas_lv.format(
fstype=lv.fstype, uuid_str=lv.fs_uuid)
msg = "Mounting LV %s at %s on node %s." % (
lv.name, lv.mountpoint, n.name)
self.logger.debug(msg)
maas_lv.mount(
mount_point=lv.mountpoint,
mount_options=lv.mount_options)
self.task.add_status_msg(
msg=msg,
error=False,
ctx=n.name,
ctx_type='node')
self.task.success(focus=n.get_id())
except Exception as ex:
self.task.failure(focus=n.get_id())
self.task.add_status_msg(
msg="Error configuring storage.",
error=True,
ctx=n.name,
ctx_type='node')
self.logger.debug("Error configuring storage for node %s: %s" %
(n.name, str(ex)))
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return
@classmethod
def calculate_bytes(cls, size_str=None, context=None):
"""Calculate the size on bytes of a size_str.
Calculate the size as specified in size_str in the context of the provided
blockdev or vg. Valid size_str format below.
#m or #M or #mb or #MB = # * 1024 * 1024
#g or #G or #gb or #GB = # * 1024 * 1024 * 1024
#t or #T or #tb or #TB = # * 1024 * 1024 * 1024 * 1024
#% = Percentage of the total storage in the context
Prepend '>' to the above to note the size as a minimum and the calculated size being the
remaining storage available above the minimum
If the calculated size is not available in the context, a NotEnoughStorage exception is
raised.
:param size_str: A string representing the desired size
:param context: An instance of maasdriver.models.blockdev.BlockDevice or
instance of maasdriver.models.volumegroup.VolumeGroup. The
size_str is interpreted in the context of this device
:return size: The calculated size in bytes
"""
pattern = '(>?)(\d+)([mMbBgGtT%]{1,2})'
regex = re.compile(pattern)
match = regex.match(size_str)
if not match:
raise errors.InvalidSizeFormat(
"Invalid size string format: %s" % size_str)
if ((match.group(1) == '>' or match.group(3) == '%') and not context):
raise errors.InvalidSizeFormat(
'Sizes using the ">" or "%" format must specify a '
'block device or volume group context')
base_size = int(match.group(2))
if match.group(3) in ['m', 'M', 'mb', 'MB']:
computed_size = base_size * (1000 * 1000)
elif match.group(3) in ['g', 'G', 'gb', 'GB']:
computed_size = base_size * (1000 * 1000 * 1000)
elif match.group(3) in ['t', 'T', 'tb', 'TB']:
computed_size = base_size * (1000 * 1000 * 1000 * 1000)
elif match.group(3) == '%':
computed_size = math.floor((base_size / 100) * int(context.size))
if computed_size > int(context.available_size):
raise errors.NotEnoughStorage()
if match.group(1) == '>':
computed_size = int(context.available_size
) - ApplyNodeStorage.PART_TABLE_RESERVATION
return computed_size
class DeployNode(BaseMaasAction):
"""Action to write persistent OS to node."""
def start(self):
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except Exception as ex:
self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.add_status_msg(
msg="Error accessing MaaS API: %s" % str(ex),
error=True,
ctx='NA',
ctx_type='NA')
self.task.save()
return
self.task.set_status(hd_fields.TaskStatus.Running)
self.task.save()
try:
site_design = self._load_site_design()
except errors.OrchestratorError:
self.task.add_status_msg(
msg="Error loading site design.",
error=True,
ctx='NA',
ctx_type='NA')
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.failure()
self.task.save()
return
nodes = self.orchestrator.process_node_filter(self.task.node_filter,
site_design)
for n in nodes:
try:
machine = machine_list.identify_baremetal_node(
n, update_name=False)
if machine.status_name.startswith(
'Deployed') or machine.status_name.startswith(
'Deploying'):
msg = "Node %s already deployed or deploying, skipping." % (
n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.name)
continue
elif machine.status_name == 'Ready':
msg = "Acquiring node %s for deployment" % (n.name)
self.logger.info(msg)
machine = machine_list.acquire_node(n.name)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
else:
msg = "Unexpected status %s for node %s, skipping deployment." % (
machine.status_name, n.name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
except errors.DriverError as dex:
msg = "Error acquiring node %s, skipping" % n.name
self.logger.warning(msg, exc_info=dex)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
# Set owner data in MaaS
try:
self.logger.info("Setting node %s owner data." % n.name)
for k, v in n.owner_data.items():
msg = "Set owner data %s = %s for node %s" % (k, v, n.name)
self.logger.debug(msg)
machine.set_owner_data(k, v)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
except Exception as ex:
msg = "Error setting node %s owner data" % n.name
self.logger.warning(msg + ": " + str(ex))
self.task.failure(focus=n.get_id())
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
continue
# Saving boot action context for a node
self.logger.info(
"Saving Boot Action context for node %s." % (n.name))
try:
ba_key = self.orchestrator.create_bootaction_context(
n.name, self.task)
tag_list = maas_tag.Tags(self.maas_client)
tag_list.refresh()
node_id_tags = tag_list.startswith("%s__baid__" % (n.name))
for t in node_id_tags:
t.delete()
if ba_key is not None:
msg = "Creating boot action id key tag for node %s" % (
n.name)
self.logger.debug(msg)
node_baid_tag = maas_tag.Tag(
self.maas_client,
name="%s__baid__%s" % (n.name, ba_key.hex()))
node_baid_tag = tag_list.add(node_baid_tag)
node_baid_tag.apply_to_node(machine.resource_id)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
except Exception as ex:
self.logger.error(
"Error setting boot action id key tag for %s." % n.name,
exc_info=ex)
# Extract bootaction assets that are package lists as they
# are included in the deployment initiation
node_packages = self.orchestrator.find_node_package_lists(
n.name, self.task)
user_data_dict = dict(packages=[])
for k, v in node_packages.items():
if v:
user_data_dict['packages'].append([k, v])
else:
user_data_dict['packages'].append(k)
user_data_string = None
if user_data_dict.get('packages'):
user_data_string = "#cloud-config\n%s" % yaml.dump(
user_data_dict)
self.logger.info("Deploying node %s: image=%s, kernel=%s" %
(n.name, n.image, n.kernel))
try:
machine.deploy(
platform=n.image,
kernel=n.kernel,
user_data=user_data_string)
except errors.DriverError:
msg = "Error deploying node %s, skipping" % n.name
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
continue
attempts = 0
max_attempts = config.config_mgr.conf.timeouts.deploy_node * (
60 // config.config_mgr.conf.maasdriver.poll_interval)
while (attempts < max_attempts
and (not machine.status_name.startswith('Deployed')
and not machine.status_name.startswith('Failed'))):
attempts = attempts + 1
time.sleep(config.config_mgr.conf.maasdriver.poll_interval)
try:
machine.refresh()
self.logger.debug(
"Polling node %s status attempt %d of %d: %s" %
(n.name, attempts, max_attempts, machine.status_name))
except Exception:
self.logger.warning(
"Error updating node %s status during commissioning, will re-attempt."
% (n.name))
if machine.status_name.startswith('Deployed'):
msg = "Node %s deployed" % (n.name)
self.logger.info(msg)
self.task.add_status_msg(
msg=msg, error=False, ctx=n.name, ctx_type='node')
self.task.success(focus=n.get_id())
else:
msg = "Node %s deployment timed out" % (n.name)
self.logger.warning(msg)
self.task.add_status_msg(
msg=msg, error=True, ctx=n.name, ctx_type='node')
self.task.failure(focus=n.get_id())
self.task.set_status(hd_fields.TaskStatus.Complete)
self.task.save()
return