561 lines
23 KiB
Python
561 lines
23 KiB
Python
# Copyright 2016 Huawei Technologies Co.,LTD.
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
from ironicclient import exc as ironic_exc
|
|
from ironicclient import exceptions as client_e
|
|
from oslo_log import log as logging
|
|
from oslo_service import loopingcall
|
|
from oslo_utils import excutils
|
|
import six
|
|
|
|
from mogan.common import exception
|
|
from mogan.common.i18n import _
|
|
from mogan.common import ironic
|
|
from mogan.common import states
|
|
from mogan.conf import CONF
|
|
from mogan.engine.baremetal import driver as base_driver
|
|
from mogan.engine.baremetal.ironic import ironic_states
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
_POWER_STATE_MAP = {
|
|
ironic_states.POWER_ON: states.POWER_ON,
|
|
ironic_states.NOSTATE: states.NOSTATE,
|
|
ironic_states.POWER_OFF: states.POWER_OFF,
|
|
}
|
|
|
|
_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL,
|
|
ironic_states.ERROR, ironic_states.DEPLOYWAIT,
|
|
ironic_states.DEPLOYING)
|
|
|
|
_NODE_FIELDS = ('uuid', 'power_state', 'target_power_state', 'provision_state',
|
|
'target_provision_state', 'last_error', 'maintenance',
|
|
'properties', 'instance_uuid')
|
|
|
|
|
|
def map_power_state(state):
|
|
try:
|
|
return _POWER_STATE_MAP[state]
|
|
except KeyError:
|
|
LOG.warning("Power state %s not found.", state)
|
|
return states.NOSTATE
|
|
|
|
|
|
def _log_ironic_polling(what, node, instance):
|
|
power_state = (None if node.power_state is None else
|
|
'"%s"' % node.power_state)
|
|
tgt_power_state = (None if node.target_power_state is None else
|
|
'"%s"' % node.target_power_state)
|
|
prov_state = (None if node.provision_state is None else
|
|
'"%s"' % node.provision_state)
|
|
tgt_prov_state = (None if node.target_provision_state is None else
|
|
'"%s"' % node.target_provision_state)
|
|
LOG.debug('Still waiting for ironic node %(node)s to %(what)s: '
|
|
'power_state=%(power_state)s, '
|
|
'target_power_state=%(tgt_power_state)s, '
|
|
'provision_state=%(prov_state)s, '
|
|
'target_provision_state=%(tgt_prov_state)s',
|
|
dict(what=what,
|
|
node=node.uuid,
|
|
power_state=power_state,
|
|
tgt_power_state=tgt_power_state,
|
|
prov_state=prov_state,
|
|
tgt_prov_state=tgt_prov_state),
|
|
instance=instance)
|
|
|
|
|
|
class IronicDriver(base_driver.BaseEngineDriver):
|
|
|
|
def __init__(self):
|
|
super(IronicDriver, self).__init__()
|
|
self.ironicclient = ironic.IronicClientWrapper()
|
|
|
|
def _get_node(self, node_uuid):
|
|
"""Get a node by its UUID."""
|
|
return self.ironicclient.call('node.get', node_uuid,
|
|
fields=_NODE_FIELDS)
|
|
|
|
def _validate_instance_and_node(self, instance):
|
|
"""Get the node associated with the instance.
|
|
|
|
Check with the Ironic service that this instance is associated with a
|
|
node, and return the node.
|
|
"""
|
|
try:
|
|
return self.ironicclient.call('node.get_by_instance_uuid',
|
|
instance.uuid, fields=_NODE_FIELDS)
|
|
except ironic_exc.NotFound:
|
|
raise exception.InstanceNotFound(instance_id=instance.uuid)
|
|
|
|
def _parse_node_properties(self, node):
|
|
"""Helper method to parse the node's properties."""
|
|
properties = {}
|
|
|
|
for prop in ('cpus', 'memory_mb', 'local_gb'):
|
|
try:
|
|
properties[prop] = int(node.properties.get(prop, 0))
|
|
except (TypeError, ValueError):
|
|
LOG.warning('Node %(uuid)s has a malformed "%(prop)s". '
|
|
'It should be an integer.',
|
|
{'uuid': node.uuid, 'prop': prop})
|
|
properties[prop] = 0
|
|
|
|
properties['capabilities'] = node.properties.get('capabilities')
|
|
properties['availability_zone'] = \
|
|
node.properties.get('availability_zone')
|
|
properties['node_type'] = node.properties.get('node_type')
|
|
return properties
|
|
|
|
def _node_resource(self, node):
|
|
"""Helper method to create resource dict from node stats."""
|
|
properties = self._parse_node_properties(node)
|
|
|
|
cpus = properties['cpus']
|
|
memory_mb = properties['memory_mb']
|
|
availability_zone = properties['availability_zone']
|
|
node_type = properties['node_type']
|
|
|
|
nodes_extra_specs = {}
|
|
|
|
# NOTE(gilliard): To assist with more precise scheduling, if the
|
|
# node.properties contains a key 'capabilities', we expect the value
|
|
# to be of the form "k1:v1,k2:v2,etc.." which we add directly as
|
|
# key/value pairs into the node_extra_specs to be used by the
|
|
# ComputeCapabilitiesFilter
|
|
capabilities = properties['capabilities']
|
|
if capabilities:
|
|
for capability in str(capabilities).split(','):
|
|
parts = capability.split(':')
|
|
if len(parts) == 2 and parts[0] and parts[1]:
|
|
nodes_extra_specs[parts[0].strip()] = parts[1]
|
|
else:
|
|
LOG.warning("Ignoring malformed capability '%s'. "
|
|
"Format should be 'key:val'.", capability)
|
|
|
|
dic = {
|
|
'cpus': cpus,
|
|
'memory_mb': memory_mb,
|
|
'hypervisor_type': self._get_hypervisor_type(),
|
|
'availability_zone': str(availability_zone),
|
|
'node_type': str(node_type),
|
|
'extra_specs': nodes_extra_specs,
|
|
'node_uuid': str(node.uuid),
|
|
'ports': node.ports,
|
|
}
|
|
return dic
|
|
|
|
def _port_resource(self, port):
|
|
"""Helper method to create resource dict from port stats."""
|
|
port_type = port.extra.get('port_type')
|
|
|
|
dic = {
|
|
'port_type': str(port_type),
|
|
'node_uuid': str(port.node_uuid),
|
|
'port_uuid': str(port.uuid),
|
|
}
|
|
return dic
|
|
|
|
def _add_instance_info_to_node(self, node, instance):
|
|
|
|
patch = list()
|
|
# Associate the node with an instance
|
|
patch.append({'path': '/instance_uuid', 'op': 'add',
|
|
'value': instance.uuid})
|
|
# Add the required fields to deploy a node.
|
|
patch.append({'path': '/instance_info/image_source', 'op': 'add',
|
|
'value': instance.image_uuid})
|
|
# TODO(zhenguo) Add partition support
|
|
patch.append({'path': '/instance_info/root_gb', 'op': 'add',
|
|
'value': str(node.properties.get('local_gb', 0))})
|
|
|
|
try:
|
|
# FIXME(lucasagomes): The "retry_on_conflict" parameter was added
|
|
# to basically causes the deployment to fail faster in case the
|
|
# node picked by the scheduler is already associated with another
|
|
# instance due bug #1341420.
|
|
self.ironicclient.call('node.update', node.uuid, patch,
|
|
retry_on_conflict=False)
|
|
except ironic_exc.BadRequest:
|
|
msg = (_("Failed to add deploy parameters on node %(node)s "
|
|
"when provisioning the instance %(instance)s")
|
|
% {'node': node.uuid, 'instance': instance.uuid})
|
|
LOG.error(msg)
|
|
raise exception.InstanceDeployFailure(msg)
|
|
|
|
def _remove_instance_info_from_node(self, node, instance):
|
|
patch = [{'path': '/instance_info', 'op': 'remove'},
|
|
{'path': '/instance_uuid', 'op': 'remove'}]
|
|
try:
|
|
self.ironicclient.call('node.update', node.uuid, patch)
|
|
except ironic_exc.BadRequest as e:
|
|
LOG.warning("Failed to remove deploy parameters from node "
|
|
"%(node)s when unprovisioning the instance "
|
|
"%(instance)s: %(reason)s",
|
|
{'node': node.uuid, 'instance': instance.uuid,
|
|
'reason': six.text_type(e)})
|
|
|
|
def _wait_for_active(self, instance):
|
|
"""Wait for the node to be marked as ACTIVE in Ironic."""
|
|
instance.refresh()
|
|
if instance.status in (states.DELETING, states.ERROR, states.DELETED):
|
|
raise exception.InstanceDeployAborted(
|
|
_("Instance %s provisioning was aborted") % instance.uuid)
|
|
|
|
node = self._validate_instance_and_node(instance)
|
|
if node.provision_state == ironic_states.ACTIVE:
|
|
# job is done
|
|
LOG.debug("Ironic node %(node)s is now ACTIVE",
|
|
dict(node=node.uuid), instance=instance)
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
if node.target_provision_state in (ironic_states.DELETED,
|
|
ironic_states.AVAILABLE):
|
|
# ironic is trying to delete it now
|
|
raise exception.InstanceNotFound(instance_id=instance.uuid)
|
|
|
|
if node.provision_state in (ironic_states.NOSTATE,
|
|
ironic_states.AVAILABLE):
|
|
# ironic already deleted it
|
|
raise exception.InstanceNotFound(instance_id=instance.uuid)
|
|
|
|
if node.provision_state == ironic_states.DEPLOYFAIL:
|
|
# ironic failed to deploy
|
|
msg = (_("Failed to provision instance %(inst)s: %(reason)s")
|
|
% {'inst': instance.uuid, 'reason': node.last_error})
|
|
raise exception.InstanceDeployFailure(msg)
|
|
|
|
_log_ironic_polling('become ACTIVE', node, instance)
|
|
|
|
def _wait_for_power_state(self, instance, message):
|
|
"""Wait for the node to complete a power state change."""
|
|
node = self._validate_instance_and_node(instance)
|
|
|
|
if node.target_power_state == ironic_states.NOSTATE:
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
_log_ironic_polling(message, node, instance)
|
|
|
|
def _get_hypervisor_type(self):
|
|
"""Get hypervisor type."""
|
|
return 'ironic'
|
|
|
|
def get_ports_from_node(self, node_uuid, detail=True):
|
|
"""List the MAC addresses and the port types from a node."""
|
|
ports = self.ironicclient.call("node.list_ports",
|
|
node_uuid, detail=detail)
|
|
portgroups = self.ironicclient.call("portgroup.list", node=node_uuid,
|
|
detail=detail)
|
|
return ports + portgroups
|
|
|
|
def plug_vif(self, ironic_port_id, port_id):
|
|
patch = [{'op': 'add',
|
|
'path': '/extra/vif_port_id',
|
|
'value': port_id}]
|
|
self.ironicclient.call("port.update", ironic_port_id, patch)
|
|
|
|
def unplug_vif(self, node_interface):
|
|
patch = [{'op': 'remove',
|
|
'path': '/extra/vif_port_id'}]
|
|
try:
|
|
if 'vif_port_id' in node_interface.extra:
|
|
self.ironicclient.call("port.update",
|
|
node_interface.uuid, patch)
|
|
except client_e.BadRequest:
|
|
pass
|
|
|
|
def spawn(self, context, instance):
|
|
"""Deploy an instance.
|
|
|
|
:param context: The security context.
|
|
:param instance: The instance object.
|
|
"""
|
|
LOG.debug('Spawn called for instance', instance=instance)
|
|
|
|
# The engine manager is meant to know the node uuid, so missing uuid
|
|
# is a significant issue. It may mean we've been passed the wrong data.
|
|
node_uuid = instance.node_uuid
|
|
if not node_uuid:
|
|
raise ironic_exc.BadRequest(
|
|
_("Ironic node uuid not supplied to "
|
|
"driver for instance %s.") % instance.uuid)
|
|
|
|
# add instance info to node
|
|
node = self._get_node(node_uuid)
|
|
self._add_instance_info_to_node(node, instance)
|
|
|
|
# validate we are ready to do the deploy
|
|
validate_chk = self.ironicclient.call("node.validate", node_uuid)
|
|
if (not validate_chk.deploy.get('result')
|
|
or not validate_chk.power.get('result')):
|
|
# something is wrong. undo what we have done
|
|
self._cleanup_deploy(node, instance)
|
|
raise exception.ValidationError(_(
|
|
"Ironic node: %(id)s failed to validate."
|
|
" (deploy: %(deploy)s, power: %(power)s)")
|
|
% {'id': instance.node_uuid,
|
|
'deploy': validate_chk.deploy,
|
|
'power': validate_chk.power})
|
|
|
|
# trigger the node deploy
|
|
try:
|
|
self.ironicclient.call("node.set_provision_state", node_uuid,
|
|
ironic_states.ACTIVE)
|
|
except Exception as e:
|
|
with excutils.save_and_reraise_exception():
|
|
msg = ("Failed to request Ironic to provision instance "
|
|
"%(inst)s: %(reason)s",
|
|
{'inst': instance.uuid,
|
|
'reason': six.text_type(e)})
|
|
LOG.error(msg)
|
|
|
|
timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
|
|
instance)
|
|
try:
|
|
timer.start(interval=CONF.ironic.api_retry_interval).wait()
|
|
LOG.info('Successfully provisioned Ironic node %s',
|
|
node.uuid, instance=instance)
|
|
except Exception:
|
|
with excutils.save_and_reraise_exception():
|
|
LOG.error("Error deploying instance %(instance)s on "
|
|
"baremetal node %(node)s.",
|
|
{'instance': instance.uuid,
|
|
'node': node_uuid})
|
|
|
|
def _unprovision(self, instance, node):
|
|
"""This method is called from destroy() to unprovision
|
|
already provisioned node after required checks.
|
|
"""
|
|
try:
|
|
self.ironicclient.call("node.set_provision_state", node.uuid,
|
|
"deleted")
|
|
except Exception as e:
|
|
# if the node is already in a deprovisioned state, continue
|
|
# This should be fixed in Ironic.
|
|
# TODO(deva): This exception should be added to
|
|
# python-ironicclient and matched directly,
|
|
# rather than via __name__.
|
|
if getattr(e, '__name__', None) != 'InstanceDeployFailure':
|
|
raise
|
|
|
|
# using a dict because this is modified in the local method
|
|
data = {'tries': 0}
|
|
|
|
def _wait_for_provision_state():
|
|
try:
|
|
node = self._validate_instance_and_node(instance)
|
|
except exception.InstanceNotFound:
|
|
LOG.debug("Instance already removed from Ironic",
|
|
instance=instance)
|
|
raise loopingcall.LoopingCallDone()
|
|
if node.provision_state in (ironic_states.NOSTATE,
|
|
ironic_states.CLEANING,
|
|
ironic_states.CLEANWAIT,
|
|
ironic_states.CLEANFAIL,
|
|
ironic_states.AVAILABLE):
|
|
# From a user standpoint, the node is unprovisioned. If a node
|
|
# gets into CLEANFAIL state, it must be fixed in Ironic, but we
|
|
# can consider the instance unprovisioned.
|
|
LOG.debug("Ironic node %(node)s is in state %(state)s, "
|
|
"instance is now unprovisioned.",
|
|
dict(node=node.uuid, state=node.provision_state),
|
|
instance=instance)
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
if data['tries'] >= CONF.ironic.api_max_retries + 1:
|
|
msg = (_("Error destroying the instance on node %(node)s. "
|
|
"Provision state still '%(state)s'.")
|
|
% {'state': node.provision_state,
|
|
'node': node.uuid})
|
|
LOG.error(msg)
|
|
raise exception.NovaException(msg)
|
|
else:
|
|
data['tries'] += 1
|
|
|
|
_log_ironic_polling('unprovision', node, instance)
|
|
|
|
# wait for the state transition to finish
|
|
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_provision_state)
|
|
timer.start(interval=CONF.ironic.api_retry_interval).wait()
|
|
|
|
def destroy(self, context, instance):
|
|
"""Destroy the specified instance, if it can be found.
|
|
|
|
:param context: The security context.
|
|
:param instance: The instance object.
|
|
"""
|
|
LOG.debug('Destroy called for instance', instance=instance)
|
|
try:
|
|
node = self._validate_instance_and_node(instance)
|
|
except exception.InstanceNotFound:
|
|
LOG.warning("Destroy called on non-existing instance %s.",
|
|
instance.uuid)
|
|
return
|
|
|
|
if node.provision_state in _UNPROVISION_STATES:
|
|
self._unprovision(instance, node)
|
|
else:
|
|
# NOTE(hshiina): if spawn() fails before ironic starts
|
|
# provisioning, instance information should be
|
|
# removed from ironic node.
|
|
self._remove_instance_info_from_node(node, instance)
|
|
|
|
LOG.info('Successfully unprovisioned Ironic node %s',
|
|
node.uuid, instance=instance)
|
|
|
|
def get_available_resources(self):
|
|
"""Helper function to return the list of resources.
|
|
|
|
If unable to connect ironic server, an empty list is returned.
|
|
|
|
:returns: a list of raw node from ironic
|
|
|
|
"""
|
|
|
|
# Retrieve nodes
|
|
params = {
|
|
'maintenance': False,
|
|
'detail': True,
|
|
'provision_state': ironic_states.AVAILABLE,
|
|
'associated': False,
|
|
'limit': 0
|
|
}
|
|
try:
|
|
node_list = self.ironicclient.call("node.list", **params)
|
|
except client_e.ClientException as e:
|
|
LOG.exception("Could not get nodes from ironic. Reason: "
|
|
"%(detail)s", {'detail': e.message})
|
|
node_list = []
|
|
|
|
# Retrive ports
|
|
params = {
|
|
'limit': 0,
|
|
'fields': ('uuid', 'node_uuid', 'extra', 'address')
|
|
}
|
|
|
|
try:
|
|
port_list = self.ironicclient.call("port.list", **params)
|
|
except client_e.ClientException as e:
|
|
LOG.exception("Could not get ports from ironic. Reason: "
|
|
"%(detail)s", {'detail': e.message})
|
|
port_list = []
|
|
|
|
# TODO(zhenguo): Add portgroups resources
|
|
node_resources = {}
|
|
for node in node_list:
|
|
# Add ports to the associated node
|
|
node.ports = [self._port_resource(port) for port in port_list
|
|
if node.uuid == port.node_uuid]
|
|
node_resources[node.uuid] = self._node_resource(node)
|
|
return node_resources
|
|
|
|
def get_maintenance_node_list(self):
|
|
"""Helper function to return the list of maintenance nodes.
|
|
|
|
If unable to connect ironic server, an empty list is returned.
|
|
|
|
:returns: a list of maintenance node from ironic
|
|
|
|
"""
|
|
params = {
|
|
'associated': True,
|
|
'fields': ('instance_uuid', 'maintenance'),
|
|
'limit': 0
|
|
}
|
|
try:
|
|
node_list = self.ironicclient.call("node.list", **params)
|
|
except client_e.ClientException as e:
|
|
LOG.exception("Could not get nodes from ironic. Reason: "
|
|
"%(detail)s", {'detail': e.message})
|
|
node_list = []
|
|
return node_list
|
|
|
|
def get_node_power_states(self):
|
|
"""Helper function to return the node power states.
|
|
|
|
If unable to connect ironic server, an empty list is returned.
|
|
|
|
:returns: a list of node power states from ironic
|
|
|
|
"""
|
|
params = {
|
|
'maintenance': False,
|
|
'associated': True,
|
|
'fields': ('instance_uuid', 'power_state', 'target_power_state'),
|
|
'limit': 0
|
|
}
|
|
try:
|
|
node_list = self.ironicclient.call("node.list", **params)
|
|
except client_e.ClientException as e:
|
|
LOG.exception("Could not get nodes from ironic. Reason: "
|
|
"%(detail)s", {'detail': e.message})
|
|
node_list = []
|
|
return node_list
|
|
|
|
def get_power_state(self, context, instance_uuid):
|
|
try:
|
|
node = self.ironicclient.call('node.get_by_instance_uuid',
|
|
instance_uuid,
|
|
fields=('power_state',))
|
|
return map_power_state(node.power_state)
|
|
except client_e.NotFound:
|
|
return map_power_state(ironic_states.NOSTATE)
|
|
|
|
def set_power_state(self, context, instance, state):
|
|
"""Set power state on the specified instance.
|
|
|
|
:param context: The security context.
|
|
:param instance: The instance object.
|
|
"""
|
|
node = self._validate_instance_and_node(instance)
|
|
if state == "soft_off":
|
|
self.ironicclient.call("node.set_power_state",
|
|
node.uuid, "off", soft=True)
|
|
elif state == "soft_reboot":
|
|
self.ironicclient.call("node.set_power_state",
|
|
node.uuid, "reboot", soft=True)
|
|
else:
|
|
self.ironicclient.call("node.set_power_state",
|
|
node.uuid, state)
|
|
timer = loopingcall.FixedIntervalLoopingCall(
|
|
self._wait_for_power_state, instance, state)
|
|
timer.start(interval=CONF.ironic.api_retry_interval).wait()
|
|
|
|
def rebuild(self, context, instance):
|
|
"""Rebuild/redeploy an instance.
|
|
|
|
:param context: The security context.
|
|
:param instance: The instance object.
|
|
"""
|
|
LOG.debug('Rebuild called for instance', instance=instance)
|
|
|
|
# trigger the node rebuild
|
|
try:
|
|
self.ironicclient.call("node.set_provision_state",
|
|
instance.node_uuid,
|
|
ironic_states.REBUILD)
|
|
except (ironic_exc.InternalServerError,
|
|
ironic_exc.BadRequest) as e:
|
|
msg = (_("Failed to request Ironic to rebuild instance "
|
|
"%(inst)s: %(reason)s") % {'inst': instance.uuid,
|
|
'reason': six.text_type(e)})
|
|
raise exception.InstanceDeployFailure(msg)
|
|
|
|
# Although the target provision state is REBUILD, it will actually go
|
|
# to ACTIVE once the redeploy is finished.
|
|
timer = loopingcall.FixedIntervalLoopingCall(self._wait_for_active,
|
|
instance)
|
|
timer.start(interval=CONF.ironic.api_retry_interval).wait()
|
|
LOG.info('Instance was successfully rebuilt', instance=instance)
|