mogan/mogan/engine/manager.py

285 lines
11 KiB
Python

# Copyright 2016 Huawei Technologies Co.,LTD.
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import threading
from ironicclient import exc as ironic_exc
from oslo_log import log
import oslo_messaging as messaging
from oslo_service import loopingcall
from oslo_service import periodic_task
import six
from mogan.common import exception
from mogan.common import flow_utils
from mogan.common.i18n import _
from mogan.common.i18n import _LE
from mogan.common.i18n import _LI
from mogan.common.i18n import _LW
from mogan.conf import CONF
from mogan.engine.baremetal import ironic
from mogan.engine.baremetal import ironic_states
from mogan.engine import base_manager
from mogan.engine.flows import create_instance
from mogan.engine import status
LOG = log.getLogger(__name__)
_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL,
ironic_states.ERROR, ironic_states.DEPLOYWAIT,
ironic_states.DEPLOYING)
class EngineManager(base_manager.BaseEngineManager):
"""Mogan Engine manager main class."""
RPC_API_VERSION = '1.0'
target = messaging.Target(version=RPC_API_VERSION)
_lock = threading.Lock()
def _refresh_cache(self):
node_cache = {}
nodes = ironic.get_node_list(self.ironicclient, detail=True,
maintenance=False,
provision_state=ironic_states.AVAILABLE,
associated=False, limit=0)
for node in nodes:
node_cache[node.uuid] = node
with self._lock:
self.node_cache = node_cache
@periodic_task.periodic_task(
spacing=CONF.engine.sync_node_resource_interval)
def _sync_node_resources(self, context):
self._refresh_cache()
def _set_instance_obj_error_state(self, context, instance):
try:
instance.status = status.ERROR
instance.save()
except exception.InstanceNotFound:
LOG.debug('Instance has been destroyed from under us while '
'trying to set it to ERROR', instance=instance)
def _destroy_networks(self, context, instance):
LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(network_info)s",
{'uuid': instance.uuid,
'network_info': str(instance.network_info)})
ports = instance.network_info.keys()
for port in ports:
self.network_api.delete_port(context, port, instance.uuid)
ironic_ports = ironic.get_ports_from_node(self.ironicclient,
instance.node_uuid,
detail=True)
for pif in ironic_ports:
if 'vif_port_id' in pif.extra:
ironic.unplug_vif(self.ironicclient, pif.uuid)
def _destroy_instance(self, context, instance):
try:
ironic.destroy_node(self.ironicclient, instance.node_uuid)
except Exception as e:
# if the node is already in a deprovisioned state, continue
# This should be fixed in Ironic.
# TODO(deva): This exception should be added to
# python-ironicclient and matched directly,
# rather than via __name__.
if getattr(e, '__name__', None) != 'InstanceDeployFailure':
raise
# using a dict because this is modified in the local method
data = {'tries': 0}
def _wait_for_provision_state():
try:
node = ironic.get_node_by_instance(self.ironicclient,
instance.uuid)
except ironic_exc.NotFound:
LOG.debug("Instance already removed from Ironic",
instance=instance)
raise loopingcall.LoopingCallDone()
LOG.debug('Current ironic node state is %s', node.provision_state)
if node.provision_state in (ironic_states.NOSTATE,
ironic_states.CLEANING,
ironic_states.CLEANWAIT,
ironic_states.CLEANFAIL,
ironic_states.AVAILABLE):
# From a user standpoint, the node is unprovisioned. If a node
# gets into CLEANFAIL state, it must be fixed in Ironic, but we
# can consider the instance unprovisioned.
LOG.debug("Ironic node %(node)s is in state %(state)s, "
"instance is now unprovisioned.",
dict(node=node.uuid, state=node.provision_state),
instance=instance)
raise loopingcall.LoopingCallDone()
if data['tries'] >= CONF.ironic.api_max_retries + 1:
msg = (_("Error destroying the instance on node %(node)s. "
"Provision state still '%(state)s'.")
% {'state': node.provision_state,
'node': node.uuid})
LOG.error(msg)
raise exception.MoganException(msg)
else:
data['tries'] += 1
# wait for the state transition to finish
timer = loopingcall.FixedIntervalLoopingCall(_wait_for_provision_state)
timer.start(interval=CONF.ironic.api_retry_interval).wait()
LOG.info(_LI('Successfully destroyed Ironic node %s'),
instance.node_uuid)
def _remove_instance_info_from_node(self, instance):
try:
ironic.unset_instance_info(self.ironicclient, instance)
except ironic_exc.BadRequest as e:
LOG.warning(_LW("Failed to remove deploy parameters from node "
"%(node)s when unprovisioning the instance "
"%(instance)s: %(reason)s"),
{'node': instance.node_uuid, 'instance': instance.uuid,
'reason': six.text_type(e)})
def create_instance(self, context, instance, requested_networks,
request_spec=None, filter_properties=None):
"""Perform a deployment."""
LOG.debug("Starting instance...")
if filter_properties is None:
filter_properties = {}
try:
flow_engine = create_instance.get_flow(
context,
self,
instance,
requested_networks,
request_spec,
filter_properties,
)
except Exception:
msg = _("Create manager instance flow failed.")
LOG.exception(msg)
raise exception.MoganException(msg)
def _run_flow():
# This code executes create instance flow. If something goes wrong,
# flow reverts all job that was done and reraises an exception.
# Otherwise, all data that was generated by flow becomes available
# in flow engine's storage.
with flow_utils.DynamicLogListener(flow_engine, logger=LOG):
flow_engine.run()
try:
_run_flow()
except Exception as e:
self._set_instance_obj_error_state(context, instance)
LOG.error(_LE("Created instance %(uuid)s failed."
"Exception: %(exception)s"),
{"uuid": instance.uuid,
"exception": e})
else:
LOG.info(_LI("Created instance %s successfully."), instance.uuid)
finally:
return instance
def delete_instance(self, context, instance):
"""Delete an instance."""
LOG.debug("Deleting instance...")
try:
node = ironic.get_node_by_instance(self.ironicclient,
instance.uuid)
except ironic_exc.NotFound:
node = None
if node:
try:
if node.provision_state in _UNPROVISION_STATES:
self._destroy_networks(context, instance)
self._destroy_instance(context, instance)
else:
self._remove_instance_info_from_node(instance)
except Exception:
LOG.exception(_LE("Error while trying to clean up "
"instance resources."),
instance=instance)
instance.status = status.DELETED
instance.save()
instance.destroy()
def _instance_states(self, context, instance):
states = ironic.get_node_states(self.ironicclient,
instance.node_uuid)
LOG.info(_LI('Successfully get ironic node states: %s'),
states)
return states.to_dict()
def instance_states(self, context, instance):
"""Get an instance states."""
LOG.debug("get instance states")
return self._instance_states(context, instance)
def _set_power_state(self, context, instance, state):
ironic.set_power_state(self.ironicclient, instance.node_uuid, state)
LOG.info(_LI('Successfully set ironic node power state: %s'),
state)
def set_power_state(self, context, instance, state):
"""Get an instance states."""
LOG.debug("set power state...")
return self._set_power_state(context, instance, state)
@messaging.expected_exceptions(exception.NodeNotFound)
def get_ironic_node(self, context, instance_uuid, fields):
"""Get a ironic node."""
try:
node = ironic.get_node_by_instance(self.ironicclient,
instance_uuid, fields)
except ironic_exc.NotFound:
msg = (_("Error retrieving the node by instance %(instance)s.")
% {'instance': instance_uuid})
LOG.debug(msg)
raise exception.NodeNotFound(msg)
return node.to_dict()
def get_ironic_node_list(self, context, fields):
"""Get an ironic node list."""
nodes = ironic.get_node_list(self.ironicclient, associated=True,
limit=0, fields=fields)
return {'nodes': [node.to_dict() for node in nodes]}
def list_availability_zones(self, context):
"""Get availability zone list."""
with self._lock:
node_cache = self.node_cache.values()
azs = set()
for node in node_cache:
az = node.properties.get('availability_zone')
if az is not None:
azs.add(az)
return {'availability_zones': list(azs)}