diff --git a/nimble/engine/manager.py b/nimble/engine/manager.py index 11a1c1fd..9a1964ba 100644 --- a/nimble/engine/manager.py +++ b/nimble/engine/manager.py @@ -18,12 +18,15 @@ import threading from ironicclient import exc as ironic_exc from oslo_log import log import oslo_messaging as messaging +from oslo_service import loopingcall from oslo_service import periodic_task +import six from nimble.common import exception from nimble.common import flow_utils from nimble.common.i18n import _LE from nimble.common.i18n import _LI +from nimble.common.i18n import _LW from nimble.conf import CONF from nimble.engine.baremetal import ironic from nimble.engine.baremetal import ironic_states @@ -33,6 +36,10 @@ from nimble.engine import status LOG = log.getLogger(__name__) +_UNPROVISION_STATES = (ironic_states.ACTIVE, ironic_states.DEPLOYFAIL, + ironic_states.ERROR, ironic_states.DEPLOYWAIT, + ironic_states.DEPLOYING) + class EngineManager(base_manager.BaseEngineManager): """Nimble Engine manager main class.""" @@ -84,10 +91,71 @@ class EngineManager(base_manager.BaseEngineManager): ironic.unplug_vif(self.ironicclient, pif.uuid) def _destroy_instance(self, context, instance): - ironic.destroy_node(self.ironicclient, instance.node_uuid) + try: + ironic.destroy_node(self.ironicclient, instance.node_uuid) + except Exception as e: + # if the node is already in a deprovisioned state, continue + # This should be fixed in Ironic. + # TODO(deva): This exception should be added to + # python-ironicclient and matched directly, + # rather than via __name__. + if getattr(e, '__name__', None) != 'InstanceDeployFailure': + raise + + # using a dict because this is modified in the local method + data = {'tries': 0} + + def _wait_for_provision_state(): + + try: + node = ironic.get_node_by_instance(self.ironicclient, + instance.uuid) + except ironic_exc.NotFound: + LOG.debug("Instance already removed from Ironic", + instance=instance) + raise loopingcall.LoopingCallDone() + LOG.debug('Current ironic node state is %s', node.provision_state) + if node.provision_state in (ironic_states.NOSTATE, + ironic_states.CLEANING, + ironic_states.CLEANWAIT, + ironic_states.CLEANFAIL, + ironic_states.AVAILABLE): + # From a user standpoint, the node is unprovisioned. If a node + # gets into CLEANFAIL state, it must be fixed in Ironic, but we + # can consider the instance unprovisioned. + LOG.debug("Ironic node %(node)s is in state %(state)s, " + "instance is now unprovisioned.", + dict(node=node.uuid, state=node.provision_state), + instance=instance) + raise loopingcall.LoopingCallDone() + + if data['tries'] >= CONF.ironic.api_max_retries + 1: + msg = (_("Error destroying the instance on node %(node)s. " + "Provision state still '%(state)s'.") + % {'state': node.provision_state, + 'node': node.uuid}) + LOG.error(msg) + raise exception.NimbleException(msg) + else: + data['tries'] += 1 + + # wait for the state transition to finish + timer = loopingcall.FixedIntervalLoopingCall(_wait_for_provision_state) + timer.start(interval=CONF.ironic.api_retry_interval).wait() + LOG.info(_LI('Successfully destroyed Ironic node %s'), instance.node_uuid) + def _remove_instance_info_from_node(self, instance): + try: + ironic.unset_instance_info(self.ironicclient, instance) + except ironic_exc.BadRequest as e: + LOG.warning(_LW("Failed to remove deploy parameters from node " + "%(node)s when unprovisioning the instance " + "%(instance)s: %(reason)s"), + {'node': instance.node_uuid, 'instance': instance.uuid, + 'reason': six.text_type(e)}) + def create_instance(self, context, instance, requested_networks, request_spec=None, filter_properties=None): """Perform a deployment.""" @@ -134,12 +202,22 @@ class EngineManager(base_manager.BaseEngineManager): LOG.debug("Deleting instance...") try: - self._destroy_networks(context, instance) - self._destroy_instance(context, instance) - except Exception: - LOG.exception(_LE("Error while trying to clean up " - "instance resources."), - instance=instance) + node = ironic.get_node_by_instance(self.ironicclient, + instance.uuid) + except ironic_exc.NotFound: + node = None + + if node: + try: + if node.provision_state in _UNPROVISION_STATES: + self._destroy_networks(context, instance) + self._destroy_instance(context, instance) + else: + self._remove_instance_info_from_node(instance) + except Exception: + LOG.exception(_LE("Error while trying to clean up " + "instance resources."), + instance=instance) instance.status = status.DELETED instance.save() @@ -184,7 +262,7 @@ class EngineManager(base_manager.BaseEngineManager): return node.to_dict() def get_ironic_node_list(self, context, fields): - """Get a ironic node list.""" + """Get an ironic node list.""" nodes = ironic.get_node_list(self.ironicclient, associated=True, limit=0, fields=fields) return {'nodes': [node.to_dict() for node in nodes]} diff --git a/nimble/tests/unit/engine/test_manager.py b/nimble/tests/unit/engine/test_manager.py index 6f9abf6d..d3d39d0c 100644 --- a/nimble/tests/unit/engine/test_manager.py +++ b/nimble/tests/unit/engine/test_manager.py @@ -16,7 +16,9 @@ """Test class for Nimble ManagerService.""" import mock +from oslo_config import cfg +from nimble.common import exception from nimble.engine.baremetal import ironic from nimble.engine.baremetal import ironic_states from nimble.engine import manager @@ -25,6 +27,8 @@ from nimble.tests.unit.db import base as tests_db_base from nimble.tests.unit.engine import mgr_utils from nimble.tests.unit.objects import utils as obj_utils +CONF = cfg.CONF + @mock.patch.object(manager.EngineManager, '_refresh_cache') class ManageInstanceTestCase(mgr_utils.ServiceSetUpMixin, @@ -56,9 +60,14 @@ class ManageInstanceTestCase(mgr_utils.ServiceSetUpMixin, mock.ANY, instance.node_uuid, detail=True) unplug_vif_mock.assert_called_once_with(mock.ANY, 'fake-uuid') + @mock.patch.object(ironic, 'get_node_by_instance') @mock.patch.object(ironic, 'destroy_node') - def test__destroy_instance(self, destroy_node_mock, - refresh_cache_mock): + def _test__destroy_instance(self, destroy_node_mock, + get_node_mock, refresh_cache_mock, + state=None): + fake_node = mock.MagicMock() + fake_node.provision_state = state + get_node_mock.return_value = fake_node instance = obj_utils.create_test_instance(self.context) destroy_node_mock.side_effect = None refresh_cache_mock.side_effect = None @@ -67,16 +76,52 @@ class ManageInstanceTestCase(mgr_utils.ServiceSetUpMixin, self.service._destroy_instance(self.context, instance) self._stop_service() + get_node_mock.assert_called_once_with(mock.ANY, instance.uuid) destroy_node_mock.assert_called_once_with(mock.ANY, instance.node_uuid) + def test__destroy_instance_cleaning(self, refresh_cache_mock): + self._test__destroy_instance(state=ironic_states.CLEANING, + refresh_cache_mock=refresh_cache_mock) + + def test__destroy_instance_cleanwait(self, refresh_cache_mock): + self._test__destroy_instance(state=ironic_states.CLEANWAIT, + refresh_cache_mock=refresh_cache_mock) + + @mock.patch.object(ironic, 'get_node_by_instance') + @mock.patch.object(ironic, 'destroy_node') + def test__destroy_instance_fail_max_retries(self, destroy_node_mock, + get_node_mock, + refresh_cache_mock): + CONF.set_default('api_max_retries', default=2, group='ironic') + fake_node = mock.MagicMock() + fake_node.provision_state = ironic_states.ACTIVE + get_node_mock.return_value = fake_node + instance = obj_utils.create_test_instance(self.context) + destroy_node_mock.side_effect = None + refresh_cache_mock.side_effect = None + self._start_service() + + self.assertRaises(exception.NimbleException, + self.service._destroy_instance, + self.context, instance) + self._stop_service() + + self.assertTrue(get_node_mock.called) + destroy_node_mock.assert_called_once_with(mock.ANY, instance.node_uuid) + + @mock.patch.object(ironic, 'get_node_by_instance') @mock.patch.object(manager.EngineManager, '_destroy_instance') @mock.patch.object(manager.EngineManager, '_destroy_networks') def test_delete_instance(self, destroy_net_mock, - destroy_inst_mock, refresh_cache_mock): + destroy_inst_mock, get_node_mock, + refresh_cache_mock): + fake_node = mock.MagicMock() + fake_node.provision_state = ironic_states.ACTIVE instance = obj_utils.create_test_instance(self.context) destroy_net_mock.side_effect = None destroy_inst_mock.side_effect = None refresh_cache_mock.side_effect = None + get_node_mock.return_value = fake_node self._start_service() self.service.delete_instance(self.context, instance) @@ -84,6 +129,24 @@ class ManageInstanceTestCase(mgr_utils.ServiceSetUpMixin, destroy_net_mock.assert_called_once_with(mock.ANY, instance) destroy_inst_mock.assert_called_once_with(mock.ANY, instance) + get_node_mock.assert_called_once_with(mock.ANY, instance.uuid) + + @mock.patch.object(ironic, 'get_node_by_instance') + @mock.patch.object(manager.EngineManager, '_destroy_instance') + def test_delete_instance_without_node_destroy( + self, destroy_inst_mock, get_node_mock, refresh_cache_mock): + fake_node = mock.MagicMock() + fake_node.provision_state = 'foo' + instance = obj_utils.create_test_instance(self.context) + destroy_inst_mock.side_effect = None + refresh_cache_mock.side_effect = None + get_node_mock.return_value = fake_node + self._start_service() + + self.service.delete_instance(self.context, instance) + self._stop_service() + + self.assertFalse(destroy_inst_mock.called) @mock.patch.object(ironic, 'set_power_state') def test_change_instance_power_state(self, set_power_mock,