From 508b86f9b13b1115ebef6e64c81f1b9ecd22ed40 Mon Sep 17 00:00:00 2001 From: Zhenguo Niu Date: Mon, 6 Mar 2017 19:55:56 +0800 Subject: [PATCH] Add sync maintenance state periodic task When ironic nodes go to maintenance state, we should move instance to maintenance state as well. Change-Id: Iced6535157668fb5cda9a0459272c13d3e8b3af5 --- mogan/common/exception.py | 4 +++ mogan/common/states.py | 8 +++++- mogan/conf/engine.py | 4 +++ mogan/engine/api.py | 11 ++++++++ mogan/engine/manager.py | 58 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 84 insertions(+), 1 deletion(-) diff --git a/mogan/common/exception.py b/mogan/common/exception.py index c04a66e4..48e7d810 100644 --- a/mogan/common/exception.py +++ b/mogan/common/exception.py @@ -300,6 +300,10 @@ class InstanceIsLocked(Invalid): msg_fmt = _("Instance %(instance_uuid)s is locked") +class InstanceInMaintenance(Invalid): + msg_fmt = _("Instance %(instance_uuid)s is in maintenance mode") + + class InvalidReservationExpiration(Invalid): message = _("Invalid reservation expiration %(expire)s.") diff --git a/mogan/common/states.py b/mogan/common/states.py index 618c80ff..e66c1203 100644 --- a/mogan/common/states.py +++ b/mogan/common/states.py @@ -89,7 +89,10 @@ STOPPED = 'stopped' REBUILDING = 'rebuilding' """ The server is in rebuilding process """ -STABLE_STATES = (ACTIVE, ERROR, DELETED, STOPPED) +MAINTENANCE = 'maintenance' +""" The server is in maintenance """ + +STABLE_STATES = (ACTIVE, ERROR, DELETED, STOPPED, MAINTENANCE) """States that will not transition unless receiving a request.""" UNSTABLE_STATES = (BUILDING, DELETING, POWERING_ON, POWERING_OFF, REBOOTING, @@ -155,6 +158,9 @@ machine.add_transition(STOPPED, DELETING, 'delete') # from error* states machine.add_transition(ERROR, DELETING, 'delete') +# from maintenance* states +machine.add_transition(MAINTENANCE, DELETING, 'delete') + # from *ing states machine.add_transition(BUILDING, ACTIVE, 'done') machine.add_transition(DELETING, DELETED, 'done') diff --git a/mogan/conf/engine.py b/mogan/conf/engine.py index 5c89ba3c..a181bc02 100644 --- a/mogan/conf/engine.py +++ b/mogan/conf/engine.py @@ -53,6 +53,10 @@ opts = [ default=600, help=_("Interval to sync power states between the database " "and Ironic, in seconds.")), + cfg.IntOpt('sync_maintenance_interval', + default=600, + help=_("Interval to sync maintenance states between the " + "database and Ironic, in seconds.")), ] diff --git a/mogan/engine/api.py b/mogan/engine/api.py index fad605a4..70e04b23 100644 --- a/mogan/engine/api.py +++ b/mogan/engine/api.py @@ -41,6 +41,15 @@ def check_instance_lock(function): return inner +def check_instance_maintenance(function): + @six.wraps(function) + def inner(self, context, instance, *args, **kwargs): + if instance.status == states.MAINTENANCE: + raise exception.InstanceInMaintenance(instance_uuid=instance.uuid) + return function(self, context, instance, *args, **kwargs) + return inner + + class API(object): """API for interacting with the engine manager.""" @@ -264,6 +273,7 @@ class API(object): self._delete_instance(context, instance) @check_instance_lock + @check_instance_maintenance def power(self, context, instance, target): """Set power state of an instance.""" LOG.debug("Going to try to set instance power state to %s", @@ -282,6 +292,7 @@ class API(object): self.engine_rpcapi.set_power_state(context, instance, target) @check_instance_lock + @check_instance_maintenance def rebuild(self, context, instance): """Rebuild an instance.""" fsm = states.machine.copy() diff --git a/mogan/engine/manager.py b/mogan/engine/manager.py index da8bf8a5..904be860 100644 --- a/mogan/engine/manager.py +++ b/mogan/engine/manager.py @@ -197,6 +197,64 @@ class EngineManager(base_manager.BaseEngineManager): db_instance.power_state = node_power_state db_instance.save() + @periodic_task.periodic_task(spacing=CONF.engine.sync_maintenance_interval, + run_immediately=True) + def _sync_maintenance_states(self, context): + """Align maintenance states between the database and the hypervisor.""" + + # Only fetching the necessary fields + node_fields = ('instance_uuid', 'maintenance') + + try: + nodes = ironic.get_node_list(self.ironicclient, + associated=True, + fields=node_fields, + limit=0) + except Exception as e: + LOG.warning( + _LW("Failed to retrieve node list when synchronizing " + "maintenance states: %(msg)s") % {"msg": e}) + # Just retrun if we fail to get nodes maintenance state. + return + + node_dict = {node.instance_uuid: node for node in nodes} + + if not node_dict: + LOG.warning(_LW("While synchronizing instance maintenance states, " + "found none node with instance associated on the " + "hypervisor.")) + return + + db_instances = objects.Instance.list(context) + for instance in db_instances: + uuid = instance.uuid + + # If instance in unstable states and the node goes to maintenance, + # just skip the syncing process as the pending task should be goes + # to error state instead. + if instance.status in states.UNSTABLE_STATES: + LOG.info(_LI("During sync_maintenance_state the instance " + "has a pending task (%(task)s). Skip."), + {'task': instance.status}, + instance=instance) + continue + + if uuid not in node_dict: + continue + + node_maintenance = node_dict[uuid].maintenance + + if instance.status == states.MAINTENANCE and not node_maintenance: + # TODO(zhenguo): need to check whether we need states machine + # transition here, and currently we just move to ACTIVE state + # regardless of it's real power state which may need sync power + # state periodic task to correct it. + instance.status = states.ACTIVE + instance.save() + elif node_maintenance and instance.status != states.MAINTENANCE: + instance.status = states.MAINTENANCE + instance.save() + def destroy_networks(self, context, instance): LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(instance_nics)s", {'uuid': instance.uuid,