Add sync maintenance state periodic task

When ironic nodes go to maintenance state, we should move instance
to maintenance state as well.

Change-Id: Iced6535157668fb5cda9a0459272c13d3e8b3af5
This commit is contained in:
Zhenguo Niu 2017-03-06 19:55:56 +08:00
parent 374de173c7
commit 508b86f9b1
5 changed files with 84 additions and 1 deletions

View File

@ -300,6 +300,10 @@ class InstanceIsLocked(Invalid):
msg_fmt = _("Instance %(instance_uuid)s is locked") msg_fmt = _("Instance %(instance_uuid)s is locked")
class InstanceInMaintenance(Invalid):
msg_fmt = _("Instance %(instance_uuid)s is in maintenance mode")
class InvalidReservationExpiration(Invalid): class InvalidReservationExpiration(Invalid):
message = _("Invalid reservation expiration %(expire)s.") message = _("Invalid reservation expiration %(expire)s.")

View File

@ -89,7 +89,10 @@ STOPPED = 'stopped'
REBUILDING = 'rebuilding' REBUILDING = 'rebuilding'
""" The server is in rebuilding process """ """ The server is in rebuilding process """
STABLE_STATES = (ACTIVE, ERROR, DELETED, STOPPED) MAINTENANCE = 'maintenance'
""" The server is in maintenance """
STABLE_STATES = (ACTIVE, ERROR, DELETED, STOPPED, MAINTENANCE)
"""States that will not transition unless receiving a request.""" """States that will not transition unless receiving a request."""
UNSTABLE_STATES = (BUILDING, DELETING, POWERING_ON, POWERING_OFF, REBOOTING, UNSTABLE_STATES = (BUILDING, DELETING, POWERING_ON, POWERING_OFF, REBOOTING,
@ -155,6 +158,9 @@ machine.add_transition(STOPPED, DELETING, 'delete')
# from error* states # from error* states
machine.add_transition(ERROR, DELETING, 'delete') machine.add_transition(ERROR, DELETING, 'delete')
# from maintenance* states
machine.add_transition(MAINTENANCE, DELETING, 'delete')
# from *ing states # from *ing states
machine.add_transition(BUILDING, ACTIVE, 'done') machine.add_transition(BUILDING, ACTIVE, 'done')
machine.add_transition(DELETING, DELETED, 'done') machine.add_transition(DELETING, DELETED, 'done')

View File

@ -53,6 +53,10 @@ opts = [
default=600, default=600,
help=_("Interval to sync power states between the database " help=_("Interval to sync power states between the database "
"and Ironic, in seconds.")), "and Ironic, in seconds.")),
cfg.IntOpt('sync_maintenance_interval',
default=600,
help=_("Interval to sync maintenance states between the "
"database and Ironic, in seconds.")),
] ]

View File

@ -41,6 +41,15 @@ def check_instance_lock(function):
return inner return inner
def check_instance_maintenance(function):
@six.wraps(function)
def inner(self, context, instance, *args, **kwargs):
if instance.status == states.MAINTENANCE:
raise exception.InstanceInMaintenance(instance_uuid=instance.uuid)
return function(self, context, instance, *args, **kwargs)
return inner
class API(object): class API(object):
"""API for interacting with the engine manager.""" """API for interacting with the engine manager."""
@ -264,6 +273,7 @@ class API(object):
self._delete_instance(context, instance) self._delete_instance(context, instance)
@check_instance_lock @check_instance_lock
@check_instance_maintenance
def power(self, context, instance, target): def power(self, context, instance, target):
"""Set power state of an instance.""" """Set power state of an instance."""
LOG.debug("Going to try to set instance power state to %s", LOG.debug("Going to try to set instance power state to %s",
@ -282,6 +292,7 @@ class API(object):
self.engine_rpcapi.set_power_state(context, instance, target) self.engine_rpcapi.set_power_state(context, instance, target)
@check_instance_lock @check_instance_lock
@check_instance_maintenance
def rebuild(self, context, instance): def rebuild(self, context, instance):
"""Rebuild an instance.""" """Rebuild an instance."""
fsm = states.machine.copy() fsm = states.machine.copy()

View File

@ -197,6 +197,64 @@ class EngineManager(base_manager.BaseEngineManager):
db_instance.power_state = node_power_state db_instance.power_state = node_power_state
db_instance.save() db_instance.save()
@periodic_task.periodic_task(spacing=CONF.engine.sync_maintenance_interval,
run_immediately=True)
def _sync_maintenance_states(self, context):
"""Align maintenance states between the database and the hypervisor."""
# Only fetching the necessary fields
node_fields = ('instance_uuid', 'maintenance')
try:
nodes = ironic.get_node_list(self.ironicclient,
associated=True,
fields=node_fields,
limit=0)
except Exception as e:
LOG.warning(
_LW("Failed to retrieve node list when synchronizing "
"maintenance states: %(msg)s") % {"msg": e})
# Just retrun if we fail to get nodes maintenance state.
return
node_dict = {node.instance_uuid: node for node in nodes}
if not node_dict:
LOG.warning(_LW("While synchronizing instance maintenance states, "
"found none node with instance associated on the "
"hypervisor."))
return
db_instances = objects.Instance.list(context)
for instance in db_instances:
uuid = instance.uuid
# If instance in unstable states and the node goes to maintenance,
# just skip the syncing process as the pending task should be goes
# to error state instead.
if instance.status in states.UNSTABLE_STATES:
LOG.info(_LI("During sync_maintenance_state the instance "
"has a pending task (%(task)s). Skip."),
{'task': instance.status},
instance=instance)
continue
if uuid not in node_dict:
continue
node_maintenance = node_dict[uuid].maintenance
if instance.status == states.MAINTENANCE and not node_maintenance:
# TODO(zhenguo): need to check whether we need states machine
# transition here, and currently we just move to ACTIVE state
# regardless of it's real power state which may need sync power
# state periodic task to correct it.
instance.status = states.ACTIVE
instance.save()
elif node_maintenance and instance.status != states.MAINTENANCE:
instance.status = states.MAINTENANCE
instance.save()
def destroy_networks(self, context, instance): def destroy_networks(self, context, instance):
LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(instance_nics)s", LOG.debug("unplug: instance_uuid=%(uuid)s vif=%(instance_nics)s",
{'uuid': instance.uuid, {'uuid': instance.uuid,