Introducing node introspection state management

Currently, state of a node introspection isn't kept in the database.
This change introduces:

* a new database column to keep the node introspection state
* an automaton to manage the node introspection state
* a decorator to declare a function performing an introspection state
  transition
* a version_id column is added, to enhance database consistency, that
  is consulted whenever node_info is committed

This change is part of the HA_Inspector effort[1]

[1] https://specs.openstack.org/openstack/ironic-inspector-specs/specs/HA_inspector.html

Closes-Bug: #1618835
Partial-Bug: #1525218
Change-Id: I18cb45f0d1194414715ccbe826d8a95610ec718d
changes/43/348943/36
dparalen 7 years ago
parent fbe216b782
commit 3ddc0615e5

@ -22,12 +22,13 @@ from oslo_db import options as db_opts
from oslo_db.sqlalchemy import models
from oslo_db.sqlalchemy import session as db_session
from oslo_db.sqlalchemy import types as db_types
from sqlalchemy import (Boolean, Column, DateTime, Float, ForeignKey, Integer,
String, Text)
from sqlalchemy import (Boolean, Column, DateTime, Enum, Float, ForeignKey,
Integer, String, Text)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import orm
from ironic_inspector import conf # noqa
from ironic_inspector import introspection_state as istate
class ModelBase(models.ModelBase):
@ -51,10 +52,22 @@ if CONF.discoverd.database:
class Node(Base):
__tablename__ = 'nodes'
uuid = Column(String(36), primary_key=True)
version_id = Column(String(36), server_default='')
state = Column(Enum(*istate.States.all()), nullable=False,
default=istate.States.finished,
server_default=istate.States.finished)
started_at = Column(Float, nullable=True)
finished_at = Column(Float, nullable=True)
error = Column(Text, nullable=True)
# version_id is being tracked in the NodeInfo object
# for the sake of consistency. See also SQLAlchemy docs:
# http://docs.sqlalchemy.org/en/latest/orm/versioning.html
__mapper_args__ = {
'version_id_col': version_id,
'version_id_generator': False,
}
class Attribute(Base):
__tablename__ = 'attributes'

@ -23,6 +23,7 @@ from oslo_config import cfg
from ironic_inspector.common.i18n import _, _LI, _LW
from ironic_inspector.common import ironic as ir_utils
from ironic_inspector import firewall
from ironic_inspector import introspection_state as istate
from ironic_inspector import node_cache
from ironic_inspector import utils
@ -87,9 +88,9 @@ def introspect(node_id, new_ipmi_credentials=None, token=None):
node_info=node)
bmc_address = ir_utils.get_ipmi_address(node)
node_info = node_cache.add_node(node.uuid,
bmc_address=bmc_address,
ironic=ironic)
node_info = node_cache.start_introspection(node.uuid,
bmc_address=bmc_address,
ironic=ironic)
node_info.set_option('new_ipmi_credentials', new_ipmi_credentials)
def _handle_exceptions(fut):
@ -124,12 +125,13 @@ def _background_introspect(ironic, node_info):
node_info.acquire_lock()
try:
_background_introspect_locked(ironic, node_info)
_background_introspect_locked(node_info, ironic)
finally:
node_info.release_lock()
def _background_introspect_locked(ironic, node_info):
@node_cache.fsm_transition(istate.Events.wait)
def _background_introspect_locked(node_info, ironic):
# TODO(dtantsur): pagination
macs = list(node_info.ports())
if macs:
@ -192,6 +194,8 @@ def abort(node_id, token=None):
utils.executor().submit(_abort, node_info, ironic)
@node_cache.release_lock
@node_cache.fsm_transition(istate.Events.abort, reentrant=False)
def _abort(node_info, ironic):
# runs in background
if node_info.finished_at is not None:

@ -0,0 +1,144 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Introspection state."""
from automaton import machines
class States(object):
"""States of an introspection."""
# received introspection data from a nonexistent node
# active - the inspector performs an operation on the node
enrolling = 'enrolling'
# an error appeared in a previous introspection state
# passive - the inspector doesn't perform any operation on the node
error = 'error'
# introspection finished successfully
# passive
finished = 'finished'
# processing introspection data from the node
# active
processing = 'processing'
# processing stored introspection data from the node
# active
reapplying = 'reapplying'
# received a request to start node introspection
# active
starting = 'starting'
# waiting for node introspection data
# passive
waiting = 'waiting'
@classmethod
def all(cls):
"""Return a list of all states."""
return [cls.starting, cls.waiting, cls.processing, cls.finished,
cls.error, cls.reapplying, cls.enrolling]
class Events(object):
"""Events that change introspection state."""
# cancel a waiting node introspection
# API, user
abort = 'abort'
# mark an introspection failed
# internal
error = 'error'
# mark an introspection finished
# internal
finish = 'finish'
# process node introspection data
# API, introspection image
process = 'process'
# process stored node introspection data
# API, user
reapply = 'reapply'
# initialize node introspection
# API, user
start = 'start'
# mark an introspection timed-out waiting for data
# internal
timeout = 'timeout'
# mark an introspection waiting for image data
# internal
wait = 'wait'
@classmethod
def all(cls):
"""Return a list of all events."""
return [cls.process, cls.reapply, cls.timeout, cls.wait, cls.abort,
cls.error, cls.finish]
# Error transition is allowed in any state.
State_space = [
{
'name': States.enrolling,
'next_states': {
Events.error: States.error,
Events.process: States.processing,
},
},
{
'name': States.error,
'next_states': {
Events.abort: States.error,
Events.error: States.error,
Events.reapply: States.reapplying,
Events.start: States.starting,
},
},
{
'name': States.finished,
'next_states': {
Events.finish: States.finished,
Events.reapply: States.reapplying,
Events.start: States.starting
},
},
{
'name': States.processing,
'next_states': {
Events.error: States.error,
Events.finish: States.finished,
},
},
{
'name': States.reapplying,
'next_states': {
Events.error: States.error,
Events.finish: States.finished,
Events.reapply: States.reapplying,
},
},
{
'name': States.starting,
'next_states': {
Events.error: States.error,
Events.start: States.starting,
Events.wait: States.waiting,
},
},
{
'name': States.waiting,
'next_states': {
Events.abort: States.error,
Events.process: States.processing,
Events.start: States.starting,
Events.timeout: States.error,
},
},
]
FSM = machines.FiniteMachine.build(State_space)
FSM.default_start_state = States.finished

@ -0,0 +1,49 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
"""Introducing Node.state attribute
Revision ID: d2e48801c8ef
Revises: e169a4a81d88
Create Date: 2016-07-29 10:10:32.351661
"""
# revision identifiers, used by Alembic.
revision = 'd2e48801c8ef'
down_revision = 'e169a4a81d88'
branch_labels = None
depends_on = None
from alembic import op
import sqlalchemy as sa
from sqlalchemy import sql
from ironic_inspector import introspection_state as istate
Node = sql.table('nodes',
sql.column('error', sa.String),
sql.column('state', sa.Enum(*istate.States.all())))
def upgrade():
op.add_column('nodes', sa.Column('version_id', sa.String(36),
server_default=''))
op.add_column('nodes', sa.Column('state', sa.Enum(*istate.States.all(),
name='node_state'),
nullable=False,
default=istate.States.finished,
server_default=istate.States.finished))
# correct the state: finished -> error if Node.error is not null
stmt = Node.update().where(Node.c.error != sql.null()).values(
{'state': op.inline_literal(istate.States.error)})
op.execute(stmt)

@ -13,11 +13,13 @@
"""Cache for nodes currently under introspection."""
import contextlib
import copy
import json
import six
import time
from automaton import exceptions as automaton_errors
from ironicclient import exceptions
from oslo_concurrency import lockutils
from oslo_config import cfg
@ -25,13 +27,16 @@ from oslo_db import exception as db_exc
from oslo_db.sqlalchemy import utils as db_utils
from oslo_utils import excutils
from oslo_utils import uuidutils
from sqlalchemy.orm import exc as orm_errors
from sqlalchemy import text
from ironic_inspector import db
from ironic_inspector.common.i18n import _, _LE, _LW, _LI
from ironic_inspector.common import ironic as ir_utils
from ironic_inspector import introspection_state as istate
from ironic_inspector import utils
CONF = cfg.CONF
@ -62,9 +67,12 @@ class NodeInfo(object):
for every thread.
"""
def __init__(self, uuid, started_at=None, finished_at=None, error=None,
node=None, ports=None, ironic=None, lock=None):
def __init__(self, uuid, version_id=None, state=None, started_at=None,
finished_at=None, error=None, node=None, ports=None,
ironic=None, lock=None):
self.uuid = uuid
self._version_id = version_id
self._state = state
self.started_at = started_at
self.finished_at = finished_at
self.error = error
@ -79,6 +87,7 @@ class NodeInfo(object):
self._lock = lock if lock is not None else _get_lock(uuid)
# Whether lock was acquired using this NodeInfo object
self._locked = lock is not None
self._fsm = None
def __del__(self):
if self._locked:
@ -86,6 +95,11 @@ class NodeInfo(object):
'node info object is deleted'))
self._lock.release()
def __str__(self):
"""Self represented as an UUID and a state."""
return _("%(uuid)s state %(state)s") % {'uuid': self.uuid,
'state': self._state}
def acquire_lock(self, blocking=True):
"""Acquire a lock on the associated node.
@ -118,6 +132,106 @@ class NodeInfo(object):
self._lock.release()
self._locked = False
@property
def version_id(self):
"""Get the version id"""
if self._version_id is None:
row = db.model_query(db.Node).get(self.uuid)
if row is None:
raise utils.NotFoundInCacheError(_('Node not found in the '
'cache'), node_info=self)
self._version_id = row.version_id
return self._version_id
def _set_version_id(self, value, session):
row = self._row(session)
row.version_id = value
row.save(session)
self._version_id = value
def _row(self, session=None):
"""Get a row from the database with self.uuid and self.version_id"""
try:
# race condition if version_id changed outside of this node_info
return db.model_query(db.Node, session=session).filter_by(
uuid=self.uuid, version_id=self.version_id).one()
except (orm_errors.NoResultFound, orm_errors.StaleDataError):
raise utils.NodeStateRaceCondition(node_info=self)
def _commit(self, **fields):
"""Commit the fields into the DB."""
LOG.debug('Committing fields: %s', fields, node_info=self)
with db.ensure_transaction() as session:
self._set_version_id(uuidutils.generate_uuid(), session)
row = self._row(session)
row.update(fields)
row.save(session)
def commit(self):
"""Commit current node status into the database."""
# state and version_id are updated separately
self._commit(started_at=self.started_at, finished_at=self.finished_at,
error=self.error)
@property
def state(self):
"""State of the node_info object."""
if self._state is None:
row = self._row()
self._state = row.state
return self._state
def _set_state(self, value):
self._commit(state=value)
self._state = value
def _get_fsm(self):
"""Get an fsm instance initialized with self.state."""
if self._fsm is None:
self._fsm = istate.FSM.copy(shallow=True)
self._fsm.initialize(start_state=self.state)
return self._fsm
@contextlib.contextmanager
def _fsm_ctx(self):
fsm = self._get_fsm()
try:
yield fsm
finally:
if fsm.current_state != self.state:
LOG.info(_LI('Updating node state: %(current)s --> %(new)s'),
{'current': self.state, 'new': fsm.current_state},
node_info=self)
self._set_state(fsm.current_state)
def fsm_event(self, event, strict=False):
"""Update node_info.state based on a fsm.process_event(event) call.
An AutomatonException triggers an error event.
If strict, node_info.finished(error=str(exc)) is called with the
AutomatonException instance and a EventError raised.
:param event: an event to process by the fsm
:strict: whether to fail the introspection upon an invalid event
:raises: NodeStateInvalidEvent
"""
with self._fsm_ctx() as fsm:
LOG.debug('Executing fsm(%(state)s).process_event(%(event)s)',
{'state': fsm.current_state, 'event': event},
node_info=self)
try:
fsm.process_event(event)
except automaton_errors.NotFound as exc:
msg = _('Invalid event: %s') % exc
if strict:
LOG.error(msg, node_info=self)
# assuming an error event is always possible
fsm.process_event(istate.Events.error)
self.finished(error=str(exc))
else:
LOG.warning(msg, node_info=self)
raise utils.NodeStateInvalidEvent(str(exc), node_info=self)
@property
def options(self):
"""Node introspection options as a dict."""
@ -169,9 +283,7 @@ class NodeInfo(object):
self.error = error
with db.ensure_transaction() as session:
db.model_query(db.Node, session=session).filter_by(
uuid=self.uuid).update(
{'finished_at': self.finished_at, 'error': error})
self._commit(finished_at=self.finished_at, error=self.error)
db.model_query(db.Attribute, session=session).filter_by(
uuid=self.uuid).delete()
db.model_query(db.Option, session=session).filter_by(
@ -207,7 +319,8 @@ class NodeInfo(object):
def from_row(cls, row, ironic=None, lock=None, node=None):
"""Construct NodeInfo from a database row."""
fields = {key: row[key]
for key in ('uuid', 'started_at', 'finished_at', 'error')}
for key in ('uuid', 'version_id', 'state', 'started_at',
'finished_at', 'error')}
return cls(ironic=ironic, lock=lock, node=node, **fields)
def invalidate_cache(self):
@ -217,6 +330,9 @@ class NodeInfo(object):
self._ports = None
self._attributes = None
self._ironic = None
self._fsm = None
self._state = None
self._version_id = None
def node(self, ironic=None):
"""Get Ironic node object associated with the cached node record."""
@ -385,13 +501,172 @@ class NodeInfo(object):
self.patch([{'op': op, 'path': path, 'value': value}], ironic)
def add_node(uuid, **attributes):
def triggers_fsm_error_transition(errors=(Exception,),
no_errors=(utils.NodeStateInvalidEvent,
utils.NodeStateRaceCondition)):
"""Trigger an fsm error transition upon certain errors.
It is assumed the first function arg of the decorated function is always a
NodeInfo instance.
:param errors: a tuple of exceptions upon which an error
event is triggered. Re-raised.
:param no_errors: a tuple of exceptions that won't trigger the
error event.
"""
def outer(func):
@six.wraps(func)
def inner(node_info, *args, **kwargs):
ret = None
try:
ret = func(node_info, *args, **kwargs)
except no_errors as exc:
LOG.debug('Not processing error event for the '
'exception: %(exc)s raised by %(func)s',
{'exc': exc, 'func': func}, node_info=node_info)
except errors as exc:
with excutils.save_and_reraise_exception():
LOG.error(_LE('Processing the error event because of an '
'exception %(exc_type)s: %(exc)s raised by '
'%(func)s'),
{'exc_type': type(exc), 'exc': exc,
'func': func},
node_info=node_info)
# an error event should be possible from all states
node_info.fsm_event(istate.Events.error)
return ret
return inner
return outer
def fsm_event_before(event, strict=False):
"""Trigger an fsm event before the function execution.
It is assumed the first function arg of the decorated function is always a
NodeInfo instance.
:param event: the event to process before the function call
:param strict: make an invalid fsm event trigger an error event
"""
def outer(func):
@six.wraps(func)
def inner(node_info, *args, **kwargs):
LOG.debug('Processing event %(event)s before calling '
'%(func)s', {'event': event, 'func': func},
node_info=node_info)
node_info.fsm_event(event, strict=strict)
LOG.debug('Calling: %(func)s(<node>, *%(args_)s, '
'**%(kwargs_)s)', {'func': func, 'args_': args,
'kwargs_': kwargs},
node_info=node_info)
return func(node_info, *args, **kwargs)
return inner
return outer
def fsm_event_after(event, strict=False):
"""Trigger an fsm event after the function execution.
It is assumed the first function arg of the decorated function is always a
NodeInfo instance.
:param event: the event to process after the function call
:param strict: make an invalid fsm event trigger an error event
"""
def outer(func):
@six.wraps(func)
def inner(node_info, *args, **kwargs):
LOG.debug('Calling: %(func)s(<node>, *%(args_)s, '
'**%(kwargs_)s)', {'func': func, 'args_': args,
'kwargs_': kwargs},
node_info=node_info)
ret = func(node_info, *args, **kwargs)
LOG.debug('Processing event %(event)s after calling '
'%(func)s', {'event': event, 'func': func},
node_info=node_info)
node_info.fsm_event(event, strict=strict)
return ret
return inner
return outer
def fsm_transition(event, reentrant=True, **exc_kwargs):
"""Decorate a function to perform a (non-)reentrant transition.
If True, reentrant transition will be performed at the end of a function
call. If False, the transition will be performed before the function call.
The function is decorated with the triggers_fsm_error_transition decorator
as well.
:param event: the event to bind the transition to.
:param reentrant: whether the transition is reentrant.
:param exc_kwargs: passed on to the triggers_fsm_error_transition decorator
"""
def outer(func):
inner = triggers_fsm_error_transition(**exc_kwargs)(func)
if not reentrant:
return fsm_event_before(event, strict=True)(inner)
return fsm_event_after(event)(inner)
return outer
def release_lock(func):
"""Decorate a node_info-function to release the node_info lock.
Assumes the first parameter of the function func is always a NodeInfo
instance.
"""
@six.wraps(func)
def inner(node_info, *args, **kwargs):
try:
return func(node_info, *args, **kwargs)
finally:
# FIXME(milan) hacking the test cases to work
# with release_lock.assert_called_once...
if node_info._locked:
node_info.release_lock()
return inner
def start_introspection(uuid, **kwargs):
"""Start the introspection of a node.
If a node_info record exists in the DB, a start transition is used rather
than dropping the record in order to check for the start transition
validity in particular node state.
:param uuid: Ironic node UUID
:param kwargs: passed on to add_node()
:raises: NodeStateInvalidEvent in case the start transition is invalid in
the current node state
:raises: NodeStateRaceCondition if a mismatch was detected between the
node_info cache and the DB
:returns: NodeInfo
"""
with db.ensure_transaction():
node_info = NodeInfo(uuid)
# check that the start transition is possible
try:
node_info.fsm_event(istate.Events.start)
except utils.NotFoundInCacheError:
# node not found while in the fsm_event handler
LOG.debug('Node missing in the cache; adding it now',
node_info=node_info)
state = istate.States.starting
else:
state = node_info.state
return add_node(uuid, state, **kwargs)
def add_node(uuid, state, **attributes):
"""Store information about a node under introspection.
All existing information about this node is dropped.
Empty values are skipped.
:param uuid: Ironic node UUID
:param state: The initial state of the node
:param attributes: attributes known about this node (like macs, BMC etc);
also ironic client instance may be passed under 'ironic'
:returns: NodeInfo
@ -399,9 +674,9 @@ def add_node(uuid, **attributes):
started_at = time.time()
with db.ensure_transaction() as session:
_delete_node(uuid)
db.Node(uuid=uuid, started_at=started_at).save(session)
db.Node(uuid=uuid, state=state, started_at=started_at).save(session)
node_info = NodeInfo(uuid=uuid, started_at=started_at,
node_info = NodeInfo(uuid=uuid, state=state, started_at=started_at,
ironic=attributes.pop('ironic', None))
for (name, value) in attributes.items():
if not value:
@ -591,10 +866,9 @@ def clean_up():
try:
if node_info.finished_at or node_info.started_at > threshold:
continue
node_info.fsm_event(istate.Events.timeout)
node_info.finished(error='Introspection timeout')
db.model_query(db.Node, session=session).filter_by(
uuid=u).update({'finished_at': time.time(),
'error': 'Introspection timeout'})
db.model_query(db.Attribute, session=session).filter_by(
uuid=u).delete()
db.model_query(db.Option, session=session).filter_by(
@ -610,6 +884,7 @@ def create_node(driver, ironic=None, **attributes):
* Create new node in ironic.
* Cache it in inspector.
* Sets node_info state to enrolling.
:param driver: driver for Ironic node.
:param ironic: ronic client instance.
@ -625,7 +900,7 @@ def create_node(driver, ironic=None, **attributes):
LOG.error(_LE('Failed to create new node: %s'), e)
else:
LOG.info(_LI('Node %s was created successfully'), node.uuid)
return add_node(node.uuid, ironic=ironic)
return add_node(node.uuid, istate.States.enrolling, ironic=ironic)
def get_node_list(ironic=None, marker=None, limit=None):

@ -28,6 +28,7 @@ from ironic_inspector.common.i18n import _, _LE, _LI, _LW
from ironic_inspector.common import ironic as ir_utils
from ironic_inspector.common import swift
from ironic_inspector import firewall
from ironic_inspector import introspection_state as istate
from ironic_inspector import node_cache
from ironic_inspector.plugins import base as plugins_base
from ironic_inspector import rules
@ -234,7 +235,7 @@ def process(introspection_data):
_store_logs(introspection_data, node_info)
try:
result = _process_node(node, introspection_data, node_info)
result = _process_node(node_info, node, introspection_data)
except utils.Error as exc:
node_info.finished(error=str(exc))
with excutils.save_and_reraise_exception():
@ -263,7 +264,8 @@ def _run_post_hooks(node_info, introspection_data):
hook_ext.obj.before_update(introspection_data, node_info)
def _process_node(node, introspection_data, node_info):
@node_cache.fsm_transition(istate.Events.process, reentrant=False)
def _process_node(node_info, node, introspection_data):
# NOTE(dtantsur): repeat the check in case something changed
ir_utils.check_provision_state(node)
@ -284,19 +286,20 @@ def _process_node(node, introspection_data, node_info):
new_username, new_password = (
node_info.options.get('new_ipmi_credentials'))
utils.executor().submit(_finish_set_ipmi_credentials,
ironic, node, node_info, introspection_data,
node_info, ironic, node, introspection_data,
new_username, new_password)
resp['ipmi_setup_credentials'] = True
resp['ipmi_username'] = new_username
resp['ipmi_password'] = new_password
else:
utils.executor().submit(_finish, ironic, node_info, introspection_data,
utils.executor().submit(_finish, node_info, ironic, introspection_data,
power_off=CONF.processing.power_off)
return resp
def _finish_set_ipmi_credentials(ironic, node, node_info, introspection_data,
@node_cache.fsm_transition(istate.Events.finish)
def _finish_set_ipmi_credentials(node_info, ironic, node, introspection_data,
new_username, new_password):
patch = [{'op': 'add', 'path': '/driver_info/ipmi_username',
'value': new_username},
@ -320,7 +323,7 @@ def _finish_set_ipmi_credentials(ironic, node, node_info, introspection_data,
node_info=node_info, data=introspection_data)
eventlet.greenthread.sleep(_CREDENTIALS_WAIT_PERIOD)
else:
_finish(ironic, node_info, introspection_data)
_finish_common(node_info, ironic, introspection_data)
return
msg = (_('Failed to validate updated IPMI credentials for node '
@ -329,7 +332,7 @@ def _finish_set_ipmi_credentials(ironic, node, node_info, introspection_data,
raise utils.Error(msg, node_info=node_info, data=introspection_data)
def _finish(ironic, node_info, introspection_data, power_off=True):
def _finish_common(node_info, ironic, introspection_data, power_off=True):
if power_off:
LOG.debug('Forcing power off of node %s', node_info.uuid)
try:
@ -356,6 +359,9 @@ def _finish(ironic, node_info, introspection_data, power_off=True):
node_info=node_info, data=introspection_data)
_finish = node_cache.fsm_transition(istate.Events.finish)(_finish_common)
def reapply(node_ident):
"""Re-apply introspection steps.
@ -395,30 +401,41 @@ def _reapply(node_info):
node_info.finished(error=msg)
return
failures = []
_run_pre_hooks(introspection_data, failures)
if failures:
LOG.error(_LE('Pre-processing failures detected reapplying '
'introspection on stored data:\n%s'),
'\n'.join(failures), node_info=node_info)
node_info.finished(error='\n'.join(failures))
try:
ironic = ir_utils.get_client()
except Exception as exc:
msg = _('Encountered an exception while getting the Ironic client: '
'%s') % exc
LOG.error(msg, node_info=node_info, data=introspection_data)
node_info.fsm_event(istate.Events.error)
node_info.finished(error=msg)
return
try:
ironic = ir_utils.get_client()
node_info.create_ports(introspection_data.get('macs') or ())
_run_post_hooks(node_info, introspection_data)
_store_data(node_info, introspection_data)
node_info.invalidate_cache()
rules.apply(node_info, introspection_data)
_finish(ironic, node_info, introspection_data,
power_off=False)
_reapply_with_data(node_info, introspection_data)
except Exception as exc:
LOG.exception(_LE('Encountered exception reapplying '
'introspection on stored data'),
node_info=node_info,
data=introspection_data)
node_info.finished(error=str(exc))
else:
LOG.info(_LI('Successfully reapplied introspection on stored '
'data'), node_info=node_info, data=introspection_data)
return
_finish(node_info, ironic, introspection_data,
power_off=False)
LOG.info(_LI('Successfully reapplied introspection on stored '
'data'), node_info=node_info, data=introspection_data)
@node_cache.fsm_event_before(istate.Events.reapply)
@node_cache.triggers_fsm_error_transition()
def _reapply_with_data(node_info, introspection_data):
failures = []
_run_pre_hooks(introspection_data, failures)
if failures:
raise utils.Error(_('Pre-processing failures detected reapplying '
'introspection on stored data:\n%s') %
'\n'.join(failures), node_info=node_info)
node_info.create_ports(introspection_data.get('macs') or ())
_run_post_hooks(node_info, introspection_data)
_store_data(node_info, introspection_data)
node_info.invalidate_cache()
rules.apply(node_info, introspection_data)

@ -26,6 +26,7 @@ from ironic_inspector.common import i18n
# Import configuration options
from ironic_inspector import conf # noqa
from ironic_inspector import db
from ironic_inspector import introspection_state as istate
from ironic_inspector import node_cache
from ironic_inspector.plugins import base as plugins_base
from ironic_inspector import utils
@ -161,3 +162,17 @@ class NodeTest(InventoryTest):
self.node_info = node_cache.NodeInfo(uuid=self.uuid, started_at=0,
node=self.node, ports=self.ports)
self.node_info.node = mock.Mock(return_value=self.node)
class NodeStateTest(NodeTest):
def setUp(self):
super(NodeStateTest, self).setUp()
self.node_info._version_id = uuidutils.generate_uuid()
self.node_info._state = istate.States.starting
self.db_node = db.Node(uuid=self.node_info.uuid,
version_id=self.node_info._version_id,
state=self.node_info._state,
started_at=self.node_info.started_at,
finished_at=self.node_info.finished_at,
error=self.node_info.error)
self.db_node.save(self.session)

@ -38,6 +38,7 @@ from ironic_inspector.common import ironic as ir_utils
from ironic_inspector.common import swift
from ironic_inspector import db
from ironic_inspector import dbsync
from ironic_inspector import introspection_state as istate
from ironic_inspector import main
from ironic_inspector import rules
from ironic_inspector.test import base
@ -221,6 +222,10 @@ class Base(base.NodeTest):
else:
self.assertIsNone(status['finished_at'])
def db_row(self):
"""return database row matching self.uuid."""
return db.model_query(db.Node).get(self.uuid)
class Test(Base):
def test_bmc(self):
@ -561,6 +566,122 @@ class Test(Base):
self.assertEqual(store_processing_call,
store_mock.call_args_list[-1])
# TODO(milan): remove the test case in favor of other tests once
# the introspection status endpoint exposes the state information
@mock.patch.object(swift, 'store_introspection_data', autospec=True)
@mock.patch.object(swift, 'get_introspection_data', autospec=True)
def test_state_transitions(self, get_mock, store_mock):
"""Assert state transitions work as expected."""
cfg.CONF.set_override('store_data', 'swift', 'processing')
# ramdisk data copy
# please mind the data is changed during processing
ramdisk_data = json.dumps(copy.deepcopy(self.data))
get_mock.return_value = ramdisk_data
self.call_introspect(self.uuid)
reboot_call = mock.call(self.uuid, 'reboot')
self.cli.node.set_power_state.assert_has_calls([reboot_call])
eventlet.greenthread.sleep(DEFAULT_SLEEP)
row = self.db_row()
self.assertEqual(istate.States.waiting, row.state)
self.call_continue(self.data)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
row = self.db_row()
self.assertEqual(istate.States.finished, row.state)
self.assertIsNone(row.error)
version_id = row.version_id
self.call_reapply(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
row = self.db_row()
self.assertEqual(istate.States.finished, row.state)
self.assertIsNone(row.error)
# the finished state was visited from the reapplying state
self.assertNotEqual(version_id, row.version_id)
self.call_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
row = self.db_row()
self.assertEqual(istate.States.waiting, row.state)
self.call_abort_introspect(self.uuid)
row = self.db_row()
self.assertEqual(istate.States.error, row.state)
self.assertEqual('Canceled by operator', row.error)
@mock.patch.object(swift, 'store_introspection_data', autospec=True)
@mock.patch.object(swift, 'get_introspection_data', autospec=True)
def test_edge_state_transitions(self, get_mock, store_mock):
"""Assert state transitions work as expected in edge conditions."""
cfg.CONF.set_override('store_data', 'swift', 'processing')
# ramdisk data copy
# please mind the data is changed during processing
ramdisk_data = json.dumps(copy.deepcopy(self.data))
get_mock.return_value = ramdisk_data
# multiple introspect calls
self.call_introspect(self.uuid)
self.call_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
# TODO(milan): switch to API once the introspection status
# endpoint exposes the state information
row = self.db_row()
self.assertEqual(istate.States.waiting, row.state)
# an error -start-> starting state transition is possible
self.call_abort_introspect(self.uuid)
self.call_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
row = self.db_row()
self.assertEqual(istate.States.waiting, row.state)
# double abort works
self.call_abort_introspect(self.uuid)
row = self.db_row()
version_id = row.version_id
error = row.error
self.assertEqual(istate.States.error, row.state)
self.call_abort_introspect(self.uuid)
row = self.db_row()
self.assertEqual(istate.States.error, row.state)
# assert the error didn't change
self.assertEqual(error, row.error)
self.assertEqual(version_id, row.version_id)
# preventing stale data race condition
# waiting -> processing is a strict state transition
self.call_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
row = self.db_row()
row.state = istate.States.processing
with db.ensure_transaction() as session:
row.save(session)
self.call_continue(self.data, expect_error=400)
row = self.db_row()
self.assertEqual(istate.States.error, row.state)
self.assertIn('no defined transition', row.error)
# multiple reapply calls
self.call_introspect(self.uuid)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
self.call_continue(self.data)
eventlet.greenthread.sleep(DEFAULT_SLEEP)
self.call_reapply(self.uuid)
row = self.db_row()
version_id = row.version_id
self.assertEqual(istate.States.finished, row.state)
self.assertIsNone(row.error)
self.call_reapply(self.uuid)
# assert an finished -reapply-> reapplying -> finished state transition
row = self.db_row()
self.assertEqual(istate.States.finished, row.state)
self.assertIsNone(row.error)
self.assertNotEqual(version_id, row.version_id)
@contextlib.contextmanager
def mocked_server():

@ -20,6 +20,7 @@ from oslo_config import cfg
from ironic_inspector.common import ironic as ir_utils
from ironic_inspector import firewall
from ironic_inspector import introspection_state as istate
from ironic_inspector import node_cache
from ironic_inspector.test import base as test_base
@ -95,7 +96,8 @@ class TestFirewall(test_base.NodeTest):
def test_update_filters_args(self, mock_call, mock_get_client,
mock_iptables):
# Pretend that we have nodes on introspection
node_cache.add_node(self.node.uuid, bmc_address='1.2.3.4')
node_cache.add_node(self.node.uuid, state=istate.States.waiting,
bmc_address='1.2.3.4')
firewall.init()
@ -162,6 +164,7 @@ class TestFirewall(test_base.NodeTest):
self.ports = [mock.Mock(address=m) for m in self.macs]
mock_get_client.port.list.return_value = self.ports
node_cache.add_node(self.node.uuid, mac=active_macs,
state=istate.States.finished,
bmc_address='1.2.3.4', foo=None)
firewall.init()
@ -211,6 +214,7 @@ class TestFirewall(test_base.NodeTest):
self.ports = [mock.Mock(address=m) for m in self.macs]
mock_get_client.port.list.return_value = self.ports
node_cache.add_node(self.node.uuid, mac=active_macs,
state=istate.States.finished,
bmc_address='1.2.3.4', foo=None)
firewall.init()
@ -330,7 +334,8 @@ class TestFirewall(test_base.NodeTest):
# Adding a node changes it back
node_cache.add_node(self.node.uuid, bmc_address='1.2.3.4')
node_cache.add_node(self.node.uuid, state=istate.States.starting,
bmc_address='1.2.3.4')
mock_iptables.reset_mock()
firewall.update_filters()

@ -50,21 +50,21 @@ class BaseTest(test_base.NodeTest):
@mock.patch.object(eventlet.greenthread, 'sleep', lambda _: None)
@mock.patch.object(firewall, 'update_filters', autospec=True)
@mock.patch.object(node_cache, 'add_node', autospec=True)
@mock.patch.object(node_cache, 'start_introspection', autospec=True)
@mock.patch.object(ir_utils, 'get_client', autospec=True)
class TestIntrospect(BaseTest):
def test_ok(self, client_mock, add_mock, filters_mock):
def test_ok(self, client_mock, start_mock, filters_mock):
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
introspect.introspect(self.node.uuid)
cli.node.get.assert_called_once_with(self.uuid)
cli.node.validate.assert_called_once_with(self.uuid)
add_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
start_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
self.node_info.ports.assert_called_once_with()
self.node_info.add_attribute.assert_called_once_with('mac',
self.macs)
@ -79,71 +79,71 @@ class TestIntrospect(BaseTest):
self.node_info.acquire_lock.assert_called_once_with()
self.node_info.release_lock.assert_called_once_with()
def test_ok_ilo_and_drac(self, client_mock, add_mock, filters_mock):
def test_ok_ilo_and_drac(self, client_mock, start_mock, filters_mock):
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
for name in ('ilo_address', 'drac_host'):
self.node.driver_info = {name: self.bmc_address}
introspect.introspect(self.node.uuid)
add_mock.assert_called_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
start_mock.assert_called_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
def test_power_failure(self, client_mock, add_mock, filters_mock):
def test_power_failure(self, client_mock, start_mock, filters_mock):
cli = self._prepare(client_mock)
cli.node.set_boot_device.side_effect = exceptions.BadRequest()
cli.node.set_power_state.side_effect = exceptions.BadRequest()
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
introspect.introspect(self.node.uuid)
cli.node.get.assert_called_once_with(self.uuid)
add_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
start_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
cli.node.set_boot_device.assert_called_once_with(self.uuid,
'pxe',
persistent=False)
cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
add_mock.return_value.finished.assert_called_once_with(
start_mock.return_value.finished.assert_called_once_with(
error=mock.ANY)
self.node_info.acquire_lock.assert_called_once_with()
self.node_info.release_lock.assert_called_once_with()
def test_unexpected_error(self, client_mock, add_mock, filters_mock):
def test_unexpected_error(self, client_mock, start_mock, filters_mock):
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
filters_mock.side_effect = RuntimeError()
introspect.introspect(self.node.uuid)
cli.node.get.assert_called_once_with(self.uuid)
add_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
start_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
self.assertFalse(cli.node.set_boot_device.called)
add_mock.return_value.finished.assert_called_once_with(
start_mock.return_value.finished.assert_called_once_with(
error=mock.ANY)
self.node_info.acquire_lock.assert_called_once_with()
self.node_info.release_lock.assert_called_once_with()
def test_no_macs(self, client_mock, add_mock, filters_mock):
def test_no_macs(self, client_mock, start_mock, filters_mock):
cli = self._prepare(client_mock)
self.node_info.ports.return_value = []
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
introspect.introspect(self.node.uuid)
self.node_info.ports.assert_called_once_with()
add_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
start_mock.assert_called_once_with(self.uuid,
bmc_address=self.bmc_address,
ironic=cli)
self.assertFalse(self.node_info.add_attribute.called)
self.assertFalse(filters_mock.called)
cli.node.set_boot_device.assert_called_once_with(self.uuid,
@ -152,10 +152,10 @@ class TestIntrospect(BaseTest):
cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
def test_no_lookup_attrs(self, client_mock, add_mock, filters_mock):
def test_no_lookup_attrs(self, client_mock, start_mock, filters_mock):
cli = self._prepare(client_mock)
self.node_info.ports.return_value = []
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
self.node_info.attributes = {}
introspect.introspect(self.uuid)
@ -168,11 +168,12 @@ class TestIntrospect(BaseTest):
self.node_info.release_lock.assert_called_once_with()
def test_no_lookup_attrs_with_node_not_found_hook(self, client_mock,
add_mock, filters_mock):
start_mock,
filters_mock):
CONF.set_override('node_not_found_hook', 'example', 'processing')
cli = self._prepare(client_mock)
self.node_info.ports.return_value = []
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
self.node_info.attributes = {}
introspect.introspect(self.uuid)
@ -185,7 +186,7 @@ class TestIntrospect(BaseTest):
cli.node.set_power_state.assert_called_once_with(self.uuid,
'reboot')
def test_failed_to_get_node(self, client_mock, add_mock, filters_mock):
def test_failed_to_get_node(self, client_mock, start_mock, filters_mock):
cli = client_mock.return_value
cli.node.get.side_effect = exceptions.NotFound()
self.assertRaisesRegex(utils.Error,
@ -200,10 +201,10 @@ class TestIntrospect(BaseTest):
self.assertEqual(0, self.node_info.ports.call_count)
self.assertEqual(0, filters_mock.call_count)
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertFalse(add_mock.called)
self.assertFalse(start_mock.called)
self.assertFalse(self.node_info.acquire_lock.called)
def test_failed_to_validate_node(self, client_mock, add_mock,
def test_failed_to_validate_node(self, client_mock, start_mock,
filters_mock):
cli = client_mock.return_value
cli.node.get.return_value = self.node
@ -219,10 +220,11 @@ class TestIntrospect(BaseTest):
self.assertEqual(0, self.node_info.ports.call_count)
self.assertEqual(0, filters_mock.call_count)
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertFalse(add_mock.called)
self.assertFalse(start_mock.called)
self.assertFalse(self.node_info.acquire_lock.called)
def test_wrong_provision_state(self, client_mock, add_mock, filters_mock):
def test_wrong_provision_state(self, client_mock, start_mock,
filters_mock):
self.node.provision_state = 'active'
cli = client_mock.return_value
cli.node.get.return_value = self.node
@ -234,20 +236,20 @@ class TestIntrospect(BaseTest):
self.assertEqual(0, self.node_info.ports.call_count)
self.assertEqual(0, filters_mock.call_count)
self.assertEqual(0, cli.node.set_power_state.call_count)
self.assertFalse(add_mock.called)
self.assertFalse(start_mock.called)
self.assertFalse(self.node_info.acquire_lock.called)
@mock.patch.object(time, 'sleep')
@mock.patch.object(time, 'time')
def test_sleep_no_pxe_ssh(self, time_mock, sleep_mock, client_mock,
add_mock, filters_mock):
start_mock, filters_mock):
self.node.driver = 'pxe_ipmitool'
time_mock.return_value = 42
introspect._LAST_INTROSPECTION_TIME = 40
CONF.set_override('introspection_delay', 10)
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
introspect.introspect(self.uuid)
@ -263,14 +265,14 @@ class TestIntrospect(BaseTest):
@mock.patch.object(time, 'sleep')
@mock.patch.object(time, 'time')
def test_sleep_with_pxe_ssh(self, time_mock, sleep_mock, client_mock,
add_mock, filters_mock):
start_mock, filters_mock):
self.node.driver = 'pxe_ssh'
time_mock.return_value = 42
introspect._LAST_INTROSPECTION_TIME = 40
CONF.set_override('introspection_delay', 10)
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info
start_mock.return_value = self.node_info
introspect.introspect(self.uuid)
@ -286,7 +288,7 @@ class TestIntrospect(BaseTest):
@mock.patch.object(time, 'sleep')
@mock.patch.object(time, 'time')
def test_sleep_not_needed_with_pxe_ssh(self, time_mock, sleep_mock,
client_mock, add_mock,
client_mock, start_mock,
filters_mock):
self.node.driver = 'agent_ssh'
time_mock.return_value = 100
@ -294,7 +296,7 @@ class TestIntrospect(BaseTest):
CONF.set_override('introspection_delay', 10)
cli = self._prepare(client_mock)
add_mock.return_value = self.node_info