
Primarily remove the workaround added in Ia6d512ff2ae417bab938cb095fbb0884d195010a which added continued use of autocommit, which is incompatible with SQLAlchemy 2.0. Also set the environment for unit tests to report compatability warnings, although it appears none are being reported at this time. Also cuts out the db upgrade cruft to only use the online database migration code through oslo_db's enginefacade, which has the smarts to handle online or offline migrations. And then, retools unit/functional test data storage to utlize sqlite, and in that re-tooled the queries to prevent locking conditions which could exist with queries, and some additional refactoring/cleanup. Also, don't mock and test time.sleep(). Additionally, it looks like we have discovered the root cause of the memory/connection leakage issue which has been observed, due to the way lists of nodes are processed/returned. This change was based upon the work in I506da42a9891a245831f325e34bec92e0a3f33f0 which is included in this commit as the entire database structure and interaction has been modified for ironic-inspector. Co-Authored-By: aarefiev <aarefiev@mirantis.com> Story: 2009727 Task: 44132 Change-Id: Ic88eb9dec5fddc924a72d9a23c17a304954ebf46
241 lines
8.7 KiB
Python
241 lines
8.7 KiB
Python
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import sys
|
|
import traceback as traceback_mod
|
|
|
|
from eventlet import semaphore
|
|
from futurist import periodics
|
|
from ironic_lib import mdns
|
|
from oslo_config import cfg
|
|
from oslo_log import log
|
|
import oslo_messaging as messaging
|
|
from oslo_utils import excutils
|
|
from oslo_utils import reflection
|
|
import tooz
|
|
|
|
from ironic_inspector.common import coordination
|
|
from ironic_inspector.common.i18n import _
|
|
from ironic_inspector.common import ironic as ir_utils
|
|
from ironic_inspector.common import keystone
|
|
from ironic_inspector.db import api as dbapi
|
|
from ironic_inspector import introspect
|
|
from ironic_inspector import node_cache
|
|
from ironic_inspector.plugins import base as plugins_base
|
|
from ironic_inspector import process
|
|
from ironic_inspector.pxe_filter import base as pxe_filter
|
|
from ironic_inspector import utils
|
|
|
|
LOG = log.getLogger(__name__)
|
|
CONF = cfg.CONF
|
|
MANAGER_TOPIC = 'ironic_inspector.conductor'
|
|
|
|
|
|
class ConductorManager(object):
|
|
"""ironic inspector conductor manager"""
|
|
RPC_API_VERSION = '1.3'
|
|
|
|
target = messaging.Target(version=RPC_API_VERSION)
|
|
|
|
def __init__(self):
|
|
self._periodics_worker = None
|
|
self._zeroconf = None
|
|
self._shutting_down = semaphore.Semaphore()
|
|
self.coordinator = None
|
|
self.dbapi = None
|
|
|
|
def init_host(self):
|
|
"""Initialize Worker host
|
|
|
|
Init db connection, load and validate processing
|
|
hooks, runs periodic tasks.
|
|
|
|
:returns None
|
|
"""
|
|
if CONF.processing.store_data == 'none':
|
|
LOG.warning('Introspection data will not be stored. Change '
|
|
'"[processing] store_data" option if this is not '
|
|
'the desired behavior')
|
|
else:
|
|
LOG.info('Introspection data will be stored in the %s backend',
|
|
CONF.processing.store_data)
|
|
|
|
if not self.dbapi:
|
|
self.dbapi = dbapi.init()
|
|
|
|
self.coordinator = None
|
|
try:
|
|
self.coordinator = coordination.get_coordinator(prefix='conductor')
|
|
self.coordinator.start(heartbeat=True)
|
|
self.coordinator.join_group()
|
|
except Exception as exc:
|
|
if CONF.standalone:
|
|
LOG.info('Coordination backend cannot be started, assuming '
|
|
'no other instances are running. Error: %s', exc)
|
|
self.coordinator = None
|
|
else:
|
|
with excutils.save_and_reraise_exception():
|
|
LOG.critical('Failure when connecting to coordination '
|
|
'backend', exc_info=True)
|
|
self.del_host()
|
|
else:
|
|
LOG.info('Successfully connected to coordination backend.')
|
|
|
|
try:
|
|
hooks = plugins_base.validate_processing_hooks()
|
|
except Exception as exc:
|
|
LOG.critical(str(exc))
|
|
sys.exit(1)
|
|
LOG.info('Enabled processing hooks: %s', [h.name for h in hooks])
|
|
|
|
driver = pxe_filter.driver()
|
|
driver.init_filter()
|
|
|
|
periodic_clean_up_ = periodics.periodic(
|
|
spacing=CONF.clean_up_period,
|
|
enabled=(CONF.clean_up_period != 0)
|
|
)(periodic_clean_up)
|
|
|
|
sync_with_ironic_ = periodics.periodic(
|
|
spacing=CONF.clean_up_period,
|
|
enabled=(CONF.clean_up_period != 0)
|
|
)(sync_with_ironic)
|
|
|
|
callables = [(periodic_clean_up_, None, None),
|
|
(sync_with_ironic_, (self,), None)]
|
|
|
|
driver_task = driver.get_periodic_sync_task()
|
|
if driver_task is not None:
|
|
callables.append((driver_task, None, None))
|
|
|
|
# run elections periodically if we have a coordinator
|
|
# that we were able to start
|
|
if (self.coordinator and self.coordinator.started):
|
|
periodic_leader_election_ = periodics.periodic(
|
|
spacing=CONF.leader_election_interval
|
|
)(periodic_leader_election)
|
|
callables.append((periodic_leader_election_, (self,), None))
|
|
|
|
self._periodics_worker = periodics.PeriodicWorker(
|
|
callables=callables,
|
|
executor_factory=periodics.ExistingExecutor(utils.executor()),
|
|
on_failure=self._periodics_watchdog)
|
|
|
|
utils.executor().submit(self._periodics_worker.start)
|
|
|
|
if CONF.enable_mdns:
|
|
endpoint = keystone.get_endpoint('service_catalog')
|
|
self._zeroconf = mdns.Zeroconf()
|
|
self._zeroconf.register_service('baremetal-introspection',
|
|
endpoint)
|
|
|
|
def del_host(self):
|
|
"""Shutdown the ironic inspector conductor service."""
|
|
|
|
if self.coordinator is not None:
|
|
try:
|
|
if self.coordinator.started:
|
|
self.coordinator.leave_group()
|
|
self.coordinator.stop()
|
|
except tooz.ToozError:
|
|
LOG.exception('Failed to stop coordinator')
|
|
|
|
if not self._shutting_down.acquire(blocking=False):
|
|
LOG.warning('Attempted to shut down while already shutting down')
|
|
return
|
|
|
|
pxe_filter.driver().tear_down_filter()
|
|
if self._periodics_worker is not None:
|
|
try:
|
|
self._periodics_worker.stop()
|
|
self._periodics_worker.wait()
|
|
except Exception as e:
|
|
LOG.exception('Service error occurred when stopping '
|
|
'periodic workers. Error: %s', e)
|
|
self._periodics_worker = None
|
|
|
|
if utils.executor().alive:
|
|
utils.executor().shutdown(wait=True)
|
|
|
|
if self._zeroconf is not None:
|
|
self._zeroconf.close()
|
|
self._zeroconf = None
|
|
|
|
self.dbapi = None
|
|
|
|
self._shutting_down.release()
|
|
|
|
LOG.info('Shut down successfully')
|
|
|
|
def _periodics_watchdog(self, callable_, activity, spacing, exc_info,
|
|
traceback=None):
|
|
LOG.exception("The periodic %(callable)s failed with: %(exception)s", {
|
|
'exception': ''.join(traceback_mod.format_exception(*exc_info)),
|
|
'callable': reflection.get_callable_name(callable_)})
|
|
|
|
@messaging.expected_exceptions(utils.Error)
|
|
def do_introspection(self, context, node_id, token=None,
|
|
manage_boot=True):
|
|
introspect.introspect(node_id, token=token, manage_boot=manage_boot)
|
|
|
|
@messaging.expected_exceptions(utils.Error)
|
|
def do_abort(self, context, node_id, token=None):
|
|
introspect.abort(node_id, token=token)
|
|
|
|
@messaging.expected_exceptions(utils.Error)
|
|
def do_reapply(self, context, node_uuid, token=None, data=None):
|
|
if not data:
|
|
try:
|
|
data = process.get_introspection_data(node_uuid,
|
|
processed=False,
|
|
get_json=True)
|
|
except utils.IntrospectionDataStoreDisabled:
|
|
raise utils.Error(_('Inspector is not configured to store '
|
|
'introspection data. Set the '
|
|
'[processing]store_data configuration '
|
|
'option to change this.'))
|
|
else:
|
|
process.store_introspection_data(node_uuid, data, processed=False)
|
|
|
|
process.reapply(node_uuid, data=data)
|
|
|
|
@messaging.expected_exceptions(utils.Error)
|
|
def do_continue(self, context, data):
|
|
return process.process(data)
|
|
|
|
|
|
def periodic_clean_up(): # pragma: no cover
|
|
if node_cache.clean_up():
|
|
pxe_filter.driver().sync(ir_utils.get_client())
|
|
|
|
|
|
def sync_with_ironic(conductor):
|
|
if (conductor.coordinator is not None
|
|
and not conductor.coordinator.is_leader):
|
|
LOG.debug('The conductor is not a leader, skipping syncing '
|
|
'with ironic')
|
|
return
|
|
|
|
LOG.debug('Syncing with ironic')
|
|
ironic = ir_utils.get_client()
|
|
# TODO(yuikotakada): pagination
|
|
ironic_nodes = ironic.nodes(fields=["uuid"], limit=None)
|
|
ironic_node_uuids = {node.id for node in ironic_nodes}
|
|
node_cache.delete_nodes_not_in_list(ironic_node_uuids)
|
|
|
|
|
|
def periodic_leader_election(conductor):
|
|
if conductor.coordinator is not None:
|
|
conductor.coordinator.run_elect_coordinator()
|
|
return
|