ironic-inspector/ironic_inspector/conductor/manager.py
Anton Arefiev 3fe42b53fd SQLAlchemy 2.0 Support
Primarily remove the workaround added in
Ia6d512ff2ae417bab938cb095fbb0884d195010a which added
continued use of autocommit, which is incompatible with
SQLAlchemy 2.0.

Also set the environment for unit tests to report compatability
warnings, although it appears none are being reported at this time.

Also cuts out the db upgrade cruft to only use the online database
migration code through oslo_db's enginefacade, which has the smarts
to handle online or offline migrations.

And then, retools unit/functional test data storage to utlize sqlite,
and in that re-tooled the queries to prevent locking conditions
which could exist with queries, and some additional refactoring/cleanup.

Also, don't mock and test time.sleep().

Additionally, it looks like we have discovered the root cause of the
memory/connection leakage issue which has been observed, due to the
way lists of nodes are processed/returned.

This change was based upon the work in
I506da42a9891a245831f325e34bec92e0a3f33f0 which is included in
this commit as the entire database structure and interaction
has been modified for ironic-inspector.

Co-Authored-By: aarefiev <aarefiev@mirantis.com>
Story: 2009727
Task: 44132
Change-Id: Ic88eb9dec5fddc924a72d9a23c17a304954ebf46
2022-12-15 09:28:55 -08:00

241 lines
8.7 KiB
Python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import traceback as traceback_mod
from eventlet import semaphore
from futurist import periodics
from ironic_lib import mdns
from oslo_config import cfg
from oslo_log import log
import oslo_messaging as messaging
from oslo_utils import excutils
from oslo_utils import reflection
import tooz
from ironic_inspector.common import coordination
from ironic_inspector.common.i18n import _
from ironic_inspector.common import ironic as ir_utils
from ironic_inspector.common import keystone
from ironic_inspector.db import api as dbapi
from ironic_inspector import introspect
from ironic_inspector import node_cache
from ironic_inspector.plugins import base as plugins_base
from ironic_inspector import process
from ironic_inspector.pxe_filter import base as pxe_filter
from ironic_inspector import utils
LOG = log.getLogger(__name__)
CONF = cfg.CONF
MANAGER_TOPIC = 'ironic_inspector.conductor'
class ConductorManager(object):
"""ironic inspector conductor manager"""
RPC_API_VERSION = '1.3'
target = messaging.Target(version=RPC_API_VERSION)
def __init__(self):
self._periodics_worker = None
self._zeroconf = None
self._shutting_down = semaphore.Semaphore()
self.coordinator = None
self.dbapi = None
def init_host(self):
"""Initialize Worker host
Init db connection, load and validate processing
hooks, runs periodic tasks.
:returns None
"""
if CONF.processing.store_data == 'none':
LOG.warning('Introspection data will not be stored. Change '
'"[processing] store_data" option if this is not '
'the desired behavior')
else:
LOG.info('Introspection data will be stored in the %s backend',
CONF.processing.store_data)
if not self.dbapi:
self.dbapi = dbapi.init()
self.coordinator = None
try:
self.coordinator = coordination.get_coordinator(prefix='conductor')
self.coordinator.start(heartbeat=True)
self.coordinator.join_group()
except Exception as exc:
if CONF.standalone:
LOG.info('Coordination backend cannot be started, assuming '
'no other instances are running. Error: %s', exc)
self.coordinator = None
else:
with excutils.save_and_reraise_exception():
LOG.critical('Failure when connecting to coordination '
'backend', exc_info=True)
self.del_host()
else:
LOG.info('Successfully connected to coordination backend.')
try:
hooks = plugins_base.validate_processing_hooks()
except Exception as exc:
LOG.critical(str(exc))
sys.exit(1)
LOG.info('Enabled processing hooks: %s', [h.name for h in hooks])
driver = pxe_filter.driver()
driver.init_filter()
periodic_clean_up_ = periodics.periodic(
spacing=CONF.clean_up_period,
enabled=(CONF.clean_up_period != 0)
)(periodic_clean_up)
sync_with_ironic_ = periodics.periodic(
spacing=CONF.clean_up_period,
enabled=(CONF.clean_up_period != 0)
)(sync_with_ironic)
callables = [(periodic_clean_up_, None, None),
(sync_with_ironic_, (self,), None)]
driver_task = driver.get_periodic_sync_task()
if driver_task is not None:
callables.append((driver_task, None, None))
# run elections periodically if we have a coordinator
# that we were able to start
if (self.coordinator and self.coordinator.started):
periodic_leader_election_ = periodics.periodic(
spacing=CONF.leader_election_interval
)(periodic_leader_election)
callables.append((periodic_leader_election_, (self,), None))
self._periodics_worker = periodics.PeriodicWorker(
callables=callables,
executor_factory=periodics.ExistingExecutor(utils.executor()),
on_failure=self._periodics_watchdog)
utils.executor().submit(self._periodics_worker.start)
if CONF.enable_mdns:
endpoint = keystone.get_endpoint('service_catalog')
self._zeroconf = mdns.Zeroconf()
self._zeroconf.register_service('baremetal-introspection',
endpoint)
def del_host(self):
"""Shutdown the ironic inspector conductor service."""
if self.coordinator is not None:
try:
if self.coordinator.started:
self.coordinator.leave_group()
self.coordinator.stop()
except tooz.ToozError:
LOG.exception('Failed to stop coordinator')
if not self._shutting_down.acquire(blocking=False):
LOG.warning('Attempted to shut down while already shutting down')
return
pxe_filter.driver().tear_down_filter()
if self._periodics_worker is not None:
try:
self._periodics_worker.stop()
self._periodics_worker.wait()
except Exception as e:
LOG.exception('Service error occurred when stopping '
'periodic workers. Error: %s', e)
self._periodics_worker = None
if utils.executor().alive:
utils.executor().shutdown(wait=True)
if self._zeroconf is not None:
self._zeroconf.close()
self._zeroconf = None
self.dbapi = None
self._shutting_down.release()
LOG.info('Shut down successfully')
def _periodics_watchdog(self, callable_, activity, spacing, exc_info,
traceback=None):
LOG.exception("The periodic %(callable)s failed with: %(exception)s", {
'exception': ''.join(traceback_mod.format_exception(*exc_info)),
'callable': reflection.get_callable_name(callable_)})
@messaging.expected_exceptions(utils.Error)
def do_introspection(self, context, node_id, token=None,
manage_boot=True):
introspect.introspect(node_id, token=token, manage_boot=manage_boot)
@messaging.expected_exceptions(utils.Error)
def do_abort(self, context, node_id, token=None):
introspect.abort(node_id, token=token)
@messaging.expected_exceptions(utils.Error)
def do_reapply(self, context, node_uuid, token=None, data=None):
if not data:
try:
data = process.get_introspection_data(node_uuid,
processed=False,
get_json=True)
except utils.IntrospectionDataStoreDisabled:
raise utils.Error(_('Inspector is not configured to store '
'introspection data. Set the '
'[processing]store_data configuration '
'option to change this.'))
else:
process.store_introspection_data(node_uuid, data, processed=False)
process.reapply(node_uuid, data=data)
@messaging.expected_exceptions(utils.Error)
def do_continue(self, context, data):
return process.process(data)
def periodic_clean_up(): # pragma: no cover
if node_cache.clean_up():
pxe_filter.driver().sync(ir_utils.get_client())
def sync_with_ironic(conductor):
if (conductor.coordinator is not None
and not conductor.coordinator.is_leader):
LOG.debug('The conductor is not a leader, skipping syncing '
'with ironic')
return
LOG.debug('Syncing with ironic')
ironic = ir_utils.get_client()
# TODO(yuikotakada): pagination
ironic_nodes = ironic.nodes(fields=["uuid"], limit=None)
ironic_node_uuids = {node.id for node in ironic_nodes}
node_cache.delete_nodes_not_in_list(ironic_node_uuids)
def periodic_leader_election(conductor):
if conductor.coordinator is not None:
conductor.coordinator.run_elect_coordinator()
return