Split _conductor_audit into individual audits
Currently, sysinv conductor audit _conductor_audit has a set of several sub audits. The purpose of this story is to split this audit into individual audits. Note that this change will keep the same configuration for the periodic intervals, for now, just the logic of the code has changed. TEST PLAN: PASS: AIO-SX: manually replaced these files into a Debian installation and no crashes happened. PASS: rebuild the whole system with the modification with no crashes. PASS: install the new iso inside a lab including bootstrap and first 'host-unlock'. PASS: AIO-SX: verify if the sysinv conductor audits (periodic tasks) are being called. PASS: change the install_state interval to different values (for test purposes) PASS: follow the sysinv logs seeking for errors. No error was found. PASS: guarantee that all audits are being called by the periodic tasks. Story: 2010087 Task: 45646 Depends-On: https://review.opendev.org/c/starlingx/config/+/848330 Signed-off-by: Bruno Costa <bruno.costa@windriver.com> Change-Id: I215fae7ccbbbaadd7b93f5a8efc11df0834d411a
This commit is contained in:
parent
a71c4b6c4c
commit
d2e2a67224
@ -5657,6 +5657,8 @@ class ConductorManager(service.PeriodicService):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@periodic_task.periodic_task(
|
||||||
|
spacing=CONF.conductor_periodic_task_intervals.controller_config_active_apply)
|
||||||
def _controller_config_active_apply(self, context):
|
def _controller_config_active_apply(self, context):
|
||||||
"""Check whether target config has been applied to active
|
"""Check whether target config has been applied to active
|
||||||
controller to run postprocessing"""
|
controller to run postprocessing"""
|
||||||
@ -5768,86 +5770,91 @@ class ConductorManager(service.PeriodicService):
|
|||||||
|
|
||||||
return all_fs_resized
|
return all_fs_resized
|
||||||
|
|
||||||
def _audit_ihost_action(self, ihost):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.ihost_action)
|
||||||
|
def _audit_ihost_action(self, context):
|
||||||
"""Audit whether the ihost_action needs to be terminated or escalated.
|
"""Audit whether the ihost_action needs to be terminated or escalated.
|
||||||
"""
|
"""
|
||||||
|
hosts = self.dbapi.ihost_get_list()
|
||||||
|
for ihost in hosts:
|
||||||
|
# only audit configured hosts
|
||||||
|
if ihost.personality:
|
||||||
|
if ihost.administrative == constants.ADMIN_UNLOCKED:
|
||||||
|
ihost_action_str = ihost.ihost_action or ""
|
||||||
|
|
||||||
if ihost.administrative == constants.ADMIN_UNLOCKED:
|
if (ihost_action_str.startswith(constants.FORCE_LOCK_ACTION) or
|
||||||
ihost_action_str = ihost.ihost_action or ""
|
ihost_action_str.startswith(constants.LOCK_ACTION)):
|
||||||
|
|
||||||
if (ihost_action_str.startswith(constants.FORCE_LOCK_ACTION) or
|
task_str = ihost.task or ""
|
||||||
ihost_action_str.startswith(constants.LOCK_ACTION)):
|
if (('--' in ihost_action_str and
|
||||||
|
ihost_action_str.startswith(
|
||||||
|
constants.FORCE_LOCK_ACTION)) or
|
||||||
|
('----------' in ihost_action_str and
|
||||||
|
ihost_action_str.startswith(constants.LOCK_ACTION))):
|
||||||
|
|
||||||
task_str = ihost.task or ""
|
ihost_mtc = ihost.as_dict()
|
||||||
if (('--' in ihost_action_str and
|
keepkeys = ['ihost_action', 'vim_progress_status']
|
||||||
ihost_action_str.startswith(
|
ihost_mtc = cutils.removekeys_nonmtce(ihost_mtc,
|
||||||
constants.FORCE_LOCK_ACTION)) or
|
keepkeys)
|
||||||
('----------' in ihost_action_str and
|
|
||||||
ihost_action_str.startswith(constants.LOCK_ACTION))):
|
|
||||||
|
|
||||||
ihost_mtc = ihost.as_dict()
|
if ihost_action_str.startswith(constants.FORCE_LOCK_ACTION):
|
||||||
keepkeys = ['ihost_action', 'vim_progress_status']
|
timeout_in_secs = 6
|
||||||
ihost_mtc = cutils.removekeys_nonmtce(ihost_mtc,
|
ihost_mtc['operation'] = 'modify'
|
||||||
keepkeys)
|
ihost_mtc['action'] = constants.FORCE_LOCK_ACTION
|
||||||
|
ihost_mtc['task'] = constants.FORCE_LOCKING
|
||||||
|
LOG.warn("ihost_action override %s" %
|
||||||
|
ihost_mtc)
|
||||||
|
mtce_api.host_modify(
|
||||||
|
self._api_token, self._mtc_address, self._mtc_port,
|
||||||
|
ihost_mtc, timeout_in_secs)
|
||||||
|
|
||||||
if ihost_action_str.startswith(constants.FORCE_LOCK_ACTION):
|
# need time for FORCE_LOCK mtce to clear
|
||||||
timeout_in_secs = 6
|
if ('----' in ihost_action_str):
|
||||||
ihost_mtc['operation'] = 'modify'
|
ihost_action_str = ""
|
||||||
ihost_mtc['action'] = constants.FORCE_LOCK_ACTION
|
else:
|
||||||
ihost_mtc['task'] = constants.FORCE_LOCKING
|
ihost_action_str += "-"
|
||||||
LOG.warn("ihost_action override %s" %
|
|
||||||
ihost_mtc)
|
|
||||||
mtce_api.host_modify(
|
|
||||||
self._api_token, self._mtc_address, self._mtc_port,
|
|
||||||
ihost_mtc, timeout_in_secs)
|
|
||||||
|
|
||||||
# need time for FORCE_LOCK mtce to clear
|
if (task_str.startswith(constants.FORCE_LOCKING) or
|
||||||
if ('----' in ihost_action_str):
|
task_str.startswith(constants.LOCKING)):
|
||||||
ihost_action_str = ""
|
val = {'task': "",
|
||||||
else:
|
'ihost_action': ihost_action_str,
|
||||||
ihost_action_str += "-"
|
'vim_progress_status': ""}
|
||||||
|
else:
|
||||||
|
val = {'ihost_action': ihost_action_str,
|
||||||
|
'vim_progress_status': ""}
|
||||||
|
else:
|
||||||
|
ihost_action_str += "-"
|
||||||
|
if (task_str.startswith(constants.FORCE_LOCKING) or
|
||||||
|
task_str.startswith(constants.LOCKING)):
|
||||||
|
task_str += "-"
|
||||||
|
val = {'task': task_str,
|
||||||
|
'ihost_action': ihost_action_str}
|
||||||
|
else:
|
||||||
|
val = {'ihost_action': ihost_action_str}
|
||||||
|
|
||||||
|
self.dbapi.ihost_update(ihost.uuid, val)
|
||||||
|
else: # Administrative locked already
|
||||||
|
task_str = ihost.task or ""
|
||||||
if (task_str.startswith(constants.FORCE_LOCKING) or
|
if (task_str.startswith(constants.FORCE_LOCKING) or
|
||||||
task_str.startswith(constants.LOCKING)):
|
task_str.startswith(constants.LOCKING)):
|
||||||
val = {'task': "",
|
val = {'task': ""}
|
||||||
'ihost_action': ihost_action_str,
|
self.dbapi.ihost_update(ihost.uuid, val)
|
||||||
'vim_progress_status': ""}
|
|
||||||
|
vim_progress_status_str = ihost.get('vim_progress_status') or ""
|
||||||
|
if (vim_progress_status_str and
|
||||||
|
(vim_progress_status_str != constants.VIM_SERVICES_ENABLED) and
|
||||||
|
(vim_progress_status_str != constants.VIM_SERVICES_DISABLED)):
|
||||||
|
if ('..' in vim_progress_status_str):
|
||||||
|
LOG.info("Audit clearing vim_progress_status=%s" %
|
||||||
|
vim_progress_status_str)
|
||||||
|
vim_progress_status_str = ""
|
||||||
else:
|
else:
|
||||||
val = {'ihost_action': ihost_action_str,
|
vim_progress_status_str += ".."
|
||||||
'vim_progress_status': ""}
|
|
||||||
else:
|
|
||||||
ihost_action_str += "-"
|
|
||||||
if (task_str.startswith(constants.FORCE_LOCKING) or
|
|
||||||
task_str.startswith(constants.LOCKING)):
|
|
||||||
task_str += "-"
|
|
||||||
val = {'task': task_str,
|
|
||||||
'ihost_action': ihost_action_str}
|
|
||||||
else:
|
|
||||||
val = {'ihost_action': ihost_action_str}
|
|
||||||
|
|
||||||
self.dbapi.ihost_update(ihost.uuid, val)
|
val = {'vim_progress_status': vim_progress_status_str}
|
||||||
else: # Administrative locked already
|
self.dbapi.ihost_update(ihost.uuid, val)
|
||||||
task_str = ihost.task or ""
|
|
||||||
if (task_str.startswith(constants.FORCE_LOCKING) or
|
|
||||||
task_str.startswith(constants.LOCKING)):
|
|
||||||
val = {'task': ""}
|
|
||||||
self.dbapi.ihost_update(ihost.uuid, val)
|
|
||||||
|
|
||||||
vim_progress_status_str = ihost.get('vim_progress_status') or ""
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.upgrade_status)
|
||||||
if (vim_progress_status_str and
|
def _audit_upgrade_status(self, context):
|
||||||
(vim_progress_status_str != constants.VIM_SERVICES_ENABLED) and
|
|
||||||
(vim_progress_status_str != constants.VIM_SERVICES_DISABLED)):
|
|
||||||
if ('..' in vim_progress_status_str):
|
|
||||||
LOG.info("Audit clearing vim_progress_status=%s" %
|
|
||||||
vim_progress_status_str)
|
|
||||||
vim_progress_status_str = ""
|
|
||||||
else:
|
|
||||||
vim_progress_status_str += ".."
|
|
||||||
|
|
||||||
val = {'vim_progress_status': vim_progress_status_str}
|
|
||||||
self.dbapi.ihost_update(ihost.uuid, val)
|
|
||||||
|
|
||||||
def _audit_upgrade_status(self):
|
|
||||||
"""Audit upgrade related status"""
|
"""Audit upgrade related status"""
|
||||||
try:
|
try:
|
||||||
upgrade = self.dbapi.software_upgrade_get_one()
|
upgrade = self.dbapi.software_upgrade_get_one()
|
||||||
@ -5928,19 +5935,21 @@ class ConductorManager(service.PeriodicService):
|
|||||||
LOG.info("Ceph Upgrade: Exception %s" % e)
|
LOG.info("Ceph Upgrade: Exception %s" % e)
|
||||||
LOG.info("Ceph Upgrade: Enabled monitor msgr2")
|
LOG.info("Ceph Upgrade: Enabled monitor msgr2")
|
||||||
|
|
||||||
def _audit_install_states(self, hosts):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.install_states)
|
||||||
|
def _audit_install_states(self, context):
|
||||||
# A node could shutdown during it's installation and the install_state
|
# A node could shutdown during it's installation and the install_state
|
||||||
# for example could get stuck at the value "installing". To avoid
|
# for example could get stuck at the value "installing". To avoid
|
||||||
# this situation we audit the sanity of the states by appending the
|
# this situation we audit the sanity of the states by appending the
|
||||||
# character '+' to the states in the database. After 15 minutes of the
|
# character '+' to the states in the database. After 15 minutes of the
|
||||||
# states not changing, set the install_state to failed.
|
# states not changing, set the install_state to failed.
|
||||||
|
|
||||||
# The audit's interval is 60sec
|
# The duration (in minutes) to wait until the install_state fails
|
||||||
MAX_COUNT = 15
|
MAX_COUNT = 15
|
||||||
|
|
||||||
# Allow longer duration for booting phase
|
# Allow longer duration for booting phase
|
||||||
MAX_COUNT_BOOTING = 40
|
MAX_COUNT_BOOTING = 40
|
||||||
|
|
||||||
|
hosts = self.dbapi.ihost_get_list()
|
||||||
for host in hosts:
|
for host in hosts:
|
||||||
LOG.debug("Auditing %s, install_state is %s",
|
LOG.debug("Auditing %s, install_state is %s",
|
||||||
host.hostname, host.install_state)
|
host.hostname, host.install_state)
|
||||||
@ -5961,9 +5970,16 @@ class ConductorManager(service.PeriodicService):
|
|||||||
if (install_state != constants.INSTALL_STATE_INSTALLED and
|
if (install_state != constants.INSTALL_STATE_INSTALLED and
|
||||||
install_state !=
|
install_state !=
|
||||||
constants.INSTALL_STATE_COMPLETED):
|
constants.INSTALL_STATE_COMPLETED):
|
||||||
|
# define the quantity of '+' signs that will be added to install_state_info
|
||||||
|
# accordingly to the interval set to this audit.
|
||||||
|
periodic_interval = max(60,
|
||||||
|
CONF.conductor_periodic_task_intervals.install_states)
|
||||||
|
factor = periodic_interval // 60 + \
|
||||||
|
(1 if periodic_interval % 60 > 0 else 0)
|
||||||
if (install_state ==
|
if (install_state ==
|
||||||
constants.INSTALL_STATE_INSTALLING and
|
constants.INSTALL_STATE_INSTALLING and
|
||||||
host.install_state_info is not None):
|
host.install_state_info is not None):
|
||||||
|
host.install_state_info += factor * "+"
|
||||||
if host.install_state_info.count('+') >= MAX_COUNT:
|
if host.install_state_info.count('+') >= MAX_COUNT:
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Auditing %s, install_state changed from "
|
"Auditing %s, install_state changed from "
|
||||||
@ -5972,13 +5988,12 @@ class ConductorManager(service.PeriodicService):
|
|||||||
constants.INSTALL_STATE_FAILED)
|
constants.INSTALL_STATE_FAILED)
|
||||||
host.install_state = \
|
host.install_state = \
|
||||||
constants.INSTALL_STATE_FAILED
|
constants.INSTALL_STATE_FAILED
|
||||||
else:
|
|
||||||
host.install_state_info += "+"
|
|
||||||
else:
|
else:
|
||||||
if install_state == constants.INSTALL_STATE_BOOTING:
|
if install_state == constants.INSTALL_STATE_BOOTING:
|
||||||
max_count = MAX_COUNT_BOOTING
|
max_count = MAX_COUNT_BOOTING
|
||||||
else:
|
else:
|
||||||
max_count = MAX_COUNT
|
max_count = MAX_COUNT
|
||||||
|
host.install_state_info += factor * "+"
|
||||||
if host.install_state.count('+') >= max_count:
|
if host.install_state.count('+') >= max_count:
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Auditing %s, install_state changed from "
|
"Auditing %s, install_state changed from "
|
||||||
@ -5987,8 +6002,6 @@ class ConductorManager(service.PeriodicService):
|
|||||||
constants.INSTALL_STATE_FAILED)
|
constants.INSTALL_STATE_FAILED)
|
||||||
host.install_state = \
|
host.install_state = \
|
||||||
constants.INSTALL_STATE_FAILED
|
constants.INSTALL_STATE_FAILED
|
||||||
else:
|
|
||||||
host.install_state += "+"
|
|
||||||
|
|
||||||
# It is possible we get stuck in an installed failed state. For
|
# It is possible we get stuck in an installed failed state. For
|
||||||
# example if a node gets powered down during an install booting
|
# example if a node gets powered down during an install booting
|
||||||
@ -6090,6 +6103,11 @@ class ConductorManager(service.PeriodicService):
|
|||||||
LOG.error("Removed unsupported deferred config_type %s" %
|
LOG.error("Removed unsupported deferred config_type %s" %
|
||||||
config_type)
|
config_type)
|
||||||
|
|
||||||
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.deferred_runtime_config)
|
||||||
|
def _audit_deferred_runtime_config_periodic(self, context):
|
||||||
|
# check whether there are deferred runtime manifests to apply
|
||||||
|
self._audit_deferred_runtime_config(context)
|
||||||
|
|
||||||
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kubernetes_local_secrets)
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kubernetes_local_secrets)
|
||||||
def _kubernetes_local_secrets_audit(self, context):
|
def _kubernetes_local_secrets_audit(self, context):
|
||||||
# Audit kubernetes local registry secrets info
|
# Audit kubernetes local registry secrets info
|
||||||
@ -6097,38 +6115,8 @@ class ConductorManager(service.PeriodicService):
|
|||||||
if self._app:
|
if self._app:
|
||||||
self._app.audit_local_registry_secrets(context)
|
self._app.audit_local_registry_secrets(context)
|
||||||
|
|
||||||
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.default)
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kubernetes_labels)
|
||||||
def _conductor_audit(self, context):
|
def _audit_kubernetes_labels(self, context):
|
||||||
# periodically, perform audit of inventory
|
|
||||||
LOG.debug("Sysinv Conductor running periodic audit task.")
|
|
||||||
|
|
||||||
# check whether there are deferred runtime manifests to apply
|
|
||||||
self._audit_deferred_runtime_config(context)
|
|
||||||
|
|
||||||
# check whether we may have just become active with target config
|
|
||||||
self._controller_config_active_apply(context)
|
|
||||||
|
|
||||||
# Audit upgrade status
|
|
||||||
self._audit_upgrade_status()
|
|
||||||
|
|
||||||
hosts = self.dbapi.ihost_get_list()
|
|
||||||
|
|
||||||
# Audit install states
|
|
||||||
self._audit_install_states(hosts)
|
|
||||||
|
|
||||||
# Audit kubernetes node labels
|
|
||||||
self._audit_kubernetes_labels(hosts)
|
|
||||||
|
|
||||||
# Audit image conversion
|
|
||||||
self._audit_image_conversion(hosts)
|
|
||||||
|
|
||||||
for host in hosts:
|
|
||||||
# only audit configured hosts
|
|
||||||
if not host.personality:
|
|
||||||
continue
|
|
||||||
self._audit_ihost_action(host)
|
|
||||||
|
|
||||||
def _audit_kubernetes_labels(self, hosts):
|
|
||||||
if not cutils.is_initial_config_complete():
|
if not cutils.is_initial_config_complete():
|
||||||
LOG.debug("_audit_kubernetes_labels skip")
|
LOG.debug("_audit_kubernetes_labels skip")
|
||||||
return
|
return
|
||||||
@ -6137,6 +6125,7 @@ class ConductorManager(service.PeriodicService):
|
|||||||
sysinv_labels = self.dbapi.label_get_all()
|
sysinv_labels = self.dbapi.label_get_all()
|
||||||
nodes = self._kube.kube_get_nodes()
|
nodes = self._kube.kube_get_nodes()
|
||||||
|
|
||||||
|
hosts = self.dbapi.ihost_get_list()
|
||||||
for host in hosts:
|
for host in hosts:
|
||||||
try:
|
try:
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
@ -6191,14 +6180,15 @@ class ConductorManager(service.PeriodicService):
|
|||||||
elif bk.backend in self._stor_bck_op_timeouts:
|
elif bk.backend in self._stor_bck_op_timeouts:
|
||||||
del self._stor_bck_op_timeouts[bk.backend]
|
del self._stor_bck_op_timeouts[bk.backend]
|
||||||
|
|
||||||
def _audit_image_conversion(self, hosts):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.image_conversion)
|
||||||
|
def _audit_image_conversion(self, context):
|
||||||
"""
|
"""
|
||||||
Raise alarm if:
|
Raise alarm if:
|
||||||
- image-conversion is not added on both controllers;
|
- image-conversion is not added on both controllers;
|
||||||
- the size of the filesystem is not the same
|
- the size of the filesystem is not the same
|
||||||
on both controllers
|
on both controllers
|
||||||
"""
|
"""
|
||||||
chosts = [h for h in hosts if h.personality == constants.CONTROLLER]
|
chosts = self.dbapi.ihost_get_by_personality(constants.CONTROLLER)
|
||||||
if len(chosts) <= 1:
|
if len(chosts) <= 1:
|
||||||
# No alarm is raised if setup has only one controller
|
# No alarm is raised if setup has only one controller
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user