Merge "Split _conductor_audit into individual audits"
This commit is contained in:
commit
86a463bf2e
@ -5657,6 +5657,8 @@ class ConductorManager(service.PeriodicService):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@periodic_task.periodic_task(
|
||||||
|
spacing=CONF.conductor_periodic_task_intervals.controller_config_active_apply)
|
||||||
def _controller_config_active_apply(self, context):
|
def _controller_config_active_apply(self, context):
|
||||||
"""Check whether target config has been applied to active
|
"""Check whether target config has been applied to active
|
||||||
controller to run postprocessing"""
|
controller to run postprocessing"""
|
||||||
@ -5768,10 +5770,14 @@ class ConductorManager(service.PeriodicService):
|
|||||||
|
|
||||||
return all_fs_resized
|
return all_fs_resized
|
||||||
|
|
||||||
def _audit_ihost_action(self, ihost):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.ihost_action)
|
||||||
|
def _audit_ihost_action(self, context):
|
||||||
"""Audit whether the ihost_action needs to be terminated or escalated.
|
"""Audit whether the ihost_action needs to be terminated or escalated.
|
||||||
"""
|
"""
|
||||||
|
hosts = self.dbapi.ihost_get_list()
|
||||||
|
for ihost in hosts:
|
||||||
|
# only audit configured hosts
|
||||||
|
if ihost.personality:
|
||||||
if ihost.administrative == constants.ADMIN_UNLOCKED:
|
if ihost.administrative == constants.ADMIN_UNLOCKED:
|
||||||
ihost_action_str = ihost.ihost_action or ""
|
ihost_action_str = ihost.ihost_action or ""
|
||||||
|
|
||||||
@ -5847,7 +5853,8 @@ class ConductorManager(service.PeriodicService):
|
|||||||
val = {'vim_progress_status': vim_progress_status_str}
|
val = {'vim_progress_status': vim_progress_status_str}
|
||||||
self.dbapi.ihost_update(ihost.uuid, val)
|
self.dbapi.ihost_update(ihost.uuid, val)
|
||||||
|
|
||||||
def _audit_upgrade_status(self):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.upgrade_status)
|
||||||
|
def _audit_upgrade_status(self, context):
|
||||||
"""Audit upgrade related status"""
|
"""Audit upgrade related status"""
|
||||||
try:
|
try:
|
||||||
upgrade = self.dbapi.software_upgrade_get_one()
|
upgrade = self.dbapi.software_upgrade_get_one()
|
||||||
@ -5928,19 +5935,21 @@ class ConductorManager(service.PeriodicService):
|
|||||||
LOG.info("Ceph Upgrade: Exception %s" % e)
|
LOG.info("Ceph Upgrade: Exception %s" % e)
|
||||||
LOG.info("Ceph Upgrade: Enabled monitor msgr2")
|
LOG.info("Ceph Upgrade: Enabled monitor msgr2")
|
||||||
|
|
||||||
def _audit_install_states(self, hosts):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.install_states)
|
||||||
|
def _audit_install_states(self, context):
|
||||||
# A node could shutdown during it's installation and the install_state
|
# A node could shutdown during it's installation and the install_state
|
||||||
# for example could get stuck at the value "installing". To avoid
|
# for example could get stuck at the value "installing". To avoid
|
||||||
# this situation we audit the sanity of the states by appending the
|
# this situation we audit the sanity of the states by appending the
|
||||||
# character '+' to the states in the database. After 15 minutes of the
|
# character '+' to the states in the database. After 15 minutes of the
|
||||||
# states not changing, set the install_state to failed.
|
# states not changing, set the install_state to failed.
|
||||||
|
|
||||||
# The audit's interval is 60sec
|
# The duration (in minutes) to wait until the install_state fails
|
||||||
MAX_COUNT = 15
|
MAX_COUNT = 15
|
||||||
|
|
||||||
# Allow longer duration for booting phase
|
# Allow longer duration for booting phase
|
||||||
MAX_COUNT_BOOTING = 40
|
MAX_COUNT_BOOTING = 40
|
||||||
|
|
||||||
|
hosts = self.dbapi.ihost_get_list()
|
||||||
for host in hosts:
|
for host in hosts:
|
||||||
LOG.debug("Auditing %s, install_state is %s",
|
LOG.debug("Auditing %s, install_state is %s",
|
||||||
host.hostname, host.install_state)
|
host.hostname, host.install_state)
|
||||||
@ -5961,9 +5970,16 @@ class ConductorManager(service.PeriodicService):
|
|||||||
if (install_state != constants.INSTALL_STATE_INSTALLED and
|
if (install_state != constants.INSTALL_STATE_INSTALLED and
|
||||||
install_state !=
|
install_state !=
|
||||||
constants.INSTALL_STATE_COMPLETED):
|
constants.INSTALL_STATE_COMPLETED):
|
||||||
|
# define the quantity of '+' signs that will be added to install_state_info
|
||||||
|
# accordingly to the interval set to this audit.
|
||||||
|
periodic_interval = max(60,
|
||||||
|
CONF.conductor_periodic_task_intervals.install_states)
|
||||||
|
factor = periodic_interval // 60 + \
|
||||||
|
(1 if periodic_interval % 60 > 0 else 0)
|
||||||
if (install_state ==
|
if (install_state ==
|
||||||
constants.INSTALL_STATE_INSTALLING and
|
constants.INSTALL_STATE_INSTALLING and
|
||||||
host.install_state_info is not None):
|
host.install_state_info is not None):
|
||||||
|
host.install_state_info += factor * "+"
|
||||||
if host.install_state_info.count('+') >= MAX_COUNT:
|
if host.install_state_info.count('+') >= MAX_COUNT:
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Auditing %s, install_state changed from "
|
"Auditing %s, install_state changed from "
|
||||||
@ -5972,13 +5988,12 @@ class ConductorManager(service.PeriodicService):
|
|||||||
constants.INSTALL_STATE_FAILED)
|
constants.INSTALL_STATE_FAILED)
|
||||||
host.install_state = \
|
host.install_state = \
|
||||||
constants.INSTALL_STATE_FAILED
|
constants.INSTALL_STATE_FAILED
|
||||||
else:
|
|
||||||
host.install_state_info += "+"
|
|
||||||
else:
|
else:
|
||||||
if install_state == constants.INSTALL_STATE_BOOTING:
|
if install_state == constants.INSTALL_STATE_BOOTING:
|
||||||
max_count = MAX_COUNT_BOOTING
|
max_count = MAX_COUNT_BOOTING
|
||||||
else:
|
else:
|
||||||
max_count = MAX_COUNT
|
max_count = MAX_COUNT
|
||||||
|
host.install_state_info += factor * "+"
|
||||||
if host.install_state.count('+') >= max_count:
|
if host.install_state.count('+') >= max_count:
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Auditing %s, install_state changed from "
|
"Auditing %s, install_state changed from "
|
||||||
@ -5987,8 +6002,6 @@ class ConductorManager(service.PeriodicService):
|
|||||||
constants.INSTALL_STATE_FAILED)
|
constants.INSTALL_STATE_FAILED)
|
||||||
host.install_state = \
|
host.install_state = \
|
||||||
constants.INSTALL_STATE_FAILED
|
constants.INSTALL_STATE_FAILED
|
||||||
else:
|
|
||||||
host.install_state += "+"
|
|
||||||
|
|
||||||
# It is possible we get stuck in an installed failed state. For
|
# It is possible we get stuck in an installed failed state. For
|
||||||
# example if a node gets powered down during an install booting
|
# example if a node gets powered down during an install booting
|
||||||
@ -6090,6 +6103,11 @@ class ConductorManager(service.PeriodicService):
|
|||||||
LOG.error("Removed unsupported deferred config_type %s" %
|
LOG.error("Removed unsupported deferred config_type %s" %
|
||||||
config_type)
|
config_type)
|
||||||
|
|
||||||
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.deferred_runtime_config)
|
||||||
|
def _audit_deferred_runtime_config_periodic(self, context):
|
||||||
|
# check whether there are deferred runtime manifests to apply
|
||||||
|
self._audit_deferred_runtime_config(context)
|
||||||
|
|
||||||
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kubernetes_local_secrets)
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kubernetes_local_secrets)
|
||||||
def _kubernetes_local_secrets_audit(self, context):
|
def _kubernetes_local_secrets_audit(self, context):
|
||||||
# Audit kubernetes local registry secrets info
|
# Audit kubernetes local registry secrets info
|
||||||
@ -6097,38 +6115,8 @@ class ConductorManager(service.PeriodicService):
|
|||||||
if self._app:
|
if self._app:
|
||||||
self._app.audit_local_registry_secrets(context)
|
self._app.audit_local_registry_secrets(context)
|
||||||
|
|
||||||
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.default)
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.kubernetes_labels)
|
||||||
def _conductor_audit(self, context):
|
def _audit_kubernetes_labels(self, context):
|
||||||
# periodically, perform audit of inventory
|
|
||||||
LOG.debug("Sysinv Conductor running periodic audit task.")
|
|
||||||
|
|
||||||
# check whether there are deferred runtime manifests to apply
|
|
||||||
self._audit_deferred_runtime_config(context)
|
|
||||||
|
|
||||||
# check whether we may have just become active with target config
|
|
||||||
self._controller_config_active_apply(context)
|
|
||||||
|
|
||||||
# Audit upgrade status
|
|
||||||
self._audit_upgrade_status()
|
|
||||||
|
|
||||||
hosts = self.dbapi.ihost_get_list()
|
|
||||||
|
|
||||||
# Audit install states
|
|
||||||
self._audit_install_states(hosts)
|
|
||||||
|
|
||||||
# Audit kubernetes node labels
|
|
||||||
self._audit_kubernetes_labels(hosts)
|
|
||||||
|
|
||||||
# Audit image conversion
|
|
||||||
self._audit_image_conversion(hosts)
|
|
||||||
|
|
||||||
for host in hosts:
|
|
||||||
# only audit configured hosts
|
|
||||||
if not host.personality:
|
|
||||||
continue
|
|
||||||
self._audit_ihost_action(host)
|
|
||||||
|
|
||||||
def _audit_kubernetes_labels(self, hosts):
|
|
||||||
if not cutils.is_initial_config_complete():
|
if not cutils.is_initial_config_complete():
|
||||||
LOG.debug("_audit_kubernetes_labels skip")
|
LOG.debug("_audit_kubernetes_labels skip")
|
||||||
return
|
return
|
||||||
@ -6137,6 +6125,7 @@ class ConductorManager(service.PeriodicService):
|
|||||||
sysinv_labels = self.dbapi.label_get_all()
|
sysinv_labels = self.dbapi.label_get_all()
|
||||||
nodes = self._kube.kube_get_nodes()
|
nodes = self._kube.kube_get_nodes()
|
||||||
|
|
||||||
|
hosts = self.dbapi.ihost_get_list()
|
||||||
for host in hosts:
|
for host in hosts:
|
||||||
try:
|
try:
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
@ -6191,14 +6180,15 @@ class ConductorManager(service.PeriodicService):
|
|||||||
elif bk.backend in self._stor_bck_op_timeouts:
|
elif bk.backend in self._stor_bck_op_timeouts:
|
||||||
del self._stor_bck_op_timeouts[bk.backend]
|
del self._stor_bck_op_timeouts[bk.backend]
|
||||||
|
|
||||||
def _audit_image_conversion(self, hosts):
|
@periodic_task.periodic_task(spacing=CONF.conductor_periodic_task_intervals.image_conversion)
|
||||||
|
def _audit_image_conversion(self, context):
|
||||||
"""
|
"""
|
||||||
Raise alarm if:
|
Raise alarm if:
|
||||||
- image-conversion is not added on both controllers;
|
- image-conversion is not added on both controllers;
|
||||||
- the size of the filesystem is not the same
|
- the size of the filesystem is not the same
|
||||||
on both controllers
|
on both controllers
|
||||||
"""
|
"""
|
||||||
chosts = [h for h in hosts if h.personality == constants.CONTROLLER]
|
chosts = self.dbapi.ihost_get_by_personality(constants.CONTROLLER)
|
||||||
if len(chosts) <= 1:
|
if len(chosts) <= 1:
|
||||||
# No alarm is raised if setup has only one controller
|
# No alarm is raised if setup has only one controller
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user