Move subcloud audit to new worker processes

In order to allow for faster subcloud audits, introduce a new
"audit-worker" process.  By default there will be four worker
processes to perform the actual subcloud audits.

The main audit process will scan the DB for subclouds which need
auditing based on the new audit start/end timestamps.  It will
then send out RPC messages to the "audit-worker" processes to
request an audit for a list of subcloud IDs.

When the "audit-worker" process receives the "audit_subclouds" RPC
message, it loops over each of the specified subclouds.  For each
subcloud it updates the DB to indicate that the audit is starting
for that subcloud, then basically does the exact same audit that
is currently done in the audit process, then updates the DB to
indicate that the audit has been completed for that subcloud.

Story: 2007267
Task: 41336
Signed-off-by: Chris Friesen <chris.friesen@windriver.com>
Change-Id: Ifb3dd363fd337d24f2c3f7aaa3549624fffaceca
This commit is contained in:
Chris Friesen
2020-11-30 17:46:11 -06:00
committed by John Kung
parent 8417ce7737
commit c71703128e
18 changed files with 1774 additions and 722 deletions

View File

@@ -32,6 +32,7 @@ Source13: dcmanager-audit.service
Source14: dcmanager-orchestrator.service
Source15: distcloud-syslog.conf
Source16: distcloud-logrotate.conf
Source17: dcmanager-audit-worker.service
BuildArch: noarch
@@ -135,6 +136,7 @@ install -d -m 755 %{buildroot}%{_sysconfdir}/dcmanager/
install -p -D -m 644 %{SOURCE1} %{buildroot}%{_unitdir}/dcmanager-api.service
install -p -D -m 644 %{SOURCE2} %{buildroot}%{_unitdir}/dcmanager-manager.service
install -p -D -m 644 %{SOURCE13} %{buildroot}%{_unitdir}/dcmanager-audit.service
install -p -D -m 644 %{SOURCE17} %{buildroot}%{_unitdir}/dcmanager-audit-worker.service
install -p -D -m 644 %{SOURCE14} %{buildroot}%{_unitdir}/dcmanager-orchestrator.service
install -p -D -m 644 %{SOURCE9} %{buildroot}%{_tmpfilesdir}
# install default config files
@@ -197,6 +199,8 @@ install -m 755 -D -p %{SOURCE12} %{buildroot}/%{_bindir}/clean-dcorch
%{_unitdir}/dcmanager-api.service
%{_bindir}/dcmanager-audit
%{_unitdir}/dcmanager-audit.service
%{_bindir}/dcmanager-audit-worker
%{_unitdir}/dcmanager-audit-worker.service
%{_bindir}/dcmanager-orchestrator
%{_unitdir}/dcmanager-orchestrator.service
%{_bindir}/dcmanager-manager

View File

@@ -0,0 +1,12 @@
[Unit]
Description=DC Manager Audit Worker Service
After=syslog-ng.service network-online.target dcmanager-manager.service
[Service]
Type=simple
User=root
ExecStart=/usr/bin/dcmanager-audit-worker --config-file /etc/dcmanager/dcmanager.conf
Restart=on-failure
[Install]
WantedBy=multi-user.target

View File

@@ -46,6 +46,20 @@ class PatchAuditData(object):
self.committed_patch_ids = committed_patch_ids
self.software_version = software_version
def to_dict(self):
return {
'patches': self.patches,
'applied_patch_ids': self.applied_patch_ids,
'committed_patch_ids': self.committed_patch_ids,
'software_version': self.software_version,
}
@classmethod
def from_dict(cls, values):
if values is None:
return None
return cls(**values)
class PatchAudit(object):
"""Manages tasks related to patch audits."""
@@ -161,6 +175,9 @@ class PatchAudit(object):
out_of_sync = False
# audit_data will be a dict due to passing through RPC so objectify it
audit_data = PatchAuditData.from_dict(audit_data)
# Check that all patches in this subcloud are in the correct
# state, based on the state of the patch in RegionOne. For the
# subcloud, we use the patchstate because we care whether the

View File

@@ -64,3 +64,48 @@ class ManagerAuditClient(object):
def trigger_firmware_audit(self, ctxt):
return self.cast(ctxt, self.make_msg('trigger_firmware_audit'))
class ManagerAuditWorkerClient(object):
"""Client side of the DC Manager Audit Worker rpc API.
Version History:
1.0 - Initial version
"""
BASE_RPC_API_VERSION = '1.0'
def __init__(self):
self._client = messaging.get_rpc_client(
topic=consts.TOPIC_DC_MANAGER_AUDIT_WORKER,
version=self.BASE_RPC_API_VERSION)
@staticmethod
def make_msg(method, **kwargs):
return method, kwargs
def call(self, ctxt, msg, version=None):
method, kwargs = msg
if version is not None:
client = self._client.prepare(version=version)
else:
client = self._client
return client.call(ctxt, method, **kwargs)
def cast(self, ctxt, msg, version=None):
method, kwargs = msg
if version is not None:
client = self._client.prepare(version=version)
else:
client = self._client
return client.cast(ctxt, method, **kwargs)
# Tell audit-worker to perform audit on the subclouds with these
# subcloud IDs.
def audit_subclouds(self, ctxt, subcloud_ids, patch_audit_data=None,
firmware_audit_data=None, do_openstack_audit=False):
return self.cast(ctxt, self.make_msg('audit_subclouds',
subcloud_ids=subcloud_ids,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_openstack_audit=do_openstack_audit))

View File

@@ -27,6 +27,7 @@ import oslo_messaging
from oslo_service import service
from dcmanager.audit.subcloud_audit_manager import SubcloudAuditManager
from dcmanager.audit.subcloud_audit_worker_manager import SubcloudAuditWorkerManager
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
@@ -121,3 +122,66 @@ class DCManagerAuditService(service.Service):
LOG.info("Trigger firmware audit.")
return self.subcloud_audit_manager.trigger_firmware_audit(context)
class DCManagerAuditWorkerService(service.Service):
"""Lifecycle manager for a running audit service."""
def __init__(self):
super(DCManagerAuditWorkerService, self).__init__()
self.host = cfg.CONF.host
self.rpc_api_version = consts.RPC_API_VERSION
self.topic = consts.TOPIC_DC_MANAGER_AUDIT_WORKER
# The following are initialized here, but assigned in start() which
# happens after the fork when spawning multiple worker processes
self.TG = None
self.target = None
self._rpc_server = None
self.subcloud_audit_worker_manager = None
def start(self):
self.init_tgm()
self.init_audit_managers()
target = oslo_messaging.Target(version=self.rpc_api_version,
server=self.host,
topic=self.topic)
self.target = target
self._rpc_server = rpc_messaging.get_rpc_server(self.target, self)
self._rpc_server.start()
super(DCManagerAuditWorkerService, self).start()
def init_tgm(self):
self.TG = scheduler.ThreadGroupManager()
def init_audit_managers(self):
self.subcloud_audit_worker_manager = SubcloudAuditWorkerManager()
def _stop_rpc_server(self):
# Stop RPC connection to prevent new requests
LOG.debug(_("Attempting to stop audit-worker RPC service..."))
try:
self._rpc_server.stop()
self._rpc_server.wait()
LOG.info('Audit-worker RPC service stopped successfully')
except Exception as ex:
LOG.error('Failed to stop audit-worker RPC service: %s',
six.text_type(ex))
def stop(self):
self._stop_rpc_server()
self.TG.stop()
# Terminate the engine process
LOG.info("All threads were gone, terminating audit-worker engine")
super(DCManagerAuditWorkerService, self).stop()
@request_context
def audit_subclouds(self, context, subcloud_ids,
patch_audit_data, firmware_audit_data,
do_openstack_audit):
"""Used to trigger audits of the specified subcloud(s)"""
self.subcloud_audit_worker_manager.audit_subclouds(
context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit)

View File

@@ -20,29 +20,24 @@
# of an applicable Wind River license agreement.
#
import datetime
import eventlet
import os
import time
from tsconfig.tsconfig import CONFIG_PATH
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_config import cfg
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dcmanager.audit import alarm_aggregation
from dcmanager.audit import firmware_audit
from dcmanager.audit import patch_audit
from dcmanager.common import consts
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.common import scheduler
from dcmanager.db import api as db_api
from dcmanager.rpc import client as dcmanager_rpc_client
from dcorch.common import consts as dcorch_consts
CONF = cfg.CONF
@@ -67,7 +62,7 @@ class SubcloudAuditManager(manager.Manager):
# Used to force patch audit on the next interval
force_patch_audit = False
# Used to force patch audit on the next interval
# Used to force firmware audit on the next interval
force_firmware_audit = False
def __init__(self, *args, **kwargs):
@@ -76,23 +71,17 @@ class SubcloudAuditManager(manager.Manager):
super(SubcloudAuditManager, self).__init__(
service_name="subcloud_audit_manager")
self.context = context.get_admin_context()
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
# Keeps track of greenthreads we create to do work.
self.thread_group_manager = scheduler.ThreadGroupManager(
thread_pool_size=100)
# Track workers created for each subcloud.
self.subcloud_workers = dict()
self.audit_worker_rpc_client = dcmanager_audit_rpc_client.ManagerAuditWorkerClient()
# Number of audits since last subcloud state update
self.audit_count = 0
self.audit_count = SUBCLOUD_STATE_UPDATE_ITERATIONS - 2
# Number of patch audits
self.patch_audit_count = 0
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
self.patch_audit = patch_audit.PatchAudit(
self.context, self.dcmanager_rpc_client)
self.context, None)
# trigger a patch audit on startup
self.patch_audit_time = 0
self.firmware_audit = firmware_audit.FirmwareAudit(
self.context, self.dcmanager_rpc_client)
self.context, None)
def _add_missing_endpoints(self):
file_path = os.path.join(CONFIG_PATH, '.fpga_endpoint_added')
@@ -164,26 +153,20 @@ class SubcloudAuditManager(manager.Manager):
except Exception:
LOG.exception("Error in periodic subcloud audit loop")
def _get_audit_data(self):
"""Return the patch audit and firmware audit data it should be triggered.
Also, returns whether to audit the load and firmware.
"""
patch_audit_data = None
firmware_audit_data = None
def _get_audits_needed(self):
"""Returns which (if any) extra audits are needed."""
audit_patch = False
audit_load = False
audit_firmware = False
current_time = time.time()
# Determine whether to trigger a patch audit of each subcloud
if (SubcloudAuditManager.force_patch_audit or
(current_time - self.patch_audit_time >=
CONF.scheduler.patch_audit_interval)):
LOG.info("Trigger patch audit")
audit_patch = True
self.patch_audit_time = current_time
self.patch_audit_count += 1
# Query RegionOne patches and software version
patch_audit_data = self.patch_audit.get_regionone_audit_data()
# Check subcloud software version every other patch audit cycle
if (self.patch_audit_count % 2 != 0 or
SubcloudAuditManager.force_patch_audit):
@@ -192,19 +175,28 @@ class SubcloudAuditManager(manager.Manager):
if (self.patch_audit_count % 4 == 1):
LOG.info("Trigger firmware audit")
audit_firmware = True
firmware_audit_data = self.firmware_audit.get_regionone_audit_data()
# Reset force_firmware_audit only when firmware audit has been fired
SubcloudAuditManager.reset_force_firmware_audit()
SubcloudAuditManager.reset_force_patch_audit()
# Trigger a firmware audit as it is changed through proxy
if (SubcloudAuditManager.force_firmware_audit):
LOG.info("Trigger firmware audit")
audit_firmware = True
firmware_audit_data = self.firmware_audit.get_regionone_audit_data()
SubcloudAuditManager.reset_force_firmware_audit()
return patch_audit_data, firmware_audit_data, audit_load, audit_firmware
return audit_patch, audit_load, audit_firmware
def _get_audit_data(self, audit_patch, audit_firmware):
"""Return the patch audit and firmware audit data as needed."""
patch_audit_data = None
firmware_audit_data = None
if audit_patch:
# Query RegionOne patches and software version
patch_audit_data = self.patch_audit.get_regionone_audit_data()
if audit_firmware:
# Query RegionOne firmware
firmware_audit_data = self.firmware_audit.get_regionone_audit_data()
return patch_audit_data, firmware_audit_data
def _periodic_subcloud_audit_loop(self):
"""Audit availability of subclouds loop."""
@@ -213,17 +205,30 @@ class SubcloudAuditManager(manager.Manager):
LOG.info('Triggered subcloud audit.')
self.audit_count += 1
# Determine whether to trigger a state update to each subcloud
# Determine whether to trigger a state update to each subcloud.
if self.audit_count >= SUBCLOUD_STATE_UPDATE_ITERATIONS:
update_subcloud_state = True
self.audit_count = 0
else:
update_subcloud_state = False
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = self._get_audit_data()
# Determine whether we want to trigger specialty audits.
audit_patch, audit_load, audit_firmware = self._get_audits_needed()
# Set desired audit flags for all subclouds.
values = {}
if update_subcloud_state:
values['state_update_requested'] = True
if audit_patch:
values['patch_audit_requested'] = True
if audit_load:
values['load_audit_requested'] = True
if audit_firmware:
values['firmware_audit_requested'] = True
db_api.subcloud_audits_update_all(self.context, values)
do_openstack_audit = False
openstack_installed = False
# The feature of syncing openstack resources to the subclouds was not
# completed, therefore, auditing the openstack application is disabled
# Determine whether OpenStack is installed in central cloud
@@ -237,249 +242,52 @@ class SubcloudAuditManager(manager.Manager):
# apps = sysinv_client.get_applications()
# for app in apps:
# if app.name == HELM_APP_OPENSTACK and app.active:
# openstack_installed = True
# do_openstack_audit = True
# break
for subcloud in db_api.subcloud_get_all(self.context):
# Include failure deploy status states in the auditable list
# so that the subcloud can be set as offline
if (subcloud.deploy_status not in
[consts.DEPLOY_STATE_DONE,
consts.DEPLOY_STATE_DEPLOYING,
consts.DEPLOY_STATE_DEPLOY_FAILED,
consts.DEPLOY_STATE_INSTALL_FAILED,
consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
consts.DEPLOY_STATE_MIGRATED]):
LOG.debug("Skip subcloud %s audit, deploy_status: %s" %
(subcloud.name, subcloud.deploy_status))
continue
current_time = datetime.datetime.utcnow()
last_audit_threshold = current_time - datetime.timedelta(
seconds=CONF.scheduler.subcloud_audit_interval)
# Create a new greenthread for each subcloud to allow the audits
# to be done in parallel. If there are not enough greenthreads
# in the pool, this will block until one becomes available.
self.subcloud_workers[subcloud.name] = \
self.thread_group_manager.start(self._audit_subcloud,
subcloud.name,
update_subcloud_state,
openstack_installed,
patch_audit_data,
firmware_audit_data,
do_load_audit,
do_firmware_audit)
subcloud_ids = []
subcloud_audits = db_api.subcloud_audits_get_all_need_audit(
self.context, last_audit_threshold)
# Wait for all greenthreads to complete
LOG.info('Waiting for subcloud audits to complete.')
for thread in self.subcloud_workers.values():
thread.wait()
# Now check whether any of these subclouds need patch audit or firmware
# audit data and grab it if needed.
if not audit_patch:
for audit in subcloud_audits:
if audit.patch_audit_requested:
audit_patch = True
LOG.info("DB says patch audit needed")
break
if not audit_firmware:
for audit in subcloud_audits:
if audit.firmware_audit_requested:
LOG.info("DB says firmware audit needed")
audit_firmware = True
break
patch_audit_data, firmware_audit_data = self._get_audit_data(
audit_patch, audit_firmware)
LOG.info("patch_audit_data: %s, firmware_audit_data: %s" %
(patch_audit_data, firmware_audit_data))
# Clear the list of workers before next audit
self.subcloud_workers = dict()
LOG.info('All subcloud audits have completed.')
def _update_subcloud_availability(self, subcloud_name,
availability_status=None,
update_state_only=False,
audit_fail_count=None):
try:
self.dcmanager_rpc_client.update_subcloud_availability(
self.context, subcloud_name, availability_status,
update_state_only, audit_fail_count)
LOG.info('Notifying dcmanager, subcloud:%s, availability:%s' %
(subcloud_name,
availability_status))
except Exception:
LOG.exception('Problem informing dcmanager of subcloud '
'availability state change, subcloud: %s'
% subcloud_name)
@staticmethod
def _get_subcloud_availability_status(subcloud_name, sysinv_client):
"""For each subcloud, if at least one service is active in each
service of servicegroup-list then declare the subcloud online.
"""
avail_to_set = consts.AVAILABILITY_OFFLINE
svc_groups = None
# get a list of service groups in the subcloud
try:
svc_groups = sysinv_client.get_service_groups()
except Exception as e:
LOG.warn('Cannot retrieve service groups for '
'subcloud: %s, %s' % (subcloud_name, e))
if svc_groups:
active_sgs = []
inactive_sgs = []
# Build 2 lists, 1 of active service groups,
# one with non-active.
for sg in svc_groups:
if sg.state != consts.SERVICE_GROUP_STATUS_ACTIVE:
inactive_sgs.append(sg.service_group_name)
else:
active_sgs.append(sg.service_group_name)
# Create a list of service groups that are only present
# in non-active list
inactive_only = [sg for sg in inactive_sgs if
sg not in active_sgs]
# An empty inactive only list and a non-empty active list
# means we're good to go.
if not inactive_only and active_sgs:
avail_to_set = \
consts.AVAILABILITY_ONLINE
else:
LOG.info("Subcloud:%s has non-active "
"service groups: %s" %
(subcloud_name, inactive_only))
return avail_to_set
def _audit_subcloud_openstack_app(self, subcloud_name, sysinv_client,
openstack_installed):
openstack_installed_current = False
# get a list of installed apps in the subcloud
try:
apps = sysinv_client.get_applications()
except Exception:
LOG.exception('Cannot retrieve installed apps for subcloud:%s'
% subcloud_name)
return
for app in apps:
if app.name == HELM_APP_OPENSTACK and app.active:
# audit find openstack app is installed and active in
# the subcloud
openstack_installed_current = True
break
endpoint_type_list = dccommon_consts.ENDPOINT_TYPES_LIST_OS
if openstack_installed_current and not openstack_installed:
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
self.context,
subcloud_name,
endpoint_type_list,
openstack_installed_current)
elif not openstack_installed_current and openstack_installed:
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
self.context,
subcloud_name,
endpoint_type_list,
openstack_installed_current)
def _audit_subcloud(self, subcloud_name, update_subcloud_state,
audit_openstack, patch_audit_data, firmware_audit_data,
do_load_audit, do_firmware_audit):
"""Audit a single subcloud."""
# Retrieve the subcloud
try:
subcloud = db_api.subcloud_get_by_name(self.context, subcloud_name)
except exceptions.SubcloudNotFound:
# Possibility subcloud could have been deleted since the list of
# subclouds to audit was created.
LOG.info('Ignoring SubcloudNotFound when auditing subcloud %s' %
subcloud_name)
return
avail_status_current = subcloud.availability_status
audit_fail_count = subcloud.audit_fail_count
# Set defaults to None and disabled so we will still set disabled
# status if we encounter an error.
sysinv_client = None
fm_client = None
avail_to_set = consts.AVAILABILITY_OFFLINE
try:
os_client = OpenStackDriver(region_name=subcloud_name,
thread_name='subcloud-audit')
sysinv_client = os_client.sysinv_client
fm_client = os_client.fm_client
except (keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
keystone_exceptions.ConnectTimeout,
IndexError):
if avail_status_current == consts.AVAILABILITY_OFFLINE:
LOG.info("Identity or Platform endpoint for %s not "
"found, ignoring for offline "
"subcloud." % subcloud_name)
return
else:
# The subcloud will be marked as offline below.
LOG.error("Identity or Platform endpoint for online "
"subcloud: %s not found." % subcloud_name)
except Exception:
LOG.exception("Failed to get OS Client for subcloud: %s"
% subcloud_name)
# Check availability of the subcloud
if sysinv_client:
avail_to_set = self._get_subcloud_availability_status(
subcloud_name, sysinv_client)
if avail_to_set == consts.AVAILABILITY_OFFLINE:
if audit_fail_count < consts.AVAIL_FAIL_COUNT_MAX:
audit_fail_count = audit_fail_count + 1
if (avail_status_current == consts.AVAILABILITY_ONLINE) and \
(audit_fail_count < consts.AVAIL_FAIL_COUNT_TO_ALARM):
# Do not set offline until we have failed audit
# the requisite number of times
avail_to_set = consts.AVAILABILITY_ONLINE
# We want a chunksize of at least 1 so add the number of workers.
chunksize = (len(subcloud_audits) + CONF.audit_worker_workers) / CONF.audit_worker_workers
for audit in subcloud_audits:
subcloud_ids.append(audit.subcloud_id)
if len(subcloud_ids) == chunksize:
# We've gathered a batch of subclouds, send it for processing.
self.audit_worker_rpc_client.audit_subclouds(
self.context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit)
LOG.debug('Sent subcloud audit request message for subclouds: %s' % subcloud_ids)
subcloud_ids = []
if len(subcloud_ids) > 0:
# We've got a partial batch...send it off for processing.
self.audit_worker_rpc_client.audit_subclouds(
self.context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit)
LOG.debug('Sent final subcloud audit request message for subclouds: %s' % subcloud_ids)
else:
# In the case of a one off blip, we may need to set the
# fail count back to 0
audit_fail_count = 0
if avail_to_set != avail_status_current:
if avail_to_set == consts.AVAILABILITY_ONLINE:
audit_fail_count = 0
LOG.info('Setting new availability status: %s '
'on subcloud: %s' %
(avail_to_set, subcloud_name))
self._update_subcloud_availability(
subcloud_name,
availability_status=avail_to_set,
audit_fail_count=audit_fail_count)
elif audit_fail_count != subcloud.audit_fail_count:
self._update_subcloud_availability(
subcloud_name,
availability_status=None,
audit_fail_count=audit_fail_count)
elif update_subcloud_state:
# Nothing has changed, but we want to send a state update for this
# subcloud as an audit.
self._update_subcloud_availability(
subcloud_name,
availability_status=avail_status_current,
update_state_only=True)
# If subcloud is managed and online, audit additional resources
if (subcloud.management_state == consts.MANAGEMENT_MANAGED and
avail_to_set == consts.AVAILABILITY_ONLINE):
# Get alarm summary and store in db,
if fm_client:
self.alarm_aggr.update_alarm_summary(subcloud_name, fm_client)
# If we have patch audit data, audit the subcloud
if patch_audit_data:
self.patch_audit.subcloud_patch_audit(subcloud_name,
patch_audit_data,
do_load_audit)
# Perform firmware audit
if do_firmware_audit:
self.firmware_audit.subcloud_firmware_audit(subcloud_name,
firmware_audit_data)
# Audit openstack application in the subcloud
if audit_openstack and sysinv_client:
self._audit_subcloud_openstack_app(
subcloud_name, sysinv_client, subcloud.openstack_installed)
LOG.debug('Done sending audit request messages.')

View File

@@ -0,0 +1,354 @@
# Copyright 2017 Ericsson AB.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import os
from keystoneauth1 import exceptions as keystone_exceptions
from oslo_config import cfg
from oslo_log import log as logging
from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dcmanager.audit import alarm_aggregation
from dcmanager.audit import firmware_audit
from dcmanager.audit import patch_audit
from dcmanager.audit.subcloud_audit_manager import HELM_APP_OPENSTACK
from dcmanager.common import consts
from dcmanager.common import context
from dcmanager.common import exceptions
from dcmanager.common.i18n import _
from dcmanager.common import manager
from dcmanager.common import scheduler
from dcmanager.db import api as db_api
from dcmanager.rpc import client as dcmanager_rpc_client
CONF = cfg.CONF
LOG = logging.getLogger(__name__)
# We will update the state of each subcloud in the dcorch about once per hour.
# Calculate how many iterations that will be.
SUBCLOUD_STATE_UPDATE_ITERATIONS = \
dccommon_consts.SECONDS_IN_HOUR / CONF.scheduler.subcloud_audit_interval
class SubcloudAuditWorkerManager(manager.Manager):
"""Manages tasks related to audits."""
def __init__(self, *args, **kwargs):
LOG.debug(_('SubcloudAuditWorkerManager initialization...'))
super(SubcloudAuditWorkerManager, self).__init__(
service_name="subcloud_audit_worker_manager")
self.context = context.get_admin_context()
self.dcmanager_rpc_client = dcmanager_rpc_client.ManagerClient()
# Keeps track of greenthreads we create to do work.
self.thread_group_manager = scheduler.ThreadGroupManager(
thread_pool_size=100)
# Track workers created for each subcloud.
self.subcloud_workers = dict()
self.alarm_aggr = alarm_aggregation.AlarmAggregation(self.context)
self.patch_audit = patch_audit.PatchAudit(
self.context, self.dcmanager_rpc_client)
self.firmware_audit = firmware_audit.FirmwareAudit(
self.context, self.dcmanager_rpc_client)
self.pid = os.getpid()
def audit_subclouds(self, context, subcloud_ids, patch_audit_data,
firmware_audit_data, do_openstack_audit):
"""Run audits of the specified subcloud(s)"""
LOG.debug('PID: %s, subclouds to audit: %s, do_openstack_audit: %s' %
(self.pid, subcloud_ids, do_openstack_audit))
for subcloud_id in subcloud_ids:
# Retrieve the subcloud and subcloud audit info
try:
subcloud = db_api.subcloud_get(self.context, subcloud_id)
subcloud_audits = db_api.subcloud_audits_get_and_start_audit(
self.context, subcloud_id)
except exceptions.SubcloudNotFound:
# Possibility subcloud could have been deleted since the list of
# subclouds to audit was created.
LOG.info('Ignoring SubcloudNotFound when auditing subcloud %s' %
subcloud_id)
continue
LOG.debug("PID: %s, starting audit of subcloud: %s." %
(self.pid, subcloud.name))
# Include failure deploy status states in the auditable list
# so that the subcloud can be set as offline
if (subcloud.deploy_status not in
[consts.DEPLOY_STATE_DONE,
consts.DEPLOY_STATE_DEPLOYING,
consts.DEPLOY_STATE_DEPLOY_FAILED,
consts.DEPLOY_STATE_INSTALL_FAILED,
consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
consts.DEPLOY_STATE_DATA_MIGRATION_FAILED,
consts.DEPLOY_STATE_MIGRATED]):
LOG.debug("Skip subcloud %s audit, deploy_status: %s" %
(subcloud.name, subcloud.deploy_status))
# This DB API call will set the "audit_finished_at" timestamp
# so it won't get audited again for a while.
db_api.subcloud_audits_end_audit(self.context, subcloud_id)
continue
# Check the per-subcloud audit flags
do_patch_audit = subcloud_audits.patch_audit_requested
do_load_audit = subcloud_audits.load_audit_requested
do_firmware_audit = subcloud_audits.firmware_audit_requested
update_subcloud_state = subcloud_audits.state_update_requested
# Create a new greenthread for each subcloud to allow the audits
# to be done in parallel. If there are not enough greenthreads
# in the pool, this will block until one becomes available.
self.subcloud_workers[subcloud.name] = \
self.thread_group_manager.start(self._do_audit_subcloud,
subcloud,
update_subcloud_state,
do_openstack_audit,
patch_audit_data,
firmware_audit_data,
do_patch_audit,
do_load_audit,
do_firmware_audit)
def _update_subcloud_availability(self, subcloud_name,
availability_status=None,
update_state_only=False,
audit_fail_count=None):
try:
self.dcmanager_rpc_client.update_subcloud_availability(
self.context, subcloud_name, availability_status,
update_state_only, audit_fail_count)
LOG.info('Notifying dcmanager, subcloud:%s, availability:%s' %
(subcloud_name,
availability_status))
except Exception:
LOG.exception('Problem informing dcmanager of subcloud '
'availability state change, subcloud: %s'
% subcloud_name)
@staticmethod
def _get_subcloud_availability_status(subcloud_name, sysinv_client):
"""For each subcloud, if at least one service is active in each
service of servicegroup-list then declare the subcloud online.
"""
avail_to_set = consts.AVAILABILITY_OFFLINE
svc_groups = None
# get a list of service groups in the subcloud
try:
svc_groups = sysinv_client.get_service_groups()
except Exception as e:
LOG.warn('Cannot retrieve service groups for '
'subcloud: %s, %s' % (subcloud_name, e))
if svc_groups:
active_sgs = []
inactive_sgs = []
# Build 2 lists, 1 of active service groups,
# one with non-active.
for sg in svc_groups:
if sg.state != consts.SERVICE_GROUP_STATUS_ACTIVE:
inactive_sgs.append(sg.service_group_name)
else:
active_sgs.append(sg.service_group_name)
# Create a list of service groups that are only present
# in non-active list
inactive_only = [sg for sg in inactive_sgs if
sg not in active_sgs]
# An empty inactive only list and a non-empty active list
# means we're good to go.
if not inactive_only and active_sgs:
avail_to_set = \
consts.AVAILABILITY_ONLINE
else:
LOG.info("Subcloud:%s has non-active "
"service groups: %s" %
(subcloud_name, inactive_only))
return avail_to_set
def _audit_subcloud_openstack_app(self, subcloud_name, sysinv_client,
openstack_installed):
openstack_installed_current = False
# get a list of installed apps in the subcloud
try:
apps = sysinv_client.get_applications()
except Exception:
LOG.exception('Cannot retrieve installed apps for subcloud:%s'
% subcloud_name)
return
for app in apps:
if app.name == HELM_APP_OPENSTACK and app.active:
# audit find openstack app is installed and active in
# the subcloud
openstack_installed_current = True
break
endpoint_type_list = dccommon_consts.ENDPOINT_TYPES_LIST_OS
if openstack_installed_current and not openstack_installed:
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
self.context,
subcloud_name,
endpoint_type_list,
openstack_installed_current)
elif not openstack_installed_current and openstack_installed:
self.dcmanager_rpc_client.update_subcloud_sync_endpoint_type(
self.context,
subcloud_name,
endpoint_type_list,
openstack_installed_current)
def _do_audit_subcloud(self, subcloud, update_subcloud_state,
do_audit_openstack, patch_audit_data,
firmware_audit_data, do_patch_audit,
do_load_audit, do_firmware_audit):
# Do the actual subcloud audit.
try:
self._audit_subcloud(subcloud, update_subcloud_state,
do_audit_openstack, patch_audit_data,
firmware_audit_data, do_patch_audit,
do_load_audit, do_firmware_audit)
except Exception:
LOG.exception("Got exception auditing subcloud: %s" % subcloud.name)
# Update the audit completion timestamp so it doesn't get
# audited again for a while.
db_api.subcloud_audits_end_audit(self.context, subcloud.id)
# Remove the worker for this subcloud
self.subcloud_workers.pop(subcloud.name, None)
LOG.debug("PID: %s, done auditing subcloud: %s." %
(self.pid, subcloud.name))
def _audit_subcloud(self, subcloud, update_subcloud_state,
do_audit_openstack, patch_audit_data, firmware_audit_data,
do_patch_audit, do_load_audit, do_firmware_audit):
"""Audit a single subcloud."""
avail_status_current = subcloud.availability_status
audit_fail_count = subcloud.audit_fail_count
subcloud_name = subcloud.name
# Set defaults to None and disabled so we will still set disabled
# status if we encounter an error.
sysinv_client = None
fm_client = None
avail_to_set = consts.AVAILABILITY_OFFLINE
try:
os_client = OpenStackDriver(region_name=subcloud_name,
thread_name='subcloud-audit')
sysinv_client = os_client.sysinv_client
fm_client = os_client.fm_client
except (keystone_exceptions.EndpointNotFound,
keystone_exceptions.ConnectFailure,
keystone_exceptions.ConnectTimeout,
IndexError):
if avail_status_current == consts.AVAILABILITY_OFFLINE:
LOG.info("Identity or Platform endpoint for %s not "
"found, ignoring for offline "
"subcloud." % subcloud_name)
return
else:
# The subcloud will be marked as offline below.
LOG.error("Identity or Platform endpoint for online "
"subcloud: %s not found." % subcloud_name)
except Exception:
LOG.exception("Failed to get OS Client for subcloud: %s"
% subcloud_name)
# Check availability of the subcloud
if sysinv_client:
avail_to_set = self._get_subcloud_availability_status(
subcloud_name, sysinv_client)
if avail_to_set == consts.AVAILABILITY_OFFLINE:
if audit_fail_count < consts.AVAIL_FAIL_COUNT_MAX:
audit_fail_count = audit_fail_count + 1
if (avail_status_current == consts.AVAILABILITY_ONLINE) and \
(audit_fail_count < consts.AVAIL_FAIL_COUNT_TO_ALARM):
# Do not set offline until we have failed audit
# the requisite number of times
avail_to_set = consts.AVAILABILITY_ONLINE
else:
# In the case of a one off blip, we may need to set the
# fail count back to 0
audit_fail_count = 0
if avail_to_set != avail_status_current:
if avail_to_set == consts.AVAILABILITY_ONLINE:
audit_fail_count = 0
LOG.info('Setting new availability status: %s '
'on subcloud: %s' %
(avail_to_set, subcloud_name))
self._update_subcloud_availability(
subcloud_name,
availability_status=avail_to_set,
audit_fail_count=audit_fail_count)
elif audit_fail_count != subcloud.audit_fail_count:
self._update_subcloud_availability(
subcloud_name,
availability_status=None,
audit_fail_count=audit_fail_count)
elif update_subcloud_state:
# Nothing has changed, but we want to send a state update for this
# subcloud as an audit.
LOG.debug('Updating subcloud state unconditionally for subcloud %s'
% subcloud_name)
self._update_subcloud_availability(
subcloud_name,
availability_status=avail_status_current,
update_state_only=True)
# If subcloud is managed and online, audit additional resources
if (subcloud.management_state == consts.MANAGEMENT_MANAGED and
avail_to_set == consts.AVAILABILITY_ONLINE):
# Get alarm summary and store in db,
if fm_client:
self.alarm_aggr.update_alarm_summary(subcloud_name, fm_client)
# If we have patch audit data, audit the subcloud
if do_patch_audit and patch_audit_data:
self.patch_audit.subcloud_patch_audit(subcloud_name,
patch_audit_data,
do_load_audit)
# Perform firmware audit
if do_firmware_audit:
self.firmware_audit.subcloud_firmware_audit(subcloud_name,
firmware_audit_data)
# Audit openstack application in the subcloud
if do_audit_openstack and sysinv_client:
self._audit_subcloud_openstack_app(
subcloud_name, sysinv_client, subcloud.openstack_installed)

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms

View File

@@ -0,0 +1,63 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
"""
DC Manager Audit Worker Service.
"""
import eventlet
eventlet.monkey_patch()
from oslo_config import cfg
from oslo_i18n import _lazy
from oslo_log import log as logging
from oslo_service import service
from dcmanager.common import config
from dcmanager.common import messaging
_lazy.enable_lazy()
config.register_options()
config.register_keystone_options()
LOG = logging.getLogger('dcmanager.audit-worker')
CONF = cfg.CONF
def main():
logging.register_options(CONF)
CONF(project='dcmanager', prog='dcmanager-audit-worker')
logging.setup(cfg.CONF, 'dcmanager-audit-worker')
logging.set_defaults()
messaging.setup()
from dcmanager.audit import service as audit
srv = audit.DCManagerAuditWorkerService()
launcher = service.launch(cfg.CONF,
srv, workers=CONF.audit_worker_workers)
LOG.info("Configuration:")
cfg.CONF.log_opt_values(LOG, logging.INFO)
launcher.wait()
if __name__ == '__main__':
main()

View File

@@ -152,6 +152,8 @@ common_opts = [
help='number of orchestrator workers'),
cfg.IntOpt('audit_workers', default=1,
help='number of audit workers'),
cfg.IntOpt('audit_worker_workers', default=4,
help='number of audit-worker workers'),
cfg.StrOpt('host',
default='localhost',
help='hostname of the machine')

View File

@@ -12,7 +12,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
@@ -25,6 +25,8 @@ TOPIC_DC_MANAGER = "dcmanager"
TOPIC_DC_MANAGER_AUDIT = "dcmanager-audit"
TOPIC_DC_MANAGER_AUDIT_WORKER = "dcmanager-audit-worker"
TOPIC_DC_MANAGER_ORCHESTRATOR = "dcmanager-orchestrator"
LOADS_VAULT_DIR = "/opt/dc-vault/loads"

View File

@@ -10,7 +10,7 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2020 Wind River Systems, Inc.
# Copyright (c) 2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms

View File

@@ -44,6 +44,12 @@ class FakeDCManagerAPI(object):
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeAuditWorkerAPI(object):
def __init__(self):
self.audit_subclouds = mock.MagicMock()
class PCIDevice(object):
def __init__(self, uuid, name,
pciaddr, pvendor_id,
@@ -363,6 +369,13 @@ class TestFirmwareAudit(base.DCManagerTestCase):
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
self.fake_audit_worker_api = FakeAuditWorkerAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditWorkerClient')
self.mock_audit_worker_api = p.start()
self.mock_audit_worker_api.return_value = self.fake_audit_worker_api
self.addCleanup(p.stop)
def test_init(self):
fm = firmware_audit.FirmwareAudit(self.ctxt,
self.fake_dcmanager_api)
@@ -390,8 +403,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -423,8 +435,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -456,8 +467,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -489,8 +499,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -522,8 +531,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -555,8 +563,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -588,8 +595,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)
@@ -621,8 +627,7 @@ class TestFirmwareAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.firmware_audit = fm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
for name in ['subcloud1', 'subcloud2']:
fm.subcloud_firmware_audit(name, firmware_audit_data)

View File

@@ -43,6 +43,12 @@ class FakeDCManagerAPI(object):
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeAuditWorkerAPI(object):
def __init__(self):
self.audit_subclouds = mock.MagicMock()
class Load(object):
def __init__(self, software_version, state):
self.software_version = software_version
@@ -243,6 +249,13 @@ class TestPatchAudit(base.DCManagerTestCase):
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
self.fake_audit_worker_api = FakeAuditWorkerAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditWorkerClient')
self.mock_audit_worker_api = p.start()
self.mock_audit_worker_api.return_value = self.fake_audit_worker_api
self.addCleanup(p.stop)
def test_init(self):
pm = patch_audit.PatchAudit(self.ctxt,
self.fake_dcmanager_api)
@@ -267,8 +280,10 @@ class TestPatchAudit(base.DCManagerTestCase):
am = subcloud_audit_manager.SubcloudAuditManager()
am.patch_audit = pm
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
for name in ['subcloud1', 'subcloud2']:
pm.subcloud_patch_audit(name, patch_audit_data, do_load_audit)
@@ -301,8 +316,10 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_patching_client.side_effect = FakePatchingClientOutOfSync
mock_sysinv_client.side_effect = FakeSysinvClientOneLoad
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
for name in ['subcloud1', 'subcloud2', 'subcloud3', 'subcloud4']:
pm.subcloud_patch_audit(name, patch_audit_data, do_load_audit)
@@ -362,8 +379,10 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_patching_client.side_effect = FakePatchingClientExtraPatches
mock_sysinv_client.side_effect = FakeSysinvClientOneLoad
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
for name in ['subcloud1', 'subcloud2']:
pm.subcloud_patch_audit(name, patch_audit_data, do_load_audit)
@@ -396,8 +415,10 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_patching_client.side_effect = FakePatchingClientInSync
mock_sysinv_client.side_effect = FakeSysinvClientOneLoadUnmatchedSoftwareVersion
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
for name in ['subcloud1', 'subcloud2']:
pm.subcloud_patch_audit(name, patch_audit_data, do_load_audit)
@@ -440,8 +461,10 @@ class TestPatchAudit(base.DCManagerTestCase):
mock_patching_client.side_effect = FakePatchingClientInSync
mock_sysinv_client.side_effect = FakeSysinvClientOneLoadUpgradeInProgress
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
do_load_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(True, True)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
for name in ['subcloud1', 'subcloud2']:
pm.subcloud_patch_audit(name, patch_audit_data, do_load_audit)

View File

@@ -10,52 +10,39 @@
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2020 Wind River Systems, Inc.
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import copy
import mock
import sys
sys.modules['fm_core'] = mock.Mock()
from dccommon import consts as dccommon_consts
from dcmanager.audit import subcloud_audit_manager
from dcmanager.common import consts
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.tests import base
class FakeDCManagerAPI(object):
class FakeAuditWorkerAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_sync_endpoint_type = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeAlarmAggregation(object):
def __init__(self):
self.update_alarm_summary = mock.MagicMock()
self.audit_subclouds = mock.MagicMock()
class FakePatchAudit(object):
def __init__(self):
self.subcloud_patch_audit = mock.MagicMock()
self.get_regionone_audit_data = mock.MagicMock()
class FakeFirmwareAudit(object):
def __init__(self):
self.subcloud_firmware_audit = mock.MagicMock()
self.get_regionone_audit_data = mock.MagicMock()
@@ -215,18 +202,11 @@ class TestAuditManager(base.DCManagerTestCase):
def setUp(self):
super(TestAuditManager, self).setUp()
# Mock the DCManager API
self.fake_dcmanager_api = FakeDCManagerAPI()
p = mock.patch('dcmanager.rpc.client.ManagerClient')
self.mock_dcmanager_api = p.start()
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
self.addCleanup(p.stop)
# Mock the OpenStackDriver
self.fake_openstack_client = FakeOpenStackDriver('fake_region')
p = mock.patch.object(subcloud_audit_manager, 'OpenStackDriver')
self.mock_openstack_driver = p.start()
self.mock_openstack_driver.return_value = self.fake_openstack_client
# Mock the Audit Worker API
self.fake_audit_worker_api = FakeAuditWorkerAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditWorkerClient')
self.mock_audit_worker_api = p.start()
self.mock_audit_worker_api.return_value = self.fake_audit_worker_api
self.addCleanup(p.stop)
# Mock the context
@@ -235,15 +215,6 @@ class TestAuditManager(base.DCManagerTestCase):
self.mock_context.get_admin_context.return_value = self.ctx
self.addCleanup(p.stop)
# Mock alarm aggregation
self.fake_alarm_aggr = FakeAlarmAggregation()
p = mock.patch.object(subcloud_audit_manager,
'alarm_aggregation')
self.mock_alarm_aggr = p.start()
self.mock_alarm_aggr.AlarmAggregation.return_value = \
self.fake_alarm_aggr
self.addCleanup(p.stop)
# Mock patch audit
self.fake_patch_audit = FakePatchAudit()
p = mock.patch.object(subcloud_audit_manager,
@@ -291,382 +262,3 @@ class TestAuditManager(base.DCManagerTestCase):
def test_periodic_subcloud_audit(self):
am = subcloud_audit_manager.SubcloudAuditManager()
am._periodic_subcloud_audit_loop()
def test_audit_subcloud_online_managed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
# Set the subcloud to managed
db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed')
am = subcloud_audit_manager.SubcloudAuditManager()
# Audit the subcloud
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
False, 0)
# Verify the openstack endpoints were not updated
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
subcloud.name, self.fake_openstack_client.fm_client)
# Verify patch audit is called
self.fake_patch_audit.subcloud_patch_audit.assert_called_with(
subcloud.name, patch_audit_data, do_load_audit)
# Verify firmware audit is called
self.fake_firmware_audit.subcloud_firmware_audit.assert_called_with(
subcloud.name, firmware_audit_data)
def test_audit_subcloud_online_unmanaged(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Audit the subcloud
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
False, 0)
# Verify the openstack endpoints were not added
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_no_change(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Set the subcloud to online
db_api.subcloud_update(
self.ctx, subcloud.id,
availability_status=consts.AVAILABILITY_ONLINE)
# Audit the subcloud
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=False, patch_audit_data=None,
firmware_audit_data=None,
do_load_audit=False,
do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were not added
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
def test_audit_subcloud_online_no_change_force_update(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Set the subcloud to online
db_api.subcloud_update(
self.ctx, subcloud.id,
availability_status=consts.AVAILABILITY_ONLINE)
# Audit the subcloud and force a state update
am._audit_subcloud(subcloud.name, update_subcloud_state=True,
audit_openstack=False, patch_audit_data=None,
firmware_audit_data=None,
do_load_audit=False,
do_firmware_audit=False)
# Verify the subcloud state was updated even though no change
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
True, None)
# Verify the openstack endpoints were not updated
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_go_offline(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Set the subcloud to managed/online
db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE)
# Mark a service group as inactive
self.fake_openstack_client.sysinv_client.get_service_groups_result = \
copy.deepcopy(FAKE_SERVICE_GROUPS)
self.fake_openstack_client.sysinv_client. \
get_service_groups_result[3].state = 'inactive'
# Audit the subcloud
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the audit fail count was updated
audit_fail_count = 1
self.fake_dcmanager_api.update_subcloud_availability.\
assert_called_with(mock.ANY, subcloud.name,
None, False, audit_fail_count)
db_api.subcloud_update(self.ctx, subcloud.id,
audit_fail_count=audit_fail_count)
# Audit the subcloud again
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
audit_fail_count = audit_fail_count + 1
# Verify the subcloud was set to offline
self.fake_dcmanager_api.update_subcloud_availability.\
assert_called_with(mock.ANY, subcloud.name,
consts.AVAILABILITY_OFFLINE, False,
audit_fail_count)
# Verify alarm update is called only once
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
subcloud.name, self.fake_openstack_client.fm_client)
# Verify patch audit is called only once
self.fake_patch_audit.subcloud_patch_audit.assert_called_once_with(
subcloud.name, mock.ANY, True)
# Verify firmware audit is called
self.fake_firmware_audit.subcloud_firmware_audit.assert_called_once_with(
subcloud.name, mock.ANY)
def test_audit_subcloud_offline_no_change(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
db_api.subcloud_update(self.ctx, subcloud.id,
audit_fail_count=consts.AVAIL_FAIL_COUNT_MAX)
# Mark a service group as inactive
self.fake_openstack_client.sysinv_client.get_service_groups_result = \
copy.deepcopy(FAKE_SERVICE_GROUPS)
self.fake_openstack_client.sysinv_client. \
get_service_groups_result[3].state = 'inactive'
# Audit the subcloud
patch_audit_data, firmware_audit_data,\
do_load_audit, do_firmware_audit = am._get_audit_data()
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=True,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were not updated
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_installed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Set the subcloud to online
db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE)
# Audit the subcloud
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None,
do_load_audit=False, do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were added
# self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
# assert_called_with(mock.ANY, 'subcloud1',
# dccommon_consts.ENDPOINT_TYPES_LIST_OS,
# True)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.fake_openstack_client.fm_client)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_removed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Set the subcloud to online and openstack installed
db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE,
openstack_installed=True)
# Remove stx-openstack application
FAKE_APPLICATIONS.pop(1)
# Audit the subcloud
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None,
do_load_audit=False, do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were removed
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_called_with(mock.ANY, 'subcloud1',
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.fake_openstack_client.fm_client)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_inactive(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
# Set the subcloud to online and openstack installed
db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE,
openstack_installed=True)
# stx-openstack application is not active
FAKE_APPLICATIONS[1].active = False
# Audit the subcloud
am._audit_subcloud(subcloud.name, update_subcloud_state=False,
audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None,
do_load_audit=False, do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were removed
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_called_with(mock.ANY, 'subcloud1',
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.fake_openstack_client.fm_client)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()

View File

@@ -0,0 +1,738 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# Copyright (c) 2017-2021 Wind River Systems, Inc.
#
# The right to copy, distribute, modify, or otherwise make use
# of this software may be licensed only pursuant to the terms
# of an applicable Wind River license agreement.
#
import copy
import mock
import sys
sys.modules['fm_core'] = mock.Mock()
from dccommon import consts as dccommon_consts
from dcmanager.audit import subcloud_audit_manager
from dcmanager.audit import subcloud_audit_worker_manager
from dcmanager.common import consts
from dcmanager.db.sqlalchemy import api as db_api
from dcmanager.tests import base
class FakeDCManagerAPI(object):
def __init__(self):
self.update_subcloud_availability = mock.MagicMock()
self.update_subcloud_sync_endpoint_type = mock.MagicMock()
self.update_subcloud_endpoint_status = mock.MagicMock()
class FakeAuditWorkerAPI(object):
def __init__(self):
self.audit_subclouds = mock.MagicMock()
class FakeAlarmAggregation(object):
def __init__(self):
self.update_alarm_summary = mock.MagicMock()
class FakePatchAudit(object):
def __init__(self):
self.subcloud_patch_audit = mock.MagicMock()
self.get_regionone_audit_data = mock.MagicMock()
class FakeFirmwareAudit(object):
def __init__(self):
self.subcloud_firmware_audit = mock.MagicMock()
self.get_regionone_audit_data = mock.MagicMock()
class FakeServiceGroup(object):
def __init__(self, status, desired_state, service_group_name, uuid,
node_name, state, condition, name):
self.status = status
self.desired_state = desired_state
self.service_group_name = service_group_name
self.uuid = uuid
self.node_name = node_name
self.state = state
self.condition = condition
self.name = name
class FakeApplication(object):
def __init__(self, status, name, manifest_name, active, progress,
app_version, manifest_file):
self.status = status
self.name = name
self.manifest_name = manifest_name
self.active = active
self.progress = progress
self.app_version = app_version
self.manifest_file = manifest_file
FAKE_SERVICE_GROUPS = [
FakeServiceGroup("",
"active",
"distributed-cloud-services",
"b00fd252-5bd7-44b5-bbde-7d525e7125c7",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"storage-monitoring-services",
"5a14a1d1-dac1-48b0-9598-3702e0b0338a",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"storage-services",
"5cbfa903-379f-4329-81b4-2e88acdfa215",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"web-services",
"42829858-008f-4931-94e1-4b86fe31ce3c",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"directory-services",
"74225295-2601-4376-a52c-7cbd149146f6",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"patching-services",
"6870c079-e1c3-4402-b88b-63a5ef06a77a",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"vim-services",
"d8367a52-316e-418b-9211-a13331e073ef",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"cloud-services",
"12682dc0-cef5-427a-b1a6-145cf950b49c",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"controller-services",
"daac63fb-24b3-4cd1-b895-260a32e356ae",
"controller-0",
"active",
"",
"controller"),
FakeServiceGroup("",
"active",
"oam-services",
"4b66913d-98ba-4a4a-86c3-168625f629eb",
"controller-0",
"active",
"",
"controller"),
]
FAKE_APPLICATIONS = [
FakeApplication("applied",
"platform-integ-apps",
"platform-integration-manifest",
True,
"completed",
"1.0-8",
"manifest.yaml"),
FakeApplication("applied",
"stx-openstack",
"stx-openstack-manifest",
True,
"completed",
"1.0-8",
"manifest.yaml"),
]
class FakeSysinvClient(object):
def __init__(self, region, session):
self.get_service_groups_result = FAKE_SERVICE_GROUPS
self.get_applications_result = FAKE_APPLICATIONS
def get_service_groups(self):
return self.get_service_groups_result
def get_applications(self):
return self.get_applications_result
class FakeFmClient(object):
def get_alarm_summary(self):
pass
class FakeOpenStackDriver(object):
def __init__(self, region_name):
self.sysinv_client = FakeSysinvClient('fake_region', 'fake_session')
self.fm_client = FakeFmClient()
class TestAuditWorkerManager(base.DCManagerTestCase):
def setUp(self):
super(TestAuditWorkerManager, self).setUp()
# Mock the DCManager API
self.fake_dcmanager_api = FakeDCManagerAPI()
p = mock.patch('dcmanager.rpc.client.ManagerClient')
self.mock_dcmanager_api = p.start()
self.mock_dcmanager_api.return_value = self.fake_dcmanager_api
self.addCleanup(p.stop)
# Mock the Audit Worker API
self.fake_audit_worker_api = FakeAuditWorkerAPI()
p = mock.patch('dcmanager.audit.rpcapi.ManagerAuditWorkerClient')
self.mock_audit_worker_api = p.start()
self.mock_audit_worker_api.return_value = self.fake_audit_worker_api
self.addCleanup(p.stop)
# Mock the OpenStackDriver
self.fake_openstack_client = FakeOpenStackDriver('fake_region')
p = mock.patch.object(subcloud_audit_worker_manager, 'OpenStackDriver')
self.mock_openstack_driver = p.start()
self.mock_openstack_driver.return_value = self.fake_openstack_client
self.addCleanup(p.stop)
# Mock the context
p = mock.patch.object(subcloud_audit_worker_manager, 'context')
self.mock_context = p.start()
self.mock_context.get_admin_context.return_value = self.ctx
self.addCleanup(p.stop)
# Mock the context
p = mock.patch.object(subcloud_audit_manager, 'context')
self.mock_context2 = p.start()
self.mock_context2.get_admin_context.return_value = self.ctx
self.addCleanup(p.stop)
# Mock alarm aggregation
self.fake_alarm_aggr = FakeAlarmAggregation()
p = mock.patch.object(subcloud_audit_worker_manager,
'alarm_aggregation')
self.mock_alarm_aggr = p.start()
self.mock_alarm_aggr.AlarmAggregation.return_value = \
self.fake_alarm_aggr
self.addCleanup(p.stop)
# Mock patch audit
self.fake_patch_audit = FakePatchAudit()
p = mock.patch.object(subcloud_audit_worker_manager,
'patch_audit')
self.mock_patch_audit = p.start()
self.mock_patch_audit.PatchAudit.return_value = \
self.fake_patch_audit
self.addCleanup(p.stop)
# Mock patch audit
self.fake_patch_audit2 = FakePatchAudit()
p = mock.patch.object(subcloud_audit_manager,
'patch_audit')
self.mock_patch_audit2 = p.start()
self.mock_patch_audit2.PatchAudit.return_value = \
self.fake_patch_audit2
self.addCleanup(p.stop)
# Mock firmware audit
self.fake_firmware_audit = FakeFirmwareAudit()
p = mock.patch.object(subcloud_audit_worker_manager,
'firmware_audit')
self.mock_firmware_audit = p.start()
self.mock_firmware_audit.FirmwareAudit.return_value = \
self.fake_firmware_audit
self.addCleanup(p.stop)
# Mock firmware audit
self.fake_firmware_audit2 = FakeFirmwareAudit()
p = mock.patch.object(subcloud_audit_manager,
'firmware_audit')
self.mock_firmware_audit2 = p.start()
self.mock_firmware_audit2.FirmwareAudit.return_value = \
self.fake_firmware_audit2
self.addCleanup(p.stop)
@staticmethod
def create_subcloud_static(ctxt, **kwargs):
values = {
'name': "subcloud1",
'description': "This is a subcloud",
'location': "This is the location of the subcloud",
'software_version': "10.04",
'management_subnet': "192.168.101.0/24",
'management_gateway_ip': "192.168.101.1",
'management_start_ip': "192.168.101.2",
'management_end_ip': "192.168.101.50",
'systemcontroller_gateway_ip': "192.168.204.101",
'deploy_status': "not-deployed",
'openstack_installed': False,
'group_id': 1,
}
values.update(kwargs)
return db_api.subcloud_create(ctxt, **values)
def test_init(self):
am = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
self.assertIsNotNone(am)
self.assertEqual('subcloud_audit_worker_manager', am.service_name)
self.assertEqual('localhost', am.host)
self.assertEqual(self.ctx, am.context)
def test_audit_subcloud_online_managed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
# Set the subcloud to managed
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed')
am = subcloud_audit_manager.SubcloudAuditManager()
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Audit the subcloud
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
False, 0)
# Verify the openstack endpoints were not updated
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_with(
subcloud.name, self.fake_openstack_client.fm_client)
# Verify patch audit is called
self.fake_patch_audit.subcloud_patch_audit.assert_called_with(
subcloud.name, patch_audit_data, do_load_audit)
# Verify firmware audit is called
self.fake_firmware_audit.subcloud_firmware_audit.assert_called_with(
subcloud.name, firmware_audit_data)
def test_audit_subcloud_online_unmanaged(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Audit the subcloud
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the subcloud was set to online
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
False, 0)
# Verify the openstack endpoints were not added
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_no_change(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Set the subcloud to online
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
availability_status=consts.AVAILABILITY_ONLINE)
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False, patch_audit_data=None,
firmware_audit_data=None,
do_patch_audit=False,
do_load_audit=False,
do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were not added
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
def test_audit_subcloud_online_no_change_force_update(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Set the subcloud to online
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
availability_status=consts.AVAILABILITY_ONLINE)
# Audit the subcloud and force a state update
wm._audit_subcloud(subcloud, update_subcloud_state=True,
do_audit_openstack=False, patch_audit_data=None,
firmware_audit_data=None,
do_patch_audit=False,
do_load_audit=False,
do_firmware_audit=False)
# Verify the subcloud state was updated even though no change
self.fake_dcmanager_api.update_subcloud_availability.assert_called_with(
mock.ANY, subcloud.name, consts.AVAILABILITY_ONLINE,
True, None)
# Verify the openstack endpoints were not updated
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_go_offline(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Set the subcloud to managed/online
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE)
# Mark a service group as inactive
self.fake_openstack_client.sysinv_client.get_service_groups_result = \
copy.deepcopy(FAKE_SERVICE_GROUPS)
self.fake_openstack_client.sysinv_client. \
get_service_groups_result[3].state = 'inactive'
# Audit the subcloud
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the audit fail count was updated
audit_fail_count = 1
self.fake_dcmanager_api.update_subcloud_availability.\
assert_called_with(mock.ANY, subcloud.name,
None, False, audit_fail_count)
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id, audit_fail_count=audit_fail_count)
# Audit the subcloud again
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=False,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
audit_fail_count = audit_fail_count + 1
# Verify the subcloud was set to offline
self.fake_dcmanager_api.update_subcloud_availability.\
assert_called_with(mock.ANY, subcloud.name,
consts.AVAILABILITY_OFFLINE, False,
audit_fail_count)
# Verify alarm update is called only once
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
subcloud.name, self.fake_openstack_client.fm_client)
# Verify patch audit is called only once
self.fake_patch_audit.subcloud_patch_audit.assert_called_once_with(
subcloud.name, mock.ANY, True)
# Verify firmware audit is called
self.fake_firmware_audit.subcloud_firmware_audit.assert_called_once_with(
subcloud.name, mock.ANY)
def test_audit_subcloud_offline_no_change(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
am = subcloud_audit_manager.SubcloudAuditManager()
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id, audit_fail_count=consts.AVAIL_FAIL_COUNT_MAX)
# Mark a service group as inactive
self.fake_openstack_client.sysinv_client.get_service_groups_result = \
copy.deepcopy(FAKE_SERVICE_GROUPS)
self.fake_openstack_client.sysinv_client. \
get_service_groups_result[3].state = 'inactive'
# Audit the subcloud
do_patch_audit = True
do_load_audit = True
do_firmware_audit = True
patch_audit_data, firmware_audit_data = am._get_audit_data(
do_patch_audit, do_firmware_audit)
# Convert to dict like what would happen calling via RPC
patch_audit_data = patch_audit_data.to_dict()
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True,
patch_audit_data=patch_audit_data,
firmware_audit_data=firmware_audit_data,
do_patch_audit=do_patch_audit,
do_load_audit=do_load_audit,
do_firmware_audit=do_firmware_audit)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were not updated
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_not_called()
# Verify alarm update is not called
self.fake_alarm_aggr.update_alarm_summary.assert_not_called()
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_installed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Set the subcloud to online
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE)
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were added
# self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
# assert_called_with(mock.ANY, 'subcloud1',
# dccommon_consts.ENDPOINT_TYPES_LIST_OS,
# True)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.fake_openstack_client.fm_client)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_removed(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Set the subcloud to online and openstack installed
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE,
openstack_installed=True)
# Remove stx-openstack application
FAKE_APPLICATIONS.pop(1)
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were removed
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_called_with(mock.ANY, 'subcloud1',
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.fake_openstack_client.fm_client)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()
def test_audit_subcloud_online_with_openstack_inactive(self):
subcloud = self.create_subcloud_static(self.ctx, name='subcloud1')
self.assertIsNotNone(subcloud)
wm = subcloud_audit_worker_manager.SubcloudAuditWorkerManager()
# Set the subcloud to online and openstack installed
subcloud = db_api.subcloud_update(
self.ctx, subcloud.id,
management_state='managed',
availability_status=consts.AVAILABILITY_ONLINE,
openstack_installed=True)
# stx-openstack application is not active
FAKE_APPLICATIONS[1].active = False
# Audit the subcloud
wm._audit_subcloud(subcloud, update_subcloud_state=False,
do_audit_openstack=True, patch_audit_data=None,
firmware_audit_data=None, do_patch_audit=False,
do_load_audit=False, do_firmware_audit=False)
# Verify the subcloud state was not updated
self.fake_dcmanager_api.update_subcloud_availability.\
assert_not_called()
# Verify the openstack endpoints were removed
self.fake_dcmanager_api.update_subcloud_sync_endpoint_type.\
assert_called_with(mock.ANY, 'subcloud1',
dccommon_consts.ENDPOINT_TYPES_LIST_OS, False)
# Verify alarm update is called
self.fake_alarm_aggr.update_alarm_summary.assert_called_once_with(
'subcloud1', self.fake_openstack_client.fm_client)
# Verify patch audit is not called
self.fake_patch_audit.subcloud_patch_audit.assert_not_called()
# Verify firmware audit is not called
self.fake_firmware_audit.subcloud_firmware_audit.assert_not_called()

View File

@@ -0,0 +1,322 @@
#!/bin/sh
# OpenStack DC Manager Audit Worker Service (dcmanager-audit-worker)
#
# Description:
# Manages an OpenStack DC Manager Audit-Worker Service (dcmanager-audit-worker)
# process as an HA resource
#
# Copyright (c) 2021 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
#
# See usage() function below for more details ...
#
# OCF instance parameters:
# OCF_RESKEY_binary
# OCF_RESKEY_config
# OCF_RESKEY_user
# OCF_RESKEY_pid
# OCF_RESKEY_additional_parameters
#######################################################################
# Initialization:
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
#######################################################################
# Fill in some defaults if no values are specified
OCF_RESKEY_binary_default="/usr/bin/dcmanager-audit-worker"
OCF_RESKEY_config_default="/etc/dcmanager/dcmanager.conf"
OCF_RESKEY_user_default="root"
OCF_RESKEY_pid_default="$HA_RSCTMP/$OCF_RESOURCE_INSTANCE.pid"
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
#######################################################################
usage() {
cat <<UEND
usage: $0 (start|stop|validate-all|meta-data|status|monitor)
$0 manages an OpenStack DC Manager Audit-worker service (dcmanager-audit-worker) process as an HA resource
The 'start' operation starts the dcmanager-audit-worker service.
The 'stop' operation stops the dcmanager-audit-worker service.
The 'validate-all' operation reports whether the parameters are valid
The 'meta-data' operation reports this RA's meta-data information
The 'status' operation reports whether the dcmanager-audit-worker service is running
The 'monitor' operation reports whether the dcmanager-audit-worker service seems to be working
UEND
}
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="dcmanager-audit-worker">
<version>1.0</version>
<longdesc lang="en">
Resource agent for the DC Manager service (dcmanager-audit-worker)
</longdesc>
<shortdesc lang="en">Manages the OpenStack DC Manager Audit-worker Service (dcmanager-audit-worker)</shortdesc>
<parameters>
<parameter name="binary" unique="0" required="0">
<longdesc lang="en">
Location of the DC Manager Audit-worker Service binary (dcmanager-audit-worker)
</longdesc>
<shortdesc lang="en">DC Manager Audit-worker Service binary (dcmanager-audit-worker)</shortdesc>
<content type="string" default="${OCF_RESKEY_binary_default}" />
</parameter>
<parameter name="config" unique="0" required="0">
<longdesc lang="en">
Location of the DC Manager Audit-worker Service (dcmanager-audit-worker) configuration file
</longdesc>
<shortdesc lang="en">DC Manager Audit-worker Service (dcmanager-audit-worker registry) config file</shortdesc>
<content type="string" default="${OCF_RESKEY_config_default}" />
</parameter>
<parameter name="user" unique="0" required="0">
<longdesc lang="en">
User running DC Manager Audit-worker Service (dcmanager-audit-worker)
</longdesc>
<shortdesc lang="en">DC Manager Audit-worker Service (dcmanager-audit-worker) user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>
<parameter name="pid" unique="0" required="0">
<longdesc lang="en">
The pid file to use for this DC Manager Audit-worker Service (dcmanager-audit-worker) instance
</longdesc>
<shortdesc lang="en">DC Manager Audit-worker Service (dcmanager-audit-worker) pid file</shortdesc>
<content type="string" default="${OCF_RESKEY_pid_default}" />
</parameter>
<parameter name="additional_parameters" unique="0" required="0">
<longdesc lang="en">
Additional parameters to pass on to the dcmanager-audit-worker
</longdesc>
<shortdesc lang="en">Additional parameters for dcmanager-audit-worker</shortdesc>
<content type="string" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="20" />
<action name="stop" timeout="20" />
<action name="status" timeout="20" />
<action name="monitor" timeout="10" interval="5" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
</resource-agent>
END
}
#######################################################################
# Functions invoked by resource manager actions
dcmanager_audit_validate() {
local rc
check_binary $OCF_RESKEY_binary
check_binary curl
check_binary tr
check_binary grep
check_binary cut
check_binary head
# A config file on shared storage that is not available
# during probes is OK.
if [ ! -f $OCF_RESKEY_config ]; then
if ! ocf_is_probe; then
ocf_log err "Config $OCF_RESKEY_config doesn't exist"
return $OCF_ERR_INSTALLED
fi
ocf_log_warn "Config $OCF_RESKEY_config not available during a probe"
fi
getent passwd $OCF_RESKEY_user >/dev/null 2>&1
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "User $OCF_RESKEY_user doesn't exist"
return $OCF_ERR_INSTALLED
fi
true
}
dcmanager_audit_status() {
local pid
local rc
if [ ! -f $OCF_RESKEY_pid ]; then
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) is not running"
return $OCF_NOT_RUNNING
else
pid=`cat $OCF_RESKEY_pid`
fi
ocf_run -warn kill -s 0 $pid
rc=$?
if [ $rc -eq 0 ]; then
return $OCF_SUCCESS
else
ocf_log info "Old PID file found, but DC Manager Audit-worker Service (dcmanager-audit-worker) is not running"
rm -f $OCF_RESKEY_pid
return $OCF_NOT_RUNNING
fi
}
dcmanager_audit_monitor() {
local rc
dcmanager_audit_status
rc=$?
# If status returned anything but success, return that immediately
if [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
ocf_log debug "DC Manager Audit-worker Service (dcmanager-audit-worker) monitor succeeded"
return $OCF_SUCCESS
}
dcmanager_audit_start() {
local rc
dcmanager_audit_status
rc=$?
if [ $rc -eq $OCF_SUCCESS ]; then
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) already running"
return $OCF_SUCCESS
fi
# Change the working dir to /, to be sure it's accesible
cd /
# run the actual dcmanager-audit-worker daemon. Don't use ocf_run as we're sending the tool's output
# straight to /dev/null anyway and using ocf_run would break stdout-redirection here.
su ${OCF_RESKEY_user} -s /bin/sh -c "${OCF_RESKEY_binary} --config-file=$OCF_RESKEY_config \
$OCF_RESKEY_additional_parameters"' >> /dev/null 2>&1 & echo $!' > $OCF_RESKEY_pid
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required
while true; do
dcmanager_audit_monitor
rc=$?
[ $rc -eq $OCF_SUCCESS ] && break
if [ $rc -ne $OCF_NOT_RUNNING ]; then
ocf_log err "DC Manager Audit-worker Service (dcmanager-audit-worker) start failed"
exit $OCF_ERR_GENERIC
fi
sleep 1
done
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) started"
return $OCF_SUCCESS
}
dcmanager_audit_confirm_stop() {
local my_bin
local my_processes
my_binary=`which ${OCF_RESKEY_binary}`
my_processes=`pgrep -l -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"`
if [ -n "${my_processes}" ]
then
ocf_log info "About to SIGKILL the following: ${my_processes}"
pkill -KILL -f "^(python|/usr/bin/python|/usr/bin/python2) ${my_binary}([^\w-]|$)"
fi
}
dcmanager_audit_stop() {
local rc
local pid
dcmanager_audit_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) already stopped"
dcmanager_audit_confirm_stop
return $OCF_SUCCESS
fi
# Try SIGTERM
pid=`cat $OCF_RESKEY_pid`
ocf_run kill -s TERM $pid
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "DC Manager Audit-worker Service (dcmanager-audit-worker) couldn't be stopped"
dcmanager_audit_confirm_stop
exit $OCF_ERR_GENERIC
fi
# stop waiting
shutdown_timeout=15
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
shutdown_timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000)-5))
fi
count=0
while [ $count -lt $shutdown_timeout ]; do
dcmanager_audit_status
rc=$?
if [ $rc -eq $OCF_NOT_RUNNING ]; then
break
fi
count=`expr $count + 1`
sleep 1
ocf_log debug "DC Manager Audit-worker Service (dcmanager-audit-worker) still hasn't stopped yet. Waiting ..."
done
dcmanager_audit_status
rc=$?
if [ $rc -ne $OCF_NOT_RUNNING ]; then
# SIGTERM didn't help either, try SIGKILL
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) failed to stop after ${shutdown_timeout}s \
using SIGTERM. Trying SIGKILL ..."
ocf_run kill -s KILL $pid
fi
dcmanager_audit_confirm_stop
ocf_log info "DC Manager Audit-worker Service (dcmanager-audit-worker) stopped"
rm -f $OCF_RESKEY_pid
return $OCF_SUCCESS
}
#######################################################################
case "$1" in
meta-data) meta_data
exit $OCF_SUCCESS;;
usage|help) usage
exit $OCF_SUCCESS;;
esac
# Anything except meta-data and help must pass validation
dcmanager_audit_validate || exit $?
# What kind of method was invoked?
case "$1" in
start) dcmanager_audit_start;;
stop) dcmanager_audit_stop;;
status) dcmanager_audit_status;;
monitor) dcmanager_audit_monitor;;
validate-all) ;;
*) usage
exit $OCF_ERR_UNIMPLEMENTED;;
esac

View File

@@ -30,6 +30,7 @@ packages =
console_scripts =
dcmanager-api = dcmanager.cmd.api:main
dcmanager-audit = dcmanager.cmd.audit:main
dcmanager-audit-worker = dcmanager.cmd.audit_worker:main
dcmanager-orchestrator = dcmanager.cmd.orchestrator:main
dcmanager-manager = dcmanager.cmd.manager:main
dcmanager-manage = dcmanager.cmd.manage:main