
1) Remove DC manager orchestration from dcmanager-manager process 2) Create dcmanager-orchestrator process and associated files 3) Add new RPC calls for dcmanager-orchestrator process to notify dcmanager 4) Create/update unit tests, to verify the implementation changes Story: 2007267 Task: 40734 Change-Id: Ibbbae77558a8a8fd95b636fa6c3aebb1dfefb514 Signed-off-by: Jessica Castelino <jessica.castelino@windriver.com>
440 lines
19 KiB
Python
440 lines
19 KiB
Python
# Copyright 2017 Ericsson AB.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
# Copyright (c) 2017-2020 Wind River Systems, Inc.
|
|
#
|
|
# The right to copy, distribute, modify, or otherwise make use
|
|
# of this software may be licensed only pursuant to the terms
|
|
# of an applicable Wind River license agreement.
|
|
#
|
|
import threading
|
|
|
|
from oslo_log import log as logging
|
|
|
|
from dcmanager.audit import rpcapi as dcmanager_audit_rpc_client
|
|
from dcmanager.common import consts
|
|
from dcmanager.common import exceptions
|
|
from dcmanager.common import manager
|
|
from dcmanager.db import api as db_api
|
|
from dcmanager.orchestrator.fw_update_orch_thread import FwUpdateOrchThread
|
|
from dcmanager.orchestrator.patch_orch_thread import PatchOrchThread
|
|
from dcmanager.orchestrator.sw_upgrade_orch_thread import SwUpgradeOrchThread
|
|
from dcorch.common import consts as dcorch_consts
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
|
|
class SwUpdateManager(manager.Manager):
|
|
"""Manages tasks related to software updates."""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
LOG.debug('SwUpdateManager initialization...')
|
|
|
|
super(SwUpdateManager, self).__init__(service_name="sw_update_manager",
|
|
*args, **kwargs)
|
|
# Used to protect strategies when an atomic read/update is required.
|
|
self.strategy_lock = threading.Lock()
|
|
|
|
# Used to notify dcmanager-audit
|
|
self.audit_rpc_client = dcmanager_audit_rpc_client.ManagerAuditClient()
|
|
|
|
# Start worker threads
|
|
# - patch orchestration thread
|
|
self.patch_orch_thread = PatchOrchThread(self.strategy_lock,
|
|
self.audit_rpc_client)
|
|
self.patch_orch_thread.start()
|
|
# - sw upgrade orchestration thread
|
|
self.sw_upgrade_orch_thread = SwUpgradeOrchThread(self.strategy_lock,
|
|
self.audit_rpc_client)
|
|
self.sw_upgrade_orch_thread.start()
|
|
# - fw update orchestration thread
|
|
self.fw_update_orch_thread = FwUpdateOrchThread(self.strategy_lock,
|
|
self.audit_rpc_client)
|
|
self.fw_update_orch_thread.start()
|
|
|
|
def stop(self):
|
|
# Stop (and join) the worker threads
|
|
# - patch orchestration thread
|
|
self.patch_orch_thread.stop()
|
|
self.patch_orch_thread.join()
|
|
# - sw upgrade orchestration thread
|
|
self.sw_upgrade_orch_thread.stop()
|
|
self.sw_upgrade_orch_thread.join()
|
|
# - fw update orchestration thread
|
|
self.fw_update_orch_thread.stop()
|
|
self.fw_update_orch_thread.join()
|
|
|
|
def _validate_subcloud_status_sync(self, strategy_type,
|
|
subcloud_status, force):
|
|
"""Check the appropriate subcloud_status fields for the strategy_type
|
|
|
|
Returns: True if out of sync.
|
|
"""
|
|
if strategy_type == consts.SW_UPDATE_TYPE_PATCH:
|
|
return (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_PATCHING and
|
|
subcloud_status.sync_status ==
|
|
consts.SYNC_STATUS_OUT_OF_SYNC)
|
|
elif strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
|
|
if force:
|
|
return (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_LOAD and
|
|
subcloud_status.sync_status !=
|
|
consts.SYNC_STATUS_IN_SYNC)
|
|
else:
|
|
return (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_LOAD and
|
|
subcloud_status.sync_status ==
|
|
consts.SYNC_STATUS_OUT_OF_SYNC)
|
|
elif strategy_type == consts.SW_UPDATE_TYPE_FIRMWARE:
|
|
return (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_FIRMWARE and
|
|
subcloud_status.sync_status ==
|
|
consts.SYNC_STATUS_OUT_OF_SYNC)
|
|
# Unimplemented strategy_type status check. Log an error
|
|
LOG.error("_validate_subcloud_status_sync for %s not implemented" %
|
|
strategy_type)
|
|
return False
|
|
|
|
def create_sw_update_strategy(self, context, payload):
|
|
"""Create software update strategy.
|
|
|
|
:param context: request context object
|
|
:param payload: strategy configuration
|
|
"""
|
|
LOG.info("Creating software update strategy of type %s." %
|
|
payload['type'])
|
|
|
|
# Don't create a strategy if one exists. No need to filter by type
|
|
try:
|
|
strategy = db_api.sw_update_strategy_get(context, update_type=None)
|
|
except exceptions.NotFound:
|
|
pass
|
|
else:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg="Strategy of type: '%s' already exists" % strategy.type)
|
|
|
|
strategy_type = payload.get('type')
|
|
|
|
# if use_group_apply_type = True, we use the subcloud_apply_type
|
|
# specified for each subcloud group
|
|
# else we use the subcloud_apply_type specified through CLI
|
|
use_group_apply_type = False
|
|
subcloud_apply_type = payload.get('subcloud-apply-type')
|
|
if not subcloud_apply_type:
|
|
use_group_apply_type = True
|
|
|
|
# if use_group_max_parallel = True, we use the max_parallel_subclouds
|
|
# value specified for each subcloud group
|
|
# else we use the max_parallel_subclouds value specified through CLI
|
|
use_group_max_parallel = False
|
|
max_parallel_subclouds_str = payload.get('max-parallel-subclouds')
|
|
if not max_parallel_subclouds_str:
|
|
max_parallel_subclouds = None
|
|
use_group_max_parallel = True
|
|
else:
|
|
max_parallel_subclouds = int(max_parallel_subclouds_str)
|
|
|
|
stop_on_failure_str = payload.get('stop-on-failure')
|
|
|
|
if not stop_on_failure_str:
|
|
stop_on_failure = False
|
|
else:
|
|
if stop_on_failure_str in ['true']:
|
|
stop_on_failure = True
|
|
else:
|
|
stop_on_failure = False
|
|
|
|
force_str = payload.get('force')
|
|
if not force_str:
|
|
force = False
|
|
else:
|
|
if force_str in ['true']:
|
|
force = True
|
|
else:
|
|
force = False
|
|
|
|
# Has the user specified a specific subcloud?
|
|
cloud_name = payload.get('cloud_name')
|
|
if cloud_name and cloud_name != consts.SYSTEM_CONTROLLER_NAME:
|
|
# Make sure subcloud exists
|
|
try:
|
|
subcloud = db_api.subcloud_get_by_name(context, cloud_name)
|
|
except exceptions.SubcloudNameNotFound:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Subcloud %s does not exist' % cloud_name)
|
|
|
|
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
|
|
# Make sure subcloud requires upgrade
|
|
subcloud_status = db_api.subcloud_status_get(
|
|
context, subcloud.id, dcorch_consts.ENDPOINT_TYPE_LOAD)
|
|
if subcloud_status.sync_status == consts.SYNC_STATUS_IN_SYNC:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Subcloud %s does not require upgrade' % cloud_name)
|
|
elif strategy_type == consts.SW_UPDATE_TYPE_FIRMWARE:
|
|
subcloud_status = db_api.subcloud_status_get(
|
|
context, subcloud.id, dcorch_consts.ENDPOINT_TYPE_FIRMWARE)
|
|
if subcloud_status.sync_status == consts.SYNC_STATUS_IN_SYNC:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Subcloud %s does not require firmware update'
|
|
% cloud_name)
|
|
elif strategy_type == consts.SW_UPDATE_TYPE_PATCH:
|
|
# Make sure subcloud requires patching
|
|
subcloud_status = db_api.subcloud_status_get(
|
|
context, subcloud.id, dcorch_consts.ENDPOINT_TYPE_PATCHING)
|
|
if subcloud_status.sync_status == consts.SYNC_STATUS_IN_SYNC:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Subcloud %s does not require patching' % cloud_name)
|
|
|
|
# Don't create a strategy if any of the subclouds is online and the
|
|
# relevant sync status is unknown. Offline subcloud is skipped unless
|
|
# --force option is specified and strategy type is upgrade.
|
|
subclouds = db_api.subcloud_get_all_with_status(context)
|
|
for subcloud, subcloud_status in subclouds:
|
|
if (cloud_name and subcloud.name != cloud_name or
|
|
subcloud.management_state != consts.MANAGEMENT_MANAGED):
|
|
# We are not updating this subcloud
|
|
continue
|
|
|
|
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
|
|
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
|
|
if not force:
|
|
continue
|
|
elif (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_LOAD and
|
|
subcloud_status.sync_status ==
|
|
consts.SYNC_STATUS_UNKNOWN):
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Upgrade sync status is unknown for one or more '
|
|
'subclouds')
|
|
elif strategy_type == consts.SW_UPDATE_TYPE_PATCH:
|
|
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
|
|
continue
|
|
elif (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_PATCHING and
|
|
subcloud_status.sync_status ==
|
|
consts.SYNC_STATUS_UNKNOWN):
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Patching sync status is unknown for one or more '
|
|
'subclouds')
|
|
elif strategy_type == consts.SW_UPDATE_TYPE_FIRMWARE:
|
|
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
|
|
continue
|
|
elif (subcloud_status.endpoint_type ==
|
|
dcorch_consts.ENDPOINT_TYPE_FIRMWARE and
|
|
subcloud_status.sync_status ==
|
|
consts.SYNC_STATUS_UNKNOWN):
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Firmware sync status is unknown for one or more '
|
|
'subclouds')
|
|
|
|
# Create the strategy
|
|
strategy = db_api.sw_update_strategy_create(
|
|
context,
|
|
strategy_type,
|
|
subcloud_apply_type,
|
|
max_parallel_subclouds,
|
|
stop_on_failure,
|
|
consts.SW_UPDATE_STATE_INITIAL)
|
|
|
|
# For 'upgrade' do not create a strategy step for the system controller
|
|
# For 'firmware' do not create a strategy step for system controller
|
|
# For 'patch', always create a strategy step for the system controller
|
|
if strategy_type == consts.SW_UPDATE_TYPE_PATCH:
|
|
db_api.strategy_step_create(
|
|
context,
|
|
None, # None means not a subcloud. ie: SystemController
|
|
stage=1,
|
|
state=consts.STRATEGY_STATE_INITIAL,
|
|
details='')
|
|
|
|
# Create a strategy step for each subcloud that is managed, online and
|
|
# out of sync
|
|
current_stage = 2
|
|
stage_size = 0
|
|
stage_updated = False
|
|
|
|
# Fetch all subcloud groups
|
|
groups = db_api.subcloud_group_get_all(context)
|
|
|
|
for group in groups:
|
|
# Fetch subcloud list for each group
|
|
subclouds_list = db_api.subcloud_get_for_group(context, group.id)
|
|
if use_group_max_parallel:
|
|
max_parallel_subclouds = group.max_parallel_subclouds
|
|
if use_group_apply_type:
|
|
subcloud_apply_type = group.update_apply_type
|
|
for subcloud in subclouds_list:
|
|
stage_updated = False
|
|
if (cloud_name and subcloud.name != cloud_name or
|
|
subcloud.management_state != consts.MANAGEMENT_MANAGED):
|
|
# We are not targeting for update this subcloud
|
|
continue
|
|
|
|
if subcloud.availability_status != consts.AVAILABILITY_ONLINE:
|
|
if strategy_type == consts.SW_UPDATE_TYPE_UPGRADE:
|
|
if not force:
|
|
continue
|
|
else:
|
|
continue
|
|
|
|
# force option only has an effect in offline case
|
|
forced_validate = force and (subcloud.availability_status !=
|
|
consts.AVAILABILITY_ONLINE)
|
|
|
|
subcloud_status = db_api.subcloud_status_get_all(context, subcloud.id)
|
|
for status in subcloud_status:
|
|
if self._validate_subcloud_status_sync(strategy_type,
|
|
status, forced_validate):
|
|
LOG.debug("Created for %s" % subcloud.id)
|
|
db_api.strategy_step_create(
|
|
context,
|
|
subcloud.id,
|
|
stage=current_stage,
|
|
state=consts.STRATEGY_STATE_INITIAL,
|
|
details='')
|
|
|
|
# We have added a subcloud to this stage
|
|
stage_size += 1
|
|
if consts.SUBCLOUD_APPLY_TYPE_SERIAL in subcloud_apply_type:
|
|
# For serial apply type always move to next stage
|
|
stage_updated = True
|
|
current_stage += 1
|
|
elif stage_size >= max_parallel_subclouds:
|
|
# For parallel apply type, move to next stage if we have
|
|
# reached the maximum subclouds for this stage
|
|
stage_updated = True
|
|
current_stage += 1
|
|
stage_size = 0
|
|
|
|
# Reset the stage_size before iterating through a new subcloud group
|
|
stage_size = 0
|
|
# current_stage value is updated only when subcloud_apply_type is serial
|
|
# or the max_parallel_subclouds limit is reached. If the value is updated
|
|
# for either one of these reasons and it also happens to be the last
|
|
# iteration for this particular group, the following check will prevent
|
|
# the current_stage value from being updated twice
|
|
if not stage_updated:
|
|
current_stage += 1
|
|
|
|
strategy_dict = db_api.sw_update_strategy_db_model_to_dict(
|
|
strategy)
|
|
return strategy_dict
|
|
|
|
def delete_sw_update_strategy(self, context, update_type=None):
|
|
"""Delete software update strategy.
|
|
|
|
:param context: request context object.
|
|
:param update_type: the type to filter on querying
|
|
"""
|
|
LOG.info("Deleting software update strategy.")
|
|
|
|
# Ensure our read/update of the strategy is done without interference
|
|
# The strategy object is common to all workers (patch, upgrades, etc)
|
|
with self.strategy_lock:
|
|
# Retrieve the existing strategy from the database
|
|
sw_update_strategy = \
|
|
db_api.sw_update_strategy_get(context, update_type=update_type)
|
|
|
|
# Semantic checking
|
|
if sw_update_strategy.state not in [
|
|
consts.SW_UPDATE_STATE_INITIAL,
|
|
consts.SW_UPDATE_STATE_COMPLETE,
|
|
consts.SW_UPDATE_STATE_FAILED,
|
|
consts.SW_UPDATE_STATE_ABORTED]:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Strategy in state %s cannot be deleted' %
|
|
sw_update_strategy.state)
|
|
|
|
# Set the state to deleting, which will trigger the orchestration
|
|
# to delete it...
|
|
sw_update_strategy = db_api.sw_update_strategy_update(
|
|
context,
|
|
state=consts.SW_UPDATE_STATE_DELETING,
|
|
update_type=update_type)
|
|
|
|
strategy_dict = db_api.sw_update_strategy_db_model_to_dict(
|
|
sw_update_strategy)
|
|
return strategy_dict
|
|
|
|
def apply_sw_update_strategy(self, context, update_type=None):
|
|
"""Apply software update strategy.
|
|
|
|
:param context: request context object.
|
|
:param update_type: the type to filter on querying
|
|
"""
|
|
LOG.info("Applying software update strategy.")
|
|
|
|
# Ensure our read/update of the strategy is done without interference
|
|
with self.strategy_lock:
|
|
# Retrieve the existing strategy from the database
|
|
sw_update_strategy = \
|
|
db_api.sw_update_strategy_get(context, update_type=update_type)
|
|
|
|
# Semantic checking
|
|
if sw_update_strategy.state != consts.SW_UPDATE_STATE_INITIAL:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Strategy in state %s cannot be applied' %
|
|
sw_update_strategy.state)
|
|
|
|
# Set the state to applying, which will trigger the orchestration
|
|
# to begin...
|
|
sw_update_strategy = db_api.sw_update_strategy_update(
|
|
context,
|
|
state=consts.SW_UPDATE_STATE_APPLYING,
|
|
update_type=update_type)
|
|
strategy_dict = db_api.sw_update_strategy_db_model_to_dict(
|
|
sw_update_strategy)
|
|
return strategy_dict
|
|
|
|
def abort_sw_update_strategy(self, context, update_type=None):
|
|
"""Abort software update strategy.
|
|
|
|
:param context: request context object.
|
|
:param update_type: the type to filter on querying
|
|
"""
|
|
LOG.info("Aborting software update strategy.")
|
|
|
|
# Ensure our read/update of the strategy is done without interference
|
|
with self.strategy_lock:
|
|
# Retrieve the existing strategy from the database
|
|
sw_update_strategy = \
|
|
db_api.sw_update_strategy_get(context, update_type=update_type)
|
|
|
|
# Semantic checking
|
|
if sw_update_strategy.state != consts.SW_UPDATE_STATE_APPLYING:
|
|
raise exceptions.BadRequest(
|
|
resource='strategy',
|
|
msg='Strategy in state %s cannot be aborted' %
|
|
sw_update_strategy.state)
|
|
|
|
# Set the state to abort requested, which will trigger
|
|
# the orchestration to abort...
|
|
sw_update_strategy = db_api.sw_update_strategy_update(
|
|
context, state=consts.SW_UPDATE_STATE_ABORT_REQUESTED)
|
|
strategy_dict = db_api.sw_update_strategy_db_model_to_dict(
|
|
sw_update_strategy)
|
|
return strategy_dict
|