nfv/nfv/nfv-vim/nfv_vim/strategy/_strategy.py
Eric MacDonald 615340ce62 Add orchestrated fw update support to vim
This update adds firmware update orchestration support to
the VIM for host device fpga images as described in the
'N3000 FPGA device image update orchestration' feature
specification. See https://review.opendev.org/#/c/713302/

The firmware update orchestration strategy added to the VIM
is modeled after the VIM's existing patch orchestration
and includes strategy create/apply/show/abort/delete.

The strategy can be managed through the existing sw-manager
tool via the VIM's REST API.

Only unlocked hosts with the worker function are included
in the strategy.

The strategy includes a stage for each host or group of hosts
with ordered step sequences of firmware update followed by a
lock and unlock of each updated host.

Change-Id: I4eea7ca3f00782d27cdbb5d5615c6a255ac6966e
Story: 2006740
Task: 39145
Signed-off-by: Eric MacDonald <eric.macdonald@windriver.com>
2020-05-14 01:03:21 +00:00

2018 lines
85 KiB
Python
Executable File

#
# Copyright (c) 2015-2020 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
import copy
import six
import weakref
from nfv_common import debug
from nfv_common.helpers import Constant
from nfv_common.helpers import Constants
from nfv_common.helpers import get_local_host_name
from nfv_common.helpers import Singleton
from nfv_common import strategy
from nfv_vim.nfvi.objects.v1 import UPGRADE_STATE
from nfv_vim.objects import HOST_GROUP_POLICY
from nfv_vim.objects import HOST_NAME
from nfv_vim.objects import HOST_PERSONALITY
from nfv_vim.objects import HOST_SERVICES
from nfv_vim.objects import INSTANCE_GROUP_POLICY
from nfv_vim.objects import SW_UPDATE_APPLY_TYPE
from nfv_vim.objects import SW_UPDATE_INSTANCE_ACTION
DLOG = debug.debug_get_logger('nfv_vim.strategy')
@six.add_metaclass(Singleton)
class StrategyNames(Constants):
"""
Strategy Names
"""
SW_PATCH = Constant('sw-patch')
SW_UPGRADE = Constant('sw-upgrade')
FW_UPDATE = Constant('fw-update')
# Constant Instantiation
STRATEGY_NAME = StrategyNames()
# SystemStabilize timeout constants:
# After a reboot patch is applied, we need to wait for maintenance to detect
# that the host is patch current
MTCE_DELAY = 15
# a no-reboot patch can stabilize in 30 seconds
NO_REBOOT_DELAY = 30
###################################################################
#
# The Software Update Strategy ; Base Class
#
###################################################################
class SwUpdateStrategy(strategy.Strategy):
"""
Software Update - Strategy
"""
def __init__(self, uuid, strategy_name, controller_apply_type,
storage_apply_type,
swift_apply_type, worker_apply_type,
max_parallel_worker_hosts, default_instance_action,
alarm_restrictions,
ignore_alarms):
super(SwUpdateStrategy, self).__init__(uuid, strategy_name)
self._controller_apply_type = controller_apply_type
self._storage_apply_type = storage_apply_type
self._swift_apply_type = swift_apply_type
self._worker_apply_type = worker_apply_type
self._max_parallel_worker_hosts = max_parallel_worker_hosts
self._default_instance_action = default_instance_action
self._alarm_restrictions = alarm_restrictions
self._sw_update_obj_reference = None
# The ignore_alarms is a list that needs to get a copy
# to avoid inadvertently modifying the input list by
# subclass service strategies.
self._ignore_alarms = copy.copy(ignore_alarms)
self._nfvi_alarms = list()
@property
def sw_update_obj(self):
"""
Returns the software update object this strategy is a part of
"""
return self._sw_update_obj_reference()
@sw_update_obj.setter
def sw_update_obj(self, sw_update_obj_value):
"""
Set the software update object this strategy is a part of
"""
self._sw_update_obj_reference = weakref.ref(sw_update_obj_value)
@property
def nfvi_alarms(self):
"""
Returns the alarms raised in the NFVI layer
"""
return self._nfvi_alarms
@nfvi_alarms.setter
def nfvi_alarms(self, nfvi_alarms):
"""
Save the alarms raised in the NFVI Layer
"""
self._nfvi_alarms = nfvi_alarms
def save(self):
"""
Save the software update strategy object information
"""
if self.sw_update_obj is not None:
self.sw_update_obj.save()
def build(self):
"""
Build the strategy (expected to be overridden by child class)
"""
super(SwUpdateStrategy, self).build()
def _create_storage_host_lists(self, storage_hosts):
"""
Create host lists for updating storage hosts
"""
from nfv_vim import tables
if SW_UPDATE_APPLY_TYPE.IGNORE != self._storage_apply_type:
host_table = tables.tables_get_host_table()
for host in storage_hosts:
if HOST_PERSONALITY.STORAGE not in host.personality:
DLOG.error("Host inventory personality storage mismatch "
"detected for host %s." % host.name)
reason = 'host inventory personality storage mismatch detected'
return None, reason
if 2 > host_table.total_by_personality(HOST_PERSONALITY.STORAGE):
DLOG.warn("Not enough storage hosts to apply software updates.")
reason = 'not enough storage hosts to apply software updates'
return None, reason
host_lists = list()
if SW_UPDATE_APPLY_TYPE.SERIAL == self._storage_apply_type:
for host in storage_hosts:
host_lists.append([host])
elif SW_UPDATE_APPLY_TYPE.PARALLEL == self._storage_apply_type:
policy = HOST_GROUP_POLICY.STORAGE_REPLICATION
host_group_table = tables.tables_get_host_group_table()
for host in storage_hosts:
# find the first list that can add this host
# else create a new list
for host_list in host_lists:
for peer_host in host_list:
if host_group_table.same_group(policy, host.name,
peer_host.name):
break
else:
host_list.append(host)
break
else:
host_lists.append([host])
else:
DLOG.verbose("Storage apply type set to ignore.")
return host_lists, ''
def _create_worker_host_lists(self, worker_hosts, reboot):
"""
Create host lists for updating worker hosts
"""
from nfv_vim import tables
def has_policy_conflict(peer_host):
for instance in instance_table.on_host(host.name):
for peer_instance in instance_table.on_host(peer_host.name):
for policy in policies:
if instance_group_table.same_group(policy, instance.uuid,
peer_instance.uuid):
return True
DLOG.debug("No instance group policy conflict between host %s and "
"host %s." % (host.name, peer_host.name))
return False
def calculate_host_aggregate_limits():
"""
Calculate limit for each host aggregate
"""
# Use the ratio of the max parallel worker hosts to the total
# number of worker hosts to limit the number of hosts in each
# aggregate that will be patched at the same time. If there
# are multiple aggregates, that will help us select hosts
# from more than one aggregate for each stage.
host_table = tables.tables_get_host_table()
num_worker_hosts = host_table.total_by_personality(
HOST_PERSONALITY.WORKER)
aggregate_ratio = \
float(self._max_parallel_worker_hosts) / num_worker_hosts
# Limit the ratio to half the worker hosts in an aggregate
if aggregate_ratio > 0.5:
aggregate_ratio = 0.5
for host_aggregate in host_aggregate_table:
aggregate_count = len(
host_aggregate_table[host_aggregate].host_names)
if aggregate_count == 1:
# only one host in this aggregate
host_aggregate_limit[host_aggregate] = 1
else:
# multiple hosts in the aggregate - use the ratio,
# rounding down, but no lower than 1.
host_aggregate_limit[host_aggregate] = max(
1, int(aggregate_count * aggregate_ratio))
def aggregate_limit_reached():
"""
Determine whether adding this host to a host_list would exceed
the number of hosts to be updated in the same aggregate
Note: This isn't efficient, because we will be calling the
host_aggregate_table.get_by_host many times, which will traverse
all the aggregates each time. It would be more efficient to
create a dictionary mapping host names to a list of aggregates
for that host. We could do this once and then use it to more
quickly calculate the host_aggregate_count here.
"""
# count the number of hosts from the current host_list in each aggregate
host_aggregate_count = {}
for existing_host in host_list:
for aggregate in host_aggregate_table.get_by_host(
existing_host.name):
if aggregate.name in host_aggregate_count:
host_aggregate_count[aggregate.name] += 1
else:
host_aggregate_count[aggregate.name] = 1
# now check whether adding the current host will exceed the limit
# for any aggregate
for aggregate in host_aggregate_table.get_by_host(host.name):
if aggregate.name in host_aggregate_count:
if host_aggregate_count[aggregate.name] == \
host_aggregate_limit[aggregate.name]:
return True
DLOG.debug("No host aggregate limit reached for host %s." % (host.name))
return False
instance_table = tables.tables_get_instance_table()
instance_group_table = tables.tables_get_instance_group_table()
if SW_UPDATE_APPLY_TYPE.IGNORE != self._worker_apply_type:
for host in worker_hosts:
if HOST_PERSONALITY.WORKER not in host.personality:
DLOG.error("Host inventory personality worker mismatch "
"detected for host %s." % host.name)
reason = 'host inventory personality worker mismatch detected'
return None, reason
# Do not allow reboots if there are locked instances that
# that are members of an instance group. This could result in a
# service disruption when the remaining instances are stopped or
# migrated.
if reboot:
for instance in instance_table.values():
if instance.is_locked():
for instance_group in instance_group_table.get_by_instance(
instance.uuid):
DLOG.warn(
"Instance %s in group %s must not be shut down"
% (instance.name, instance_group.name))
reason = (
'instance %s in group %s must not be shut down'
% (instance.name, instance_group.name))
return None, reason
host_lists = list()
if SW_UPDATE_APPLY_TYPE.SERIAL == self._worker_apply_type:
# handle controller hosts first
for host in worker_hosts:
if HOST_PERSONALITY.CONTROLLER in host.personality:
host_lists.append([host])
# handle the workers with no instances next
host_with_instances_lists = list()
for host in worker_hosts:
if HOST_PERSONALITY.CONTROLLER not in host.personality:
if not instance_table.exist_on_host(host.name):
host_lists.append([host])
else:
host_with_instances_lists.append([host])
# then add workers with instances
if host_with_instances_lists:
host_lists += host_with_instances_lists
elif SW_UPDATE_APPLY_TYPE.PARALLEL == self._worker_apply_type:
policies = [INSTANCE_GROUP_POLICY.ANTI_AFFINITY,
INSTANCE_GROUP_POLICY.ANTI_AFFINITY_BEST_EFFORT]
host_aggregate_table = tables.tables_get_host_aggregate_table()
host_aggregate_limit = {}
calculate_host_aggregate_limits()
controller_list = list()
host_lists.append([]) # start with empty list of workers
for host in worker_hosts:
if HOST_PERSONALITY.CONTROLLER in host.personality:
# have to swact the controller so put it in its own list
controller_list.append([host])
continue
elif not reboot:
# parallel no-reboot can group all workers together
host_lists[0].append(host)
continue
elif not instance_table.exist_on_host(host.name):
# group the workers with no instances together
host_lists[0].append(host)
continue
# find the first list that can add this host else create a new list
for idx in range(1, len(host_lists), 1):
host_list = host_lists[idx]
if len(host_list) >= self._max_parallel_worker_hosts:
# this list is full - don't add the host
continue
for peer_host in host_list:
if has_policy_conflict(peer_host):
# don't add host to the current list
break
else:
if aggregate_limit_reached():
# don't add host to the current list
continue
# add host to the current list
host_list.append(host)
break
else:
# create a new list with this host
host_lists.append([host])
if controller_list:
# handle controller hosts first
host_lists = controller_list + host_lists
else:
DLOG.verbose("Worker apply type set to ignore.")
# Drop empty lists and enforce a maximum number of hosts to be updated
# at once (only required list of workers with no instances, as we
# enforced the limit for worker hosts with instances above).
sized_host_lists = list()
for host_list in host_lists:
# drop empty host lists
if not host_list:
continue
if self._max_parallel_worker_hosts < len(host_list):
start = 0
end = self._max_parallel_worker_hosts
while start < len(host_list):
sized_host_lists.append(host_list[start:end])
start = end
end += self._max_parallel_worker_hosts
else:
sized_host_lists.append(host_list)
return sized_host_lists, ''
def build_complete(self, result, result_reason):
"""
Strategy Build Complete
"""
result, result_reason = \
super(SwUpdateStrategy, self).build_complete(result, result_reason)
return result, result_reason
def apply(self, stage_id):
"""
Apply the strategy
"""
success, reason = super(SwUpdateStrategy, self).apply(stage_id)
return success, reason
def apply_complete(self, result, result_reason):
"""
Strategy Apply Complete
"""
result, result_reason = \
super(SwUpdateStrategy, self).apply_complete(result, result_reason)
DLOG.info("Apply Complete Callback, result=%s, reason=%s."
% (result, result_reason))
if result in [strategy.STRATEGY_RESULT.SUCCESS,
strategy.STRATEGY_RESULT.DEGRADED]:
self.sw_update_obj.strategy_apply_complete(True, '')
else:
self.sw_update_obj.strategy_apply_complete(
False, self.apply_phase.result_reason)
def abort(self, stage_id):
"""
Abort the strategy
"""
success, reason = super(SwUpdateStrategy, self).abort(stage_id)
return success, reason
def abort_complete(self, result, result_reason):
"""
Strategy Abort Complete
"""
result, result_reason = \
super(SwUpdateStrategy, self).abort_complete(result, result_reason)
DLOG.info("Abort Complete Callback, result=%s, reason=%s."
% (result, result_reason))
if result in [strategy.STRATEGY_RESULT.SUCCESS,
strategy.STRATEGY_RESULT.DEGRADED]:
self.sw_update_obj.strategy_abort_complete(True, '')
else:
self.sw_update_obj.strategy_abort_complete(
False, self.abort_phase.result_reason)
def from_dict(self, data, build_phase=None, apply_phase=None, abort_phase=None):
"""
Initializes a software update strategy object using the given dictionary
"""
from nfv_vim import nfvi
super(SwUpdateStrategy, self).from_dict(data, build_phase, apply_phase,
abort_phase)
self._controller_apply_type = data['controller_apply_type']
self._storage_apply_type = data['storage_apply_type']
self._swift_apply_type = data['swift_apply_type']
self._worker_apply_type = data['worker_apply_type']
self._max_parallel_worker_hosts = data['max_parallel_worker_hosts']
self._default_instance_action = data['default_instance_action']
self._alarm_restrictions = data['alarm_restrictions']
self._ignore_alarms = data['ignore_alarms']
nfvi_alarms = list()
for alarm_data in data['nfvi_alarms_data']:
alarm = nfvi.objects.v1.Alarm(
alarm_data['alarm_uuid'], alarm_data['alarm_id'],
alarm_data['entity_instance_id'], alarm_data['severity'],
alarm_data['reason_text'], alarm_data['timestamp'],
alarm_data['mgmt_affecting'])
nfvi_alarms.append(alarm)
self._nfvi_alarms = nfvi_alarms
return self
def as_dict(self):
"""
Represent the software update strategy as a dictionary
"""
data = super(SwUpdateStrategy, self).as_dict()
data['controller_apply_type'] = self._controller_apply_type
data['storage_apply_type'] = self._storage_apply_type
data['swift_apply_type'] = self._swift_apply_type
data['worker_apply_type'] = self._worker_apply_type
data['max_parallel_worker_hosts'] = self._max_parallel_worker_hosts
data['default_instance_action'] = self._default_instance_action
data['alarm_restrictions'] = self._alarm_restrictions
data['ignore_alarms'] = self._ignore_alarms
nfvi_alarms_data = list()
for alarm in self._nfvi_alarms:
nfvi_alarms_data.append(alarm.as_dict())
data['nfvi_alarms_data'] = nfvi_alarms_data
return data
###################################################################
#
# The Software Patch Strategy
#
###################################################################
class SwPatchStrategy(SwUpdateStrategy):
"""
Software Patch - Strategy
"""
def __init__(self, uuid, controller_apply_type, storage_apply_type,
swift_apply_type, worker_apply_type,
max_parallel_worker_hosts, default_instance_action,
alarm_restrictions,
ignore_alarms,
single_controller):
super(SwPatchStrategy, self).__init__(
uuid,
STRATEGY_NAME.SW_PATCH,
controller_apply_type,
storage_apply_type,
swift_apply_type,
worker_apply_type,
max_parallel_worker_hosts,
default_instance_action,
alarm_restrictions,
ignore_alarms)
# The following alarms will not prevent a software patch operation
IGNORE_ALARMS = ['900.001', # Patch in progress
'900.005', # Upgrade in progress
'900.101', # Software patch auto apply in progress
'200.001', # Maintenance host lock alarm
'700.004', # VM stopped
'280.002', # Subcloud resource out-of-sync
]
self._ignore_alarms += IGNORE_ALARMS
self._single_controller = single_controller
self._nfvi_sw_patches = list()
self._nfvi_sw_patch_hosts = list()
@property
def nfvi_sw_patches(self):
"""
Returns the software patches from the NFVI layer
"""
return self._nfvi_sw_patches
@nfvi_sw_patches.setter
def nfvi_sw_patches(self, nfvi_sw_patches):
"""
Save the software patches from the NFVI Layer
"""
self._nfvi_sw_patches = nfvi_sw_patches
@property
def nfvi_sw_patch_hosts(self):
"""
Returns the software patch hosts from the NFVI layer
"""
return self._nfvi_sw_patch_hosts
@nfvi_sw_patch_hosts.setter
def nfvi_sw_patch_hosts(self, nfvi_sw_patch_hosts):
"""
Save the software patch hosts from the NFVI Layer
"""
self._nfvi_sw_patch_hosts = nfvi_sw_patch_hosts
def build(self):
"""
Build the strategy
"""
from nfv_vim import strategy
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_QUERY)
stage.add_step(strategy.QueryAlarmsStep(ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.QuerySwPatchesStep())
stage.add_step(strategy.QuerySwPatchHostsStep())
self.build_phase.add_stage(stage)
super(SwPatchStrategy, self).build()
def _add_controller_strategy_stages(self, controllers, reboot):
"""
Add controller software patch strategy stages
"""
from nfv_vim import strategy
from nfv_vim import tables
if SW_UPDATE_APPLY_TYPE.IGNORE != self._controller_apply_type:
host_table = tables.tables_get_host_table()
for host in controllers:
if HOST_PERSONALITY.CONTROLLER not in host.personality:
DLOG.error("Host inventory personality controller mismatch "
"detected for host %s." % host.name)
reason = ('host inventory personality controller mismatch '
'detected')
return False, reason
if (not self._single_controller and
2 > host_table.total_by_personality(
HOST_PERSONALITY.CONTROLLER)):
DLOG.warn("Not enough controllers to apply software patches.")
reason = 'not enough controllers to apply software patches'
return False, reason
if self._controller_apply_type == SW_UPDATE_APPLY_TYPE.SERIAL:
local_host = None
local_host_name = get_local_host_name()
for host in controllers:
if HOST_PERSONALITY.WORKER not in host.personality:
if local_host_name == host.name:
local_host = host
else:
host_list = [host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_CONTROLLERS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if reboot:
stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.SwPatchHostsStep(host_list))
if reboot:
# Cannot unlock right away after SwPatchHostsStep
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=NO_REBOOT_DELAY))
self.apply_phase.add_stage(stage)
if local_host is not None:
host_list = [local_host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_CONTROLLERS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if reboot:
stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.SwPatchHostsStep(host_list))
if reboot:
# Cannot unlock right away after SwPatchHostsStep
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=NO_REBOOT_DELAY))
self.apply_phase.add_stage(stage)
elif self._controller_apply_type == SW_UPDATE_APPLY_TYPE.PARALLEL:
DLOG.warn("Parallel apply type cannot be used for controllers.")
reason = 'parallel apply type not allowed for controllers'
return False, reason
else:
DLOG.verbose("Controller apply type set to ignore.")
return True, ''
def _add_storage_strategy_stages(self, storage_hosts, reboot):
"""
Add storage software patch strategy stages
"""
from nfv_vim import strategy
host_lists, reason = self._create_storage_host_lists(storage_hosts)
if host_lists is None:
return False, reason
for host_list in host_lists:
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_STORAGE_HOSTS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if reboot:
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.SwPatchHostsStep(host_list))
if reboot:
# Cannot unlock right away after SwPatchHostsStep
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
stage.add_step(strategy.UnlockHostsStep(host_list))
# After storage node(s) are unlocked, we need extra time to
# allow the OSDs to go back in sync and the storage related
# alarms to clear.
stage.add_step(strategy.WaitDataSyncStep(
timeout_in_secs=30 * 60,
ignore_alarms=self._ignore_alarms))
else:
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=NO_REBOOT_DELAY))
self.apply_phase.add_stage(stage)
return True, ''
def _add_swift_strategy_stages(self, swift_hosts, reboot):
"""
Add swift software patch strategy stages
"""
from nfv_vim import strategy
from nfv_vim import tables
if SW_UPDATE_APPLY_TYPE.IGNORE != self._swift_apply_type:
host_table = tables.tables_get_host_table()
for host in swift_hosts:
if HOST_PERSONALITY.SWIFT not in host.personality:
DLOG.error("Host inventory personality swift mismatch "
"detected for host %s." % host.name)
reason = 'host inventory personality swift mismatch detected'
return False, reason
if 2 > host_table.total_by_personality(HOST_PERSONALITY.SWIFT):
DLOG.warn("Not enough swift hosts to apply software patches.")
reason = 'not enough swift hosts to apply software patches'
return False, reason
if self._swift_apply_type in [SW_UPDATE_APPLY_TYPE.SERIAL,
SW_UPDATE_APPLY_TYPE.PARALLEL]:
for host in swift_hosts:
host_list = [host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_SWIFT_HOSTS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if reboot:
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.SwPatchHostsStep(host_list))
if reboot:
# Cannot unlock right away after SwPatchHostsStep
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
else:
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=NO_REBOOT_DELAY))
self.apply_phase.add_stage(stage)
else:
DLOG.verbose("Swift apply type set to ignore.")
return True, ''
def _add_worker_strategy_stages(self, worker_hosts, reboot):
"""
Add worker software patch strategy stages
"""
from nfv_vim import strategy
from nfv_vim import tables
if SW_UPDATE_APPLY_TYPE.IGNORE != self._worker_apply_type:
# When using a single controller/worker host that is running
# OpenStack, only allow the stop/start instance action.
if self._single_controller:
for host in worker_hosts:
if host.openstack_compute and \
HOST_PERSONALITY.CONTROLLER in host.personality and \
SW_UPDATE_INSTANCE_ACTION.STOP_START != \
self._default_instance_action:
DLOG.error("Cannot migrate instances in a single "
"controller configuration")
reason = 'cannot migrate instances in a single ' \
'controller configuration'
return False, reason
host_lists, reason = self._create_worker_host_lists(worker_hosts, reboot)
if host_lists is None:
return False, reason
instance_table = tables.tables_get_instance_table()
for host_list in host_lists:
instance_list = list()
for host in host_list:
for instance in instance_table.on_host(host.name):
# Do not take action (migrate or stop-start) on an instance
# if it is locked (i.e. stopped).
if not instance.is_locked():
instance_list.append(instance)
hosts_to_lock = list()
hosts_to_reboot = list()
if reboot:
hosts_to_lock = [x for x in host_list if not x.is_locked()]
hosts_to_reboot = [x for x in host_list if x.is_locked()]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_PATCH_WORKER_HOSTS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if reboot:
if 1 == len(host_list):
if HOST_PERSONALITY.CONTROLLER in host_list[0].personality:
if not self._single_controller:
# Swact controller before locking
stage.add_step(strategy.SwactHostsStep(host_list))
if 0 != len(instance_list):
# Migrate or stop instances as necessary
if SW_UPDATE_INSTANCE_ACTION.MIGRATE == \
self._default_instance_action:
if SW_UPDATE_APPLY_TYPE.PARALLEL == \
self._worker_apply_type:
# Disable host services before migrating to ensure
# instances do not migrate to worker hosts in the
# same set of hosts.
if host_list[0].host_service_configured(
HOST_SERVICES.COMPUTE):
stage.add_step(strategy.DisableHostServicesStep(
host_list, HOST_SERVICES.COMPUTE))
# TODO(ksmith)
# When support is added for orchestration on
# non-OpenStack worker nodes, support for disabling
# kubernetes services will have to be added.
stage.add_step(strategy.MigrateInstancesStep(
instance_list))
else:
stage.add_step(strategy.StopInstancesStep(
instance_list))
if hosts_to_lock:
wait_until_disabled = True
if 1 == len(hosts_to_lock):
if HOST_PERSONALITY.CONTROLLER in \
hosts_to_lock[0].personality:
if self._single_controller:
# A single controller will not go disabled when
# it is locked.
wait_until_disabled = False
# Lock hosts
stage.add_step(strategy.LockHostsStep(
hosts_to_lock, wait_until_disabled=wait_until_disabled))
# Patch hosts
stage.add_step(strategy.SwPatchHostsStep(host_list))
if reboot:
# Cannot unlock right away after SwPatchHostsStep
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=MTCE_DELAY))
if hosts_to_lock:
# Unlock hosts that were locked
stage.add_step(strategy.UnlockHostsStep(hosts_to_lock))
if hosts_to_reboot:
# Reboot hosts that were already locked
stage.add_step(strategy.RebootHostsStep(hosts_to_reboot))
if 0 != len(instance_list):
# Start any instances that were stopped
if SW_UPDATE_INSTANCE_ACTION.MIGRATE != \
self._default_instance_action:
stage.add_step(strategy.StartInstancesStep(
instance_list))
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=NO_REBOOT_DELAY))
self.apply_phase.add_stage(stage)
return True, ''
def build_complete(self, result, result_reason):
"""
Strategy Build Complete
"""
from nfv_vim import strategy
from nfv_vim import tables
result, result_reason = \
super(SwPatchStrategy, self).build_complete(result, result_reason)
DLOG.info("Build Complete Callback, result=%s, reason=%s."
% (result, result_reason))
if result in [strategy.STRATEGY_RESULT.SUCCESS,
strategy.STRATEGY_RESULT.DEGRADED]:
host_table = tables.tables_get_host_table()
if not self.nfvi_sw_patches:
DLOG.warn("No software patches found.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = 'no software patches found'
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
if self._nfvi_alarms:
DLOG.warn("Active alarms found, can't apply software patches.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = 'active alarms present'
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
for host in host_table.values():
if HOST_PERSONALITY.WORKER in host.personality and \
HOST_PERSONALITY.CONTROLLER not in host.personality:
# Allow patch orchestration when worker hosts are available,
# locked or powered down.
if not ((host.is_unlocked() and host.is_enabled() and
host.is_available()) or
(host.is_locked() and host.is_disabled() and
host.is_offline()) or
(host.is_locked() and host.is_disabled() and
host.is_online())):
DLOG.warn(
"All worker hosts must be unlocked-enabled-available, "
"locked-disabled-online or locked-disabled-offline, "
"can't apply software patches.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = (
'all worker hosts must be unlocked-enabled-available, '
'locked-disabled-online or locked-disabled-offline')
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
else:
# Only allow patch orchestration when all controller,
# storage and swift hosts are available. It is not safe to
# automate patch application when we do not have full
# redundancy.
if not (host.is_unlocked() and host.is_enabled() and
host.is_available()):
DLOG.warn(
"All %s hosts must be unlocked-enabled-available, "
"can't apply software patches." % host.personality)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = (
'all %s hosts must be unlocked-enabled-available' %
host.personality)
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
controllers = list()
controllers_no_reboot = list()
storage_hosts = list()
storage_hosts_no_reboot = list()
swift_hosts = list()
swift_hosts_no_reboot = list()
worker_hosts = list()
worker_hosts_no_reboot = list()
for sw_patch_host in self.nfvi_sw_patch_hosts:
host = host_table.get(sw_patch_host.name, None)
if host is None:
DLOG.error("Host inventory mismatch detected for host %s."
% sw_patch_host.name)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = \
'host inventory mismatch detected'
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
if sw_patch_host.interim_state:
# A patch operation has been done recently and we don't
# have an up-to-date state for this host.
DLOG.warn("Host %s is in pending patch current state."
% sw_patch_host.name)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = (
'at least one host is in pending patch current state')
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
if sw_patch_host.patch_current:
# No need to patch this host
continue
if HOST_PERSONALITY.CONTROLLER in sw_patch_host.personality:
if sw_patch_host.requires_reboot:
controllers.append(host)
else:
controllers_no_reboot.append(host)
elif HOST_PERSONALITY.STORAGE in sw_patch_host.personality:
if sw_patch_host.requires_reboot:
storage_hosts.append(host)
else:
storage_hosts_no_reboot.append(host)
elif HOST_PERSONALITY.SWIFT in sw_patch_host.personality:
if sw_patch_host.requires_reboot:
swift_hosts.append(host)
else:
swift_hosts_no_reboot.append(host)
# Separate if check to handle CPE where host has multiple
# personality disorder.
if HOST_PERSONALITY.WORKER in sw_patch_host.personality:
# Ignore worker hosts that are powered down
if not host.is_offline():
if sw_patch_host.requires_reboot:
worker_hosts.append(host)
else:
worker_hosts_no_reboot.append(host)
STRATEGY_CREATION_COMMANDS = [
(self._add_controller_strategy_stages,
controllers_no_reboot, False),
(self._add_controller_strategy_stages,
controllers, True),
(self._add_storage_strategy_stages,
storage_hosts_no_reboot, False),
(self._add_storage_strategy_stages,
storage_hosts, True),
(self._add_swift_strategy_stages,
swift_hosts_no_reboot, False),
(self._add_swift_strategy_stages,
swift_hosts, True),
(self._add_worker_strategy_stages,
worker_hosts_no_reboot, False),
(self._add_worker_strategy_stages,
worker_hosts, True)
]
for add_strategy_stages_function, host_list, reboot in \
STRATEGY_CREATION_COMMANDS:
if host_list:
success, reason = add_strategy_stages_function(
host_list, reboot)
if not success:
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
if 0 == len(self.apply_phase.stages):
DLOG.warn("No software patches need to be applied.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = ('no software patches need to be '
'applied')
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
else:
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.sw_update_obj.strategy_build_complete(True, '')
self.save()
def from_dict(self, data, build_phase=None, apply_phase=None, abort_phase=None):
"""
Initializes a software patch strategy object using the given dictionary
"""
from nfv_vim import nfvi
super(SwPatchStrategy, self).from_dict(data, build_phase, apply_phase,
abort_phase)
self._single_controller = data['single_controller']
nfvi_sw_patches = list()
for sw_patch_data in data['nfvi_sw_patches_data']:
sw_patch = nfvi.objects.v1.SwPatch(
sw_patch_data['name'], sw_patch_data['sw_version'],
sw_patch_data['repo_state'], sw_patch_data['patch_state'])
nfvi_sw_patches.append(sw_patch)
self._nfvi_sw_patches = nfvi_sw_patches
nfvi_sw_patch_hosts = list()
for host_data in data['nfvi_sw_patch_hosts_data']:
host = nfvi.objects.v1.HostSwPatch(
host_data['name'], host_data['personality'],
host_data['sw_version'], host_data['requires_reboot'],
host_data['patch_current'], host_data['state'],
host_data['patch_failed'], host_data['interim_state'])
nfvi_sw_patch_hosts.append(host)
self._nfvi_sw_patch_hosts = nfvi_sw_patch_hosts
return self
def as_dict(self):
"""
Represent the software patch strategy as a dictionary
"""
data = super(SwPatchStrategy, self).as_dict()
data['single_controller'] = self._single_controller
nfvi_sw_patches_data = list()
for sw_patch in self._nfvi_sw_patches:
nfvi_sw_patches_data.append(sw_patch.as_dict())
data['nfvi_sw_patches_data'] = nfvi_sw_patches_data
nfvi_sw_patch_hosts_data = list()
for host in self._nfvi_sw_patch_hosts:
nfvi_sw_patch_hosts_data.append(host.as_dict())
data['nfvi_sw_patch_hosts_data'] = nfvi_sw_patch_hosts_data
return data
###################################################################
#
# The Software Upgrade Strategy
#
###################################################################
class SwUpgradeStrategy(SwUpdateStrategy):
"""
Software Upgrade - Strategy
"""
def __init__(self, uuid, storage_apply_type, worker_apply_type,
max_parallel_worker_hosts,
alarm_restrictions, start_upgrade, complete_upgrade,
ignore_alarms):
super(SwUpgradeStrategy, self).__init__(
uuid,
STRATEGY_NAME.SW_UPGRADE,
SW_UPDATE_APPLY_TYPE.SERIAL,
storage_apply_type,
SW_UPDATE_APPLY_TYPE.IGNORE,
worker_apply_type,
max_parallel_worker_hosts,
SW_UPDATE_INSTANCE_ACTION.MIGRATE,
alarm_restrictions,
ignore_alarms)
# Note: The support for start_upgrade was implemented and (mostly)
# tested, but there is a problem. When the sw-upgrade-start stage
# runs, it will start the upgrade, upgrade controller-1 and swact to
# it. However, when controller-1 becomes active, it will be using the
# snapshot of the VIM database that was created when the upgrade was
# started, so the strategy object created from the database will be
# long out of date (it thinks the upgrade start step is still in
# progress) and the strategy apply will fail. Fixing this would be
# complex, so we will not support the start_upgrade option for now,
# which would only have been for lab use.
if start_upgrade:
raise Exception("No support for start_upgrade")
self._start_upgrade = start_upgrade
self._complete_upgrade = complete_upgrade
# The following alarms will not prevent a software upgrade operation
IGNORE_ALARMS = ['900.005', # Upgrade in progress
'900.201', # Software upgrade auto apply in progress
]
self._ignore_alarms += IGNORE_ALARMS
self._nfvi_upgrade = None
@property
def nfvi_upgrade(self):
"""
Returns the upgrade from the NFVI layer
"""
return self._nfvi_upgrade
@nfvi_upgrade.setter
def nfvi_upgrade(self, nfvi_upgrade):
"""
Save the upgrade from the NFVI Layer
"""
self._nfvi_upgrade = nfvi_upgrade
def build(self):
"""
Build the strategy
"""
from nfv_vim import strategy
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_QUERY)
stage.add_step(strategy.QueryAlarmsStep(
ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.QueryUpgradeStep())
self.build_phase.add_stage(stage)
super(SwUpgradeStrategy, self).build()
def _add_upgrade_start_stage(self):
"""
Add upgrade start strategy stage
"""
from nfv_vim import strategy
from nfv_vim import tables
host_table = tables.tables_get_host_table()
controller_1_host = None
for host in host_table.get_by_personality(HOST_PERSONALITY.CONTROLLER):
if HOST_NAME.CONTROLLER_1 == host.name:
controller_1_host = host
break
host_list = [controller_1_host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_START)
# Do not ignore any alarms when starting an upgrade
stage.add_step(strategy.QueryAlarmsStep(True))
# Upgrade start can only be done from controller-0
stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.UpgradeStartStep())
stage.add_step(strategy.SystemStabilizeStep())
self.apply_phase.add_stage(stage)
def _add_upgrade_complete_stage(self):
"""
Add upgrade complete strategy stage
"""
from nfv_vim import strategy
from nfv_vim import tables
host_table = tables.tables_get_host_table()
controller_1_host = None
for host in host_table.get_by_personality(HOST_PERSONALITY.CONTROLLER):
if HOST_NAME.CONTROLLER_1 == host.name:
controller_1_host = host
break
host_list = [controller_1_host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_COMPLETE)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
# Upgrade complete can only be done from controller-0
stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.UpgradeActivateStep())
stage.add_step(strategy.UpgradeCompleteStep())
stage.add_step(strategy.SystemStabilizeStep())
self.apply_phase.add_stage(stage)
def _add_controller_strategy_stages(self, controllers, reboot):
"""
Add controller software upgrade strategy stages
"""
from nfv_vim import strategy
from nfv_vim import tables
host_table = tables.tables_get_host_table()
if 2 > host_table.total_by_personality(HOST_PERSONALITY.CONTROLLER):
DLOG.warn("Not enough controllers to apply software upgrades.")
reason = 'not enough controllers to apply software upgrades'
return False, reason
controller_0_host = None
controller_1_host = None
for host in controllers:
if HOST_PERSONALITY.WORKER in host.personality:
DLOG.warn("Cannot apply software upgrades to AIO configuration.")
reason = 'cannot apply software upgrades to AIO configuration'
return False, reason
elif HOST_NAME.CONTROLLER_1 == host.name:
controller_1_host = host
elif HOST_NAME.CONTROLLER_0 == host.name:
controller_0_host = host
if controller_1_host is not None:
host_list = [controller_1_host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_CONTROLLERS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.UpgradeHostsStep(host_list))
stage.add_step(strategy.UnlockHostsStep(host_list))
# Allow up to four hours for controller disks to synchronize
stage.add_step(strategy.WaitDataSyncStep(
timeout_in_secs=4 * 60 * 60,
ignore_alarms=self._ignore_alarms))
self.apply_phase.add_stage(stage)
if controller_0_host is not None:
host_list = [controller_0_host]
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_CONTROLLERS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if controller_1_host is not None:
# Only swact to controller-1 if it was upgraded. If we are only
# upgrading controller-0, then controller-1 needs to be
# active already.
stage.add_step(strategy.SwactHostsStep(host_list))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.UpgradeHostsStep(host_list))
stage.add_step(strategy.UnlockHostsStep(host_list))
# Allow up to four hours for controller disks to synchronize
stage.add_step(strategy.WaitDataSyncStep(
timeout_in_secs=4 * 60 * 60,
ignore_alarms=self._ignore_alarms))
self.apply_phase.add_stage(stage)
return True, ''
def _add_storage_strategy_stages(self, storage_hosts, reboot):
"""
Add storage software upgrade strategy stages
"""
from nfv_vim import strategy
storage_0_host_list = list()
storage_0_host_lists = list()
other_storage_host_list = list()
for host in storage_hosts:
if HOST_NAME.STORAGE_0 == host.name:
storage_0_host_list.append(host)
else:
other_storage_host_list.append(host)
if len(storage_0_host_list) == 1:
storage_0_host_lists, reason = self._create_storage_host_lists(
storage_0_host_list)
if storage_0_host_lists is None:
return False, reason
other_storage_host_lists, reason = self._create_storage_host_lists(
other_storage_host_list)
if other_storage_host_lists is None:
return False, reason
# Upgrade storage-0 first and on its own since it has a ceph monitor
if len(storage_0_host_lists) == 1:
combined_host_lists = storage_0_host_lists + other_storage_host_lists
else:
combined_host_lists = other_storage_host_lists
for host_list in combined_host_lists:
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_STORAGE_HOSTS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.UpgradeHostsStep(host_list))
stage.add_step(strategy.UnlockHostsStep(host_list))
# After storage node(s) are unlocked, we need extra time to
# allow the OSDs to go back in sync and the storage related
# alarms to clear. We no longer wipe the OSD disks when upgrading
# a storage node, so they should only be syncing data that changed
# while they were being upgraded.
stage.add_step(strategy.WaitDataSyncStep(
timeout_in_secs=2 * 60 * 60,
ignore_alarms=self._ignore_alarms))
self.apply_phase.add_stage(stage)
return True, ''
def _add_worker_strategy_stages(self, worker_hosts, reboot):
"""
Add worker software upgrade strategy stages
"""
from nfv_vim import strategy
from nfv_vim import tables
host_lists, reason = self._create_worker_host_lists(worker_hosts, reboot)
if host_lists is None:
return False, reason
instance_table = tables.tables_get_instance_table()
for host_list in host_lists:
instance_list = list()
for host in host_list:
for instance in instance_table.on_host(host.name):
if not instance.is_locked():
instance_list.append(instance)
else:
DLOG.warn("Instance %s must not be shut down" %
instance.name)
reason = ('instance %s must not be shut down' %
instance.name)
return False, reason
# Computes with no instances
if 0 == len(instance_list):
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_WORKER_HOSTS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.UpgradeHostsStep(host_list))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
self.apply_phase.add_stage(stage)
continue
# Computes with instances
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.SW_UPGRADE_WORKER_HOSTS)
stage.add_step(strategy.QueryAlarmsStep(
True, ignore_alarms=self._ignore_alarms))
if SW_UPDATE_APPLY_TYPE.PARALLEL == self._worker_apply_type:
# Disable host services before migrating to ensure
# instances do not migrate to worker hosts in the
# same set of hosts.
if host_list[0].host_service_configured(
HOST_SERVICES.COMPUTE):
stage.add_step(strategy.DisableHostServicesStep(
host_list, HOST_SERVICES.COMPUTE))
# TODO(ksmith)
# When support is added for orchestration on
# non-OpenStack worker nodes, support for disabling
# kubernetes services will have to be added.
stage.add_step(strategy.MigrateInstancesStep(instance_list))
stage.add_step(strategy.LockHostsStep(host_list))
stage.add_step(strategy.UpgradeHostsStep(host_list))
stage.add_step(strategy.UnlockHostsStep(host_list))
stage.add_step(strategy.SystemStabilizeStep())
self.apply_phase.add_stage(stage)
return True, ''
def build_complete(self, result, result_reason):
"""
Strategy Build Complete
"""
from nfv_vim import strategy
from nfv_vim import tables
result, result_reason = \
super(SwUpgradeStrategy, self).build_complete(result, result_reason)
DLOG.info("Build Complete Callback, result=%s, reason=%s."
% (result, result_reason))
if result in [strategy.STRATEGY_RESULT.SUCCESS,
strategy.STRATEGY_RESULT.DEGRADED]:
# Check whether the upgrade is in a valid state for orchestration
if self.nfvi_upgrade is None:
if not self._start_upgrade:
DLOG.warn("No upgrade in progress.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = 'no upgrade in progress'
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
else:
if self._start_upgrade:
valid_states = [UPGRADE_STATE.STARTED,
UPGRADE_STATE.DATA_MIGRATION_COMPLETE,
UPGRADE_STATE.UPGRADING_CONTROLLERS,
UPGRADE_STATE.UPGRADING_HOSTS]
else:
valid_states = [UPGRADE_STATE.UPGRADING_CONTROLLERS,
UPGRADE_STATE.UPGRADING_HOSTS]
if self.nfvi_upgrade.state not in valid_states:
DLOG.warn("Invalid upgrade state for orchestration: %s." %
self.nfvi_upgrade.state)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = (
'invalid upgrade state for orchestration: %s' %
self.nfvi_upgrade.state)
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
# If controller-1 has been upgraded and we have yet to upgrade
# controller-0, then controller-1 must be active.
if UPGRADE_STATE.UPGRADING_CONTROLLERS == self.nfvi_upgrade.state:
if HOST_NAME.CONTROLLER_1 != get_local_host_name():
DLOG.warn(
"Controller-1 must be active for orchestration to "
"upgrade controller-0.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = (
'controller-1 must be active for orchestration to '
'upgrade controller-0')
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
if self._nfvi_alarms:
DLOG.warn(
"Active alarms found, can't apply software upgrade.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = 'active alarms present'
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
host_table = tables.tables_get_host_table()
for host in host_table.values():
# Only allow upgrade orchestration when all hosts are
# available. It is not safe to automate upgrade application
# when we do not have full redundancy.
if not (host.is_unlocked() and host.is_enabled() and
host.is_available()):
DLOG.warn(
"All %s hosts must be unlocked-enabled-available, "
"can't apply software upgrades." % host.personality)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = (
'all %s hosts must be unlocked-enabled-available' %
host.personality)
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
controller_hosts = list()
storage_hosts = list()
worker_hosts = list()
if self.nfvi_upgrade is None:
# Start upgrade
self._add_upgrade_start_stage()
# All hosts will be upgraded
for host in host_table.values():
if HOST_PERSONALITY.CONTROLLER in host.personality:
controller_hosts.append(host)
elif HOST_PERSONALITY.STORAGE in host.personality:
storage_hosts.append(host)
elif HOST_PERSONALITY.WORKER in host.personality:
worker_hosts.append(host)
else:
# Only hosts not yet upgraded will be upgraded
to_load = self.nfvi_upgrade.to_release
for host in host_table.values():
if host.software_load == to_load:
# No need to upgrade this host
continue
if HOST_PERSONALITY.CONTROLLER in host.personality:
controller_hosts.append(host)
elif HOST_PERSONALITY.STORAGE in host.personality:
storage_hosts.append(host)
elif HOST_PERSONALITY.WORKER in host.personality:
worker_hosts.append(host)
STRATEGY_CREATION_COMMANDS = [
(self._add_controller_strategy_stages,
controller_hosts, True),
(self._add_storage_strategy_stages,
storage_hosts, True),
(self._add_worker_strategy_stages,
worker_hosts, True)
]
for add_strategy_stages_function, host_list, reboot in \
STRATEGY_CREATION_COMMANDS:
if host_list:
success, reason = add_strategy_stages_function(
host_list, reboot)
if not success:
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
if self._complete_upgrade:
self._add_upgrade_complete_stage()
if 0 == len(self.apply_phase.stages):
DLOG.warn("No software upgrades need to be applied.")
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = ('no software upgrades need to be '
'applied')
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
else:
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.sw_update_obj.strategy_build_complete(True, '')
self.save()
def from_dict(self, data, build_phase=None, apply_phase=None, abort_phase=None):
"""
Initializes a software upgrade strategy object using the given dictionary
"""
from nfv_vim import nfvi
super(SwUpgradeStrategy, self).from_dict(data, build_phase, apply_phase,
abort_phase)
self._start_upgrade = data['start_upgrade']
self._complete_upgrade = data['complete_upgrade']
nfvi_upgrade_data = data['nfvi_upgrade_data']
if nfvi_upgrade_data:
self._nfvi_upgrade = nfvi.objects.v1.Upgrade(
nfvi_upgrade_data['state'],
nfvi_upgrade_data['from_release'],
nfvi_upgrade_data['to_release'])
else:
self._nfvi_upgrade = None
return self
def as_dict(self):
"""
Represent the software upgrade strategy as a dictionary
"""
data = super(SwUpgradeStrategy, self).as_dict()
data['start_upgrade'] = self._start_upgrade
data['complete_upgrade'] = self._complete_upgrade
if self._nfvi_upgrade:
nfvi_upgrade_data = self._nfvi_upgrade.as_dict()
else:
nfvi_upgrade_data = None
data['nfvi_upgrade_data'] = nfvi_upgrade_data
return data
###################################################################
#
# The Firmware Update Strategy
#
###################################################################
class FwUpdateStrategy(SwUpdateStrategy):
"""
Firmware Update - Strategy - FPGA
"""
def __init__(self, uuid, controller_apply_type, storage_apply_type,
worker_apply_type, max_parallel_worker_hosts,
default_instance_action,
alarm_restrictions, ignore_alarms,
single_controller):
super(FwUpdateStrategy, self).__init__(
uuid,
STRATEGY_NAME.FW_UPDATE,
controller_apply_type,
storage_apply_type,
SW_UPDATE_APPLY_TYPE.IGNORE,
worker_apply_type,
max_parallel_worker_hosts,
default_instance_action,
alarm_restrictions,
ignore_alarms)
# The following alarms will not prevent a firmware update operation
IGNORE_ALARMS = ['700.004', # VM stopped
'280.002', # Subcloud resource out-of-sync
'900.301', # Fw Update Auto Apply in progress
'200.001', # Locked Host
]
self._ignore_alarms += IGNORE_ALARMS
self._single_controller = single_controller
self._fail_on_alarms = True
# list of hostnames that need update
self._fw_update_hosts = list()
@property
def fw_update_hosts(self):
"""
Returns a list of hostnames that require firmware update
"""
return self._fw_update_hosts
@fw_update_hosts.setter
def fw_update_hosts(self, fw_update_hosts):
"""
Save a list of hostnames that require firmware update
"""
self._fw_update_hosts = fw_update_hosts
def build(self):
"""
Build the strategy
"""
from nfv_vim import strategy
from nfv_vim import tables
stage = strategy.StrategyStage(
strategy.STRATEGY_STAGE_NAME.FW_UPDATE_HOSTS_QUERY)
# Firmware update is only supported for hosts that support
# the worker function.
if self._worker_apply_type == SW_UPDATE_APPLY_TYPE.IGNORE:
msg = "apply type is 'ignore' ; must be '%s' or '%s'" % \
(SW_UPDATE_APPLY_TYPE.SERIAL,
SW_UPDATE_APPLY_TYPE.PARALLEL)
DLOG.warn("Worker %s" % msg)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = "Worker " + msg
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
stage.add_step(strategy.QueryAlarmsStep(
self._fail_on_alarms,
ignore_alarms=self._ignore_alarms))
# using existing vim host inventory add a step for each host
host_table = tables.tables_get_host_table()
for host in host_table.values():
if HOST_PERSONALITY.WORKER in host.personality:
if host.is_unlocked() and host.is_enabled():
stage.add_step(strategy.QueryHostDeviceListStep(host))
self.build_phase.add_stage(stage)
super(FwUpdateStrategy, self).build()
def _add_worker_strategy_stages(self, worker_hosts, reboot):
"""
Add worker firmware update strategy stages
"""
from nfv_vim import strategy
from nfv_vim import tables
hostnames = ''
for host in worker_hosts:
hostnames += host.name + ' '
DLOG.info("Worker hosts that require firmware update: %s " % hostnames)
# When using a single controller/worker host that is running
# OpenStack, only allow the stop/start instance action.
if self._single_controller:
for host in worker_hosts:
if host.openstack_compute and \
HOST_PERSONALITY.CONTROLLER in host.personality and \
SW_UPDATE_INSTANCE_ACTION.STOP_START != \
self._default_instance_action:
DLOG.error("Cannot migrate instances in a single "
"controller configuration")
reason = 'cannot migrate instances in a single ' \
'controller configuration'
return False, reason
# Returns a list of 'host update lists' based on serial vs parallel
# update specification and the overall host pool and various aspects
# of the hosts in that pool ; i.e. personality, instances, etc.
host_lists, reason = self._create_worker_host_lists(worker_hosts, reboot)
if host_lists is None:
DLOG.info("failed to create worker host lists")
return False, reason
instance_table = tables.tables_get_instance_table()
# Loop over the host aggregate lists creating back to back steps
# that will update all the worker hosts in the order dictated
# by the strategy.
for host_list in host_lists:
# Start the Update Worker Hosts Stage ; the stage that includes all
# the steps to update all the worker hosts found to need firmware update.
stage = strategy.StrategyStage(strategy.STRATEGY_STAGE_NAME.FW_UPDATE_WORKER_HOSTS)
# build a list of unlocked instances
instance_list = list()
for host in host_list:
for instance in instance_table.on_host(host.name):
# Do not take action (migrate or stop-start) on
# an instance if it is locked (i.e. stopped).
if not instance.is_locked():
instance_list.append(instance)
# Handle alarms that show up after create but before apply.
stage.add_step(strategy.QueryAlarmsStep(
self._fail_on_alarms,
ignore_alarms=self._ignore_alarms))
# Issue Firmware Update for hosts in host_list
stage.add_step(strategy.FwUpdateHostsStep(host_list))
# Handle reboot-required option with host lock/unlock.
if reboot:
if 1 == len(host_list):
if HOST_PERSONALITY.CONTROLLER in host_list[0].personality:
if not self._single_controller:
# Handle upgrade of both controllers
# in AIO DX Swact controller before locking.
# If this is not the active controller then it has no effect
stage.add_step(strategy.SwactHostsStep(host_list))
# Handle instance migration
if len(instance_list):
# Migrate or stop instances as necessary
if SW_UPDATE_INSTANCE_ACTION.MIGRATE == \
self._default_instance_action:
if SW_UPDATE_APPLY_TYPE.PARALLEL == \
self._worker_apply_type:
# Disable host services before migrating to ensure
# instances do not migrate to worker hosts in the
# same set of hosts.
if host_list[0].host_service_configured(
HOST_SERVICES.COMPUTE):
stage.add_step(strategy.DisableHostServicesStep(
host_list, HOST_SERVICES.COMPUTE))
# TODO(ksmith)
# When support is added for orchestration on
# non-OpenStack worker nodes, support for disabling
# kubernetes services will have to be added.
stage.add_step(strategy.MigrateInstancesStep(
instance_list))
else:
stage.add_step(strategy.StopInstancesStep(
instance_list))
wait_until_disabled = True
if 1 == len(host_list):
if HOST_PERSONALITY.CONTROLLER in \
host_list[0].personality:
if self._single_controller:
# Handle upgrade of AIO SX
# A single controller will not go disabled when
# it is locked.
wait_until_disabled = False
# Lock hosts
stage.add_step(strategy.LockHostsStep(host_list, wait_until_disabled=wait_until_disabled))
# Wait for system to stabilize
stage.add_step(strategy.SystemStabilizeStep(timeout_in_secs=MTCE_DELAY))
# Unlock hosts
stage.add_step(strategy.UnlockHostsStep(host_list))
if 0 != len(instance_list):
# Start any instances that were stopped
if SW_UPDATE_INSTANCE_ACTION.MIGRATE != \
self._default_instance_action:
stage.add_step(strategy.StartInstancesStep(
instance_list))
stage.add_step(strategy.SystemStabilizeStep())
else:
# Less time required if host is not rebooting
stage.add_step(strategy.SystemStabilizeStep(
timeout_in_secs=NO_REBOOT_DELAY))
self.apply_phase.add_stage(stage)
return True, ''
def build_complete(self, result, result_reason):
"""
Strategy Build Complete
"""
from nfv_vim import strategy
from nfv_vim import tables
result, result_reason = \
super(FwUpdateStrategy, self).build_complete(result, result_reason)
DLOG.verbose("Build Complete Callback, result=%s, reason=%s." %
(result, result_reason))
if result in [strategy.STRATEGY_RESULT.SUCCESS,
strategy.STRATEGY_RESULT.DEGRADED]:
if self._nfvi_alarms:
# Fail create strategy if unignored alarms present
DLOG.warn("Active alarms found, can't update firmware.")
alarm_id_list = ""
for alarm_data in self._nfvi_alarms:
if alarm_id_list:
alarm_id_list += ', '
alarm_id_list += alarm_data['alarm_id']
DLOG.warn("... active alarms: %s" % alarm_id_list)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = 'active alarms present ; '
self.build_phase.result_reason += alarm_id_list
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
# Fail if no hosts require firmware upgrade.
if len(self._fw_update_hosts) == 0:
self.build_phase.result_reason = "no firmware update required"
DLOG.warn(self.build_phase.result_reason)
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = strategy.STRATEGY_PHASE_RESULT.FAILED
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
worker_hosts = list()
host_table = tables.tables_get_host_table()
for host in host_table.values():
if host.name in self._fw_update_hosts:
worker_hosts.append(host)
STRATEGY_CREATION_COMMANDS = [
(self._add_worker_strategy_stages,
worker_hosts, True)]
for add_strategy_stages_function, host_list, reboot in \
STRATEGY_CREATION_COMMANDS:
if host_list:
success, reason = add_strategy_stages_function(
host_list, reboot)
if not success:
self._state = strategy.STRATEGY_STATE.BUILD_FAILED
self.build_phase.result = \
strategy.STRATEGY_PHASE_RESULT.FAILED
self.build_phase.result_reason = reason
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.save()
return
else:
self.sw_update_obj.strategy_build_complete(
False, self.build_phase.result_reason)
self.sw_update_obj.strategy_build_complete(True, '')
self.save()
def from_dict(self,
data,
build_phase=None,
apply_phase=None,
abort_phase=None):
"""
Load firmware update strategy object from dict data.
"""
from nfv_vim import nfvi
super(FwUpdateStrategy, self).from_dict(
data, build_phase, apply_phase, abort_phase)
self._single_controller = data['single_controller']
# Load nfvi alarm data
nfvi_alarms = list()
nfvi_alarms_data = data.get('nfvi_alarms_data')
if nfvi_alarms_data:
for alarm_data in data['nfvi_alarms_data']:
alarm = nfvi.objects.v1.Alarm(
alarm_data['alarm_uuid'], alarm_data['alarm_id'],
alarm_data['entity_instance_id'], alarm_data['severity'],
alarm_data['reason_text'], alarm_data['timestamp'],
alarm_data['mgmt_affecting'])
nfvi_alarms.append(alarm)
self._nfvi_alarms = nfvi_alarms
return self
def as_dict(self):
"""
Return firmware update strategy nfvi data object as dictionary.
"""
data = super(FwUpdateStrategy, self).as_dict()
data['single_controller'] = self._single_controller
# Save nfvi alarm info to data
if self._nfvi_alarms:
nfvi_alarms_data = list()
for alarm in self._nfvi_alarms:
nfvi_alarms_data.append(alarm.as_dict())
data['nfvi_alarms_data'] = nfvi_alarms_data
return data
def strategy_rebuild_from_dict(data):
"""
Returns the strategy object initialized using the given dictionary
"""
from nfv_vim.strategy._strategy_phases import strategy_phase_rebuild_from_dict # noqa: F401
if not data:
return None
build_phase = strategy_phase_rebuild_from_dict(data['build_phase'])
apply_phase = strategy_phase_rebuild_from_dict(data['apply_phase'])
abort_phase = strategy_phase_rebuild_from_dict(data['abort_phase'])
if STRATEGY_NAME.SW_PATCH == data['name']:
strategy_obj = object.__new__(SwPatchStrategy)
elif STRATEGY_NAME.SW_UPGRADE == data['name']:
strategy_obj = object.__new__(SwUpgradeStrategy)
elif STRATEGY_NAME.FW_UPDATE == data['name']:
strategy_obj = object.__new__(FwUpdateStrategy)
else:
strategy_obj = object.__new__(strategy.StrategyStage)
strategy_obj.from_dict(data, build_phase, apply_phase, abort_phase)
return strategy_obj