1094 lines
39 KiB
Python
1094 lines
39 KiB
Python
# Copyright 2015 Huawei Technologies Co., Ltd.
|
|
# Copyright (c) 2017-2023 Wind River Systems, Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
# implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
import datetime
|
|
import grp
|
|
import itertools
|
|
import json
|
|
import netaddr
|
|
import os
|
|
import pwd
|
|
import re
|
|
import resource as sys_resource
|
|
import six.moves
|
|
import string
|
|
import subprocess
|
|
import tsconfig.tsconfig as tsc
|
|
import yaml
|
|
|
|
from keystoneauth1 import exceptions as keystone_exceptions
|
|
from oslo_concurrency import lockutils
|
|
from oslo_config import cfg
|
|
from oslo_log import log as logging
|
|
from oslo_serialization import base64
|
|
|
|
from dccommon import consts as dccommon_consts
|
|
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
|
|
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
|
|
from dccommon.drivers.openstack import vim
|
|
from dccommon import exceptions as dccommon_exceptions
|
|
from dccommon import kubeoperator
|
|
from dcmanager.common import consts
|
|
from dcmanager.common import exceptions
|
|
from dcmanager.db import api as db_api
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
DC_MANAGER_USERNAME = "root"
|
|
DC_MANAGER_GRPNAME = "root"
|
|
|
|
# Max lines output msg from logs
|
|
MAX_LINES_MSG = 10
|
|
|
|
ABORT_UPDATE_STATUS = {
|
|
consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_ABORTING_INSTALL,
|
|
consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_ABORTING_BOOTSTRAP,
|
|
consts.DEPLOY_STATE_CONFIGURING: consts.DEPLOY_STATE_ABORTING_CONFIG,
|
|
consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_ABORTED,
|
|
consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_ABORTED,
|
|
consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_ABORTED
|
|
}
|
|
|
|
ABORT_UPDATE_FAIL_STATUS = {
|
|
consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_FAILED,
|
|
consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
|
|
consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_FAILED
|
|
}
|
|
|
|
RESUME_PREP_UPDATE_STATUS = {
|
|
consts.DEPLOY_PHASE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL,
|
|
consts.DEPLOY_PHASE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP,
|
|
consts.DEPLOY_PHASE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG
|
|
}
|
|
|
|
RESUME_PREP_UPDATE_FAIL_STATUS = {
|
|
consts.DEPLOY_PHASE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
|
|
consts.DEPLOY_PHASE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED,
|
|
consts.DEPLOY_PHASE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG_FAILED
|
|
}
|
|
|
|
|
|
def get_import_path(cls):
|
|
return cls.__module__ + "." + cls.__name__
|
|
|
|
|
|
# Returns a iterator of tuples containing batch_size number of objects in each
|
|
def get_batch_projects(batch_size, project_list, fillvalue=None):
|
|
args = [iter(project_list)] * batch_size
|
|
return six.moves.zip_longest(fillvalue=fillvalue, *args)
|
|
|
|
|
|
def validate_address_str(ip_address_str, network):
|
|
"""Determine whether an address is valid."""
|
|
try:
|
|
ip_address = netaddr.IPAddress(ip_address_str)
|
|
if ip_address.version != network.version:
|
|
msg = ("Invalid IP version - must match network version " +
|
|
ip_version_to_string(network.version))
|
|
raise exceptions.ValidateFail(msg)
|
|
elif ip_address == network:
|
|
raise exceptions.ValidateFail("Cannot use network address")
|
|
elif ip_address == network.broadcast:
|
|
raise exceptions.ValidateFail("Cannot use broadcast address")
|
|
elif ip_address not in network:
|
|
raise exceptions.ValidateFail(
|
|
"Address must be in subnet %s" % str(network))
|
|
return ip_address
|
|
except netaddr.AddrFormatError:
|
|
raise exceptions.ValidateFail(
|
|
"Invalid address - not a valid IP address")
|
|
|
|
|
|
def ip_version_to_string(ip_version):
|
|
"""Returns a string representation of ip_version."""
|
|
if ip_version == 4:
|
|
return "IPv4"
|
|
elif ip_version == 6:
|
|
return "IPv6"
|
|
else:
|
|
return "IP"
|
|
|
|
|
|
def validate_network_str(network_str, minimum_size, existing_networks=None,
|
|
multicast=False, operation=None):
|
|
"""Determine whether a network is valid."""
|
|
try:
|
|
network = netaddr.IPNetwork(network_str)
|
|
if network.size < minimum_size:
|
|
raise exceptions.ValidateFail("Subnet too small - must have at "
|
|
"least %d addresses" % minimum_size)
|
|
elif network.version == 6 and network.prefixlen < 64:
|
|
raise exceptions.ValidateFail("IPv6 minimum prefix length is 64")
|
|
elif existing_networks and operation != 'reinstall':
|
|
if any(network.ip in subnet for subnet in existing_networks):
|
|
raise exceptions.ValidateFail("Subnet overlaps with another "
|
|
"configured subnet")
|
|
elif multicast and not network.is_multicast():
|
|
raise exceptions.ValidateFail("Invalid subnet - must be multicast")
|
|
return network
|
|
except netaddr.AddrFormatError:
|
|
raise exceptions.ValidateFail(
|
|
"Invalid subnet - not a valid IP subnet")
|
|
|
|
|
|
def validate_certificate_subject(subject):
|
|
"""Validate a certificate subject
|
|
|
|
Duplicate the get_subject validation logic defined in:
|
|
sysinv/api/controllers/v1/kube_rootca_update.py
|
|
Returns a tuple of True, "" if the input is None
|
|
Returns a tuple of True, "" if the input is valid
|
|
Returns a tuple of False, "<error details>" if the input is invalid
|
|
"""
|
|
if subject is None:
|
|
return True, ""
|
|
|
|
params_supported = ['C', 'OU', 'O', 'ST', 'CN', 'L']
|
|
subject_pairs = re.findall(r"([^=]+=[^=]+)(?:\s|$)", subject)
|
|
subject_dict = {}
|
|
for pair_value in subject_pairs:
|
|
key, value = pair_value.split("=")
|
|
subject_dict[key] = value
|
|
|
|
if not all([param in params_supported for param in subject_dict.keys()]):
|
|
return False, ("There are parameters not supported "
|
|
"for the certificate subject specification. "
|
|
"The subject parameter has to be in the "
|
|
"format of 'C=<Country> ST=<State/Province> "
|
|
"L=<Locality> O=<Organization> OU=<OrganizationUnit> "
|
|
"CN=<commonName>")
|
|
if 'CN' not in list(subject_dict.keys()):
|
|
return False, ("The CN=<commonName> parameter is required to be "
|
|
"specified in subject argument")
|
|
return True, ""
|
|
|
|
|
|
def validate_expiry_date(expiry_date):
|
|
"""Validate a certificate expiry date
|
|
|
|
Duplicate the expiry_date validation logic defined in:
|
|
sysinv/api/controllers/v1/kube_rootca_update.py
|
|
Returns a tuple of True, "" if the input is None
|
|
Returns a tuple of True, "" if the input is valid
|
|
Returns a tuple of False, "<error details>" if the input is invalid
|
|
"""
|
|
if expiry_date is None:
|
|
return True, ""
|
|
|
|
try:
|
|
date = datetime.datetime.strptime(expiry_date, "%Y-%m-%d")
|
|
except ValueError:
|
|
return False, ("expiry_date %s doesn't match format "
|
|
"YYYY-MM-DD" % expiry_date)
|
|
|
|
delta = date - datetime.datetime.now()
|
|
# we sum one day (24 hours) to accomplish the certificate expiry
|
|
# during the day specified by the user
|
|
duration = (delta.days * 24 + 24)
|
|
|
|
# Cert-manager manages certificates and renew them some time
|
|
# before it expires. Along this procedure we set renewBefore
|
|
# parameter for 24h, so we are checking if the duration sent
|
|
# has at least this amount of time. This is needed to avoid
|
|
# cert-manager to block the creation of the resources.
|
|
if duration <= 24:
|
|
return False, ("New k8s rootCA should have at least 24 hours of "
|
|
"validation before expiry.")
|
|
return True, ""
|
|
|
|
|
|
# to do validate the quota limits
|
|
def validate_quota_limits(payload):
|
|
for resource in payload:
|
|
# Check valid resource name
|
|
if resource not in itertools.chain(dccommon_consts.CINDER_QUOTA_FIELDS,
|
|
dccommon_consts.NOVA_QUOTA_FIELDS,
|
|
dccommon_consts.NEUTRON_QUOTA_FIELDS):
|
|
raise exceptions.InvalidInputError
|
|
# Check valid quota limit value in case for put/post
|
|
if isinstance(payload, dict) and (not isinstance(
|
|
payload[resource], int) or payload[resource] <= 0):
|
|
raise exceptions.InvalidInputError
|
|
|
|
|
|
def get_sw_update_strategy_extra_args(context, update_type=None):
|
|
"""Query an existing sw_update_strategy for its extra_args.
|
|
|
|
:param context: request context object.
|
|
:param update_type: filter the update strategy (defaults to None)
|
|
:returns dict (returns an empty dictionary if no strategy exists)
|
|
"""
|
|
try:
|
|
sw_update_strategy = \
|
|
db_api.sw_update_strategy_get(context,
|
|
update_type=update_type)
|
|
return sw_update_strategy.extra_args
|
|
except exceptions.NotFound:
|
|
# return an empty dictionary if there is no strategy
|
|
return {}
|
|
|
|
|
|
def get_sw_update_opts(context,
|
|
for_sw_update=False, subcloud_id=None):
|
|
"""Get sw update options for a subcloud
|
|
|
|
:param context: request context object.
|
|
:param for_sw_update: return the default options if subcloud options
|
|
are empty. Useful for retrieving sw update
|
|
options on application of patch strategy.
|
|
:param subcloud_id: id of subcloud.
|
|
|
|
"""
|
|
|
|
if subcloud_id is None:
|
|
# Requesting defaults. Return constants if no entry in db.
|
|
sw_update_opts_ref = db_api.sw_update_opts_default_get(context)
|
|
if not sw_update_opts_ref:
|
|
sw_update_opts_dict = vim.SW_UPDATE_OPTS_CONST_DEFAULT
|
|
return sw_update_opts_dict
|
|
else:
|
|
# requesting subcloud options
|
|
sw_update_opts_ref = db_api.sw_update_opts_get(context,
|
|
subcloud_id)
|
|
if sw_update_opts_ref:
|
|
subcloud_name = db_api.subcloud_get(context, subcloud_id).name
|
|
return db_api.sw_update_opts_w_name_db_model_to_dict(
|
|
sw_update_opts_ref, subcloud_name)
|
|
elif for_sw_update:
|
|
sw_update_opts_ref = db_api.sw_update_opts_default_get(context)
|
|
if not sw_update_opts_ref:
|
|
sw_update_opts_dict = vim.SW_UPDATE_OPTS_CONST_DEFAULT
|
|
return sw_update_opts_dict
|
|
else:
|
|
raise exceptions.SubcloudPatchOptsNotFound(
|
|
subcloud_id=subcloud_id)
|
|
|
|
return db_api.sw_update_opts_w_name_db_model_to_dict(
|
|
sw_update_opts_ref, dccommon_consts.SW_UPDATE_DEFAULT_TITLE)
|
|
|
|
|
|
def ensure_lock_path():
|
|
# Determine the oslo_concurrency lock path:
|
|
# 1) First, from the oslo_concurrency section of the config
|
|
# a) If not set via an option default or config file, oslo_concurrency
|
|
# sets it to the OSLO_LOCK_PATH env variable
|
|
# 2) Then if not set, set it to a specific directory under
|
|
# tsc.VOLATILE_PATH
|
|
|
|
if cfg.CONF.oslo_concurrency.lock_path:
|
|
lock_path = cfg.CONF.oslo_concurrency.lock_path
|
|
else:
|
|
lock_path = os.path.join(tsc.VOLATILE_PATH, "dcmanager")
|
|
|
|
if not os.path.isdir(lock_path):
|
|
try:
|
|
uid = pwd.getpwnam(DC_MANAGER_USERNAME).pw_uid
|
|
gid = grp.getgrnam(DC_MANAGER_GRPNAME).gr_gid
|
|
os.makedirs(lock_path)
|
|
os.chown(lock_path, uid, gid)
|
|
LOG.info("Created directory=%s" % lock_path)
|
|
|
|
except OSError as e:
|
|
LOG.exception("makedir %s OSError=%s encountered" %
|
|
(lock_path, e))
|
|
return None
|
|
|
|
return lock_path
|
|
|
|
|
|
def synchronized(name, external=True, fair=False):
|
|
if external:
|
|
prefix = 'DCManager-'
|
|
lock_path = ensure_lock_path()
|
|
else:
|
|
prefix = None
|
|
lock_path = None
|
|
|
|
return lockutils.synchronized(name, lock_file_prefix=prefix,
|
|
external=external, lock_path=lock_path,
|
|
semaphores=None, delay=0.01, fair=fair)
|
|
|
|
|
|
def get_filename_by_prefix(dir_path, prefix):
|
|
"""Returns the first filename found matching 'prefix' within 'dir_path'
|
|
|
|
Note: returns base filename only - result does not include dir_path
|
|
"""
|
|
for filename in os.listdir(dir_path):
|
|
if filename.startswith(prefix):
|
|
return filename
|
|
return None
|
|
|
|
|
|
def create_subcloud_inventory(subcloud,
|
|
inventory_file):
|
|
"""Create the ansible inventory file for the specified subcloud"""
|
|
|
|
# Delete the file if it already exists
|
|
delete_subcloud_inventory(inventory_file)
|
|
|
|
with open(inventory_file, 'w') as f_out_inventory:
|
|
f_out_inventory.write(
|
|
'---\n'
|
|
'all:\n'
|
|
' vars:\n'
|
|
' ansible_ssh_user: sysadmin\n'
|
|
' ansible_ssh_extra_args: "-o UserKnownHostsFile=/dev/null"\n'
|
|
' hosts:\n'
|
|
' ' + subcloud['name'] + ':\n'
|
|
' ansible_host: ' +
|
|
subcloud['bootstrap-address'] + '\n'
|
|
)
|
|
|
|
|
|
def create_subcloud_inventory_with_admin_creds(subcloud_name,
|
|
inventory_file,
|
|
subcloud_bootstrap_address,
|
|
ansible_pass):
|
|
"""Create the ansible inventory file for the specified subcloud.
|
|
|
|
Includes ansible_become_pass attribute.
|
|
"""
|
|
|
|
# Delete the file if it already exists
|
|
delete_subcloud_inventory(inventory_file)
|
|
|
|
with open(inventory_file, 'w') as f_out_inventory:
|
|
f_out_inventory.write(
|
|
('---\n'
|
|
'all:\n'
|
|
' vars:\n'
|
|
' ansible_ssh_user: sysadmin\n'
|
|
' ansible_ssh_pass: {0}\n'
|
|
' ansible_become_pass: {0}\n'
|
|
' ansible_ssh_extra_args: "-o UserKnownHostsFile=/dev/null"\n'
|
|
' hosts:\n'
|
|
' {1}:\n'
|
|
' ansible_host: {2}\n').format(ansible_pass,
|
|
subcloud_name,
|
|
subcloud_bootstrap_address)
|
|
)
|
|
|
|
|
|
def delete_subcloud_inventory(inventory_file):
|
|
"""Delete the ansible inventory file for the specified subcloud"""
|
|
|
|
# Delete the file if it exists
|
|
if inventory_file and os.path.isfile(inventory_file):
|
|
os.remove(inventory_file)
|
|
|
|
|
|
def get_vault_load_files(target_version):
|
|
"""Return a tuple for the ISO and SIG for this load version from the vault.
|
|
|
|
The files can be imported to the vault using any name, but must end
|
|
in 'iso' or 'sig'.
|
|
: param target_version: The software version to search under the vault
|
|
"""
|
|
vault_dir = "{}/{}/".format(consts.LOADS_VAULT_DIR, target_version)
|
|
|
|
matching_iso = None
|
|
matching_sig = None
|
|
|
|
if os.path.isdir(vault_dir):
|
|
for a_file in os.listdir(vault_dir):
|
|
if a_file.lower().endswith(".iso"):
|
|
matching_iso = os.path.join(vault_dir, a_file)
|
|
continue
|
|
elif a_file.lower().endswith(".sig"):
|
|
matching_sig = os.path.join(vault_dir, a_file)
|
|
continue
|
|
# If no .iso or .sig is found, raise an exception
|
|
if matching_iso is None:
|
|
raise exceptions.VaultLoadMissingError(
|
|
file_type='.iso', vault_dir=vault_dir)
|
|
if matching_sig is None:
|
|
raise exceptions.VaultLoadMissingError(
|
|
file_type='.sig', vault_dir=vault_dir)
|
|
|
|
# return the iso and sig for this load
|
|
return (matching_iso, matching_sig)
|
|
|
|
|
|
def get_active_kube_version(kube_versions):
|
|
"""Returns the active (target) kubernetes from a list of versions"""
|
|
|
|
matching_kube_version = None
|
|
for kube in kube_versions:
|
|
kube_dict = kube.to_dict()
|
|
if kube_dict.get('target') and kube_dict.get('state') == 'active':
|
|
matching_kube_version = kube_dict.get('version')
|
|
break
|
|
return matching_kube_version
|
|
|
|
|
|
def get_available_kube_version(kube_versions):
|
|
"""Returns first available kubernetes version from a list of versions"""
|
|
|
|
matching_kube_version = None
|
|
for kube in kube_versions:
|
|
kube_dict = kube.to_dict()
|
|
if kube_dict.get('state') == 'available':
|
|
matching_kube_version = kube_dict.get('version')
|
|
break
|
|
return matching_kube_version
|
|
|
|
|
|
def kube_version_compare(left, right):
|
|
"""Performs a cmp operation for two kubernetes versions
|
|
|
|
Return -1, 0, or 1 if left is less, equal, or greater than right
|
|
|
|
left and right are semver strings starting with the letter 'v'
|
|
If either value is None, an exception is raised
|
|
If the strings are not 'v'major.minor.micro, an exception is raised
|
|
Note: This method supports shorter versions. ex: v1.22
|
|
When comparing different length tuples, additional fields are ignored.
|
|
For example: v1.19 and v1.19.1 would be the same.
|
|
"""
|
|
if left is None or right is None or left[0] != 'v' or right[0] != 'v':
|
|
raise Exception("Invalid kube version(s), left: (%s), right: (%s)" %
|
|
(left, right))
|
|
# start the split at index 1 ('after' the 'v' character)
|
|
l_val = tuple(map(int, (left[1:].split("."))))
|
|
r_val = tuple(map(int, (right[1:].split("."))))
|
|
# If the tuples are different length, convert both to the same length
|
|
min_tuple = min(len(l_val), len(r_val))
|
|
l_val = l_val[0:min_tuple]
|
|
r_val = r_val[0:min_tuple]
|
|
# The following is the same as cmp. Verified in python2 and python3
|
|
# cmp does not exist in python3.
|
|
return (l_val > r_val) - (l_val < r_val)
|
|
|
|
|
|
def get_loads_for_patching(loads):
|
|
"""Filter the loads that can be patched. Return their software versions"""
|
|
valid_states = [
|
|
consts.ACTIVE_LOAD_STATE,
|
|
consts.IMPORTED_LOAD_STATE
|
|
]
|
|
return [load.software_version for load in loads if load.state in valid_states]
|
|
|
|
|
|
def get_loads_for_prestage(loads):
|
|
"""Filter the loads that can be prestaged. Return their software versions"""
|
|
valid_states = [
|
|
consts.ACTIVE_LOAD_STATE,
|
|
consts.IMPORTED_LOAD_STATE,
|
|
consts.INACTIVE_LOAD_STATE
|
|
]
|
|
return [load.software_version for load in loads if load.state in valid_states]
|
|
|
|
|
|
def subcloud_get_by_ref(context, subcloud_ref):
|
|
"""Handle getting a subcloud by either name, or ID
|
|
|
|
:param context: The request context
|
|
:param subcloud_ref: Reference to the subcloud, either a name or an ID
|
|
"""
|
|
try:
|
|
return db_api.subcloud_get(context, subcloud_ref) \
|
|
if subcloud_ref.isdigit() \
|
|
else db_api.subcloud_get_by_name(context, subcloud_ref)
|
|
except (exceptions.SubcloudNotFound, exceptions.SubcloudNameNotFound):
|
|
return None
|
|
|
|
|
|
def subcloud_group_get_by_ref(context, group_ref):
|
|
# Handle getting a group by either name, or ID
|
|
if group_ref.isdigit():
|
|
# Lookup subcloud group as an ID
|
|
try:
|
|
group = db_api.subcloud_group_get(context, group_ref)
|
|
except exceptions.SubcloudGroupNotFound:
|
|
return None
|
|
else:
|
|
# Lookup subcloud group as a name
|
|
try:
|
|
group = db_api.subcloud_group_get_by_name(context, group_ref)
|
|
except exceptions.SubcloudGroupNameNotFound:
|
|
return None
|
|
return group
|
|
|
|
|
|
def subcloud_db_list_to_dict(subclouds):
|
|
return {'subclouds': [db_api.subcloud_db_model_to_dict(subcloud)
|
|
for subcloud in subclouds]}
|
|
|
|
|
|
def get_oam_addresses(subcloud_name, sc_ks_client):
|
|
"""Get the subclouds oam addresses"""
|
|
|
|
# First need to retrieve the Subcloud's Keystone session
|
|
try:
|
|
endpoint = sc_ks_client.endpoint_cache.get_endpoint('sysinv')
|
|
sysinv_client = SysinvClient(subcloud_name,
|
|
sc_ks_client.session,
|
|
endpoint=endpoint)
|
|
return sysinv_client.get_oam_addresses()
|
|
except (keystone_exceptions.EndpointNotFound, IndexError) as e:
|
|
message = ("Identity endpoint for subcloud: %s not found. %s" %
|
|
(subcloud_name, e))
|
|
LOG.error(message)
|
|
except dccommon_exceptions.OAMAddressesNotFound:
|
|
message = ("OAM addresses for subcloud: %s not found." %
|
|
subcloud_name)
|
|
LOG.error(message)
|
|
return None
|
|
|
|
|
|
def get_ansible_filename(subcloud_name, postfix='.yml'):
|
|
"""Build ansible filename using subcloud and given postfix"""
|
|
ansible_filename = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH,
|
|
subcloud_name + postfix)
|
|
return ansible_filename
|
|
|
|
|
|
def pre_check_management_affected_alarm(system_health):
|
|
"""Acceptable health conditions:
|
|
|
|
a) subcloud is completely healthy (i.e. no failed checks)
|
|
b) there is alarm but no management affecting alarm
|
|
c) subcloud fails alarm check and it only has non-management
|
|
affecting alarm(s)
|
|
"""
|
|
failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
|
|
no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
|
|
system_health)
|
|
if failed_alarm_check and not no_mgmt_alarms:
|
|
return False
|
|
return True
|
|
|
|
|
|
def find_ansible_error_msg(subcloud_name, log_file, stage=None):
|
|
"""Find errors into ansible logs.
|
|
|
|
It will search into ansible log for a fatal error expression.
|
|
|
|
If fatal error is found, it will capture the message
|
|
until the final expression. It will get always the more recent
|
|
fatal error from the log files.
|
|
If the error message is longer than N lines, it will be summarized.
|
|
Also, the last task is provided.
|
|
|
|
Returns the error message found
|
|
Returns generic error message if not found or there is failures
|
|
during search
|
|
"""
|
|
|
|
error_found = False
|
|
error_msg = []
|
|
failed_task = ''
|
|
files_for_search = []
|
|
|
|
cmd_1 = 'awk'
|
|
# awk command to get the information iside the last match found
|
|
# starting with 'fatal: [' and ending with 'PLAY RECAP'.
|
|
cmd_2 = ('''BEGIN {f=""} # initialize f
|
|
/fatal: \[/ {f=""} # reset f on first match
|
|
/fatal: \[/,/PLAY RECAP/ { # capture text between two delimiters
|
|
if ($0 ~ /PLAY RECAP/) next # exclude last delimiter
|
|
if ($0 == "") next # exclude blank line
|
|
f = f ? (f "\\n" $0) : $0} # assign or append to f
|
|
END {print f}
|
|
''')
|
|
|
|
# necessary check since is possible to have
|
|
# the error in rotated ansible log
|
|
log_file_temp = log_file + '.1'
|
|
if os.path.exists(log_file_temp):
|
|
files_for_search.append(log_file_temp)
|
|
if os.path.exists(log_file):
|
|
files_for_search.append(log_file)
|
|
else:
|
|
files_for_search.append(log_file)
|
|
|
|
if (len(files_for_search) < 2):
|
|
cmd_list = ([cmd_1, cmd_2, files_for_search[0]])
|
|
else:
|
|
cmd_list = ([cmd_1, cmd_2, files_for_search[0], files_for_search[1]])
|
|
|
|
try:
|
|
error_msg_raw = subprocess.check_output(
|
|
cmd_list,
|
|
stderr=subprocess.STDOUT).decode('utf-8')
|
|
if len(error_msg_raw) > 1:
|
|
error_found = True
|
|
error_msg = [elem for elem in error_msg_raw.split("\n") if elem]
|
|
failed_task = get_failed_task(files_for_search)
|
|
except Exception as exc:
|
|
LOG.error("Failed getting info from ansible log file :%s" % exc)
|
|
|
|
if error_found and (len(error_msg) > MAX_LINES_MSG):
|
|
error_msg = summarize_message(error_msg)
|
|
error_msg = '\n'.join(str(element) for element in error_msg)
|
|
error_msg = error_msg.replace("\'", "\"")
|
|
|
|
if error_found:
|
|
msg = "FAILED %s playbook of (%s).\n" \
|
|
" detail: %s \n" \
|
|
"FAILED TASK: %s " % (
|
|
stage,
|
|
subcloud_name,
|
|
error_msg,
|
|
failed_task)
|
|
else:
|
|
msg = "FAILED %s playbook of (%s).\n" \
|
|
"check individual log at " \
|
|
"%s for detailed output " % (
|
|
stage,
|
|
subcloud_name,
|
|
log_file)
|
|
return msg
|
|
|
|
|
|
def get_failed_task(files):
|
|
"""Get last task failed
|
|
|
|
It receives an ansible log file (or a couple of files)
|
|
and search for the last failed task with its date
|
|
|
|
Returns a string with the task and date
|
|
"""
|
|
|
|
cmd_1 = 'awk'
|
|
# awk command to get the information about last failed task.
|
|
# Match expression starting with 'TASK [' and ending with
|
|
# 'fatal: ['
|
|
cmd_2 = ('''BEGIN {f=""} # initialize f
|
|
/TASK \[/ {f=""} # reset f on first match
|
|
/TASK \[/,/fatal: \[/ { # capture text between two delimiters
|
|
if ($0 ~ /fatal: \[/) next # exclude last delimiter
|
|
if ($0 == "") next # exclude blank line
|
|
f = f ? (f "\\n" $0) : $0} # assign or append to f
|
|
END {print f}
|
|
''')
|
|
# necessary check since is possible to have
|
|
# the error in rotated ansible log
|
|
if (len(files) < 2):
|
|
awk_cmd = ([cmd_1, cmd_2, files[0]])
|
|
else:
|
|
awk_cmd = ([cmd_1, cmd_2, files[0], files[1]])
|
|
|
|
try:
|
|
failed_task = subprocess.check_output(
|
|
awk_cmd,
|
|
stderr=subprocess.STDOUT).decode('utf-8')
|
|
if len(failed_task) < 1:
|
|
return None
|
|
except Exception as exc:
|
|
LOG.error("Failed getting failed task :%s" % exc)
|
|
return None
|
|
failed_task = failed_task.replace("*", "")
|
|
failed_task = failed_task.replace("\'", "\"")
|
|
failed_task = [elem for elem in failed_task.split("\n") if elem]
|
|
failed_task = "%s %s" % (failed_task[0], failed_task[1])
|
|
return failed_task
|
|
|
|
|
|
def summarize_message(error_msg):
|
|
"""Summarize message.
|
|
|
|
This function receives a long error message and
|
|
greps it using key words to return a summarized
|
|
error message.
|
|
|
|
Returns a brief message.
|
|
"""
|
|
list_of_strings_to_search_for = [
|
|
'msg:', 'fail', 'error', 'cmd', 'stderr'
|
|
]
|
|
brief_message = []
|
|
for line in error_msg:
|
|
for s in list_of_strings_to_search_for:
|
|
if re.search(s, line, re.IGNORECASE):
|
|
if len(brief_message) >= MAX_LINES_MSG:
|
|
break
|
|
# append avoiding duplicated items
|
|
if line not in brief_message:
|
|
brief_message.append(line)
|
|
return brief_message
|
|
|
|
|
|
def is_valid_for_backup_operation(operation, subcloud):
|
|
|
|
if operation == 'create':
|
|
return _is_valid_for_backup_create(subcloud)
|
|
elif operation == 'delete':
|
|
return _is_valid_for_backup_delete(subcloud)
|
|
elif operation == 'restore':
|
|
return _is_valid_for_backup_restore(subcloud)
|
|
else:
|
|
msg = "Invalid operation %s" % operation
|
|
LOG.error(msg)
|
|
raise exceptions.ValidateFail(msg)
|
|
|
|
|
|
def _is_valid_for_backup_create(subcloud):
|
|
|
|
if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE \
|
|
or subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED \
|
|
or subcloud.deploy_status not in consts.VALID_DEPLOY_STATES_FOR_BACKUP:
|
|
msg = ('Subcloud %s must be online, managed and have valid '
|
|
'deploy-status for the subcloud-backup '
|
|
'create operation.' % subcloud.name)
|
|
raise exceptions.ValidateFail(msg)
|
|
|
|
return True
|
|
|
|
|
|
def _is_valid_for_backup_delete(subcloud):
|
|
|
|
if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE \
|
|
or subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED:
|
|
msg = ('Subcloud %s must be online and managed for the subcloud-backup'
|
|
' delete operation with --local-only option.' % subcloud.name)
|
|
raise exceptions.ValidateFail(msg)
|
|
|
|
return True
|
|
|
|
|
|
def _is_valid_for_backup_restore(subcloud):
|
|
|
|
msg = None
|
|
if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED \
|
|
or subcloud.deploy_status in consts.INVALID_DEPLOY_STATES_FOR_RESTORE:
|
|
msg = ('Subcloud %s must be unmanaged and in a valid deploy state '
|
|
'for the subcloud-backup restore operation.' % subcloud.name)
|
|
elif not subcloud.data_install:
|
|
msg = ('Data installation on %s is missing.' % subcloud.name)
|
|
if msg:
|
|
raise exceptions.ValidateFail(msg)
|
|
|
|
return True
|
|
|
|
|
|
def get_matching_iso(software_version=None):
|
|
try:
|
|
if not software_version:
|
|
software_version = tsc.SW_VERSION
|
|
matching_iso, _ = get_vault_load_files(software_version)
|
|
if not matching_iso:
|
|
error_msg = ('Failed to get %s load image. Provide '
|
|
'active/inactive load image via '
|
|
'"system --os-region-name SystemController '
|
|
'load-import --active/--inactive"' % software_version)
|
|
LOG.exception(error_msg)
|
|
return None, error_msg
|
|
return matching_iso, None
|
|
except Exception as e:
|
|
LOG.exception("Could not load vault files.")
|
|
return None, str(e)
|
|
|
|
|
|
def is_subcloud_healthy(subcloud_name):
|
|
|
|
system_health = ""
|
|
try:
|
|
os_client = OpenStackDriver(region_name=subcloud_name,
|
|
region_clients=None)
|
|
keystone_client = os_client.keystone_client
|
|
endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
|
|
sysinv_client = SysinvClient(subcloud_name,
|
|
keystone_client.session,
|
|
endpoint=endpoint)
|
|
system_health = sysinv_client.get_system_health()
|
|
except Exception as e:
|
|
LOG.exception(e)
|
|
raise
|
|
|
|
fails = re.findall("\[Fail\]", system_health)
|
|
failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
|
|
no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
|
|
system_health)
|
|
|
|
# Subcloud is considered healthy if there are no failures or
|
|
# a single failure with only low severity alarms (not management affecting)
|
|
if ((len(fails) == 0) or
|
|
(len(fails) == 1 and failed_alarm_check and no_mgmt_alarms)):
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def get_systemcontroller_installed_loads():
|
|
|
|
try:
|
|
os_client = OpenStackDriver(
|
|
region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME,
|
|
region_clients=None)
|
|
except Exception:
|
|
LOG.exception("Failed to get keystone client for %s",
|
|
dccommon_consts.SYSTEM_CONTROLLER_NAME)
|
|
raise
|
|
|
|
ks_client = os_client.keystone_client
|
|
sysinv_client = SysinvClient(
|
|
dccommon_consts.SYSTEM_CONTROLLER_NAME, ks_client.session,
|
|
endpoint=ks_client.endpoint_cache.get_endpoint('sysinv'))
|
|
|
|
loads = sysinv_client.get_loads()
|
|
return get_loads_for_prestage(loads)
|
|
|
|
|
|
def get_certificate_from_secret(secret_name, secret_ns):
|
|
"""Get certificate from k8s secret
|
|
|
|
:param secret_name: the name of the secret
|
|
:param secret_ns: the namespace of the secret
|
|
|
|
:return: tls_crt: the certificate.
|
|
tls_key: the corresponding private key of the certificate.
|
|
raise Exception for kubernetes data errors
|
|
"""
|
|
|
|
kube = kubeoperator.KubeOperator()
|
|
secret = kube.kube_get_secret(secret_name, secret_ns)
|
|
|
|
if not hasattr(secret, 'data'):
|
|
raise Exception('Invalid secret %s\\%s' % (secret_ns, secret_name))
|
|
|
|
data = secret.data
|
|
if 'tls.crt' not in data or 'tls.key' not in data:
|
|
raise Exception('Invalid certificate data from secret %s\\%s' %
|
|
(secret_ns, secret_name))
|
|
|
|
try:
|
|
tls_crt = base64.decode_as_text(data['tls.crt'])
|
|
tls_key = base64.decode_as_text(data['tls.key'])
|
|
except TypeError:
|
|
raise Exception('Certificate secret data is invalid %s\\%s' %
|
|
(secret_ns, secret_name))
|
|
|
|
return tls_crt, tls_key
|
|
|
|
|
|
def get_management_subnet(payload):
|
|
"""Get management subnet.
|
|
|
|
Given a payload dict, prefer an admin
|
|
subnet over a management subnet if it
|
|
is present.
|
|
|
|
Returns the management subnet.
|
|
"""
|
|
if payload.get('admin_subnet', None):
|
|
return payload.get('admin_subnet')
|
|
return payload.get('management_subnet', '')
|
|
|
|
|
|
def get_management_start_address(payload):
|
|
"""Get management start address.
|
|
|
|
Given a payload dict, prefer an admin
|
|
start address over a management start address
|
|
if it is present.
|
|
|
|
Returns the management start address.
|
|
"""
|
|
if payload.get('admin_start_address', None):
|
|
return payload.get('admin_start_address')
|
|
return payload.get('management_start_address', '')
|
|
|
|
|
|
def get_management_end_address(payload):
|
|
"""Get management end address.
|
|
|
|
Given a payload dict, prefer an admin
|
|
end address over a management end address
|
|
if it is present.
|
|
|
|
Returns the management end address.
|
|
"""
|
|
if payload.get('admin_end_address', None):
|
|
return payload.get('admin_end_address')
|
|
return payload.get('management_end_address', '')
|
|
|
|
|
|
def get_management_gateway_address(payload):
|
|
"""Get management gateway address.
|
|
|
|
Given a payload dict, prefer an admin
|
|
gateway address over a management gateway address
|
|
if it is present.
|
|
|
|
Returns the management gateway address.
|
|
"""
|
|
if payload.get('admin_gateway_address', None):
|
|
return payload.get('admin_gateway_address')
|
|
return payload.get('management_gateway_address', '')
|
|
|
|
|
|
def has_network_reconfig(payload, subcloud):
|
|
"""Check if network reconfiguration is needed
|
|
|
|
:param payload: subcloud configuration
|
|
:param subcloud: subcloud object
|
|
"""
|
|
management_subnet = get_management_subnet(payload)
|
|
start_address = get_management_start_address(payload)
|
|
end_address = get_management_end_address(payload)
|
|
gateway_address = get_management_gateway_address(payload)
|
|
|
|
has_network_reconfig = any([
|
|
management_subnet != subcloud.management_subnet,
|
|
start_address != subcloud.management_start_ip,
|
|
end_address != subcloud.management_end_ip,
|
|
gateway_address != subcloud.management_gateway_ip
|
|
])
|
|
|
|
return has_network_reconfig
|
|
|
|
|
|
def set_open_file_limit(new_soft_limit: int):
|
|
"""Adjust the maximum number of open files for this process (soft limit)"""
|
|
try:
|
|
current_soft, current_hard = sys_resource.getrlimit(
|
|
sys_resource.RLIMIT_NOFILE)
|
|
if new_soft_limit > current_hard:
|
|
LOG.error(f'New process open file soft limit [{new_soft_limit}] '
|
|
f'exceeds the hard limit [{current_hard}]. Setting to '
|
|
'hard limit instead.')
|
|
new_soft_limit = current_hard
|
|
if new_soft_limit != current_soft:
|
|
LOG.info(f'Setting process open file limit to {new_soft_limit} '
|
|
f'(from {current_soft})')
|
|
sys_resource.setrlimit(sys_resource.RLIMIT_NOFILE,
|
|
(new_soft_limit, current_hard))
|
|
except Exception as ex:
|
|
LOG.exception(f'Failed to set NOFILE resource limit: {ex}')
|
|
|
|
|
|
def get_playbook_for_software_version(playbook_filename, software_version=None):
|
|
"""Get the ansible playbook filename in corresponding software version.
|
|
|
|
:param playbook_filename: ansible playbook filename
|
|
:param software_version: software version
|
|
:raises PlaybookNotFound: If the playbook is not found
|
|
|
|
Returns the unchanged ansible playbook filename if the software version
|
|
parameter is not provided or the same as active release, otherwise, returns
|
|
the filename in corresponding software version.
|
|
"""
|
|
if software_version and software_version != tsc.SW_VERSION:
|
|
software_version_path = os.path.join(
|
|
consts.ANSIBLE_PREVIOUS_VERSION_BASE_PATH, software_version)
|
|
playbook_filename = playbook_filename.replace(
|
|
consts.ANSIBLE_CURRENT_VERSION_BASE_PATH,
|
|
software_version_path)
|
|
if not os.path.isfile(playbook_filename):
|
|
raise exceptions.PlaybookNotFound(playbook_name=playbook_filename)
|
|
return playbook_filename
|
|
|
|
|
|
def get_value_from_yaml_file(filename, key):
|
|
"""Get corresponding value for a key in the given yaml file.
|
|
|
|
:param filename: the yaml filename
|
|
:param key: the path for the value
|
|
|
|
Returns the value or None if not found.
|
|
"""
|
|
value = None
|
|
if os.path.isfile(filename):
|
|
with open(os.path.abspath(filename), 'r') as f:
|
|
data = f.read()
|
|
data = yaml.load(data, Loader=yaml.SafeLoader)
|
|
value = data.get(key)
|
|
return value
|
|
|
|
|
|
def update_values_on_yaml_file(filename, values, yaml_dump=True):
|
|
"""Update all specified key values from the given yaml file.
|
|
|
|
:param filename: the yaml filename
|
|
:param values: dict with yaml keys and values to replace
|
|
:param yaml_dump: write file using yaml dump (default is True)
|
|
"""
|
|
update_file = False
|
|
if not os.path.isfile(filename):
|
|
return
|
|
with open(os.path.abspath(filename), 'r') as f:
|
|
data = f.read()
|
|
data = yaml.load(data, Loader=yaml.SafeLoader)
|
|
for key, value in values.items():
|
|
if key not in data or value != data.get(key):
|
|
data.update({key: value})
|
|
update_file = True
|
|
if update_file:
|
|
with open(os.path.abspath(filename), 'w') as f:
|
|
if yaml_dump:
|
|
yaml.dump(data, f, sort_keys=False)
|
|
else:
|
|
f.write('---\n')
|
|
for k, v in data.items():
|
|
f.write("%s: %s\n" % (k, json.dumps(v)))
|
|
|
|
|
|
def load_yaml_file(filename: str):
|
|
with open(os.path.abspath(filename), 'r') as f:
|
|
data = yaml.load(f, Loader=yaml.loader.SafeLoader)
|
|
return data
|
|
|
|
|
|
def decode_and_normalize_passwd(input_passwd):
|
|
pattern = r'^[' + string.punctuation + ']'
|
|
passwd = base64.decode_as_text(input_passwd)
|
|
# Ensure that sysadmin password which starts with a special
|
|
# character will be enclosed in quotes so that the generated
|
|
# inventory file will be parsable by Ansible.
|
|
if not passwd.startswith('"') and re.search(pattern, passwd):
|
|
passwd = '"' + passwd + '"'
|
|
elif passwd.startswith('"') and not passwd.endswith('"'):
|
|
passwd = "'" + passwd + "'"
|
|
|
|
return passwd
|
|
|
|
|
|
def get_failure_msg(subcloud_name):
|
|
try:
|
|
os_client = OpenStackDriver(region_name=subcloud_name,
|
|
region_clients=None)
|
|
keystone_client = os_client.keystone_client
|
|
endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
|
|
sysinv_client = SysinvClient(subcloud_name,
|
|
keystone_client.session,
|
|
endpoint=endpoint)
|
|
msg = sysinv_client.get_error_msg()
|
|
return msg
|
|
except Exception as e:
|
|
LOG.exception("{}: {}".format(subcloud_name, e))
|
|
return consts.ERROR_DESC_FAILED
|
|
|
|
|
|
def update_abort_status(context, subcloud_id, deploy_status, abort_failed=False):
|
|
"""Update the subcloud deploy status during deploy abort operation.
|
|
|
|
:param context: request context object
|
|
:param subcloud_id: subcloud id from db
|
|
:param deploy_status: subcloud deploy status from db
|
|
:param abort_failed: if abort process fails (default False)
|
|
"""
|
|
if abort_failed:
|
|
abort_status_dict = ABORT_UPDATE_FAIL_STATUS
|
|
else:
|
|
abort_status_dict = ABORT_UPDATE_STATUS
|
|
new_deploy_status = abort_status_dict[deploy_status]
|
|
updated_subcloud = db_api.subcloud_update(context, subcloud_id,
|
|
deploy_status=new_deploy_status)
|
|
return updated_subcloud
|