distcloud/distributedcloud/dcmanager/common/utils.py

# Copyright 2015 Huawei Technologies Co., Ltd.
# Copyright (c) 2017-2023 Wind River Systems, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import datetime
import grp
import itertools
import json
import netaddr
import os
import pwd
import re
import resource as sys_resource
import six.moves
import string
import subprocess
import tsconfig.tsconfig as tsc
import yaml

from keystoneauth1 import exceptions as keystone_exceptions
from oslo_concurrency import lockutils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_serialization import base64

from dccommon import consts as dccommon_consts
from dccommon.drivers.openstack.sdk_platform import OpenStackDriver
from dccommon.drivers.openstack.sysinv_v1 import SysinvClient
from dccommon.drivers.openstack import vim
from dccommon import exceptions as dccommon_exceptions
from dccommon import kubeoperator
from dcmanager.common import consts
from dcmanager.common import exceptions
from dcmanager.db import api as db_api

LOG = logging.getLogger(__name__)

DC_MANAGER_USERNAME = "root"
DC_MANAGER_GRPNAME = "root"

# Max lines output msg from logs
MAX_LINES_MSG = 10

ABORT_UPDATE_STATUS = {
    consts.DEPLOY_STATE_INSTALLING: consts.DEPLOY_STATE_ABORTING_INSTALL,
    consts.DEPLOY_STATE_BOOTSTRAPPING: consts.DEPLOY_STATE_ABORTING_BOOTSTRAP,
    consts.DEPLOY_STATE_CONFIGURING: consts.DEPLOY_STATE_ABORTING_CONFIG,
    consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_ABORTED,
    consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_ABORTED,
    consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_ABORTED
}

ABORT_UPDATE_FAIL_STATUS = {
    consts.DEPLOY_STATE_ABORTING_INSTALL: consts.DEPLOY_STATE_INSTALL_FAILED,
    consts.DEPLOY_STATE_ABORTING_BOOTSTRAP: consts.DEPLOY_STATE_BOOTSTRAP_FAILED,
    consts.DEPLOY_STATE_ABORTING_CONFIG: consts.DEPLOY_STATE_CONFIG_FAILED
}

RESUME_PREP_UPDATE_STATUS = {
    consts.DEPLOY_PHASE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL,
    consts.DEPLOY_PHASE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP,
    consts.DEPLOY_PHASE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG
}

RESUME_PREP_UPDATE_FAIL_STATUS = {
    consts.DEPLOY_PHASE_INSTALL: consts.DEPLOY_STATE_PRE_INSTALL_FAILED,
    consts.DEPLOY_PHASE_BOOTSTRAP: consts.DEPLOY_STATE_PRE_BOOTSTRAP_FAILED,
    consts.DEPLOY_PHASE_CONFIG: consts.DEPLOY_STATE_PRE_CONFIG_FAILED
}


def get_import_path(cls):
    return cls.__module__ + "." + cls.__name__


# Returns a iterator of tuples containing batch_size number of objects in each
def get_batch_projects(batch_size, project_list, fillvalue=None):
    args = [iter(project_list)] * batch_size
    return six.moves.zip_longest(fillvalue=fillvalue, *args)


def validate_address_str(ip_address_str, network):
    """Determine whether an address is valid."""
    try:
        ip_address = netaddr.IPAddress(ip_address_str)
        if ip_address.version != network.version:
            msg = ("Invalid IP version - must match network version " +
                   ip_version_to_string(network.version))
            raise exceptions.ValidateFail(msg)
        elif ip_address == network:
            raise exceptions.ValidateFail("Cannot use network address")
        elif ip_address == network.broadcast:
            raise exceptions.ValidateFail("Cannot use broadcast address")
        elif ip_address not in network:
            raise exceptions.ValidateFail(
                "Address must be in subnet %s" % str(network))
        return ip_address
    except netaddr.AddrFormatError:
        raise exceptions.ValidateFail(
            "Invalid address - not a valid IP address")


def ip_version_to_string(ip_version):
    """Returns a string representation of ip_version."""
    if ip_version == 4:
        return "IPv4"
    elif ip_version == 6:
        return "IPv6"
    else:
        return "IP"


def validate_network_str(network_str, minimum_size, existing_networks=None,
                         multicast=False, operation=None):
    """Determine whether a network is valid."""
    try:
        network = netaddr.IPNetwork(network_str)
        if network.size < minimum_size:
            raise exceptions.ValidateFail("Subnet too small - must have at "
                                          "least %d addresses" % minimum_size)
        elif network.version == 6 and network.prefixlen < 64:
            raise exceptions.ValidateFail("IPv6 minimum prefix length is 64")
        elif existing_networks and operation != 'reinstall':
            if any(network.ip in subnet for subnet in existing_networks):
                raise exceptions.ValidateFail("Subnet overlaps with another "
                                              "configured subnet")
        elif multicast and not network.is_multicast():
            raise exceptions.ValidateFail("Invalid subnet - must be multicast")
        return network
    except netaddr.AddrFormatError:
        raise exceptions.ValidateFail(
            "Invalid subnet - not a valid IP subnet")


def validate_certificate_subject(subject):
    """Validate a certificate subject

    Duplicate the get_subject validation logic defined in:
    sysinv/api/controllers/v1/kube_rootca_update.py
    Returns a tuple of True, "" if the input is None
    Returns a tuple of True, "" if the input is valid
    Returns a tuple of False, "<error details>" if the input is invalid
    """
    if subject is None:
        return True, ""

    params_supported = ['C', 'OU', 'O', 'ST', 'CN', 'L']
    subject_pairs = re.findall(r"([^=]+=[^=]+)(?:\s|$)", subject)
    subject_dict = {}
    for pair_value in subject_pairs:
        key, value = pair_value.split("=")
        subject_dict[key] = value

    if not all([param in params_supported for param in subject_dict.keys()]):
        return False, ("There are parameters not supported "
                       "for the certificate subject specification. "
                       "The subject parameter has to be in the "
                       "format of 'C=<Country> ST=<State/Province> "
                       "L=<Locality> O=<Organization> OU=<OrganizationUnit> "
                       "CN=<commonName>")
    if 'CN' not in list(subject_dict.keys()):
        return False, ("The CN=<commonName> parameter is required to be "
                       "specified in subject argument")
    return True, ""


def validate_expiry_date(expiry_date):
    """Validate a certificate expiry date

    Duplicate the expiry_date validation logic defined in:
    sysinv/api/controllers/v1/kube_rootca_update.py
    Returns a tuple of True, "" if the input is None
    Returns a tuple of True, "" if the input is valid
    Returns a tuple of False, "<error details>" if the input is invalid
    """
    if expiry_date is None:
        return True, ""

    try:
        date = datetime.datetime.strptime(expiry_date, "%Y-%m-%d")
    except ValueError:
        return False, ("expiry_date %s doesn't match format "
                       "YYYY-MM-DD" % expiry_date)

    delta = date - datetime.datetime.now()
    # we sum one day (24 hours) to accomplish the certificate expiry
    # during the day specified by the user
    duration = (delta.days * 24 + 24)

    # Cert-manager manages certificates and renew them some time
    # before it expires. Along this procedure we set renewBefore
    # parameter for 24h, so we are checking if the duration sent
    # has at least this amount of time. This is needed to avoid
    # cert-manager to block the creation of the resources.
    if duration <= 24:
        return False, ("New k8s rootCA should have at least 24 hours of "
                       "validation before expiry.")
    return True, ""


# to do validate the quota limits
def validate_quota_limits(payload):
    for resource in payload:
        # Check valid resource name
        if resource not in itertools.chain(dccommon_consts.CINDER_QUOTA_FIELDS,
                                           dccommon_consts.NOVA_QUOTA_FIELDS,
                                           dccommon_consts.NEUTRON_QUOTA_FIELDS):
            raise exceptions.InvalidInputError
        # Check valid quota limit value in case for put/post
        if isinstance(payload, dict) and (not isinstance(
                payload[resource], int) or payload[resource] <= 0):
            raise exceptions.InvalidInputError


def get_sw_update_strategy_extra_args(context, update_type=None):
    """Query an existing sw_update_strategy for its extra_args.

    :param context: request context object.
    :param update_type: filter the update strategy (defaults to None)
    :returns dict (returns an empty dictionary if no strategy exists)
    """
    try:
        sw_update_strategy = \
            db_api.sw_update_strategy_get(context,
                                          update_type=update_type)
        return sw_update_strategy.extra_args
    except exceptions.NotFound:
        # return an empty dictionary if there is no strategy
        return {}


def get_sw_update_opts(context,
                       for_sw_update=False, subcloud_id=None):
        """Get sw update options for a subcloud

        :param context: request context object.
        :param for_sw_update: return the default options if subcloud options
                              are empty. Useful for retrieving sw update
                              options on application of patch strategy.
        :param subcloud_id: id of subcloud.

        """

        if subcloud_id is None:
            # Requesting defaults. Return constants if no entry in db.
            sw_update_opts_ref = db_api.sw_update_opts_default_get(context)
            if not sw_update_opts_ref:
                sw_update_opts_dict = vim.SW_UPDATE_OPTS_CONST_DEFAULT
                return sw_update_opts_dict
        else:
            # requesting subcloud options
            sw_update_opts_ref = db_api.sw_update_opts_get(context,
                                                           subcloud_id)
            if sw_update_opts_ref:
                subcloud_name = db_api.subcloud_get(context, subcloud_id).name
                return db_api.sw_update_opts_w_name_db_model_to_dict(
                    sw_update_opts_ref, subcloud_name)
            elif for_sw_update:
                sw_update_opts_ref = db_api.sw_update_opts_default_get(context)
                if not sw_update_opts_ref:
                    sw_update_opts_dict = vim.SW_UPDATE_OPTS_CONST_DEFAULT
                    return sw_update_opts_dict
            else:
                raise exceptions.SubcloudPatchOptsNotFound(
                    subcloud_id=subcloud_id)

        return db_api.sw_update_opts_w_name_db_model_to_dict(
            sw_update_opts_ref, dccommon_consts.SW_UPDATE_DEFAULT_TITLE)


def ensure_lock_path():
    # Determine the oslo_concurrency lock path:
    # 1) First, from the oslo_concurrency section of the config
    #    a) If not set via an option default or config file, oslo_concurrency
    #       sets it to the OSLO_LOCK_PATH env variable
    # 2) Then if not set, set it to a specific directory under
    #    tsc.VOLATILE_PATH

    if cfg.CONF.oslo_concurrency.lock_path:
        lock_path = cfg.CONF.oslo_concurrency.lock_path
    else:
        lock_path = os.path.join(tsc.VOLATILE_PATH, "dcmanager")

    if not os.path.isdir(lock_path):
        try:
            uid = pwd.getpwnam(DC_MANAGER_USERNAME).pw_uid
            gid = grp.getgrnam(DC_MANAGER_GRPNAME).gr_gid
            os.makedirs(lock_path)
            os.chown(lock_path, uid, gid)
            LOG.info("Created directory=%s" % lock_path)

        except OSError as e:
            LOG.exception("makedir %s OSError=%s encountered" %
                          (lock_path, e))
            return None

    return lock_path


def synchronized(name, external=True, fair=False):
    if external:
        prefix = 'DCManager-'
        lock_path = ensure_lock_path()
    else:
        prefix = None
        lock_path = None

    return lockutils.synchronized(name, lock_file_prefix=prefix,
                                  external=external, lock_path=lock_path,
                                  semaphores=None, delay=0.01, fair=fair)


def get_filename_by_prefix(dir_path, prefix):
    """Returns the first filename found matching 'prefix' within 'dir_path'

    Note: returns base filename only - result does not include dir_path
    """
    for filename in os.listdir(dir_path):
        if filename.startswith(prefix):
            return filename
    return None


def create_subcloud_inventory(subcloud,
                              inventory_file):
    """Create the ansible inventory file for the specified subcloud"""

    # Delete the file if it already exists
    delete_subcloud_inventory(inventory_file)

    with open(inventory_file, 'w') as f_out_inventory:
        f_out_inventory.write(
            '---\n'
            'all:\n'
            '  vars:\n'
            '    ansible_ssh_user: sysadmin\n'
            '    ansible_ssh_extra_args: "-o UserKnownHostsFile=/dev/null"\n'
            '  hosts:\n'
            '    ' + subcloud['name'] + ':\n'
            '      ansible_host: ' +
            subcloud['bootstrap-address'] + '\n'
        )


def create_subcloud_inventory_with_admin_creds(subcloud_name,
                                               inventory_file,
                                               subcloud_bootstrap_address,
                                               ansible_pass):
    """Create the ansible inventory file for the specified subcloud.

    Includes ansible_become_pass attribute.
    """

    # Delete the file if it already exists
    delete_subcloud_inventory(inventory_file)

    with open(inventory_file, 'w') as f_out_inventory:
        f_out_inventory.write(
            ('---\n'
             'all:\n'
             '  vars:\n'
             '    ansible_ssh_user: sysadmin\n'
             '    ansible_ssh_pass: {0}\n'
             '    ansible_become_pass: {0}\n'
             '    ansible_ssh_extra_args: "-o UserKnownHostsFile=/dev/null"\n'
             '  hosts:\n'
             '    {1}:\n'
             '      ansible_host: {2}\n').format(ansible_pass,
                                                 subcloud_name,
                                                 subcloud_bootstrap_address)
        )


def delete_subcloud_inventory(inventory_file):
    """Delete the ansible inventory file for the specified subcloud"""

    # Delete the file if it exists
    if inventory_file and os.path.isfile(inventory_file):
        os.remove(inventory_file)


def get_vault_load_files(target_version):
    """Return a tuple for the ISO and SIG for this load version from the vault.

    The files can be imported to the vault using any name, but must end
    in 'iso' or 'sig'.
    : param target_version: The software version to search under the vault
    """
    vault_dir = "{}/{}/".format(consts.LOADS_VAULT_DIR, target_version)

    matching_iso = None
    matching_sig = None

    if os.path.isdir(vault_dir):
        for a_file in os.listdir(vault_dir):
            if a_file.lower().endswith(".iso"):
                matching_iso = os.path.join(vault_dir, a_file)
                continue
            elif a_file.lower().endswith(".sig"):
                matching_sig = os.path.join(vault_dir, a_file)
                continue
        # If no .iso or .sig is found, raise an exception
        if matching_iso is None:
            raise exceptions.VaultLoadMissingError(
                file_type='.iso', vault_dir=vault_dir)
        if matching_sig is None:
            raise exceptions.VaultLoadMissingError(
                file_type='.sig', vault_dir=vault_dir)

    # return the iso and sig for this load
    return (matching_iso, matching_sig)


def get_active_kube_version(kube_versions):
    """Returns the active (target) kubernetes from a list of versions"""

    matching_kube_version = None
    for kube in kube_versions:
        kube_dict = kube.to_dict()
        if kube_dict.get('target') and kube_dict.get('state') == 'active':
            matching_kube_version = kube_dict.get('version')
            break
    return matching_kube_version


def get_available_kube_version(kube_versions):
    """Returns first available kubernetes version from a list of versions"""

    matching_kube_version = None
    for kube in kube_versions:
        kube_dict = kube.to_dict()
        if kube_dict.get('state') == 'available':
            matching_kube_version = kube_dict.get('version')
            break
    return matching_kube_version


def kube_version_compare(left, right):
    """Performs a cmp operation for two kubernetes versions

    Return -1, 0, or 1 if left is less, equal, or greater than right

    left and right are semver strings starting with the letter 'v'
    If either value is None, an exception is raised
    If the strings are not 'v'major.minor.micro, an exception is raised
    Note: This method supports shorter versions.  ex: v1.22
    When comparing different length tuples, additional fields are ignored.
    For example: v1.19 and v1.19.1 would be the same.
    """
    if left is None or right is None or left[0] != 'v' or right[0] != 'v':
        raise Exception("Invalid kube version(s), left: (%s), right: (%s)" %
                        (left, right))
    # start the split at index 1 ('after' the 'v' character)
    l_val = tuple(map(int, (left[1:].split("."))))
    r_val = tuple(map(int, (right[1:].split("."))))
    # If the tuples are different length, convert both to the same length
    min_tuple = min(len(l_val), len(r_val))
    l_val = l_val[0:min_tuple]
    r_val = r_val[0:min_tuple]
    # The following is the same as cmp. Verified in python2 and python3
    # cmp does not exist in python3.
    return (l_val > r_val) - (l_val < r_val)


def get_loads_for_patching(loads):
    """Filter the loads that can be patched. Return their software versions"""
    valid_states = [
        consts.ACTIVE_LOAD_STATE,
        consts.IMPORTED_LOAD_STATE
    ]
    return [load.software_version for load in loads if load.state in valid_states]


def get_loads_for_prestage(loads):
    """Filter the loads that can be prestaged. Return their software versions"""
    valid_states = [
        consts.ACTIVE_LOAD_STATE,
        consts.IMPORTED_LOAD_STATE,
        consts.INACTIVE_LOAD_STATE
    ]
    return [load.software_version for load in loads if load.state in valid_states]


def subcloud_get_by_ref(context, subcloud_ref):
    """Handle getting a subcloud by either name, or ID

    :param context: The request context
    :param subcloud_ref: Reference to the subcloud, either a name or an ID
    """
    try:
        return db_api.subcloud_get(context, subcloud_ref) \
            if subcloud_ref.isdigit() \
            else db_api.subcloud_get_by_name(context, subcloud_ref)
    except (exceptions.SubcloudNotFound, exceptions.SubcloudNameNotFound):
        return None


def subcloud_group_get_by_ref(context, group_ref):
    # Handle getting a group by either name, or ID
    if group_ref.isdigit():
        # Lookup subcloud group as an ID
        try:
            group = db_api.subcloud_group_get(context, group_ref)
        except exceptions.SubcloudGroupNotFound:
            return None
    else:
        # Lookup subcloud group as a name
        try:
            group = db_api.subcloud_group_get_by_name(context, group_ref)
        except exceptions.SubcloudGroupNameNotFound:
            return None
    return group


def subcloud_db_list_to_dict(subclouds):
    return {'subclouds': [db_api.subcloud_db_model_to_dict(subcloud)
            for subcloud in subclouds]}


def get_oam_addresses(subcloud_name, sc_ks_client):
    """Get the subclouds oam addresses"""

    # First need to retrieve the Subcloud's Keystone session
    try:
        endpoint = sc_ks_client.endpoint_cache.get_endpoint('sysinv')
        sysinv_client = SysinvClient(subcloud_name,
                                     sc_ks_client.session,
                                     endpoint=endpoint)
        return sysinv_client.get_oam_addresses()
    except (keystone_exceptions.EndpointNotFound, IndexError) as e:
        message = ("Identity endpoint for subcloud: %s not found. %s" %
                   (subcloud_name, e))
        LOG.error(message)
    except dccommon_exceptions.OAMAddressesNotFound:
        message = ("OAM addresses for subcloud: %s not found." %
                   subcloud_name)
        LOG.error(message)
    return None


def get_ansible_filename(subcloud_name, postfix='.yml'):
    """Build ansible filename using subcloud and given postfix"""
    ansible_filename = os.path.join(dccommon_consts.ANSIBLE_OVERRIDES_PATH,
                                    subcloud_name + postfix)
    return ansible_filename


def pre_check_management_affected_alarm(system_health):
    """Acceptable health conditions:

    a) subcloud is completely healthy (i.e. no failed checks)
    b) there is alarm but no management affecting alarm
    c) subcloud fails alarm check and it only has non-management
       affecting alarm(s)
    """
    failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
    no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
                                system_health)
    if failed_alarm_check and not no_mgmt_alarms:
        return False
    return True


def find_ansible_error_msg(subcloud_name, log_file, stage=None):
    """Find errors into ansible logs.

    It will search into ansible log for a fatal error expression.

    If fatal error is found, it will capture the message
    until the final expression. It will get always the more recent
    fatal error from the log files.
    If the error message is longer than N lines, it will be summarized.
    Also, the last task is provided.

    Returns the error message found
    Returns generic error message if not found or there is failures
    during search
    """

    error_found = False
    error_msg = []
    failed_task = ''
    files_for_search = []

    cmd_1 = 'awk'
    # awk command to get the information iside the last match found
    # starting with 'fatal: [' and ending with 'PLAY RECAP'.
    cmd_2 = ('''BEGIN {f=""}                # initialize f
        /fatal: \[/ {f=""}                  # reset f on first match
        /fatal: \[/,/PLAY RECAP/ {          # capture text between two delimiters
            if ($0 ~ /PLAY RECAP/) next     # exclude last delimiter
            if ($0 == "") next              # exclude blank line
            f = f ? (f "\\n" $0) : $0}      # assign or append to f
            END {print f}
            ''')

    # necessary check since is possible to have
    # the error in rotated ansible log
    log_file_temp = log_file + '.1'
    if os.path.exists(log_file_temp):
        files_for_search.append(log_file_temp)
        if os.path.exists(log_file):
            files_for_search.append(log_file)
    else:
        files_for_search.append(log_file)

    if (len(files_for_search) < 2):
        cmd_list = ([cmd_1, cmd_2, files_for_search[0]])
    else:
        cmd_list = ([cmd_1, cmd_2, files_for_search[0], files_for_search[1]])

    try:
        error_msg_raw = subprocess.check_output(
            cmd_list,
            stderr=subprocess.STDOUT).decode('utf-8')
        if len(error_msg_raw) > 1:
            error_found = True
            error_msg = [elem for elem in error_msg_raw.split("\n") if elem]
            failed_task = get_failed_task(files_for_search)
    except Exception as exc:
        LOG.error("Failed getting info from ansible log file :%s" % exc)

    if error_found and (len(error_msg) > MAX_LINES_MSG):
        error_msg = summarize_message(error_msg)
    error_msg = '\n'.join(str(element) for element in error_msg)
    error_msg = error_msg.replace("\'", "\"")

    if error_found:
        msg = "FAILED %s playbook of (%s).\n" \
            " detail: %s \n" \
            "FAILED TASK: %s " % (
                stage,
                subcloud_name,
                error_msg,
                failed_task)
    else:
        msg = "FAILED %s playbook of (%s).\n" \
            "check individual log at " \
            "%s for detailed output " % (
                stage,
                subcloud_name,
                log_file)
    return msg


def get_failed_task(files):
    """Get last task failed

    It receives an ansible log file (or a couple of files)
    and search for the last failed task with its date

    Returns a string with the task and date
    """

    cmd_1 = 'awk'
    # awk command to get the information about last failed task.
    # Match expression starting with 'TASK [' and ending with
    # 'fatal: ['
    cmd_2 = ('''BEGIN {f=""}            # initialize f
        /TASK \[/ {f=""}                # reset f on first match
        /TASK \[/,/fatal: \[/ {         # capture text between two delimiters
            if ($0 ~ /fatal: \[/) next  # exclude last delimiter
            if ($0 == "") next          # exclude blank line
            f = f ? (f "\\n" $0) : $0}  # assign or append to f
            END {print f}
            ''')
    # necessary check since is possible to have
    # the error in rotated ansible log
    if (len(files) < 2):
        awk_cmd = ([cmd_1, cmd_2, files[0]])
    else:
        awk_cmd = ([cmd_1, cmd_2, files[0], files[1]])

    try:
        failed_task = subprocess.check_output(
            awk_cmd,
            stderr=subprocess.STDOUT).decode('utf-8')
        if len(failed_task) < 1:
            return None
    except Exception as exc:
        LOG.error("Failed getting failed task :%s" % exc)
        return None
    failed_task = failed_task.replace("*", "")
    failed_task = failed_task.replace("\'", "\"")
    failed_task = [elem for elem in failed_task.split("\n") if elem]
    failed_task = "%s %s" % (failed_task[0], failed_task[1])
    return failed_task


def summarize_message(error_msg):
    """Summarize message.

    This function receives a long error message and
    greps it using key words to return a summarized
    error message.

    Returns a brief message.
    """
    list_of_strings_to_search_for = [
        'msg:', 'fail', 'error', 'cmd', 'stderr'
        ]
    brief_message = []
    for line in error_msg:
        for s in list_of_strings_to_search_for:
            if re.search(s, line, re.IGNORECASE):
                if len(brief_message) >= MAX_LINES_MSG:
                    break
                # append avoiding duplicated items
                if line not in brief_message:
                    brief_message.append(line)
    return brief_message


def is_valid_for_backup_operation(operation, subcloud):

    if operation == 'create':
        return _is_valid_for_backup_create(subcloud)
    elif operation == 'delete':
        return _is_valid_for_backup_delete(subcloud)
    elif operation == 'restore':
        return _is_valid_for_backup_restore(subcloud)
    else:
        msg = "Invalid operation %s" % operation
        LOG.error(msg)
        raise exceptions.ValidateFail(msg)


def _is_valid_for_backup_create(subcloud):

    if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE \
        or subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED \
        or subcloud.deploy_status not in consts.VALID_DEPLOY_STATES_FOR_BACKUP:
        msg = ('Subcloud %s must be online, managed and have valid '
               'deploy-status for the subcloud-backup '
               'create operation.' % subcloud.name)
        raise exceptions.ValidateFail(msg)

    return True


def _is_valid_for_backup_delete(subcloud):

    if subcloud.availability_status != dccommon_consts.AVAILABILITY_ONLINE \
        or subcloud.management_state != dccommon_consts.MANAGEMENT_MANAGED:
        msg = ('Subcloud %s must be online and managed for the subcloud-backup'
               ' delete operation with --local-only option.' % subcloud.name)
        raise exceptions.ValidateFail(msg)

    return True


def _is_valid_for_backup_restore(subcloud):

    msg = None
    if subcloud.management_state != dccommon_consts.MANAGEMENT_UNMANAGED \
        or subcloud.deploy_status in consts.INVALID_DEPLOY_STATES_FOR_RESTORE:
        msg = ('Subcloud %s must be unmanaged and in a valid deploy state '
               'for the subcloud-backup restore operation.' % subcloud.name)
    elif not subcloud.data_install:
        msg = ('Data installation on %s is missing.' % subcloud.name)
    if msg:
        raise exceptions.ValidateFail(msg)

    return True


def get_matching_iso(software_version=None):
    try:
        if not software_version:
            software_version = tsc.SW_VERSION
        matching_iso, _ = get_vault_load_files(software_version)
        if not matching_iso:
            error_msg = ('Failed to get %s load image. Provide '
                         'active/inactive load image via '
                         '"system --os-region-name SystemController '
                         'load-import --active/--inactive"' % software_version)
            LOG.exception(error_msg)
            return None, error_msg
        return matching_iso, None
    except Exception as e:
        LOG.exception("Could not load vault files.")
        return None, str(e)


def is_subcloud_healthy(subcloud_name):

    system_health = ""
    try:
        os_client = OpenStackDriver(region_name=subcloud_name,
                                    region_clients=None)
        keystone_client = os_client.keystone_client
        endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
        sysinv_client = SysinvClient(subcloud_name,
                                     keystone_client.session,
                                     endpoint=endpoint)
        system_health = sysinv_client.get_system_health()
    except Exception as e:
        LOG.exception(e)
        raise

    fails = re.findall("\[Fail\]", system_health)
    failed_alarm_check = re.findall("No alarms: \[Fail\]", system_health)
    no_mgmt_alarms = re.findall("\[0\] of which are management affecting",
                                system_health)

    # Subcloud is considered healthy if there are no failures or
    # a single failure with only low severity alarms (not management affecting)
    if ((len(fails) == 0) or
            (len(fails) == 1 and failed_alarm_check and no_mgmt_alarms)):
        return True

    return False


def get_systemcontroller_installed_loads():

    try:
        os_client = OpenStackDriver(
            region_name=dccommon_consts.SYSTEM_CONTROLLER_NAME,
            region_clients=None)
    except Exception:
        LOG.exception("Failed to get keystone client for %s",
                      dccommon_consts.SYSTEM_CONTROLLER_NAME)
        raise

    ks_client = os_client.keystone_client
    sysinv_client = SysinvClient(
        dccommon_consts.SYSTEM_CONTROLLER_NAME, ks_client.session,
        endpoint=ks_client.endpoint_cache.get_endpoint('sysinv'))

    loads = sysinv_client.get_loads()
    return get_loads_for_prestage(loads)


def get_certificate_from_secret(secret_name, secret_ns):
    """Get certificate from k8s secret

    :param secret_name: the name of the secret
    :param secret_ns: the namespace of the secret

    :return: tls_crt: the certificate.
             tls_key: the corresponding private key of the certificate.
    raise Exception for kubernetes data errors
    """

    kube = kubeoperator.KubeOperator()
    secret = kube.kube_get_secret(secret_name, secret_ns)

    if not hasattr(secret, 'data'):
        raise Exception('Invalid secret %s\\%s' % (secret_ns, secret_name))

    data = secret.data
    if 'tls.crt' not in data or 'tls.key' not in data:
        raise Exception('Invalid certificate data from secret %s\\%s' %
                        (secret_ns, secret_name))

    try:
        tls_crt = base64.decode_as_text(data['tls.crt'])
        tls_key = base64.decode_as_text(data['tls.key'])
    except TypeError:
        raise Exception('Certificate secret data is invalid %s\\%s' %
                        (secret_ns, secret_name))

    return tls_crt, tls_key


def get_management_subnet(payload):
    """Get management subnet.

    Given a payload dict, prefer an admin
    subnet over a management subnet if it
    is present.

    Returns the management subnet.
    """
    if payload.get('admin_subnet', None):
        return payload.get('admin_subnet')
    return payload.get('management_subnet', '')


def get_management_start_address(payload):
    """Get management start address.

    Given a payload dict, prefer an admin
    start address over a management start address
    if it is present.

    Returns the management start address.
    """
    if payload.get('admin_start_address', None):
        return payload.get('admin_start_address')
    return payload.get('management_start_address', '')


def get_management_end_address(payload):
    """Get management end address.

    Given a payload dict, prefer an admin
    end address over a management end address
    if it is present.

    Returns the management end address.
    """
    if payload.get('admin_end_address', None):
        return payload.get('admin_end_address')
    return payload.get('management_end_address', '')


def get_management_gateway_address(payload):
    """Get management gateway address.

    Given a payload dict, prefer an admin
    gateway address over a management gateway address
    if it is present.

    Returns the management gateway address.
    """
    if payload.get('admin_gateway_address', None):
        return payload.get('admin_gateway_address')
    return payload.get('management_gateway_address', '')


def has_network_reconfig(payload, subcloud):
    """Check if network reconfiguration is needed

    :param payload: subcloud configuration
    :param subcloud: subcloud object
    """
    management_subnet = get_management_subnet(payload)
    start_address = get_management_start_address(payload)
    end_address = get_management_end_address(payload)
    gateway_address = get_management_gateway_address(payload)

    has_network_reconfig = any([
        management_subnet != subcloud.management_subnet,
        start_address != subcloud.management_start_ip,
        end_address != subcloud.management_end_ip,
        gateway_address != subcloud.management_gateway_ip
    ])

    return has_network_reconfig


def set_open_file_limit(new_soft_limit: int):
    """Adjust the maximum number of open files for this process (soft limit)"""
    try:
        current_soft, current_hard = sys_resource.getrlimit(
            sys_resource.RLIMIT_NOFILE)
        if new_soft_limit > current_hard:
            LOG.error(f'New process open file soft limit [{new_soft_limit}] '
                      f'exceeds the hard limit [{current_hard}]. Setting to '
                      'hard limit instead.')
            new_soft_limit = current_hard
        if new_soft_limit != current_soft:
            LOG.info(f'Setting process open file limit to {new_soft_limit} '
                     f'(from {current_soft})')
            sys_resource.setrlimit(sys_resource.RLIMIT_NOFILE,
                                   (new_soft_limit, current_hard))
    except Exception as ex:
        LOG.exception(f'Failed to set NOFILE resource limit: {ex}')


def get_playbook_for_software_version(playbook_filename, software_version=None):
    """Get the ansible playbook filename in corresponding software version.

    :param playbook_filename: ansible playbook filename
    :param software_version: software version
    :raises PlaybookNotFound: If the playbook is not found

    Returns the unchanged ansible playbook filename if the software version
    parameter is not provided or the same as active release, otherwise, returns
    the filename in corresponding software version.
    """
    if software_version and software_version != tsc.SW_VERSION:
        software_version_path = os.path.join(
            consts.ANSIBLE_PREVIOUS_VERSION_BASE_PATH, software_version)
        playbook_filename = playbook_filename.replace(
            consts.ANSIBLE_CURRENT_VERSION_BASE_PATH,
            software_version_path)
    if not os.path.isfile(playbook_filename):
        raise exceptions.PlaybookNotFound(playbook_name=playbook_filename)
    return playbook_filename


def get_value_from_yaml_file(filename, key):
    """Get corresponding value for a key in the given yaml file.

    :param filename: the yaml filename
    :param key: the path for the value

    Returns the value or None if not found.
    """
    value = None
    if os.path.isfile(filename):
        with open(os.path.abspath(filename), 'r') as f:
            data = f.read()
        data = yaml.load(data, Loader=yaml.SafeLoader)
        value = data.get(key)
    return value


def update_values_on_yaml_file(filename, values, yaml_dump=True):
    """Update all specified key values from the given yaml file.

    :param filename: the yaml filename
    :param values: dict with yaml keys and values to replace
    :param yaml_dump: write file using yaml dump (default is True)
    """
    update_file = False
    if not os.path.isfile(filename):
        return
    with open(os.path.abspath(filename), 'r') as f:
        data = f.read()
    data = yaml.load(data, Loader=yaml.SafeLoader)
    for key, value in values.items():
        if key not in data or value != data.get(key):
            data.update({key: value})
            update_file = True
    if update_file:
        with open(os.path.abspath(filename), 'w') as f:
            if yaml_dump:
                yaml.dump(data, f, sort_keys=False)
            else:
                f.write('---\n')
                for k, v in data.items():
                    f.write("%s: %s\n" % (k, json.dumps(v)))


def load_yaml_file(filename: str):
    with open(os.path.abspath(filename), 'r') as f:
        data = yaml.load(f, Loader=yaml.loader.SafeLoader)
    return data


def decode_and_normalize_passwd(input_passwd):
    pattern = r'^[' + string.punctuation + ']'
    passwd = base64.decode_as_text(input_passwd)
    # Ensure that sysadmin password which starts with a special
    # character will be enclosed in quotes so that the generated
    # inventory file will be parsable by Ansible.
    if not passwd.startswith('"') and re.search(pattern, passwd):
        passwd = '"' + passwd + '"'
    elif passwd.startswith('"') and not passwd.endswith('"'):
        passwd = "'" + passwd + "'"

    return passwd


def get_failure_msg(subcloud_name):
    try:
        os_client = OpenStackDriver(region_name=subcloud_name,
                                    region_clients=None)
        keystone_client = os_client.keystone_client
        endpoint = keystone_client.endpoint_cache.get_endpoint('sysinv')
        sysinv_client = SysinvClient(subcloud_name,
                                     keystone_client.session,
                                     endpoint=endpoint)
        msg = sysinv_client.get_error_msg()
        return msg
    except Exception as e:
        LOG.exception("{}: {}".format(subcloud_name, e))
        return consts.ERROR_DESC_FAILED


def update_abort_status(context, subcloud_id, deploy_status, abort_failed=False):
    """Update the subcloud deploy status during deploy abort operation.

    :param context: request context object
    :param subcloud_id: subcloud id from db
    :param deploy_status: subcloud deploy status from db
    :param abort_failed: if abort process fails (default False)
    """
    if abort_failed:
        abort_status_dict = ABORT_UPDATE_FAIL_STATUS
    else:
        abort_status_dict = ABORT_UPDATE_STATUS
    new_deploy_status = abort_status_dict[deploy_status]
    updated_subcloud = db_api.subcloud_update(context, subcloud_id,
                                              deploy_status=new_deploy_status)
    return updated_subcloud