config/controllerconfig/controllerconfig/controllerconfig/backup_restore.py

#
# Copyright (c) 2014-2017 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#

"""
Backup & Restore
"""

from __future__ import print_function
import copy
import filecmp
import fileinput
import os
import glob
import shutil
import stat
import subprocess
import tarfile
import tempfile
import textwrap
import time

from fm_api import constants as fm_constants
from fm_api import fm_api
from sysinv.common import constants as sysinv_constants

from controllerconfig.common import log
from controllerconfig.common import constants
from controllerconfig.common.exceptions import BackupFail
from controllerconfig.common.exceptions import RestoreFail
from controllerconfig.common.exceptions import KeystoneFail
from controllerconfig.common.exceptions import SysInvFail
from controllerconfig import openstack
import tsconfig.tsconfig as tsconfig
from controllerconfig import utils
from controllerconfig import sysinv_api as sysinv
from six.moves import input
from os import environ

LOG = log.get_logger(__name__)

DEVNULL = open(os.devnull, 'w')
RESTORE_COMPLETE = "restore-complete"
RESTORE_RERUN_REQUIRED = "restore-rerun-required"

# Backup/restore related constants
backup_in_progress = tsconfig.BACKUP_IN_PROGRESS_FLAG
restore_in_progress = tsconfig.RESTORE_IN_PROGRESS_FLAG
restore_patching_complete = '/etc/platform/.restore_patching_complete'
node_is_patched = '/var/run/node_is_patched'
keyring_permdir = os.path.join('/opt/platform/.keyring', tsconfig.SW_VERSION)
ceph_permdir = os.path.join(tsconfig.CONFIG_PATH, 'ceph-config')
ldap_permdir = '/var/lib/openldap-data'
patching_permdir = '/opt/patching'
patching_repo_permdir = '/www/pages/updates'
home_permdir = '/home'
extension_permdir = '/opt/extension'
patch_vault_permdir = '/opt/patch-vault'
mariadb_pod = 'mariadb-server-0'

kube_config = environ.get('KUBECONFIG')
if kube_config is None:
    kube_config = '/etc/kubernetes/admin.conf'


kube_cmd_prefix = 'kubectl --kubeconfig=%s ' % kube_config
kube_cmd_prefix += 'exec -i %s -n openstack -- bash -c ' % mariadb_pod

mysql_prefix = '\'exec mysql -uroot -p"$MYSQL_ROOT_PASSWORD" '
mysqldump_prefix = '\'exec mysqldump -uroot -p"$MYSQL_ROOT_PASSWORD" '


def get_backup_databases():
    """
    Retrieve database lists for backup.
    :return: backup_databases and backup_database_skip_tables
    """

    # Databases common to all configurations
    REGION_LOCAL_DATABASES = ('postgres', 'template1', 'sysinv',
                              'fm', 'barbican')
    REGION_SHARED_DATABASES = ('keystone',)

    # Indicates which tables have to be dropped for a certain database.
    DB_TABLE_SKIP_MAPPING = {
        'fm': ('alarm',),
        'dcorch': ('orch_job',
                   'orch_request',
                   'resource',
                   'subcloud_resource'), }

    if tsconfig.region_config == 'yes':
        BACKUP_DATABASES = REGION_LOCAL_DATABASES
    else:
        # Add additional databases for non-region configuration and for the
        # primary region in region deployments.
        BACKUP_DATABASES = REGION_LOCAL_DATABASES + REGION_SHARED_DATABASES

        # Add distributed cloud databases
        if tsconfig.distributed_cloud_role == \
                sysinv_constants.DISTRIBUTED_CLOUD_ROLE_SYSTEMCONTROLLER:
            BACKUP_DATABASES += ('dcmanager', 'dcorch')

    # We generate the tables to be skipped for each database
    # mentioned in BACKUP_DATABASES. We explicitly list
    # skip tables in DB_TABLE_SKIP_MAPPING
    BACKUP_DB_SKIP_TABLES = dict(
        [[x, DB_TABLE_SKIP_MAPPING.get(x, ())] for x in BACKUP_DATABASES])

    return BACKUP_DATABASES, BACKUP_DB_SKIP_TABLES


def get_os_backup_databases():
    """
    Retrieve openstack database lists from MariaDB for backup.
    :return: os_backup_databases
    """

    skip_dbs = ("Database", "information_schema", "performance_schema",
                "mysql", "horizon", "panko", "gnocchi")

    try:
        db_cmd = kube_cmd_prefix + mysql_prefix + '-e"show databases" \''

        proc = subprocess.Popen([db_cmd], shell=True,
                                stdout=subprocess.PIPE, stderr=DEVNULL)

        os_backup_dbs = set(line[:-1] for line in proc.stdout
                            if line[:-1] not in skip_dbs)

        proc.communicate()

        return os_backup_dbs

    except subprocess.CalledProcessError:
        raise BackupFail("Failed to get openstack databases from MariaDB.")


def check_load_versions(archive, staging_dir):
    match = False
    try:
        member = archive.getmember('etc/build.info')
        archive.extract(member, path=staging_dir)
        match = filecmp.cmp('/etc/build.info', staging_dir + '/etc/build.info')
        shutil.rmtree(staging_dir + '/etc')
    except Exception as e:
        LOG.exception(e)
        raise RestoreFail("Unable to verify load version in backup file. "
                          "Invalid backup file.")

    if not match:
        LOG.error("Load version mismatch.")
        raise RestoreFail("Load version of backup does not match the "
                          "version of the installed load.")


def get_subfunctions(filename):
    """
    Retrieves the subfunctions from a platform.conf file.
    :param filename: file to retrieve subfunctions from
    :return: a list of the subfunctions or None if no subfunctions exist
    """
    matchstr = 'subfunction='

    with open(filename, 'r') as f:
        for line in f:
            if matchstr in line:
                parsed = line.split('=')
                return parsed[1].rstrip().split(",")
    return


def check_load_subfunctions(archive, staging_dir):
    """
    Verify that the subfunctions in the backup match the installed load.
    :param archive: backup archive
    :param staging_dir: staging directory
    :return: raises exception if the subfunctions do not match
    """
    match = False
    backup_subfunctions = None
    try:
        member = archive.getmember('etc/platform/platform.conf')
        archive.extract(member, path=staging_dir)
        backup_subfunctions = get_subfunctions(staging_dir +
                                               '/etc/platform/platform.conf')
        shutil.rmtree(staging_dir + '/etc')
        if set(backup_subfunctions) ^ set(tsconfig.subfunctions):
            # The set of subfunctions do not match
            match = False
        else:
            match = True
    except Exception:
        LOG.exception("Unable to verify subfunctions in backup file")
        raise RestoreFail("Unable to verify subfunctions in backup file. "
                          "Invalid backup file.")

    if not match:
        LOG.error("Subfunction mismatch - backup: %s, installed: %s" %
                  (str(backup_subfunctions), str(tsconfig.subfunctions)))
        raise RestoreFail("Subfunctions in backup load (%s) do not match the "
                          "subfunctions of the installed load (%s)." %
                          (str(backup_subfunctions),
                           str(tsconfig.subfunctions)))


def file_exists_in_archive(archive, file_path):
    """ Check if file exists in archive """
    try:
        archive.getmember(file_path)
        return True

    except KeyError:
        LOG.info("File %s is not in archive." % file_path)
        return False


def filter_directory(archive, directory):
    for tarinfo in archive:
        if tarinfo.name.split('/')[0] == directory:
            yield tarinfo


def backup_etc_size():
    """ Backup etc size estimate """
    try:
        total_size = utils.directory_get_size('/etc')
        return total_size
    except OSError:
        LOG.error("Failed to estimate backup etc size.")
        raise BackupFail("Failed to estimate backup etc size")


def backup_etc(archive):
    """ Backup etc """
    try:
        archive.add('/etc', arcname='etc')

    except tarfile.TarError:
        LOG.error("Failed to backup etc.")
        raise BackupFail("Failed to backup etc")


def restore_etc_file(archive, dest_dir, etc_file):
    """ Restore etc file """
    try:
        # Change the name of this file to remove the leading path
        member = archive.getmember('etc/' + etc_file)
        # Copy the member to avoid changing the name for future operations on
        # this member.
        temp_member = copy.copy(member)
        temp_member.name = os.path.basename(temp_member.name)
        archive.extract(temp_member, path=dest_dir)

    except tarfile.TarError:
        LOG.error("Failed to restore etc file.")
        raise RestoreFail("Failed to restore etc file")


def restore_etc_ssl_dir(archive, configpath=constants.CONFIG_WORKDIR):
    """ Restore the etc SSL dir """

    def filter_etc_ssl_private(members):
        for tarinfo in members:
            if 'etc/ssl/private' in tarinfo.name:
                yield tarinfo

    if file_exists_in_archive(archive, 'config/server-cert.pem'):
        restore_config_file(
            archive, configpath, 'server-cert.pem')

    if file_exists_in_archive(archive, 'etc/ssl/private'):
        # NOTE: This will include all TPM certificate files if TPM was
        # enabled on the backed up system. However in that case, this
        # restoration is only done for the first controller and TPM
        # will need to be reconfigured once duplex controller (if any)
        # is restored.
        archive.extractall(path='/',
                           members=filter_etc_ssl_private(archive))


def restore_ceph_external_config_files(archive, staging_dir):
    # Restore ceph-config.
    if file_exists_in_archive(archive, "config/ceph-config"):
        restore_config_dir(archive, staging_dir, 'ceph-config', ceph_permdir)

        # Copy the file to /etc/ceph.
        # There might be no files to copy, so don't check the return code.
        cp_command = ('cp -Rp ' + os.path.join(ceph_permdir, '*') +
                      ' /etc/ceph/')
        subprocess.call(cp_command, shell=True)


def backup_config_size(config_permdir):
    """ Backup configuration size estimate """
    try:
        return(utils.directory_get_size(config_permdir))

    except OSError:
        LOG.error("Failed to estimate backup configuration size.")
        raise BackupFail("Failed to estimate backup configuration size")


def backup_config(archive, config_permdir):
    """ Backup configuration """
    try:
        # The config dir is versioned, but we're only grabbing the current
        # release
        archive.add(config_permdir, arcname='config')

    except tarfile.TarError:
        LOG.error("Failed to backup config.")
        raise BackupFail("Failed to backup configuration")


def restore_config_file(archive, dest_dir, config_file):
    """ Restore configuration file """
    try:
        # Change the name of this file to remove the leading path
        member = archive.getmember('config/' + config_file)
        # Copy the member to avoid changing the name for future operations on
        # this member.
        temp_member = copy.copy(member)
        temp_member.name = os.path.basename(temp_member.name)
        archive.extract(temp_member, path=dest_dir)

    except tarfile.TarError:
        LOG.error("Failed to restore config file %s." % config_file)
        raise RestoreFail("Failed to restore configuration")


def restore_configuration(archive, staging_dir):
    """ Restore configuration """
    try:
        os.makedirs(constants.CONFIG_WORKDIR, stat.S_IRWXU | stat.S_IRGRP |
                    stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
    except OSError:
        LOG.error("Failed to create config directory: %s",
                  constants.CONFIG_WORKDIR)
        raise RestoreFail("Failed to restore configuration files")

    # Restore cgcs_config file from original installation for historical
    # purposes. Not used to restore the system as the information in this
    # file is out of date (not updated after original installation).
    restore_config_file(archive, constants.CONFIG_WORKDIR, 'cgcs_config')

    # Restore platform.conf file and update as necessary. The file will be
    # created in a temporary location and then moved into place when it is
    # complete to prevent access to a partially created file.
    restore_etc_file(archive, staging_dir, 'platform/platform.conf')
    temp_platform_conf_file = os.path.join(tsconfig.PLATFORM_CONF_PATH,
                                           'platform.conf.temp')
    shutil.copyfile(os.path.join(staging_dir, 'platform.conf'),
                    temp_platform_conf_file)
    install_uuid = utils.get_install_uuid()
    for line in fileinput.FileInput(temp_platform_conf_file, inplace=1):
        if line.startswith("INSTALL_UUID="):
            # The INSTALL_UUID must be updated to match the new INSTALL_UUID
            # which was generated when this controller was installed prior to
            # doing the restore.
            print("INSTALL_UUID=%s" % install_uuid)
        elif line.startswith("management_interface=") or \
                line.startswith("oam_interface=") or \
                line.startswith("cluster_host_interface=") or \
                line.startswith("UUID="):
            # Strip out any entries that are host specific as the backup can
            # be done on either controller. The application of the
            # platform_conf manifest will add these back in.
            pass
        else:
            print(line, end='')
    fileinput.close()
    # Move updated platform.conf file into place.
    os.rename(temp_platform_conf_file, tsconfig.PLATFORM_CONF_FILE)

    # Kick tsconfig to reload the platform.conf file
    tsconfig._load()

    # Restore branding
    restore_config_dir(archive, staging_dir, 'branding', '/opt/branding/')

    # Restore banner customization
    restore_config_dir(archive, staging_dir, 'banner/etc', '/opt/banner')

    # Restore ssh configuration
    restore_config_dir(archive, staging_dir, 'ssh_config',
                       constants.CONFIG_WORKDIR + '/ssh_config')

    # Configure hostname
    utils.configure_hostname('controller-0')

    # Restore hosts file
    restore_etc_file(archive, '/etc', 'hosts')
    restore_etc_file(archive, constants.CONFIG_WORKDIR, 'hosts')

    # Restore certificate files
    restore_etc_ssl_dir(archive)


def filter_pxelinux(archive):
    for tarinfo in archive:
        if tarinfo.name.find('config/pxelinux.cfg') == 0:
            yield tarinfo


def restore_dnsmasq(archive, config_permdir):
    """ Restore dnsmasq """
    try:
        etc_files = ['hosts']

        perm_files = ['hosts',
                      'dnsmasq.hosts', 'dnsmasq.leases',
                      'dnsmasq.addn_hosts']

        for etc_file in etc_files:
            restore_config_file(archive, '/etc', etc_file)

        for perm_file in perm_files:
            restore_config_file(archive, config_permdir, perm_file)

        # Extract distributed cloud addn_hosts file if present in archive.
        if file_exists_in_archive(
                archive, 'config/dnsmasq.addn_hosts_dc'):
            restore_config_file(archive, config_permdir,
                                'dnsmasq.addn_hosts_dc')

        tmpdir = tempfile.mkdtemp(prefix="pxerestore_")

        archive.extractall(tmpdir,
                           members=filter_pxelinux(archive))

        if os.path.exists(tmpdir + '/config/pxelinux.cfg'):
            shutil.rmtree(config_permdir + 'pxelinux.cfg', ignore_errors=True)
            shutil.move(tmpdir + '/config/pxelinux.cfg', config_permdir)

        shutil.rmtree(tmpdir, ignore_errors=True)

    except (shutil.Error, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to restore dnsmasq config.")
        raise RestoreFail("Failed to restore dnsmasq files")


def backup_puppet_data_size(puppet_permdir):
    """ Backup puppet data size estimate """
    try:
        return(utils.directory_get_size(puppet_permdir))

    except OSError:
        LOG.error("Failed to estimate backup puppet data size.")
        raise BackupFail("Failed to estimate backup puppet data size")


def backup_puppet_data(archive, puppet_permdir):
    """ Backup puppet data """
    try:
        # The puppet dir is versioned, but we're only grabbing the current
        # release
        archive.add(puppet_permdir, arcname='hieradata')

    except tarfile.TarError:
        LOG.error("Failed to backup puppet data.")
        raise BackupFail("Failed to backup puppet data")


def restore_static_puppet_data(archive, puppet_workdir):
    """ Restore static puppet data """
    try:
        member = archive.getmember('hieradata/static.yaml')
        archive.extract(member, path=os.path.dirname(puppet_workdir))

        member = archive.getmember('hieradata/secure_static.yaml')
        archive.extract(member, path=os.path.dirname(puppet_workdir))

    except tarfile.TarError:
        LOG.error("Failed to restore static puppet data.")
        raise RestoreFail("Failed to restore static puppet data")

    except OSError:
        pass


def restore_puppet_data(archive, puppet_workdir, controller_0_address):
    """ Restore puppet data """
    try:
        member = archive.getmember('hieradata/system.yaml')
        archive.extract(member, path=os.path.dirname(puppet_workdir))

        member = archive.getmember('hieradata/secure_system.yaml')
        archive.extract(member, path=os.path.dirname(puppet_workdir))

        # Only restore controller-0 hieradata
        controller_0_hieradata = 'hieradata/%s.yaml' % controller_0_address
        member = archive.getmember(controller_0_hieradata)
        archive.extract(member, path=os.path.dirname(puppet_workdir))

    except tarfile.TarError:
        LOG.error("Failed to restore puppet data.")
        raise RestoreFail("Failed to restore puppet data")

    except OSError:
        pass


def backup_armada_manifest_size(armada_permdir):
    """ Backup armada manifest size estimate """
    try:
        return(utils.directory_get_size(armada_permdir))

    except OSError:
        LOG.error("Failed to estimate backup armada manifest size.")
        raise BackupFail("Failed to estimate backup armada manifest size")


def backup_armada_manifest_data(archive, armada_permdir):
    """ Backup armada manifest data """
    try:
        archive.add(armada_permdir, arcname='armada')

    except tarfile.TarError:
        LOG.error("Failed to backup armada manifest data.")
        raise BackupFail("Failed to backup armada manifest data")


def restore_armada_manifest_data(archive, armada_permdir):
    """ Restore armada manifest data  """
    try:
        shutil.rmtree(armada_permdir, ignore_errors=True)
        members = filter_directory(archive, 'armada')
        temp_members = list()
        # remove armada and armada/ from the member path since they are
        # extracted to armada_permdir: /opt/platform/armada/release
        for m in members:
            temp_member = copy.copy(m)
            lst = temp_member.name.split('armada/')
            if len(lst) > 1:
                temp_member.name = lst[1]
                temp_members.append(temp_member)
        archive.extractall(path=armada_permdir, members=temp_members)

    except (tarfile.TarError, OSError):
        LOG.error("Failed to restore armada manifest.")
        shutil.rmtree(armada_permdir, ignore_errors=True)
        raise RestoreFail("Failed to restore armada manifest")


def backup_keyring_size(keyring_permdir):
    """ Backup keyring size estimate """
    try:
        return(utils.directory_get_size(keyring_permdir))

    except OSError:
        LOG.error("Failed to estimate backup keyring size.")
        raise BackupFail("Failed to estimate backup keyring size")


def backup_keyring(archive, keyring_permdir):
    """ Backup keyring configuration """
    try:
        archive.add(keyring_permdir, arcname='.keyring')

    except tarfile.TarError:
        LOG.error("Failed to backup keyring.")
        raise BackupFail("Failed to backup keyring configuration")


def restore_keyring(archive, keyring_permdir):
    """ Restore keyring configuration """
    try:
        shutil.rmtree(keyring_permdir, ignore_errors=False)
        members = filter_directory(archive, '.keyring')
        temp_members = list()
        # remove .keyring and .keyring/ from the member path since they are
        # extracted to keyring_permdir: /opt/platform/.keyring/release
        for m in members:
            temp_member = copy.copy(m)
            lst = temp_member.name.split('.keyring/')
            if len(lst) > 1:
                temp_member.name = lst[1]
                temp_members.append(temp_member)
        archive.extractall(path=keyring_permdir, members=temp_members)

    except (tarfile.TarError, shutil.Error):
        LOG.error("Failed to restore keyring.")
        shutil.rmtree(keyring_permdir, ignore_errors=True)
        raise RestoreFail("Failed to restore keyring configuration")


def prefetch_keyring(archive):
    """ Prefetch keyring configuration for manifest use """
    keyring_tmpdir = '/tmp/.keyring'
    python_keyring_tmpdir = '/tmp/python_keyring'
    try:
        shutil.rmtree(keyring_tmpdir, ignore_errors=True)
        shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
        archive.extractall(
            path=os.path.dirname(keyring_tmpdir),
            members=filter_directory(archive,
                                     os.path.basename(keyring_tmpdir)))

        shutil.move(keyring_tmpdir + '/python_keyring', python_keyring_tmpdir)

    except (tarfile.TarError, shutil.Error):
        LOG.error("Failed to restore keyring.")
        shutil.rmtree(keyring_tmpdir, ignore_errors=True)
        shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)
        raise RestoreFail("Failed to restore keyring configuration")


def cleanup_prefetched_keyring():
    """ Cleanup fetched keyring """
    try:
        keyring_tmpdir = '/tmp/.keyring'
        python_keyring_tmpdir = '/tmp/python_keyring'

        shutil.rmtree(keyring_tmpdir, ignore_errors=True)
        shutil.rmtree(python_keyring_tmpdir, ignore_errors=True)

    except shutil.Error:
        LOG.error("Failed to cleanup keyring.")
        raise RestoreFail("Failed to cleanup fetched keyring")


def backup_ldap_size():
    """ Backup ldap size estimate """
    try:
        total_size = 0

        proc = subprocess.Popen(
            ['slapcat -d 0 -F /etc/openldap/schema | wc -c'],
            shell=True, stdout=subprocess.PIPE)

        for line in proc.stdout:
            total_size = int(line)
            break

        proc.communicate()

        return total_size

    except subprocess.CalledProcessError:
        LOG.error("Failed to estimate backup ldap size.")
        raise BackupFail("Failed to estimate backup ldap size")


def backup_ldap(archive, staging_dir):
    """ Backup ldap configuration """
    try:
        ldap_staging_dir = staging_dir + '/ldap'
        os.mkdir(ldap_staging_dir, 0o655)

        subprocess.check_call([
            'slapcat', '-d', '0', '-F', '/etc/openldap/schema',
            '-l', (ldap_staging_dir + '/ldap.db')], stdout=DEVNULL)

        archive.add(ldap_staging_dir + '/ldap.db', arcname='ldap.db')

    except (OSError, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to backup ldap database.")
        raise BackupFail("Failed to backup ldap configuration")


def restore_ldap(archive, ldap_permdir, staging_dir):
    """ Restore ldap configuration """
    try:
        ldap_staging_dir = staging_dir + '/ldap'
        archive.extract('ldap.db', path=ldap_staging_dir)

        utils.stop_lsb_service('openldap')

        subprocess.call(['rm', '-rf', ldap_permdir], stdout=DEVNULL)
        os.mkdir(ldap_permdir, 0o755)

        subprocess.check_call(['slapadd', '-F', '/etc/openldap/schema',
                              '-l', ldap_staging_dir + '/ldap.db'],
                              stdout=DEVNULL, stderr=DEVNULL)

    except (subprocess.CalledProcessError, OSError, tarfile.TarError):
        LOG.error("Failed to restore ldap database.")
        raise RestoreFail("Failed to restore ldap configuration")

    finally:
        utils.start_lsb_service('openldap')


def backup_mariadb_size():
    """ Backup MariaDB size estimate """
    try:
        total_size = 0

        os_backup_dbs = get_os_backup_databases()

        # Backup data for databases.
        for db_elem in os_backup_dbs:

            db_cmd = kube_cmd_prefix + mysqldump_prefix
            db_cmd += ' %s\' | wc -c' % db_elem

            proc = subprocess.Popen([db_cmd], shell=True,
                                    stdout=subprocess.PIPE, stderr=DEVNULL)

            total_size += int(proc.stdout.readline())
            proc.communicate()

        return total_size

    except subprocess.CalledProcessError:
        LOG.error("Failed to estimate MariaDB database size.")
        raise BackupFail("Failed to estimate MariaDB database size")


def backup_mariadb(archive, staging_dir):
    """ Backup MariaDB data """
    try:
        mariadb_staging_dir = staging_dir + '/mariadb'
        os.mkdir(mariadb_staging_dir, 0o655)

        os_backup_dbs = get_os_backup_databases()

        # Backup data for databases.
        for db_elem in os_backup_dbs:
            db_cmd = kube_cmd_prefix + mysqldump_prefix
            db_cmd += ' %s\' > %s/%s.sql.data' % (db_elem,
                                                  mariadb_staging_dir, db_elem)

            subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL)

        archive.add(mariadb_staging_dir, arcname='mariadb')

    except (OSError, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to backup MariaDB databases.")
        raise BackupFail("Failed to backup MariaDB database.")


def extract_mariadb_data(archive):
    """ Extract and store MariaDB data """
    try:
        # We store MariaDB data in /opt/backups/mariadb for now.
        # After MariaDB service is up, we will populate the
        # database using these data.
        archive.extractall(path=constants.BACKUPS_PATH,
                           members=filter_directory(archive, 'mariadb'))
    except (OSError, tarfile.TarError) as e:
        LOG.error("Failed to extract and store MariaDB data. Error: %s", e)
        raise RestoreFail("Failed to extract and store MariaDB data.")


def create_helm_overrides_directory():
    """
    Create helm overrides directory
    During restore, application-apply will be done without
    first running application-upload where the helm overrides
    directory is created. So we need to create the helm overrides
    directory before running application-apply.
    """
    try:
        os.mkdir(constants.HELM_OVERRIDES_PERMDIR, 0o755)
    except OSError:
        LOG.error("Failed to create helm overrides directory")
        raise BackupFail("Failed to create helm overrides directory")


def restore_mariadb():
    """
    Restore MariaDB

    This function is called after MariaDB service is up
    """
    try:
        mariadb_staging_dir = constants.BACKUPS_PATH + '/mariadb'
        # Restore data for databases.
        for data in glob.glob(mariadb_staging_dir + '/*.sql.data'):
            db_elem = data.split('/')[-1].split('.')[0]
            create_db = "create database %s" % db_elem

            # Create the database
            db_cmd = kube_cmd_prefix + mysql_prefix + '-e"%s" \'' % create_db
            subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL)

            # Populate data
            db_cmd = 'cat %s | ' % data
            db_cmd = db_cmd + kube_cmd_prefix + mysql_prefix
            db_cmd += '%s\' ' % db_elem
            subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL)

        shutil.rmtree(mariadb_staging_dir, ignore_errors=True)

    except (OSError, subprocess.CalledProcessError) as e:
        LOG.error("Failed to restore MariaDB data. Error: %s", e)
        raise RestoreFail("Failed to restore MariaDB data.")


def backup_postgres_size():
    """ Backup postgres size estimate """
    try:
        total_size = 0

        # Backup roles, table spaces and schemas for databases.
        proc = subprocess.Popen([('sudo -u postgres pg_dumpall --clean ' +
                                  '--schema-only | wc -c')], shell=True,
                                stdout=subprocess.PIPE, stderr=DEVNULL)

        for line in proc.stdout:
            total_size = int(line)
            break

        proc.communicate()

        # get backup database
        backup_databases, backup_db_skip_tables = get_backup_databases()

        # Backup data for databases.
        for _, db_elem in enumerate(backup_databases):

            db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts '
            db_cmd += '--disable-triggers --data-only %s ' % db_elem

            for _, table_elem in enumerate(backup_db_skip_tables[db_elem]):
                db_cmd += '--exclude-table=%s ' % table_elem

            db_cmd += '| wc -c'

            proc = subprocess.Popen([db_cmd], shell=True,
                                    stdout=subprocess.PIPE, stderr=DEVNULL)

            for line in proc.stdout:
                total_size += int(line)
                break

            proc.communicate()

        return total_size

    except subprocess.CalledProcessError:
        LOG.error("Failed to estimate backup database size.")
        raise BackupFail("Failed to estimate backup database size")


def backup_postgres(archive, staging_dir):
    """ Backup postgres configuration """
    try:
        postgres_staging_dir = staging_dir + '/postgres'
        os.mkdir(postgres_staging_dir, 0o655)

        # Backup roles, table spaces and schemas for databases.
        subprocess.check_call([('sudo -u postgres pg_dumpall --clean ' +
                                '--schema-only' +
                                '> %s/%s' % (postgres_staging_dir,
                                             'postgres.sql.config'))],
                              shell=True, stderr=DEVNULL)

        # get backup database
        backup_databases, backup_db_skip_tables = get_backup_databases()

        # Backup data for databases.
        for _, db_elem in enumerate(backup_databases):

            db_cmd = 'sudo -u postgres pg_dump --format=plain --inserts '
            db_cmd += '--disable-triggers --data-only %s ' % db_elem

            for _, table_elem in enumerate(backup_db_skip_tables[db_elem]):
                db_cmd += '--exclude-table=%s ' % table_elem

            db_cmd += '> %s/%s.sql.data' % (postgres_staging_dir, db_elem)

            subprocess.check_call([db_cmd], shell=True, stderr=DEVNULL)

        archive.add(postgres_staging_dir, arcname='postgres')

    except (OSError, subprocess.CalledProcessError, tarfile.TarError):
        LOG.error("Failed to backup postgres databases.")
        raise BackupFail("Failed to backup database configuration")


def restore_postgres(archive, staging_dir):
    """ Restore postgres configuration """
    try:
        postgres_staging_dir = staging_dir + '/postgres'
        archive.extractall(path=staging_dir,
                           members=filter_directory(archive, 'postgres'))

        utils.start_service("postgresql")

        # Restore roles, table spaces and schemas for databases.
        subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f",
                               postgres_staging_dir +
                               '/postgres.sql.config', "postgres"],
                              stdout=DEVNULL, stderr=DEVNULL)

        # Restore data for databases.
        for data in glob.glob(postgres_staging_dir + '/*.sql.data'):
            db_elem = data.split('/')[-1].split('.')[0]
            subprocess.check_call(["sudo", "-u", "postgres", "psql", "-f",
                                   data, db_elem],
                                  stdout=DEVNULL)

    except (OSError, subprocess.CalledProcessError, tarfile.TarError) as e:
        LOG.error("Failed to restore postgres databases. Error: %s", e)
        raise RestoreFail("Failed to restore database configuration")

    finally:
        utils.stop_service('postgresql')


def filter_config_dir(archive, directory):
    for tarinfo in archive:
        if tarinfo.name.find('config/' + directory) == 0:
            yield tarinfo


def restore_config_dir(archive, staging_dir, config_dir, dest_dir):
    """ Restore configuration directory if it exists """
    try:
        archive.extractall(staging_dir,
                           members=filter_config_dir(archive, config_dir))

        # Copy files from backup to dest dir
        if (os.path.exists(staging_dir + '/config/' + config_dir) and
                os.listdir(staging_dir + '/config/' + config_dir)):
            subprocess.call(["mkdir", "-p", dest_dir])

            try:
                for f in glob.glob(
                        staging_dir + '/config/' + config_dir + '/*'):
                    subprocess.check_call(["cp", "-p", f, dest_dir])
            except IOError:
                LOG.warning("Failed to copy %s files" % config_dir)

    except (subprocess.CalledProcessError, tarfile.TarError):
        LOG.info("No custom %s config was found during restore." % config_dir)


def backup_std_dir_size(directory):
    """ Backup standard directory size estimate """
    try:
        return utils.directory_get_size(directory)

    except OSError:
        LOG.error("Failed to estimate backup size for %s" % directory)
        raise BackupFail("Failed to estimate backup size for %s" % directory)


def backup_std_dir(archive, directory):
    """ Backup standard directory """
    try:
        archive.add(directory, arcname=os.path.basename(directory))

    except tarfile.TarError:
        LOG.error("Failed to backup %s" % directory)
        raise BackupFail("Failed to backup %s" % directory)


def restore_std_dir(archive, directory):
    """ Restore standard directory """
    try:
        shutil.rmtree(directory, ignore_errors=True)
        # Verify that archive contains this directory
        try:
            archive.getmember(os.path.basename(directory))
        except KeyError:
            LOG.error("Archive does not contain directory %s" % directory)
            raise RestoreFail("Invalid backup file - missing directory %s" %
                              directory)
        archive.extractall(
            path=os.path.dirname(directory),
            members=filter_directory(archive, os.path.basename(directory)))

    except (shutil.Error, tarfile.TarError):
        LOG.error("Failed to restore %s" % directory)
        raise RestoreFail("Failed to restore %s" % directory)


def configure_loopback_interface(archive):
    """ Restore and apply configuration for loopback interface """
    utils.remove_interface_config_files()
    restore_etc_file(
        archive, utils.NETWORK_SCRIPTS_PATH,
        'sysconfig/network-scripts/' + utils.NETWORK_SCRIPTS_LOOPBACK)
    utils.restart_networking()


def backup_ceph_crush_map(archive, staging_dir):
    """ Backup ceph crush map """
    try:
        ceph_staging_dir = os.path.join(staging_dir, 'ceph')
        os.mkdir(ceph_staging_dir, 0o655)
        crushmap_file = os.path.join(ceph_staging_dir,
                                     sysinv_constants.CEPH_CRUSH_MAP_BACKUP)
        subprocess.check_call(['ceph', 'osd', 'getcrushmap',
                               '-o', crushmap_file], stdout=DEVNULL,
                              stderr=DEVNULL)
        archive.add(crushmap_file, arcname='ceph/' +
                    sysinv_constants.CEPH_CRUSH_MAP_BACKUP)
    except Exception as e:
        LOG.error('Failed to backup ceph crush map. Reason: {}'.format(e))
        raise BackupFail('Failed to backup ceph crush map')


def restore_ceph_crush_map(archive):
    """ Restore ceph crush map """
    if not file_exists_in_archive(archive, 'ceph/' +
                                  sysinv_constants.CEPH_CRUSH_MAP_BACKUP):
        return

    try:
        crush_map_file = 'ceph/' + sysinv_constants.CEPH_CRUSH_MAP_BACKUP
        if file_exists_in_archive(archive, crush_map_file):
            member = archive.getmember(crush_map_file)
            # Copy the member to avoid changing the name for future
            # operations on this member.
            temp_member = copy.copy(member)
            temp_member.name = os.path.basename(temp_member.name)
            archive.extract(temp_member,
                            path=sysinv_constants.SYSINV_CONFIG_PATH)

    except tarfile.TarError as e:
        LOG.error('Failed to restore crush map file. Reason: {}'.format(e))
        raise RestoreFail('Failed to restore crush map file')


def check_size(archive_dir):
    """Check if there is enough space to create backup."""
    backup_overhead_bytes = 1024 ** 3  # extra GB for staging directory

    backup_size = (backup_overhead_bytes +
                   backup_etc_size() +
                   backup_config_size(tsconfig.CONFIG_PATH) +
                   backup_puppet_data_size(constants.HIERADATA_PERMDIR) +
                   backup_keyring_size(keyring_permdir) +
                   backup_ldap_size() +
                   backup_postgres_size() +
                   backup_std_dir_size(home_permdir) +
                   backup_std_dir_size(patching_permdir) +
                   backup_std_dir_size(patching_repo_permdir) +
                   backup_std_dir_size(extension_permdir) +
                   backup_std_dir_size(patch_vault_permdir) +
                   backup_armada_manifest_size(constants.ARMADA_PERMDIR) +
                   backup_std_dir_size(constants.HELM_CHARTS_PERMDIR) +
                   backup_mariadb_size()
                   )

    archive_dir_free_space = \
        utils.filesystem_get_free_space(archive_dir)

    if backup_size > archive_dir_free_space:
        print("Archive directory (%s) does not have enough free "
              "space (%s), estimated backup size is %s." %
              (archive_dir, utils.print_bytes(archive_dir_free_space),
               utils.print_bytes(backup_size)))

        raise BackupFail("Not enough free space for backup.")


def backup(backup_name, archive_dir, clone=False):
    """Backup configuration."""

    if not os.path.isdir(archive_dir):
        raise BackupFail("Archive directory (%s) not found." % archive_dir)

    if not utils.is_active("management-ip"):
        raise BackupFail(
            "Backups can only be performed from the active controller.")

    if os.path.isfile(backup_in_progress):
        raise BackupFail("Backup already in progress.")
    else:
        open(backup_in_progress, 'w')

    fmApi = fm_api.FaultAPIs()
    entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
                                    sysinv_constants.CONTROLLER_HOSTNAME)
    fault = fm_api.Fault(alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
                         alarm_state=fm_constants.FM_ALARM_STATE_SET,
                         entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
                         entity_instance_id=entity_instance_id,
                         severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
                         reason_text=("System Backup in progress."),
                         # operational
                         alarm_type=fm_constants.FM_ALARM_TYPE_7,
                         # congestion
                         probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_8,
                         proposed_repair_action=("No action required."),
                         service_affecting=False)

    fmApi.set_fault(fault)

    staging_dir = None
    system_tar_path = None
    warnings = ''
    try:
        os.chdir('/')

        if not clone:
            check_size(archive_dir)

        print ("\nPerforming backup (this might take several minutes):")
        staging_dir = tempfile.mkdtemp(dir=archive_dir)

        system_tar_path = os.path.join(archive_dir,
                                       backup_name + '_system.tgz')
        system_archive = tarfile.open(system_tar_path, "w:gz")

        step = 1
        total_steps = 16

        # Step 1: Backup etc
        backup_etc(system_archive)
        utils.progress(total_steps, step, 'backup etc', 'DONE')
        step += 1

        # Step 2: Backup configuration
        backup_config(system_archive, tsconfig.CONFIG_PATH)
        utils.progress(total_steps, step, 'backup configuration', 'DONE')
        step += 1

        # Step 3: Backup puppet data
        backup_puppet_data(system_archive, constants.HIERADATA_PERMDIR)
        utils.progress(total_steps, step, 'backup puppet data', 'DONE')
        step += 1

        # Step 4: Backup armada data
        backup_armada_manifest_data(system_archive, constants.ARMADA_PERMDIR)
        utils.progress(total_steps, step, 'backup armada data', 'DONE')
        step += 1

        # Step 5: Backup helm charts data
        backup_std_dir(system_archive, constants.HELM_CHARTS_PERMDIR)
        utils.progress(total_steps, step, 'backup helm charts', 'DONE')
        step += 1

        # Step 6: Backup keyring
        backup_keyring(system_archive, keyring_permdir)
        utils.progress(total_steps, step, 'backup keyring', 'DONE')
        step += 1

        # Step 7: Backup ldap
        backup_ldap(system_archive, staging_dir)
        utils.progress(total_steps, step, 'backup ldap', 'DONE')
        step += 1

        # Step 8: Backup postgres
        backup_postgres(system_archive, staging_dir)
        utils.progress(total_steps, step, 'backup postgres', 'DONE')
        step += 1

        # Step 9: Backup mariadb
        backup_mariadb(system_archive, staging_dir)
        utils.progress(total_steps, step, 'backup mariadb', 'DONE')
        step += 1

        # Step 10: Backup home
        backup_std_dir(system_archive, home_permdir)
        utils.progress(total_steps, step, 'backup home directory', 'DONE')
        step += 1

        # Step 11: Backup patching
        if not clone:
            backup_std_dir(system_archive, patching_permdir)
            utils.progress(total_steps, step, 'backup patching', 'DONE')
        step += 1

        # Step 12: Backup patching repo
        if not clone:
            backup_std_dir(system_archive, patching_repo_permdir)
            utils.progress(total_steps, step, 'backup patching repo', 'DONE')
        step += 1

        # Step 13: Backup extension filesystem
        backup_std_dir(system_archive, extension_permdir)
        utils.progress(total_steps, step, 'backup extension filesystem '
                                          'directory', 'DONE')
        step += 1

        # Step 14: Backup patch-vault filesystem
        if os.path.exists(patch_vault_permdir):
            backup_std_dir(system_archive, patch_vault_permdir)
            utils.progress(total_steps, step, 'backup patch-vault filesystem '
                                              'directory', 'DONE')
        step += 1

        # Step 15: Backup ceph crush map
        backup_ceph_crush_map(system_archive, staging_dir)
        utils.progress(total_steps, step, 'backup ceph crush map', 'DONE')
        step += 1

        # Step 16: Create archive
        system_archive.close()
        utils.progress(total_steps, step, 'create archive', 'DONE')
        step += 1

    except Exception:
        if system_tar_path and os.path.isfile(system_tar_path):
            os.remove(system_tar_path)

        raise
    finally:
        fmApi.clear_fault(fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
                          entity_instance_id)
        os.remove(backup_in_progress)
        if staging_dir:
            shutil.rmtree(staging_dir, ignore_errors=True)

    system_msg = "System backup file created"
    if not clone:
        system_msg += ": " + system_tar_path

    print(system_msg)
    if warnings != '':
        print("WARNING: The following problems occurred:")
        print(textwrap.fill(warnings, 80))


def create_restore_runtime_config(filename):
    """ Create any runtime parameters needed for Restore."""
    config = {}
    # We need to re-enable Openstack password rules, which
    # were previously disabled while the controller manifests
    # were applying during a Restore
    config['classes'] = ['keystone::security_compliance']
    utils.create_manifest_runtime_config(filename, config)


def restore_system(backup_file, include_storage_reinstall=False, clone=False):
    """Restoring system configuration."""

    if (os.path.exists(constants.CGCS_CONFIG_FILE) or
            os.path.exists(tsconfig.CONFIG_PATH) or
            os.path.exists(constants.INITIAL_CONFIG_COMPLETE_FILE)):
        print(textwrap.fill(
            "Configuration has already been done. "
            "A system restore operation can only be done "
            "immediately after the load has been installed.", 80))
        print('')
        raise RestoreFail("System configuration already completed")

    if not os.path.isabs(backup_file):
        raise RestoreFail("Backup file (%s) not found. Full path is "
                          "required." % backup_file)

    if os.path.isfile(restore_in_progress):
        raise RestoreFail("Restore already in progress.")
    else:
        open(restore_in_progress, 'w')

    # Add newline to console log for install-clone scenario
    newline = clone
    staging_dir = None

    try:
        try:
            with open(os.devnull, "w") as fnull:
                subprocess.check_call(["vgdisplay", "cgts-vg"],
                                      stdout=fnull,
                                      stderr=fnull)
        except subprocess.CalledProcessError:
            LOG.error("The cgts-vg volume group was not found")
            raise RestoreFail("Volume groups not configured")

        print("\nRestoring system (this will take several minutes):")
        # Use /scratch for the staging dir for now,
        # until /opt/backups is available
        staging_dir = tempfile.mkdtemp(dir='/scratch')
        # Permission change required or postgres restore fails
        subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL)
        os.chdir('/')

        step = 1
        total_steps = 26

        # Step 1: Open archive and verify installed load matches backup
        try:
            archive = tarfile.open(backup_file)
        except tarfile.TarError as e:
            LOG.exception(e)
            raise RestoreFail("Error opening backup file. Invalid backup "
                              "file.")
        check_load_versions(archive, staging_dir)
        check_load_subfunctions(archive, staging_dir)
        utils.progress(total_steps, step, 'open archive', 'DONE', newline)
        step += 1

        # Patching is potentially a multi-phase step.
        # If the controller is impacted by patches from the backup,
        # it must be rebooted before continuing the restore.
        # If this is the second pass through, we can skip over this.
        if not os.path.isfile(restore_patching_complete) and not clone:
            # Step 2: Restore patching
            restore_std_dir(archive, patching_permdir)
            utils.progress(total_steps, step, 'restore patching', 'DONE',
                           newline)
            step += 1

            # Step 3: Restore patching repo
            restore_std_dir(archive, patching_repo_permdir)
            utils.progress(total_steps, step, 'restore patching repo', 'DONE',
                           newline)
            step += 1

            # Step 4: Apply patches
            try:
                subprocess.check_output(["sw-patch", "install-local"])
            except subprocess.CalledProcessError:
                LOG.error("Failed to install patches")
                raise RestoreFail("Failed to install patches")
            utils.progress(total_steps, step, 'install patches', 'DONE',
                           newline)
            step += 1

            open(restore_patching_complete, 'w')

            # If the controller was impacted by patches, we need to reboot.
            if os.path.isfile(node_is_patched):
                if not clone:
                    print("\nThis controller has been patched. " +
                          "A reboot is required.")
                    print("After the reboot is complete, " +
                          "re-execute the restore command.")
                    while True:
                        user_input = input(
                            "Enter 'reboot' to reboot controller: ")
                        if user_input == 'reboot':
                            break
                LOG.info("This controller has been patched. Rebooting now")
                print("\nThis controller has been patched. Rebooting now\n\n")
                time.sleep(5)
                os.remove(restore_in_progress)
                if staging_dir:
                    shutil.rmtree(staging_dir, ignore_errors=True)
                subprocess.call("reboot")

            else:
                # We need to restart the patch controller and agent, since
                # we setup the repo and patch store outside its control
                with open(os.devnull, "w") as devnull:
                    subprocess.call(
                        ["systemctl",
                         "restart",
                         "sw-patch-controller-daemon.service"],
                        stdout=devnull, stderr=devnull)
                    subprocess.call(
                        ["systemctl",
                         "restart",
                         "sw-patch-agent.service"],
                        stdout=devnull, stderr=devnull)
                if clone:
                    #  No patches were applied, return to cloning code
                    #  to run validation code.
                    return RESTORE_RERUN_REQUIRED
        else:
            # Add the skipped steps
            step += 3

        if os.path.isfile(node_is_patched):
            # If we get here, it means the node was patched by the user
            # AFTER the restore applied patches and rebooted, but didn't
            # reboot.
            # This means the patch lineup no longer matches what's in the
            # backup, but we can't (and probably shouldn't) prevent that.
            # However, since this will ultimately cause the node to fail
            # the goenabled step, we can fail immediately and force the
            # user to reboot.
            print ("\nThis controller has been patched, but not rebooted.")
            print ("Please reboot before continuing the restore process.")
            raise RestoreFail("Controller node patched without rebooting")

        # Flag can now be cleared
        if os.path.exists(restore_patching_complete):
            os.remove(restore_patching_complete)

        # Prefetch keyring
        prefetch_keyring(archive)

        # Step 5: Restore configuration
        restore_configuration(archive, staging_dir)
        # In AIO SX systems, the loopback interface is used as the management
        # interface. However, the application of the interface manifest will
        # not configure the necessary addresses on the loopback interface (see
        # apply_network_config.sh for details). So, we need to configure the
        # loopback interface here.
        if tsconfig.system_mode == sysinv_constants.SYSTEM_MODE_SIMPLEX:
            configure_loopback_interface(archive)
        # Write the simplex flag
        utils.write_simplex_flag()
        utils.progress(total_steps, step, 'restore configuration', 'DONE',
                       newline)
        step += 1

        # Step 6: Apply restore bootstrap manifest
        controller_0_address = utils.get_address_from_hosts_file(
            'controller-0')
        restore_static_puppet_data(archive, constants.HIERADATA_WORKDIR)
        try:
            utils.apply_manifest(controller_0_address,
                                 sysinv_constants.CONTROLLER,
                                 'bootstrap',
                                 constants.HIERADATA_WORKDIR)
        except Exception as e:
            LOG.exception(e)
            raise RestoreFail(
                'Failed to apply bootstrap manifest. '
                'See /var/log/puppet/latest/puppet.log for details.')

        utils.progress(total_steps, step, 'apply bootstrap manifest', 'DONE',
                       newline)
        step += 1

        # Step 7: Restore puppet data
        restore_puppet_data(archive, constants.HIERADATA_WORKDIR,
                            controller_0_address)
        utils.progress(total_steps, step, 'restore puppet data', 'DONE',
                       newline)
        step += 1

        # Step 8: Persist configuration
        utils.persist_config()
        utils.progress(total_steps, step, 'persist configuration', 'DONE',
                       newline)
        step += 1

        # Step 9: Apply controller manifest
        try:
            utils.apply_manifest(controller_0_address,
                                 sysinv_constants.CONTROLLER,
                                 'controller',
                                 constants.HIERADATA_PERMDIR)
        except Exception as e:
            LOG.exception(e)
            raise RestoreFail(
                'Failed to apply controller manifest. '
                'See /var/log/puppet/latest/puppet.log for details.')
        utils.progress(total_steps, step, 'apply controller manifest', 'DONE',
                       newline)
        step += 1

        # Step 10: Apply runtime controller manifests
        restore_filename = os.path.join(staging_dir, 'restore.yaml')
        create_restore_runtime_config(restore_filename)
        try:
            utils.apply_manifest(controller_0_address,
                                 sysinv_constants.CONTROLLER,
                                 'runtime',
                                 constants.HIERADATA_PERMDIR,
                                 runtime_filename=restore_filename)
        except Exception as e:
            LOG.exception(e)
            raise RestoreFail(
                'Failed to apply runtime controller manifest. '
                'See /var/log/puppet/latest/puppet.log for details.')
        utils.progress(total_steps, step,
                       'apply runtime controller manifest', 'DONE',
                       newline)
        step += 1

        # Move the staging dir under /opt/backups, now that it's setup
        shutil.rmtree(staging_dir, ignore_errors=True)
        staging_dir = tempfile.mkdtemp(dir=constants.BACKUPS_PATH)
        # Permission change required or postgres restore fails
        subprocess.call(['chmod', 'a+rx', staging_dir], stdout=DEVNULL)

        # Step 11: Apply banner customization
        utils.apply_banner_customization()
        utils.progress(total_steps, step, 'apply banner customization', 'DONE',
                       newline)
        step += 1

        # Step 12: Restore dnsmasq and pxeboot config
        restore_dnsmasq(archive, tsconfig.CONFIG_PATH)
        utils.progress(total_steps, step, 'restore dnsmasq', 'DONE', newline)
        step += 1

        # Step 13: Restore keyring
        restore_keyring(archive, keyring_permdir)
        utils.progress(total_steps, step, 'restore keyring', 'DONE', newline)
        step += 1

        # Step 14: Restore ldap
        restore_ldap(archive, ldap_permdir, staging_dir)
        utils.progress(total_steps, step, 'restore ldap', 'DONE', newline)
        step += 1

        # Step 15: Restore postgres
        restore_postgres(archive, staging_dir)
        utils.progress(total_steps, step, 'restore postgres', 'DONE', newline)
        step += 1

        # Step 16: Extract and store mariadb data
        extract_mariadb_data(archive)
        utils.progress(total_steps, step, 'extract mariadb', 'DONE', newline)
        step += 1

        # Step 17: Restore ceph crush map
        restore_ceph_crush_map(archive)
        utils.progress(total_steps, step, 'restore ceph crush map', 'DONE',
                       newline)
        step += 1

        # Step 18: Restore home
        restore_std_dir(archive, home_permdir)
        utils.progress(total_steps, step, 'restore home directory', 'DONE',
                       newline)
        step += 1

        # Step 19: Restore extension filesystem
        restore_std_dir(archive, extension_permdir)
        utils.progress(total_steps, step, 'restore extension filesystem '
                                          'directory', 'DONE', newline)
        step += 1

        # Step 20: Restore patch-vault filesystem
        if file_exists_in_archive(archive,
                                  os.path.basename(patch_vault_permdir)):
            restore_std_dir(archive, patch_vault_permdir)
            utils.progress(total_steps, step, 'restore patch-vault filesystem '
                                              'directory', 'DONE', newline)

        step += 1

        # Step 21: Restore external ceph configuration files.
        restore_ceph_external_config_files(archive, staging_dir)
        utils.progress(total_steps, step, 'restore CEPH external config',
                       'DONE', newline)
        step += 1

        # Step 22: Restore Armada manifest
        restore_armada_manifest_data(archive, constants.ARMADA_PERMDIR)
        utils.progress(total_steps, step, 'restore armada manifest',
                       'DONE', newline)
        step += 1

        # Step 23: Restore Helm charts
        restore_std_dir(archive, constants.HELM_CHARTS_PERMDIR)
        utils.progress(total_steps, step, 'restore helm charts',
                       'DONE', newline)
        step += 1

        # Step 24: Create Helm overrides directory
        create_helm_overrides_directory()
        utils.progress(total_steps, step, 'create helm overrides directory',
                       'DONE', newline)
        step += 1

        # Step 25: Shutdown file systems
        archive.close()
        shutil.rmtree(staging_dir, ignore_errors=True)
        utils.shutdown_file_systems()
        utils.progress(total_steps, step, 'shutdown file systems', 'DONE',
                       newline)
        step += 1

        # Step 26: Recover services
        utils.mtce_restart()
        utils.mark_config_complete()
        time.sleep(120)

        for service in ['sysinv-conductor', 'sysinv-inv']:
            if not utils.wait_sm_service(service):
                raise RestoreFail("Services have failed to initialize.")

        utils.progress(total_steps, step, 'recover services', 'DONE', newline)
        step += 1

        if tsconfig.system_mode != sysinv_constants.SYSTEM_MODE_SIMPLEX:

            print("\nRestoring node states (this will take several minutes):")

            with openstack.OpenStack() as client:
                # On ceph setups storage nodes take about 90 seconds
                # to become locked. Setting the timeout to 120 seconds
                # for such setups
                lock_timeout = 60
                storage_hosts = sysinv.get_hosts(client.admin_token,
                                                 client.conf['region_name'],
                                                 personality='storage')
                if storage_hosts:
                    lock_timeout = 120

                failed_lock_host = False
                skip_hosts = ['controller-0']
                if not include_storage_reinstall:
                    if storage_hosts:
                        install_uuid = utils.get_install_uuid()
                        for h in storage_hosts:
                            skip_hosts.append(h.name)

                            # Update install_uuid on the storage node
                            client.sysinv.ihost.update_install_uuid(
                                h.uuid,
                                install_uuid)

                skip_hosts_count = len(skip_hosts)

                # Wait for nodes to be identified as disabled before attempting
                # to lock hosts. Even if after 3 minute nodes are still not
                # identified as disabled, we still continue the restore.
                if not client.wait_for_hosts_disabled(
                        exempt_hostnames=skip_hosts,
                        timeout=180):
                    LOG.info("At least one node is not in a disabling state. "
                             "Continuing.")

                print("\nLocking nodes:")
                try:
                    failed_hosts = client.lock_hosts(skip_hosts,
                                                     utils.progress,
                                                     timeout=lock_timeout)
                    # Don't power off nodes that could not be locked
                    if len(failed_hosts) > 0:
                        skip_hosts.append(failed_hosts)

                except (KeystoneFail, SysInvFail) as e:
                    LOG.exception(e)
                    failed_lock_host = True

                if not failed_lock_host:
                    print("\nPowering-off nodes:")
                    try:
                        client.power_off_hosts(skip_hosts,
                                               utils.progress,
                                               timeout=60)
                    except (KeystoneFail, SysInvFail) as e:
                        LOG.exception(e)
                        # this is somehow expected

                if failed_lock_host or len(skip_hosts) > skip_hosts_count:
                    if include_storage_reinstall:
                        print(textwrap.fill(
                            "Failed to lock at least one node. " +
                            "Please lock the unlocked nodes manually.", 80
                        ))
                    else:
                        print(textwrap.fill(
                            "Failed to lock at least one node. " +
                            "Please lock the unlocked controller-1 or " +
                            "worker nodes manually.", 80
                        ))

                if not clone:
                    print(textwrap.fill(
                        "Before continuing to the next step in the restore, " +
                        "please ensure all nodes other than controller-0 " +
                        "and storage nodes, if they are not being " +
                        "reinstalled, are powered off. Please refer to the " +
                        "system administration guide for more details.", 80
                    ))

    finally:
        os.remove(restore_in_progress)
        if staging_dir:
            shutil.rmtree(staging_dir, ignore_errors=True)
        cleanup_prefetched_keyring()

    fmApi = fm_api.FaultAPIs()
    entity_instance_id = "%s=%s" % (fm_constants.FM_ENTITY_TYPE_HOST,
                                    sysinv_constants.CONTROLLER_HOSTNAME)
    fault = fm_api.Fault(
        alarm_id=fm_constants.FM_ALARM_ID_BACKUP_IN_PROGRESS,
        alarm_state=fm_constants.FM_ALARM_STATE_MSG,
        entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST,
        entity_instance_id=entity_instance_id,
        severity=fm_constants.FM_ALARM_SEVERITY_MINOR,
        reason_text=("System Restore complete."),
        # other
        alarm_type=fm_constants.FM_ALARM_TYPE_0,
        # unknown
        probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_UNKNOWN,
        proposed_repair_action=(""),
        service_affecting=False)

    fmApi.set_fault(fault)

    if utils.get_system_type() == sysinv_constants.TIS_AIO_BUILD:
        print("\nApplying worker manifests for %s. " %
              (utils.get_controller_hostname()))
        print("Node will reboot on completion.")

        sysinv.do_worker_config_complete(utils.get_controller_hostname())

        # show in-progress log on console every 30 seconds
        # until self reboot or timeout

        time.sleep(30)
        for i in range(1, 10):
            print("worker manifest apply in progress ... ")
            time.sleep(30)

        raise RestoreFail("Timeout running worker manifests, "
                          "reboot did not occur")

    return RESTORE_COMPLETE