Remove Armada if there are no Armada apps

Added a script which runs in upgrades to 23.09. It checks if there are Armada apps uploaded/applied or helm v2 releases. If not, it stops and removes Armada resources, including helm release, namespace, manifest directory and docker image. Added helper functions to sysinv.common.utils. Test Plan: PASS armada removed when there are no armada apps PASS armada untouched when there are armada apps PASS tested on centos and debian PASS upgrade DX/SX 22.06 -> 22.12 Story: 2010560 Task: 47274 Depends-On: https://review.opendev.org/c/starlingx/config/+/869094 Signed-off-by: Leonardo Fagundes Luz Serrano <Leonardo.FagundesLuzSerrano@windriver.com> Change-Id: I02dbdb3d2c41d765cb8f733c11cd3e695ad5e552
2022-11-09 21:18:10 -03:00 · 2022-11-09 21:18:10 -03:00 · 2cb4f215a9
parent c937f46ece
commit 2cb4f215a9
2 changed files with 339 additions and 0 deletions
--- a/controllerconfig/controllerconfig/upgrade-scripts/76-remove-armada-if-unused.py
+++ b/controllerconfig/controllerconfig/upgrade-scripts/76-remove-armada-if-unused.py
@ -0,0 +1,301 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2022 Wind River Systems, Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# The purpose of this script is to check for armada apps uploaded/applied,
+# and, if none are found, remove armada, including:
+# - armada helm v3 release
+# - armada kubernetes namespace
+# - armada docker image
+
+import os
+import psutil
+import ruamel.yaml as yaml
+import sys
+import threading
+from eventlet.green import subprocess
+from time import sleep
+
+from controllerconfig.common import log
+from sysinv.common import exception
+from sysinv.common import utils as common_utils
+from sysinv.helm import utils as helm_utils
+
+from sysinv.common.kubernetes import KUBERNETES_ADMIN_CONF
+from tsconfig.tsconfig import PLATFORM_PATH
+
+
+LOG = log.get_logger(__name__)
+log.configure()
+
+# This script should only execute in the following upgrade conditions:
+ACCEPTED_FROM = ['21.12', '22.06']
+ACCEPTED_TO = ['22.12']
+ACCEPTED_ACTIONS = ['activate']
+
+ARMADA_MANIFEST_DIRECTORY = os.path.join(PLATFORM_PATH, 'armada')
+ARMADA_NS = 'armada'
+ARMADA_RELEASE_NAME = 'armada'
+
+TIMEOUT = 180  # timeout in seconds for armada pods to terminate
+TIME_STEP = 15  # wait X seconds between checks
+
+
+def run_cmd(cmd, interrupt_on_error=False, env=None):
+    "A wrapper for common_utils.trycmd()"
+
+    out, err = common_utils.trycmd(*cmd.split(), env=env)
+
+    if err:
+        if env:
+            err += "\nEnv: {}".format(env)
+
+        if interrupt_on_error:
+            raise Exception(err)
+        else:
+            LOG.debug(err)
+
+    return out, err
+
+
+def wait_cmd_output(cmd, expected_output, timeout=TIMEOUT, step=TIME_STEP,
+                    interrupt_on_error=True):
+    "Executes cmd until output matches 'expected_output' or a timeout."
+
+    LOG.debug('Wait for output of "%s" to match "%s"' % (cmd, expected_output))
+
+    time_elapsed = 0
+    while time_elapsed < timeout:
+        output, _ = run_cmd(cmd, interrupt_on_error=interrupt_on_error)
+        if output == expected_output:
+            return time_elapsed
+        sleep(step)
+        time_elapsed += step
+
+    msg = 'Timeout waiting for output of cmd "%s" to match "%s"' \
+          % (cmd, expected_output)
+    raise Exception(msg)
+
+
+def kill_process_and_descendants(proc):
+    # function to kill a process and its children processes
+    for child in psutil.Process(proc.pid).children(recursive=True):
+        child.kill()
+    proc.kill()
+
+
+def retrieve_helm_v2_releases():
+    env = os.environ.copy()
+    env['PATH'] = '/usr/local/sbin:' + env['PATH']
+    env['KUBECONFIG'] = KUBERNETES_ADMIN_CONF
+    helm_list = subprocess.Popen(
+        ['helmv2-cli', '--',
+         'helm',
+         'list', '--output', 'yaml', '--tiller-connection-timeout', '5'],
+        env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+        universal_newlines=True)
+    timer = threading.Timer(20, kill_process_and_descendants, [helm_list])
+
+    try:
+        timer.start()
+        out, err = helm_list.communicate()
+        if helm_list.returncode != 0:
+            if err:
+                raise exception.HelmTillerFailure(reason=err)
+
+            # killing the subprocesses with +kill() when timer expires
+            # returns EBADF because the pipe is closed, but no error
+            # string on stderr.
+            if helm_list.returncode == -9:
+                raise exception.HelmTillerFailure(
+                    reason="helmv2-cli -- helm list operation timed out after "
+                           "20 seconds. Terminated by threading timer.")
+            raise exception.HelmTillerFailure(
+                reason="helmv2-cli -- helm list operation failed without "
+                       "error message, errno=%s" % helm_list.returncode)
+
+        deployed_releases = {}
+        if out:
+            output = yaml.safe_load(out)
+            releases = output.get('Releases', {})
+            for r in releases:
+                r_name = r.get('Name')
+                r_version = r.get('Revision')
+                r_namespace = r.get('Namespace')
+
+                deployed_releases.setdefault(r_name, {}).update(
+                    {r_namespace: r_version})
+
+        return deployed_releases
+    except Exception as e:
+        raise exception.HelmTillerFailure(
+            reason="Failed to retrieve helmv2 releases: %s" % e)
+    finally:
+        timer.cancel()
+
+
+def is_armada_required():
+    """
+    Check for armada manifests or helm v2 releases.
+    Return True if any are found,
+    False otherwise (including if helm v2 was already removed).
+    """
+
+    # Check if there armada apps uploaded/applied, exit if there are any
+    if os.path.exists(ARMADA_MANIFEST_DIRECTORY):
+        for sw_version in os.listdir(ARMADA_MANIFEST_DIRECTORY):
+            directory = os.path.join(ARMADA_MANIFEST_DIRECTORY, sw_version)
+            if os.listdir(directory):
+                LOG.debug("Armada apps found: " + str(os.listdir(directory)))
+                return True
+
+    # Check for releases in helm v2, exit if there are any
+    try:
+        helm_v2_releases = retrieve_helm_v2_releases()
+        if helm_v2_releases:
+            LOG.debug("helm v2 releases found: %s" % (list(helm_v2_releases)))
+            return True
+    except Exception as e:
+        # Don't touch armada if helm v2 query fails for unknown reason.
+        # If armada was already removed, exception message will be:
+        # "helm list operation failed without error message"
+        # and it's okay to continue.
+        if 'operation failed' not in str(e):
+            raise Exception("Error listing helm v2 releases: %s" % e)
+
+    return False
+
+
+def remove_armada_resources():
+    """
+    Remove Armada helm release and namespace.
+    Note: removing the HR terminates pods and secrets.
+    """
+
+    # Remove armada helm v3 release
+    try:
+        if ARMADA_RELEASE_NAME in helm_utils.retrieve_helm_v3_releases():
+            helm_utils.delete_helm_v3_release(
+                ARMADA_RELEASE_NAME, namespace=ARMADA_NS)
+        else:
+            LOG.warning("Helm v3 release %s not found." % ARMADA_RELEASE_NAME)
+    except Exception as e:
+        # Couldn't remove HR, so don't touch anything else.
+        raise Exception("Could not remove Armada helm release: %s" % e)
+
+    # Wait for kubernetes armada namespace to have no resources,
+    # fail after a timeout
+    LOG.debug("Waiting for resources to terminate...")
+    cmd = "kubectl get all -n %s -o name --kubeconfig %s" \
+          % (ARMADA_NS, KUBERNETES_ADMIN_CONF)
+    time_elapsed = wait_cmd_output(cmd=cmd, expected_output="")
+    LOG.debug("Took about {} seconds".format(time_elapsed))
+
+    # Remove armada namespace
+    cmd = "kubectl delete namespace %s --kubeconfig %s --ignore-not-found" \
+          % (ARMADA_NS, KUBERNETES_ADMIN_CONF)
+    run_cmd(cmd)
+
+    return True
+
+
+def remove_armada_manifest_directory():
+
+    cmd = "sudo rm -rf %s" % (ARMADA_MANIFEST_DIRECTORY)
+    _, stderr = run_cmd(cmd)
+    if stderr:
+        LOG.warning("Could not remove %s" % (ARMADA_MANIFEST_DIRECTORY))
+        return False
+
+    return True
+
+
+def remove_armada_docker_image():
+    """
+    Loads the keystone admin environment variables and uses 'system' commands
+    to remove the armada docker image in the local registry.
+
+    Return True if successful, False otherwise.
+    """
+
+    # Get env
+    keystone_env = common_utils.get_keystone_admin_env()
+    if not keystone_env:
+        LOG.warning("While trying to remove armada image from docker registry,"
+                    " could not get keystone admin env to run system commands")
+        return False
+
+    # Get image name
+    cmd = "system registry-image-list"
+    output, _ = run_cmd(cmd, env=keystone_env)
+    if not output:
+        LOG.warning("Failed to remove armada docker image. "
+                    "'%s' did not return an output" % cmd)
+        return False
+    output = output.replace('|', '').replace('+', '').split()
+    for line in output:
+        if 'armada' in line:
+            image_name = line
+            break
+    else:
+        LOG.debug("Could not find armada image in docker registry.")
+        return True
+
+    # Get image tag
+    cmd = "system registry-image-tags %s" % image_name
+    output, _ = run_cmd(cmd, env=keystone_env)
+    if not output.strip():
+        LOG.warning("Armada image already deleted, but still appears on "
+                    "'system registry-image-list'")
+        return True
+    image_tag = output.replace('|', '').replace('+', '').split()[-2]
+
+    cmd = "system registry-image-delete %s:%s" % (image_name, image_tag)
+    run_cmd(cmd, env=keystone_env)
+
+    cmd = "system registry-garbage-collect"
+    run_cmd(cmd, env=keystone_env)
+
+    return True
+
+
+def main():
+    if len(sys.argv) != 4:
+        error_msg = "Invalid arguments: %s" % (sys.argv)
+        print(error_msg)
+        LOG.error(error_msg)
+        return 1
+
+    script_name, from_release, to_release, action = sys.argv
+
+    LOG.info("%s invoked with from_release = %s to_release = %s action = %s"
+             % (script_name, from_release, to_release, action))
+
+    if from_release in ACCEPTED_FROM and to_release in ACCEPTED_TO \
+            and action in ACCEPTED_ACTIONS:
+
+        try:
+            if is_armada_required():
+                LOG.info("Armada is in use. It will not be removed.")
+                return 0
+
+            LOG.info("Armada is not in use. It will be removed.")
+
+            remove_armada_resources()
+            remove_armada_manifest_directory()
+            remove_armada_docker_image()
+
+            LOG.info("Armada removed.")
+
+        except Exception as e:
+            print(e)
+            LOG.error("An error occured while trying to remove armada:")
+            LOG.exception(e)
+
+        return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/sysinv/sysinv/sysinv/sysinv/common/utils.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py
@ -183,6 +183,7 @@ def exception_msg(exception):
    return str(exception)


+# TODO(lfagunde): Should be able to specify the delay between retries.
 def execute(*cmd, **kwargs):
    """Helper method to execute command with optional retry.

@ -201,6 +202,7 @@ def execute(*cmd, **kwargs):
    :param attempts:           How many times to retry cmd.
    :param run_as_root:        True | False. Defaults to False. If set to True,
                               the command is run with rootwrap.
+    :param env:                Mapping with environment variables.

    :raises exception.SysinvException: on receiving unknown arguments
    :raises exception.ProcessExecutionError:
@ -220,6 +222,7 @@ def execute(*cmd, **kwargs):
    attempts = kwargs.pop('attempts', 1)
    run_as_root = kwargs.pop('run_as_root', False)
    shell = kwargs.pop('shell', False)
+    env = kwargs.pop('env', {})

    if len(kwargs):
        raise exception.SysinvException(_('Got unknown keyword args '
@ -248,6 +251,7 @@ def execute(*cmd, **kwargs):
                                   stdout=_PIPE,
                                   stderr=_PIPE,
                                   close_fds=close_fds,
+                                   env=env,
                                   preexec_fn=preexec_fn,
                                   shell=shell)
            result = None
@ -3767,3 +3771,37 @@ def is_filesystem_enabled(dbapi, host_id_or_uuid, fs_name):
        if fs.name == fs_name:
            return True
    return False
+
+
+def get_keystone_admin_env(openrc_file="/etc/platform/openrc"):
+    """
+    Extracts the keystone admin user env variables from :openrc_file:
+    Returns the mapping if successful, otherwise returns None
+    """
+
+    env = {"TERM": "linux"}
+
+    with open(openrc_file, 'r') as f:
+        for line in f.readlines():
+            if line.startswith("export"):
+                try:
+                    key, value = line.split()[1].split('=')
+                    env[key] = value
+                except Exception:
+                    if 'OS_PASSWORD' in line:
+                        os_password_line = line
+                    else:
+                        LOG.exception("Failed to get keystone admin env."
+                                      "Line with unexpected formatting:" + line)
+                        return None
+
+    # Getting the OS_PASSWORD requires running a keyring command
+    try:
+        cmd = "bash " + os_password_line.split('`')[1].replace("TERM=linux", "")
+        out, _ = execute(*cmd.split(), env=env)
+        env["OS_PASSWORD"] = out.strip()
+    except Exception:
+        LOG.exception("Failed to get keystone admin env. Couldn't parse OS_PASSWORD")
+        return None
+
+    return env