0bbb7fb9d7
IPSec requires the private key of system-local-ca to be RSA, and while the usage of other types of keys is under consideration, it still needs to be implemented. Also, other types of keys where never validated for other services that use certificates issued from system-local-ca, such as LDAP, Docker Registry, and so on. Considering this, this review includes an upgrade precheck that will prevent an user to upgrade from a system with non RSA keys. This precheck needs to be re evaluated in the future if we support other types of keys. Test Plan: PASS: With an stx 8 system with ECC key in system-local-ca: - Loaded designer iso for stx 10; - Run 'software deploy precheck'; - Observed that a message is displayed to the user, informing that only RSA keys are supported and the procedure to update system-local-ca. PASS: With an stx 8 system with RSA key in system-local-ca: - Loaded designer iso for stx 10; - Run 'software deploy precheck'; - Observed that the precheck passes. No message regarding system-local-ca's private key is displayed. Story: 2010940 Task: 51238 Change-Id: I6315b6018e755cf80d7e1f6a01909fb5ddb15ba4 Signed-off-by: Marcelo de Castro Loebens <Marcelo.DeCastroLoebens@windriver.com>
486 lines
20 KiB
Python
486 lines
20 KiB
Python
#!/usr/bin/python3
|
|
# -*- encoding: utf-8 -*-
|
|
#
|
|
# vim: tabstop=4 shiftwidth=4 softtabstop=4
|
|
#
|
|
# Copyright (c) 2023-2024 Wind River Systems, Inc.
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
|
|
"""
|
|
Run platform upgrade deploy precheck as a standalone executable
|
|
"""
|
|
|
|
import argparse
|
|
import base64
|
|
import json
|
|
import logging as LOG
|
|
import os
|
|
import re
|
|
import requests
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
|
|
from lxml import etree as ElementTree
|
|
from tsconfig.tsconfig import SW_VERSION
|
|
|
|
import upgrade_utils
|
|
|
|
|
|
# TODO(heitormatsui) keep updated for every release
|
|
SUPPORTED_K8S_VERSIONS = [
|
|
"v1.24.4",
|
|
"v1.25.3",
|
|
"v1.26.1",
|
|
"v1.27.5",
|
|
"v1.28.4",
|
|
"v1.29.2",
|
|
]
|
|
|
|
RC_SUCCESS = 0
|
|
RC_UNHEALTHY = 3
|
|
STATE_AVAILABLE = 'available'
|
|
STATE_DEPLOYED = 'deployed'
|
|
|
|
class HealthCheck(object):
|
|
"""This class represents a general health check object
|
|
that uses sysinv-client to run system health checks"""
|
|
|
|
SUCCESS_MSG = 'OK'
|
|
FAIL_MSG = 'Fail'
|
|
|
|
def __init__(self, config):
|
|
self._config = config
|
|
|
|
# get target release from script directory location
|
|
self._target_release = re.match("^.*/rel-(\d\d.\d\d.\d+)/", __file__).group(1)
|
|
self._major_release = self._target_release.rsplit(".", 1)[0]
|
|
|
|
# get sysinv token, endpoint and client
|
|
self._sysinv_token, self._sysinv_endpoint = \
|
|
upgrade_utils.get_token_endpoint(config, service_type="platform")
|
|
self._sysinv_client = upgrade_utils.get_sysinv_client(self._sysinv_token,
|
|
self._sysinv_endpoint)
|
|
|
|
def _check_license(self, version):
|
|
"""
|
|
Validates the current license is valid for the specified version
|
|
:param version: version to be checked against installed license
|
|
:return: True is license is valid for version, False otherwise
|
|
"""
|
|
license_dict = self._sysinv_client.license.show()
|
|
if license_dict["error"]:
|
|
return False
|
|
|
|
# create temp file with license content to run verify-license binary against it
|
|
with tempfile.NamedTemporaryFile(mode="w", delete=True) as license_file:
|
|
try:
|
|
license_file.write(license_dict["content"])
|
|
subprocess.check_call(["/usr/bin/verify-license", # pylint: disable=not-callable
|
|
license_file.name,
|
|
version])
|
|
except subprocess.CalledProcessError:
|
|
return False
|
|
return True
|
|
|
|
# TODO(heitormatsui): implement patch precheck targeted against USM
|
|
# and implement patch precheck for subcloud
|
|
def _check_required_patches_state(self, required_patches, patch_health_check=False):
|
|
"""
|
|
Check if the required patches are in 'deployed' state, if patch_health_check is
|
|
True, the required_patches can be in 'available' state as well.
|
|
:param required_patches: list of patches to be checked
|
|
:param patch_health_check: boolean if is a patch or upgrage health check
|
|
:return: boolean indicating success/failure and list of patches
|
|
that are not in the 'deployed' or 'available' state
|
|
"""
|
|
success = True
|
|
releases = self._config.get("releases", "")
|
|
releases_in_allowed_states = []
|
|
for release in json.loads(releases):
|
|
if release['state'] == STATE_DEPLOYED or \
|
|
(patch_health_check and release['state'] == STATE_AVAILABLE):
|
|
releases_in_allowed_states.append(release)
|
|
|
|
allowed_patches = [release['release_id'] for release in releases_in_allowed_states]
|
|
missing_patches = list(set(required_patches) - set(allowed_patches))
|
|
if missing_patches:
|
|
success = False
|
|
|
|
return success, missing_patches
|
|
|
|
def run_general_health_check(self):
|
|
"""Run general health check using sysinv client"""
|
|
force = self._config.get("force", False)
|
|
health_ok = success = True
|
|
|
|
alarm_ignore_list = ["900.201"]
|
|
api_cmd = self._sysinv_endpoint + "/health/kube-upgrade"
|
|
|
|
if force:
|
|
api_cmd += '/relaxed'
|
|
|
|
if alarm_ignore_list:
|
|
api_cmd += f'?alarm_ignore_list={alarm_ignore_list}'
|
|
|
|
method = 'GET'
|
|
output = upgrade_utils.call_api(self._sysinv_token, method, api_cmd)
|
|
|
|
# check hosts are patch current
|
|
deploy_in_progress = self._config.get("deploy_in_progress", "{}")
|
|
deploy_in_progress = json.loads(deploy_in_progress)
|
|
if deploy_in_progress:
|
|
success = False
|
|
from_load = deploy_in_progress["from_release"]
|
|
to_load = deploy_in_progress["to_release"]
|
|
output += ('All hosts are patch current: [%s]\n') \
|
|
% (HealthCheck.FAIL_MSG)
|
|
output += ('Deployment in progress: %s to %s\n' % (from_load, to_load))
|
|
else:
|
|
success = True
|
|
output += ('All hosts are patch current: [%s]\n') \
|
|
% (HealthCheck.SUCCESS_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
if HealthCheck.FAIL_MSG in output:
|
|
success = False
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
|
|
class UpgradeHealthCheck(HealthCheck):
|
|
"""This class represents a upgrade-specific health check object
|
|
that verifies if system is in a valid state for upgrade"""
|
|
|
|
# TODO(heitormatsui): switch from using upgrade metadata xml to
|
|
# the new USM metadata format
|
|
def _check_valid_upgrade_path(self):
|
|
"""Checks if active release to specified release is a valid upgrade path"""
|
|
# Get active release
|
|
isystem = self._sysinv_client.isystem.list()[0]
|
|
active_release = isystem.software_version
|
|
|
|
# supported_release is a dict with {release: required_patch}
|
|
supported_releases = dict()
|
|
|
|
# Parse upgrade metadata file for supported upgrade paths
|
|
root = ElementTree.parse("/var/www/pages/feed/rel-%s/upgrades/metadata.xml" % self._major_release)
|
|
upgrade_root = root.find("supported_upgrades").findall("upgrade")
|
|
for upgrade in upgrade_root:
|
|
version = upgrade.find("version")
|
|
required_patch = upgrade.find("required_patch")
|
|
supported_releases.update({version.text: [required_patch.text] if
|
|
required_patch is not None else []})
|
|
success = active_release in supported_releases
|
|
return success, active_release, supported_releases.get(active_release, [])
|
|
|
|
# TODO(heitormatsui) do we need this check on USM? Remove if we don't
|
|
def _check_active_is_controller_0(self):
|
|
"""Checks that active controller is controller-0"""
|
|
controllers = self._sysinv_client.ihost.list()
|
|
for controller in controllers:
|
|
if controller.hostname == "controller-0" and \
|
|
"Controller-Active" in controller.capabilities["Personality"]:
|
|
return True
|
|
return False
|
|
|
|
def _check_kube_version(self, supported_versions):
|
|
"""
|
|
Check if active k8s version is in a list of supported versions
|
|
:param supported_versions: list of supported k8s versions
|
|
:return: boolean indicating success/failure and active k8s version
|
|
"""
|
|
kube_versions = self._sysinv_client.kube_version.list()
|
|
active_version = None
|
|
for kv in kube_versions:
|
|
if kv.state == "active":
|
|
active_version = kv.version
|
|
break
|
|
success = active_version in supported_versions
|
|
return success, active_version
|
|
|
|
def run_health_check(self):
|
|
"""Run specific upgrade health checks"""
|
|
|
|
# run health check for 22.12
|
|
# TODO(ShawnLi): remove this once upgrade from 22.12 is deprecated
|
|
if SW_VERSION == '22.12':
|
|
return self.run_health_check_in_from_release()
|
|
|
|
health_ok = True
|
|
output = ""
|
|
|
|
# check if it is a valid upgrade path
|
|
success, active_release, required_patches = self._check_valid_upgrade_path()
|
|
output += 'Valid upgrade path from release %s to %s: [%s]\n' \
|
|
% (active_release, self._major_release,
|
|
HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# check if required patches are deployed
|
|
success, missing_patches = self._check_required_patches_state(required_patches)
|
|
output += 'Required patches are applied: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
if not success:
|
|
output += '-> Patches not applied: [%s]\n' \
|
|
% ', '.join(missing_patches)
|
|
health_ok = health_ok and success
|
|
|
|
# check if k8s version is valid
|
|
success, active_version = self._check_kube_version(SUPPORTED_K8S_VERSIONS)
|
|
output += 'Active kubernetes version [%s] is a valid supported version: [%s]\n' \
|
|
% (active_version, HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
|
|
if not active_version:
|
|
output += ('-> Failed to get version info. Upgrade kubernetes to one of the '
|
|
'supported versions [%s] and ensure that the kubernetes version '
|
|
'information is available in the kubeadm configmap.\n'
|
|
'See "system kube-version-list"\n' % ", ".join(SUPPORTED_K8S_VERSIONS))
|
|
elif not success:
|
|
output += ('-> Upgrade active kubernetes version [%s] to one of the '
|
|
'supported versions [%s]. See "system kube-version-list"\n' %
|
|
(active_version, ", ".join(SUPPORTED_K8S_VERSIONS)))
|
|
health_ok = health_ok and success
|
|
|
|
# TODO(heitormatsui) Do we need the following check on USM?
|
|
# The load is only imported to controller-0. An upgrade can only
|
|
# be started when controller-0 is active.
|
|
is_controller_0 = self._check_active_is_controller_0()
|
|
success = is_controller_0
|
|
output += \
|
|
'Active controller is controller-0: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# check installed license
|
|
# NOTE(nicodemos): We just need to check the license for major release
|
|
success = self._check_license(self._major_release)
|
|
output += 'Installed license is valid: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# TODO(mdecastr) Plan is to add support to ECC key, this verification need to
|
|
# be re evaluated in future releases if the support is implemented.
|
|
# Check if system-local-ca's private key is RSA
|
|
if upgrade_utils.get_distributed_cloud_role() == 'subcloud':
|
|
# system-local-ca in subclouds either match the systemcontroller's,
|
|
# or it will be changed to match in upgrade activation
|
|
LOG.info("Checking system-local-ca's private key is not required for subclouds.")
|
|
success = True
|
|
else:
|
|
success = self._check_local_issuer_rsa_private_key()
|
|
|
|
if not success:
|
|
LOG.error("system-local-ca's private key is either not RSA or not valid.")
|
|
output += 'Platform Issuer: [%s]\n' % (HealthCheck.FAIL_MSG)
|
|
output += ('-> Platform Issuer (system-local-ca) TLS private key is not valid. '
|
|
'Only RSA keys are supported.\n'
|
|
' Please perform the \'Update system-local-ca or Migrate Platform '
|
|
'Certificates to use Cert Manager\'\n'
|
|
' procedure to update the Platform Issuer, providing a valid RSA '
|
|
'cert/key to be used by the issuer.\n')
|
|
else:
|
|
LOG.info("system-local-ca has a valid private key.")
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
def run_health_check_in_from_release(self):
|
|
"""
|
|
Run the health check in 22.12 release environment
|
|
:return: tuple (success, output)
|
|
"""
|
|
|
|
health_ok = True
|
|
output = ""
|
|
|
|
success, active_release, required_patches = self._check_valid_upgrade_path()
|
|
output += 'Valid upgrade path from release %s to %s: [%s]\n' \
|
|
% (active_release, self._major_release,
|
|
HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
health_ok = health_ok and success
|
|
|
|
# check if required patches are deployed
|
|
success, missing_patches = self._check_required_patches(required_patches)
|
|
output += 'Required patches are applied: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
if not success:
|
|
output += '-> Patches not applied: [%s]\n' \
|
|
% ', '.join(missing_patches)
|
|
health_ok = health_ok and success
|
|
|
|
# check if system-local-ca's private key is RSA
|
|
if upgrade_utils.get_distributed_cloud_role() == 'subcloud':
|
|
# system-local-ca in subclouds either match the systemcontroller's,
|
|
# or it will be changed to match in upgrade activation
|
|
LOG.info("Checking system-local-ca's private key is not required for subclouds.")
|
|
success = True
|
|
else:
|
|
success = self._check_local_issuer_rsa_private_key()
|
|
|
|
if not success:
|
|
LOG.error("system-local-ca's private key is either not RSA or not valid.")
|
|
output += 'Platform Issuer: [%s]\n' % (HealthCheck.FAIL_MSG)
|
|
output += ('-> Platform Issuer (system-local-ca) TLS private key is not valid. '
|
|
'Only RSA keys are supported.\n'
|
|
' Please perform the \'Update system-local-ca or Migrate Platform '
|
|
'Certificates to use Cert Manager\'\n'
|
|
' procedure to update the Platform Issuer, providing a valid RSA '
|
|
'cert/key to be used by the issuer.\n')
|
|
else:
|
|
LOG.info("system-local-ca has a valid private key.")
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
def _check_required_patches(self, required_patches):
|
|
"""
|
|
Check if required patches are applied using the patching API
|
|
:return: tuple (success, missing_patches)
|
|
"""
|
|
try:
|
|
patch_token, patch_endpoint = upgrade_utils.get_token_endpoint(
|
|
self._config, service_type="patching")
|
|
patch_endpoint += "/v1/query/"
|
|
response = requests.get(patch_endpoint, headers={
|
|
"X-Auth-Token": patch_token}, timeout=10)
|
|
except Exception as e:
|
|
return False, "Failed to connect to patching API: %s" % e
|
|
|
|
query_patches = response.json()['pd']
|
|
applied_patches = []
|
|
for patch_key, patch in query_patches.items():
|
|
if patch.get('patchstate') in {'Applied', 'Committed'}:
|
|
applied_patches.append(patch_key)
|
|
|
|
missing_patches = [patch for patch in required_patches if patch not in applied_patches]
|
|
success = not missing_patches
|
|
|
|
return success, missing_patches
|
|
|
|
def _check_local_issuer_rsa_private_key(self):
|
|
secret = upgrade_utils.get_secret_data_yaml('system-local-ca', 'cert-manager')
|
|
if secret is None or secret == '':
|
|
LOG.error("Error while retrieving system-local-ca's secret data.")
|
|
return False
|
|
|
|
key_b64 = secret.get('data', {}).get('tls.key', None)
|
|
if key_b64 is None:
|
|
LOG.error("Could not retrieve system-local-ca private key.")
|
|
return False
|
|
return upgrade_utils.is_tls_key_rsa(base64.b64decode(key_b64).decode('utf-8'))
|
|
|
|
|
|
class PatchHealthCheck(HealthCheck):
|
|
"""This class represents a patch-specific health check object
|
|
that verifies if system is in valid state to apply a patch"""
|
|
|
|
def _get_required_patches(self):
|
|
"""Get required patches for a target release"""
|
|
releases = self._config.get("releases")
|
|
required_patches = []
|
|
for release in json.loads(releases):
|
|
if release["sw_version"] == self._target_release:
|
|
required_patches.extend(release["requires"])
|
|
break
|
|
|
|
return required_patches
|
|
|
|
def run_health_check(self):
|
|
"""Run specific patch health checks"""
|
|
health_ok = True
|
|
output = ""
|
|
|
|
# check required patches for target release
|
|
required_patches = self._get_required_patches()
|
|
success, missing_patches = self._check_required_patches_state(required_patches, True)
|
|
output += 'Required patches are deployed or available: [%s]\n' \
|
|
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
|
|
if not success:
|
|
output += '-> Patches not deployed or available: [%s]\n' \
|
|
% ', '.join(missing_patches)
|
|
health_ok = health_ok and success
|
|
|
|
return health_ok, output
|
|
|
|
|
|
def parse_config(args=None):
|
|
"""Parse the parameters passed to the script"""
|
|
parser = argparse.ArgumentParser(description="Run health checks to verify if the system "
|
|
"meets the requirements to deploy a specific "
|
|
"release.")
|
|
parser.add_argument("--auth_url",
|
|
help="Authentication URL",
|
|
required=True)
|
|
parser.add_argument("--username",
|
|
help="Username",
|
|
required=True)
|
|
parser.add_argument("--password",
|
|
help="Password",
|
|
required=True)
|
|
parser.add_argument("--project_name",
|
|
help="Project Name",
|
|
required=True)
|
|
parser.add_argument("--user_domain_name",
|
|
help="User Domain Name",
|
|
required=True)
|
|
parser.add_argument("--project_domain_name",
|
|
help="Project Domain Name",
|
|
required=True)
|
|
parser.add_argument("--region_name",
|
|
help="Region Name",
|
|
default="RegionOne")
|
|
parser.add_argument("--force",
|
|
help="Ignore non-critical health checks",
|
|
action="store_true")
|
|
parser.add_argument("--patch",
|
|
help="Set precheck to run against a patch release",
|
|
action="store_true")
|
|
parser.add_argument("--releases",
|
|
help="Releases",
|
|
default="[]")
|
|
parser.add_argument("--deploy_in_progress",
|
|
help="check hosts are patch current",
|
|
default="{}")
|
|
|
|
# if args was not passed will use sys.argv by default
|
|
parsed_args = parser.parse_args(args)
|
|
return vars(parsed_args)
|
|
|
|
|
|
def main(argv=None):
|
|
config = parse_config(argv)
|
|
patch_release = config.get("patch", False)
|
|
|
|
health_ok = True
|
|
output = ""
|
|
|
|
if patch_release:
|
|
health_check = PatchHealthCheck(config)
|
|
else:
|
|
health_check = UpgradeHealthCheck(config)
|
|
|
|
# execute general health check
|
|
general_health_ok, general_output = health_check.run_general_health_check()
|
|
# execute release-specific health check
|
|
specific_health_ok, specific_output = health_check.run_health_check()
|
|
|
|
# combine health check results removing extra line breaks/blank spaces from the output
|
|
health_ok = general_health_ok and specific_health_ok
|
|
output = general_output.strip() + "\n" + specific_output.strip()
|
|
|
|
# print health check output and exit
|
|
print(output)
|
|
if health_ok:
|
|
return RC_SUCCESS
|
|
else:
|
|
return RC_UNHEALTHY
|
|
|
|
|
|
if __name__ == "__main__":
|
|
upgrade_utils.configure_logging("/var/log/software.log", log_level=LOG.INFO)
|
|
sys.exit(main())
|