Create software deploy precheck endpoint

This commit:
- Creates the "software deploy precheck" endpoint
- Implements a couple TODOs on the precheck code
- Replaces some hard-coded variables for constants
- Replaces "system" commands to retrieve system health
  for sysinv client calls
- Creates separate health check classes for general and
  upgrade-specific health checks
- Renames standalone precheck module to "deploy-precheck"
- Allows precheck module to receive keystone credentials
  as parameters so that the precheck can be run against a
  remote host (Use Case: system controller running precheck
  against subclouds, but mind that DC support will be added
  on a future commit)
- Creates an utility module that can be imported by standalone
  USM upgrade scripts (copied to the ISO by [1])

Test Plan
PASS: run "software deploy precheck" and verify it returning
      correctly, and verify that health checks output are valid
      given the current system status

Story: 2010676
Task: 48902

Depends-on: https://review.opendev.org/c/starlingx/update/+/898474
Relates-to: [1] https://review.opendev.org/c/starlingx/tools/+/897484

Change-Id: I4d04804c1b1e147f486434df23a291c6d315842b
Signed-off-by: Heitor Matsui <heitorvieira.matsui@windriver.com>
This commit is contained in:
Heitor Matsui 2023-09-28 11:37:50 -03:00
parent 36b39028b2
commit f9dcfcb091
9 changed files with 447 additions and 190 deletions

View File

@ -757,8 +757,25 @@ def release_upload_dir_req(args):
def deploy_precheck_req(args):
print(args.deployment)
return 1
# args.deployment is a string
deployment = args.deployment
# args.region is a string
region_name = args.region_name
# Issue deploy_precheck request
url = "http://%s/software/deploy_precheck/%s?region_name=%s" % (api_addr, deployment, region_name)
headers = {}
append_auth_token_if_required(headers)
req = requests.post(url, headers=headers)
if args.debug:
print_result_debug(req)
else:
print_software_op_result(req)
return check_rc(req)
def deploy_start_req(args):
@ -1148,7 +1165,11 @@ def register_deploy_commands(commands):
cmd.set_defaults(cmd='precheck')
cmd.set_defaults(func=deploy_precheck_req)
cmd.add_argument('deployment',
help='Verify prerequisite conditions are met for specified deployment')
help='Verify if prerequisites are met for this Deployment ID')
cmd.add_argument('--region_name',
default='RegionOne',
required=False,
help='Run precheck against a subcloud')
# --- software deploy start --------------------------
cmd = sub_cmds.add_parser(

View File

@ -60,8 +60,10 @@ override_dh_install:
${ROOT}/etc/goenabled.d/software_check_goenabled.sh
install -m 444 service-files/software.logrotate \
${ROOT}/etc/logrotate.d/software
install -m 755 scripts/platform-upgrade-precheck \
${ROOT}/usr/sbin/platform-upgrade-precheck
install -m 755 scripts/deploy-precheck \
${ROOT}/usr/sbin/deploy-precheck
install -m 444 scripts/upgrade_utils.py \
${ROOT}/usr/sbin/upgrade_utils.py
install -m 444 ${METADATA_FILE} \
${ROOT}/etc/software/${METADATA_FILE}
dh_install

View File

@ -0,0 +1,254 @@
#!/usr/bin/python3
# -*- encoding: utf-8 -*-
#
# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
"""
Run platform upgrade deploy precheck as a standalone executable
"""
import os
import re
import requests
import subprocess
import sys
import tempfile
from lxml import etree as ElementTree
import upgrade_utils
class HealthCheck(object):
"""This class represents a general health check object
that uses sysinv-client to run system health checks"""
SUCCESS_MSG = 'OK'
FAIL_MSG = 'Fail'
def __init__(self, config):
# get sysinv token, endpoint and client
self._sysinv_token, self._sysinv_endpoint = \
upgrade_utils.get_token_endpoint(config, service_type="platform")
self._sysinv_client = upgrade_utils.get_sysinv_client(self._sysinv_token,
self._sysinv_endpoint)
# get usm token and endpoint
self._software_token, self._software_endpoint = \
upgrade_utils.get_token_endpoint(config, service_type="usm")
def run_health_check(self):
output = self._sysinv_client.health.get_kube_upgrade() + "\n"
if HealthCheck.FAIL_MSG in output:
return False, output
return True, output
class UpgradeHealthCheck(HealthCheck):
"""This class represents a upgrade-specific health check object
that verifies if system is in a valid state for upgrade"""
def _check_valid_upgrade_path(self):
"""Checks if active release to specified release is a valid upgrade path"""
# Get active release
isystem = self._sysinv_client.isystem.list()[0]
active_release = isystem.software_version
# supported_release is a dict with {release: required_patch}
supported_releases = dict()
# Parse upgrade metadata file for supported upgrade paths
root = ElementTree.parse("%s/metadata.xml" % os.path.dirname(__file__))
upgrade_root = root.find("supported_upgrades").findall("upgrade")
for upgrade in upgrade_root:
version = upgrade.find("version")
required_patch = upgrade.find("required_patch")
supported_releases.update({version.text: required_patch.text if
required_patch is not None else None})
success = active_release in supported_releases
return success, active_release, supported_releases.get(active_release, None)
# TODO(heitormatsui): implement patch precheck targeted against USM
# and implement patch precheck for subcloud
def _check_required_patch(self, release, required_patch):
"""Checks if required patch for the supported release is installed"""
url = self._software_endpoint + '/query?show=applied&release=%s' % release
headers = {"X-Auth-Token": self._software_token}
response = requests.get(url, headers=headers, timeout=10)
success = True
required_patch = [required_patch] if required_patch else []
if response.status_code != 200:
print("Could not check required patches...")
return False, required_patch
applied_patches = list(response.json()["sd"].keys())
missing_patch = list(set(required_patch) - set(applied_patches))
if missing_patch:
success = False
return success, missing_patch
# TODO(heitormatsui) do we need this check on USM? Remove if we don't
def _check_active_is_controller_0(self):
"""Checks that active controller is controller-0"""
controllers = self._sysinv_client.ihost.list()
for controller in controllers:
if controller.hostname == "controller-0" and \
"Controller-Active" in controller.capabilities["Personality"]:
return True
return False
def _check_license(self, version):
"""Validates the current license is valid for the specified version"""
license_dict = self._sysinv_client.license.show()
if license_dict["error"]:
return False
# create temp file with license content to run verify-license binary against it
with tempfile.NamedTemporaryFile(mode="w", delete=True) as license_file:
try:
license_file.write(license_dict["content"])
subprocess.check_call(["/usr/bin/verify-license", # pylint: disable=not-callable
license_file.name,
version])
except subprocess.CalledProcessError:
return False
return True
def _check_kube_version(self):
"""Check if active k8s version is the latest available"""
kube_versions = self._sysinv_client.kube_version.list()
active_version = None
latest_version = kube_versions[-1].version
for kv in kube_versions:
if kv.state == "active":
active_version = kv.version
break
success = active_version == latest_version
return success, active_version, latest_version
def run_health_check(self):
health_ok = True
output = ""
# get target release from script directory location
upgrade_release = re.match("^.*/rel-(\d\d.\d\d)/", __file__).group(1)
# check installed license
success = self._check_license(upgrade_release)
output += 'License valid for upgrade: [%s]\n' \
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
health_ok = health_ok and success
# check if it is a valid upgrade path
success, active_release, required_patch = self._check_valid_upgrade_path()
output += 'Valid upgrade path from release %s to %s: [%s]\n' \
% (active_release, upgrade_release,
HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
health_ok = health_ok and success
# check if required patches are applied/committed if is a valid upgrade path
if success:
success, missing_patches = self._check_required_patch(active_release, required_patch)
output += 'Required patches are applied: [%s]\n' \
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
if not success:
output += 'Patches not applied: %s\n' \
% ', '.join(missing_patches)
health_ok = health_ok and success
else:
output += 'Invalid upgrade path, skipping required patches check...'
# check k8s version is the latest available
success, active_version, latest_version = self._check_kube_version()
if success:
output += 'Active kubernetes version is the latest supported version: [%s]\n' \
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
else:
if active_version:
output += 'Upgrade kubernetes to the latest version: [%s]. ' \
'See "system kube-version-list"\n' % latest_version
else:
output += 'Failed to get version info. Upgrade kubernetes to' \
' the latest version (%s) and ensure that the ' \
'kubernetes version information is available in ' \
' the kubeadm configmap.\n' \
'Also see "system kube-version-list"\n' % latest_version
health_ok = health_ok and success
# TODO(heitormatsui) Do we need the following check on USM?
# The load is only imported to controller-0. An upgrade can only
# be started when controller-0 is active.
is_controller_0 = self._check_active_is_controller_0()
success = is_controller_0
output += \
'Active controller is controller-0: [%s]\n' \
% (HealthCheck.SUCCESS_MSG if success else HealthCheck.FAIL_MSG)
health_ok = health_ok and success
return health_ok, output
def parse_config(args):
if not args:
return None
required_keystone_config = ["auth_url", "username", "password", "project_name",
"user_domain_name", "project_domain_name", "region_name"]
config = dict()
for i in range(1, len(args)):
try:
sep = args[i].find("=")
key = args[i][:sep].lstrip("-")
value = args[i][sep+1:]
if key in required_keystone_config:
config[key] = value
except ValueError:
print("Invalid parameter format: %s" % args[i])
return None
if not all(cf in config for cf in required_keystone_config):
return None
return config
def main(argv=None):
config = parse_config(argv)
if not config:
print("Please provide keystone_authtoken configuration.\n"
"usage: deploy-precheck --auth_url=<auth_url> --username=<username> "
"--password=<password> --project_name=<project_name> --user_domain_name=<user_domain_name> "
"--project_domain_name=<project_domain_name> --region_name=<region_name>")
return 1
general_health_check = HealthCheck(config)
upgrade_health_check = UpgradeHealthCheck(config)
# execute general health check
general_health_ok, general_output = general_health_check.run_health_check()
# execute upgrade-specific health check
upgrade_health_ok, upgrade_output = upgrade_health_check.run_health_check()
# combine health check results and remove extra line break from output
health_ok = general_health_ok and upgrade_health_ok
output = general_output[:-1] + upgrade_output
# print health check output and exit
print(output)
if health_ok:
return 0
return 1
if __name__ == "__main__":
sys.exit(main(sys.argv))

View File

@ -1,184 +0,0 @@
#!/usr/bin/python3
# -*- encoding: utf-8 -*-
#
# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
"""
Run platform upgrade precheck from sysinv as a standalone executable
"""
import os
import socket
import subprocess
import sys
class HealthUpgrade(object):
SUCCESS_MSG = 'OK'
FAIL_MSG = 'Fail'
def __init__(self):
env = {}
with open("/etc/platform/openrc", "r") as f:
lines = f.readlines()
for line in lines:
if "export OS_" in line:
parsed_line = line.lstrip("export ").split("=")
env[parsed_line[0]] = parsed_line[1].strip()
try:
env["OS_PASSWORD"] = subprocess.check_output(
["keyring", "get", "CGCS", "admin"],
text=True).strip()
except subprocess.CalledProcessError as exc:
raise Exception("Unable to get auth information")
self._env = env
# TODO(heitormatsui): implement load precheck for the new software
# management framework when API/database are available
def _check_imported_load(self):
"""Checks if there is a valid load imported for upgrade"""
success, upgrade_version = True, "23.09.0"
return success, upgrade_version
# TODO(heitormatsui): implement patch precheck for the new software
# management framework when API/database are available
def _check_required_patches(self, upgrade_version):
"""Checks if required patches for the imported load are installed"""
return True, []
def _check_active_is_controller_0(self):
"""Checks that active controller is controller-0"""
return socket.gethostname() == "controller-0"
def _check_license(self, version):
"""Validates the current license is valid for the specified version"""
check_binary = "/usr/bin/verify-license"
license_file = '/etc/platform/.license'
with open(os.devnull, "w") as fnull:
try:
subprocess.check_call([check_binary, license_file, version], # pylint: disable=not-callable
stdout=fnull, stderr=fnull)
except subprocess.CalledProcessError:
return False
return True
def _check_kube_version(self):
try:
output = subprocess.check_output(["system", "kube-version-list", "--nowrap"], # pylint: disable=not-callable
env=self._env, text=True)
except subprocess.CalledProcessError:
return False, "Error checking kubernetes version"
# output comes in table format, remove headers and last line
kubernetes_versions = output.split("\n")[3:-2]
# latest version is the last line on the table
latest_version = kubernetes_versions[-1].split("|")[1].strip()
active_version = None
for version in kubernetes_versions:
if "active" in version:
active_version = version.split("|")[1].strip()
break
success = active_version == latest_version
return success, active_version, latest_version
def get_system_health(self):
try:
# "system health-query-kube-upgrade" runs all the required general health checks for
# upgrade, that consists on the basic prechecks + k8s nodes/pods/applications prechecks
output = subprocess.check_output(["system", "health-query-kube-upgrade"], # pylint: disable=not-callable
env=self._env, text=True)
except subprocess.CalledProcessError:
return False, "Error running general health check"
if HealthUpgrade.FAIL_MSG in output:
return False, output
return True, output
def get_system_health_upgrade(self):
health_ok = True
output = ""
# check k8s version
success, active_version, latest_version = self._check_kube_version()
if success:
output += 'Active kubernetes version is the latest supported version: [%s]\n' \
% (HealthUpgrade.SUCCESS_MSG if success else HealthUpgrade.FAIL_MSG)
if not success:
if active_version:
output += 'Upgrade kubernetes to the latest version: [%s]. ' \
'See "system kube-version-list"\n' % (latest_version)
else:
output += 'Failed to get version info. Upgrade kubernetes to' \
' the latest version (%s) and ensure that the ' \
'kubernetes version information is available in ' \
' the kubeadm configmap.\n' \
'Also see "system kube-version-list"\n' % (latest_version)
health_ok = health_ok and success
# check imported load
success, upgrade_version = self._check_imported_load()
health_ok = health_ok and success
if not success:
output += 'No imported load found. Unable to test further\n'
return health_ok, output
# check patches for imported load
success, missing_patches = self._check_required_patches(upgrade_version)
output += 'Required patches are applied: [%s]\n' \
% (HealthUpgrade.SUCCESS_MSG if success else HealthUpgrade.FAIL_MSG)
if not success:
output += 'Patches not applied: %s\n' \
% ', '.join(missing_patches)
health_ok = health_ok and success
# check installed license
success = self._check_license(upgrade_version)
output += 'License valid for upgrade: [%s]\n' \
% (HealthUpgrade.SUCCESS_MSG if success else HealthUpgrade.FAIL_MSG)
health_ok = health_ok and success
# The load is only imported to controller-0. An upgrade can only
# be started when controller-0 is active.
is_controller_0 = self._check_active_is_controller_0()
success = is_controller_0
output += \
'Active controller is controller-0: [%s]\n' \
% (HealthUpgrade.SUCCESS_MSG if success else HealthUpgrade.FAIL_MSG)
health_ok = health_ok and success
return health_ok, output
def main(argv):
health_upgrade = HealthUpgrade()
# execute general health check
health_ok, output = health_upgrade.get_system_health()
# execute upgrade health check
health_upgrade_ok, upgrade_output = health_upgrade.get_system_health_upgrade()
# combine health checks results and remove extra line break from output
health_ok = health_ok and health_upgrade_ok
output = output[:-1] + upgrade_output
# print health check output and exit
print(output)
if health_ok:
return 0
return 1
if __name__ == "__main__":
sys.exit(main(sys.argv))

View File

@ -0,0 +1,72 @@
#
# Copyright (c) 2023 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# This is an utility module used by standalone USM upgrade scripts
# that runs on the FROM-side context but using TO-side code base
#
from keystoneauth1 import exceptions
from keystoneauth1 import identity
from keystoneauth1 import session
def get_token_endpoint(config, service_type="platform"):
"""Returns an endpoint and a token for a service
:param config: A configuration dictionary containing the
authentication credentials
:param service_type: The service to get the related token
and endpoint
"""
required_user_keys = ['auth_url',
'username',
'password',
'project_name',
'user_domain_name',
'project_domain_name']
if not all(key in config for key in required_user_keys):
raise Exception("Missing required key(s) to authenticate to Keystone")
try:
auth = identity.Password(
auth_url=config["auth_url"],
username=config["username"],
password=config["password"],
project_name=config["project_name"],
user_domain_name=config["user_domain_name"],
project_domain_name=config["project_domain_name"]
)
sess = session.Session(auth=auth)
token = sess.get_token()
endpoint = sess.get_endpoint(service_type=service_type,
region_name=config["region_name"],
interface="public")
except exceptions.http.Unauthorized:
raise Exception("Failed to authenticate to Keystone. Request unauthorized")
except Exception as e:
raise Exception("Failed to get token and endpoint. Error: %s", str(e))
if service_type == "usm":
endpoint += "/software"
return token, endpoint
def get_sysinv_client(token, endpoint):
"""Returns a sysinv client instance
:param token: auth token
:param endpoint: service endpoint
"""
# if platform type is sysinv then return the client as well
try:
from cgtsclient import client
return client.Client(version='1', endpoint=endpoint, token=token, timeout=600)
except ImportError:
msg = "Failed to import cgtsclient"
raise ImportError(msg)
except Exception as e:
msg = "Failed to get sysinv client. Error: %s" % str(e)
raise Exception(msg)

View File

@ -98,6 +98,16 @@ class SoftwareAPIController(object):
return result
@expose('json')
@expose('query.xml', content_type='application/xml')
def deploy_precheck(self, *args, **kwargs):
try:
result = sc.software_deploy_precheck_api(list(args)[0], **kwargs)
except SoftwareError as e:
return dict(error="Error: %s" % str(e))
return result
@expose('json')
@expose('query.xml', content_type='application/xml')
def deploy_start(self, *args, **kwargs):

View File

@ -25,7 +25,7 @@ mgmt_if = None
nodetype = None
platform_conf_mtime = 0
software_conf_mtime = 0
software_conf = '/etc/software/software.conf'
software_conf = constants.SOFTWARE_CONFIG_FILE_LOCAL
# setup a shareable config
CONF = cfg.CONF

View File

@ -20,6 +20,7 @@ ADDRESS_VERSION_IPV6 = 6
CONTROLLER_FLOATING_HOSTNAME = "controller"
SOFTWARE_STORAGE_DIR = "/opt/software"
SOFTWARE_CONFIG_FILE_LOCAL = "/etc/software/software.conf"
AVAILABLE_DIR = "%s/metadata/available" % SOFTWARE_STORAGE_DIR
UNAVAILABLE_DIR = "%s/metadata/unavailable" % SOFTWARE_STORAGE_DIR
@ -100,3 +101,12 @@ SIG_EXTENSION = ".sig"
PATCH_EXTENSION = ".patch"
SUPPORTED_UPLOAD_FILE_EXT = [ISO_EXTENSION, SIG_EXTENSION, PATCH_EXTENSION]
SCRATCH_DIR = "/scratch"
RELEASE_METADATA_FILE = "STX_%s_GA-metadata.xml"
CONTROLLER_HOSTNAME = 'controller'
CONTROLLER_0_HOSTNAME = '%s-0' % CONTROLLER_HOSTNAME
CONTROLLER_1_HOSTNAME = '%s-1' % CONTROLLER_HOSTNAME
# Precheck constants
LICENSE_FILE = "/etc/platform/.license"
VERIFY_LICENSE_BINARY = "/usr/bin/verify-license"

View File

@ -1786,6 +1786,78 @@ class PatchController(PatchService):
msg = "Failed to delete the restart script for %s" % patch_id
LOG.exception(msg)
def software_deploy_precheck_api(self, deployment: str, **kwargs) -> dict:
"""
Verify if system is capable to upgrade to a specified deployment
return: dict of info, warning and error messages
"""
msg_info = ""
msg_warning = ""
msg_error = ""
# We need to verify that the software release exists
release = self.release_data.metadata.get(deployment, None)
if not release:
msg = "Software release version corresponding to the specified deployment " \
"%s does not exist." % deployment
LOG.error(msg)
msg_error += msg + "\n"
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# Check if software release directory location exists
release_version = utils.get_major_release_version(release["sw_version"])
deployment_dir = "%s/rel-%s" % (constants.FEED_OSTREE_BASE_DIR, release_version)
if not os.path.isdir(deployment_dir):
msg = "Upgrade files for deployment %s are not present on the system, " \
"cannot proceed with the precheck." % deployment
LOG.error(msg)
msg_error += msg + "\n"
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# parse local config file to pass parameters to precheck script
try:
cp = configparser.ConfigParser()
cp.read(constants.SOFTWARE_CONFIG_FILE_LOCAL)
ks_section = cp["keystone_authtoken"]
auth_url = ks_section["auth_url"]
username = ks_section["username"]
password = ks_section["password"]
project_name = ks_section["project_name"]
user_domain_name = ks_section["user_domain_name"]
project_domain_name = ks_section["project_domain_name"]
region_name = kwargs["region_name"]
except Exception as e:
msg = "Error parsing config file: %s." % str(e)
msg_error += msg + "\n"
return dict(info=msg_info, warning=msg_warning, error=msg_error)
# TODO(heitormatsui) if different region was passed as parameter then
# need to discover the subcloud auth_url to pass to precheck script
if region_name != "RegionOne":
pass
# Call precheck from the deployment files
precheck_return = subprocess.run(
["%s/upgrades/deploy-precheck" % deployment_dir,
"--auth_url=%s" % auth_url,
"--username=%s" % username,
"--password=%s" % password,
"--project_name=%s" % project_name,
"--user_domain_name=%s" % user_domain_name,
"--project_domain_name=%s" % project_domain_name,
"--region_name=%s" % region_name],
stderr=subprocess.STDOUT,
stdout=subprocess.PIPE,
check=False,
text=True,
)
if precheck_return.returncode != 0:
msg_error += precheck_return.stdout
else:
msg_info += precheck_return.stdout
return dict(info=msg_info, warning=msg_warning, error=msg_error)
def software_deploy_start_api(self, deployment: str, **kwargs) -> dict:
"""
Start deployment by applying the changes to the feed ostree